diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-03 13:31:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-03 13:31:20 -0400 |
commit | d35a878ae1c50977b55e352fd46e36e35add72a0 (patch) | |
tree | 7cd4e0ec418c6f3be365e56ee3c49bab218cd608 /drivers/md/dm-cache-target.c | |
parent | e5021876c91dc3894b2174cca8fa797f8e29e7b9 (diff) | |
parent | 390020ad2af9ca04844c4f3b1f299ad8746d84c8 (diff) |
Merge tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- A major update for DM cache that reduces the latency for deciding
whether blocks should migrate to/from the cache. The bio-prison-v2
interface supports this improvement by enabling direct dispatch of
work to workqueues rather than having to delay the actual work
dispatch to the DM cache core. So the dm-cache policies are much more
nimble by being able to drive IO as they see fit. One immediate
benefit from the improved latency is a cache that should be much more
adaptive to changing workloads.
- Add a new DM integrity target that emulates a block device that has
additional per-sector tags that can be used for storing integrity
information.
- Add a new authenticated encryption feature to the DM crypt target
that builds on the capabilities provided by the DM integrity target.
- Add MD interface for switching the raid4/5/6 journal mode and update
the DM raid target to use it to enable aid4/5/6 journal write-back
support.
- Switch the DM verity target over to using the asynchronous hash
crypto API (this helps work better with architectures that have
access to off-CPU algorithm providers, which should reduce CPU
utilization).
- Various request-based DM and DM multipath fixes and improvements from
Bart and Christoph.
- A DM thinp target fix for a bio structure leak that occurs for each
discard IFF discard passdown is enabled.
- A fix for a possible deadlock in DM bufio and a fix to re-check the
new buffer allocation watermark in the face of competing admin
changes to the 'max_cache_size_bytes' tunable.
- A couple DM core cleanups.
* tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (50 commits)
dm bufio: check new buffer allocation watermark every 30 seconds
dm bufio: avoid a possible ABBA deadlock
dm mpath: make it easier to detect unintended I/O request flushes
dm mpath: cleanup QUEUE_IF_NO_PATH bit manipulation by introducing assign_bit()
dm mpath: micro-optimize the hot path relative to MPATHF_QUEUE_IF_NO_PATH
dm: introduce enum dm_queue_mode to cleanup related code
dm mpath: verify __pg_init_all_paths locking assumptions at runtime
dm: verify suspend_locking assumptions at runtime
dm block manager: remove an unused argument from dm_block_manager_create()
dm rq: check blk_mq_register_dev() return value in dm_mq_init_request_queue()
dm mpath: delay requeuing while path initialization is in progress
dm mpath: avoid that path removal can trigger an infinite loop
dm mpath: split and rename activate_path() to prepare for its expanded use
dm ioctl: prevent stack leak in dm ioctl call
dm integrity: use previously calculated log2 of sectors_per_block
dm integrity: use hex2bin instead of open-coded variant
dm crypt: replace custom implementation of hex2bin()
dm crypt: remove obsolete references to per-CPU state
dm verity: switch to using asynchronous hash crypto API
dm crypt: use WQ_HIGHPRI for the IO and crypt workqueues
...
Diffstat (limited to 'drivers/md/dm-cache-target.c')
-rw-r--r-- | drivers/md/dm-cache-target.c | 2475 |
1 files changed, 1087 insertions, 1388 deletions
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 975922c8f231..1db375f50a13 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
@@ -5,7 +5,7 @@ | |||
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include "dm.h" | 7 | #include "dm.h" |
8 | #include "dm-bio-prison.h" | 8 | #include "dm-bio-prison-v2.h" |
9 | #include "dm-bio-record.h" | 9 | #include "dm-bio-record.h" |
10 | #include "dm-cache-metadata.h" | 10 | #include "dm-cache-metadata.h" |
11 | 11 | ||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/mempool.h> | 16 | #include <linux/mempool.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/rwsem.h> | ||
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
19 | #include <linux/vmalloc.h> | 20 | #include <linux/vmalloc.h> |
20 | 21 | ||
@@ -25,7 +26,18 @@ DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, | |||
25 | 26 | ||
26 | /*----------------------------------------------------------------*/ | 27 | /*----------------------------------------------------------------*/ |
27 | 28 | ||
28 | #define IOT_RESOLUTION 4 | 29 | /* |
30 | * Glossary: | ||
31 | * | ||
32 | * oblock: index of an origin block | ||
33 | * cblock: index of a cache block | ||
34 | * promotion: movement of a block from origin to cache | ||
35 | * demotion: movement of a block from cache to origin | ||
36 | * migration: movement of a block between the origin and cache device, | ||
37 | * either direction | ||
38 | */ | ||
39 | |||
40 | /*----------------------------------------------------------------*/ | ||
29 | 41 | ||
30 | struct io_tracker { | 42 | struct io_tracker { |
31 | spinlock_t lock; | 43 | spinlock_t lock; |
@@ -99,19 +111,178 @@ static void iot_io_end(struct io_tracker *iot, sector_t len) | |||
99 | /*----------------------------------------------------------------*/ | 111 | /*----------------------------------------------------------------*/ |
100 | 112 | ||
101 | /* | 113 | /* |
102 | * Glossary: | 114 | * Represents a chunk of future work. 'input' allows continuations to pass |
103 | * | 115 | * values between themselves, typically error values. |
104 | * oblock: index of an origin block | ||
105 | * cblock: index of a cache block | ||
106 | * promotion: movement of a block from origin to cache | ||
107 | * demotion: movement of a block from cache to origin | ||
108 | * migration: movement of a block between the origin and cache device, | ||
109 | * either direction | ||
110 | */ | 116 | */ |
117 | struct continuation { | ||
118 | struct work_struct ws; | ||
119 | int input; | ||
120 | }; | ||
121 | |||
122 | static inline void init_continuation(struct continuation *k, | ||
123 | void (*fn)(struct work_struct *)) | ||
124 | { | ||
125 | INIT_WORK(&k->ws, fn); | ||
126 | k->input = 0; | ||
127 | } | ||
128 | |||
129 | static inline void queue_continuation(struct workqueue_struct *wq, | ||
130 | struct continuation *k) | ||
131 | { | ||
132 | queue_work(wq, &k->ws); | ||
133 | } | ||
111 | 134 | ||
112 | /*----------------------------------------------------------------*/ | 135 | /*----------------------------------------------------------------*/ |
113 | 136 | ||
114 | /* | 137 | /* |
138 | * The batcher collects together pieces of work that need a particular | ||
139 | * operation to occur before they can proceed (typically a commit). | ||
140 | */ | ||
141 | struct batcher { | ||
142 | /* | ||
143 | * The operation that everyone is waiting for. | ||
144 | */ | ||
145 | int (*commit_op)(void *context); | ||
146 | void *commit_context; | ||
147 | |||
148 | /* | ||
149 | * This is how bios should be issued once the commit op is complete | ||
150 | * (accounted_request). | ||
151 | */ | ||
152 | void (*issue_op)(struct bio *bio, void *context); | ||
153 | void *issue_context; | ||
154 | |||
155 | /* | ||
156 | * Queued work gets put on here after commit. | ||
157 | */ | ||
158 | struct workqueue_struct *wq; | ||
159 | |||
160 | spinlock_t lock; | ||
161 | struct list_head work_items; | ||
162 | struct bio_list bios; | ||
163 | struct work_struct commit_work; | ||
164 | |||
165 | bool commit_scheduled; | ||
166 | }; | ||
167 | |||
168 | static void __commit(struct work_struct *_ws) | ||
169 | { | ||
170 | struct batcher *b = container_of(_ws, struct batcher, commit_work); | ||
171 | |||
172 | int r; | ||
173 | unsigned long flags; | ||
174 | struct list_head work_items; | ||
175 | struct work_struct *ws, *tmp; | ||
176 | struct continuation *k; | ||
177 | struct bio *bio; | ||
178 | struct bio_list bios; | ||
179 | |||
180 | INIT_LIST_HEAD(&work_items); | ||
181 | bio_list_init(&bios); | ||
182 | |||
183 | /* | ||
184 | * We have to grab these before the commit_op to avoid a race | ||
185 | * condition. | ||
186 | */ | ||
187 | spin_lock_irqsave(&b->lock, flags); | ||
188 | list_splice_init(&b->work_items, &work_items); | ||
189 | bio_list_merge(&bios, &b->bios); | ||
190 | bio_list_init(&b->bios); | ||
191 | b->commit_scheduled = false; | ||
192 | spin_unlock_irqrestore(&b->lock, flags); | ||
193 | |||
194 | r = b->commit_op(b->commit_context); | ||
195 | |||
196 | list_for_each_entry_safe(ws, tmp, &work_items, entry) { | ||
197 | k = container_of(ws, struct continuation, ws); | ||
198 | k->input = r; | ||
199 | INIT_LIST_HEAD(&ws->entry); /* to avoid a WARN_ON */ | ||
200 | queue_work(b->wq, ws); | ||
201 | } | ||
202 | |||
203 | while ((bio = bio_list_pop(&bios))) { | ||
204 | if (r) { | ||
205 | bio->bi_error = r; | ||
206 | bio_endio(bio); | ||
207 | } else | ||
208 | b->issue_op(bio, b->issue_context); | ||
209 | } | ||
210 | } | ||
211 | |||
212 | static void batcher_init(struct batcher *b, | ||
213 | int (*commit_op)(void *), | ||
214 | void *commit_context, | ||
215 | void (*issue_op)(struct bio *bio, void *), | ||
216 | void *issue_context, | ||
217 | struct workqueue_struct *wq) | ||
218 | { | ||
219 | b->commit_op = commit_op; | ||
220 | b->commit_context = commit_context; | ||
221 | b->issue_op = issue_op; | ||
222 | b->issue_context = issue_context; | ||
223 | b->wq = wq; | ||
224 | |||
225 | spin_lock_init(&b->lock); | ||
226 | INIT_LIST_HEAD(&b->work_items); | ||
227 | bio_list_init(&b->bios); | ||
228 | INIT_WORK(&b->commit_work, __commit); | ||
229 | b->commit_scheduled = false; | ||
230 | } | ||
231 | |||
232 | static void async_commit(struct batcher *b) | ||
233 | { | ||
234 | queue_work(b->wq, &b->commit_work); | ||
235 | } | ||
236 | |||
237 | static void continue_after_commit(struct batcher *b, struct continuation *k) | ||
238 | { | ||
239 | unsigned long flags; | ||
240 | bool commit_scheduled; | ||
241 | |||
242 | spin_lock_irqsave(&b->lock, flags); | ||
243 | commit_scheduled = b->commit_scheduled; | ||
244 | list_add_tail(&k->ws.entry, &b->work_items); | ||
245 | spin_unlock_irqrestore(&b->lock, flags); | ||
246 | |||
247 | if (commit_scheduled) | ||
248 | async_commit(b); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Bios are errored if commit failed. | ||
253 | */ | ||
254 | static void issue_after_commit(struct batcher *b, struct bio *bio) | ||
255 | { | ||
256 | unsigned long flags; | ||
257 | bool commit_scheduled; | ||
258 | |||
259 | spin_lock_irqsave(&b->lock, flags); | ||
260 | commit_scheduled = b->commit_scheduled; | ||
261 | bio_list_add(&b->bios, bio); | ||
262 | spin_unlock_irqrestore(&b->lock, flags); | ||
263 | |||
264 | if (commit_scheduled) | ||
265 | async_commit(b); | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * Call this if some urgent work is waiting for the commit to complete. | ||
270 | */ | ||
271 | static void schedule_commit(struct batcher *b) | ||
272 | { | ||
273 | bool immediate; | ||
274 | unsigned long flags; | ||
275 | |||
276 | spin_lock_irqsave(&b->lock, flags); | ||
277 | immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios); | ||
278 | b->commit_scheduled = true; | ||
279 | spin_unlock_irqrestore(&b->lock, flags); | ||
280 | |||
281 | if (immediate) | ||
282 | async_commit(b); | ||
283 | } | ||
284 | |||
285 | /* | ||
115 | * There are a couple of places where we let a bio run, but want to do some | 286 | * There are a couple of places where we let a bio run, but want to do some |
116 | * work before calling its endio function. We do this by temporarily | 287 | * work before calling its endio function. We do this by temporarily |
117 | * changing the endio fn. | 288 | * changing the endio fn. |
@@ -189,31 +360,13 @@ struct cache_stats { | |||
189 | atomic_t write_miss; | 360 | atomic_t write_miss; |
190 | atomic_t demotion; | 361 | atomic_t demotion; |
191 | atomic_t promotion; | 362 | atomic_t promotion; |
363 | atomic_t writeback; | ||
192 | atomic_t copies_avoided; | 364 | atomic_t copies_avoided; |
193 | atomic_t cache_cell_clash; | 365 | atomic_t cache_cell_clash; |
194 | atomic_t commit_count; | 366 | atomic_t commit_count; |
195 | atomic_t discard_count; | 367 | atomic_t discard_count; |
196 | }; | 368 | }; |
197 | 369 | ||
198 | /* | ||
199 | * Defines a range of cblocks, begin to (end - 1) are in the range. end is | ||
200 | * the one-past-the-end value. | ||
201 | */ | ||
202 | struct cblock_range { | ||
203 | dm_cblock_t begin; | ||
204 | dm_cblock_t end; | ||
205 | }; | ||
206 | |||
207 | struct invalidation_request { | ||
208 | struct list_head list; | ||
209 | struct cblock_range *cblocks; | ||
210 | |||
211 | atomic_t complete; | ||
212 | int err; | ||
213 | |||
214 | wait_queue_head_t result_wait; | ||
215 | }; | ||
216 | |||
217 | struct cache { | 370 | struct cache { |
218 | struct dm_target *ti; | 371 | struct dm_target *ti; |
219 | struct dm_target_callbacks callbacks; | 372 | struct dm_target_callbacks callbacks; |
@@ -255,11 +408,7 @@ struct cache { | |||
255 | spinlock_t lock; | 408 | spinlock_t lock; |
256 | struct list_head deferred_cells; | 409 | struct list_head deferred_cells; |
257 | struct bio_list deferred_bios; | 410 | struct bio_list deferred_bios; |
258 | struct bio_list deferred_flush_bios; | ||
259 | struct bio_list deferred_writethrough_bios; | 411 | struct bio_list deferred_writethrough_bios; |
260 | struct list_head quiesced_migrations; | ||
261 | struct list_head completed_migrations; | ||
262 | struct list_head need_commit_migrations; | ||
263 | sector_t migration_threshold; | 412 | sector_t migration_threshold; |
264 | wait_queue_head_t migration_wait; | 413 | wait_queue_head_t migration_wait; |
265 | atomic_t nr_allocated_migrations; | 414 | atomic_t nr_allocated_migrations; |
@@ -270,9 +419,7 @@ struct cache { | |||
270 | */ | 419 | */ |
271 | atomic_t nr_io_migrations; | 420 | atomic_t nr_io_migrations; |
272 | 421 | ||
273 | wait_queue_head_t quiescing_wait; | 422 | struct rw_semaphore quiesce_lock; |
274 | atomic_t quiescing; | ||
275 | atomic_t quiescing_ack; | ||
276 | 423 | ||
277 | /* | 424 | /* |
278 | * cache_size entries, dirty if set | 425 | * cache_size entries, dirty if set |
@@ -296,13 +443,11 @@ struct cache { | |||
296 | 443 | ||
297 | struct dm_kcopyd_client *copier; | 444 | struct dm_kcopyd_client *copier; |
298 | struct workqueue_struct *wq; | 445 | struct workqueue_struct *wq; |
299 | struct work_struct worker; | 446 | struct work_struct deferred_bio_worker; |
300 | 447 | struct work_struct deferred_writethrough_worker; | |
448 | struct work_struct migration_worker; | ||
301 | struct delayed_work waker; | 449 | struct delayed_work waker; |
302 | unsigned long last_commit_jiffies; | 450 | struct dm_bio_prison_v2 *prison; |
303 | |||
304 | struct dm_bio_prison *prison; | ||
305 | struct dm_deferred_set *all_io_ds; | ||
306 | 451 | ||
307 | mempool_t *migration_pool; | 452 | mempool_t *migration_pool; |
308 | 453 | ||
@@ -330,12 +475,17 @@ struct cache { | |||
330 | struct list_head invalidation_requests; | 475 | struct list_head invalidation_requests; |
331 | 476 | ||
332 | struct io_tracker origin_tracker; | 477 | struct io_tracker origin_tracker; |
478 | |||
479 | struct work_struct commit_ws; | ||
480 | struct batcher committer; | ||
481 | |||
482 | struct rw_semaphore background_work_lock; | ||
333 | }; | 483 | }; |
334 | 484 | ||
335 | struct per_bio_data { | 485 | struct per_bio_data { |
336 | bool tick:1; | 486 | bool tick:1; |
337 | unsigned req_nr:2; | 487 | unsigned req_nr:2; |
338 | struct dm_deferred_entry *all_io_entry; | 488 | struct dm_bio_prison_cell_v2 *cell; |
339 | struct dm_hook_info hook_info; | 489 | struct dm_hook_info hook_info; |
340 | sector_t len; | 490 | sector_t len; |
341 | 491 | ||
@@ -350,55 +500,64 @@ struct per_bio_data { | |||
350 | }; | 500 | }; |
351 | 501 | ||
352 | struct dm_cache_migration { | 502 | struct dm_cache_migration { |
353 | struct list_head list; | 503 | struct continuation k; |
354 | struct cache *cache; | 504 | struct cache *cache; |
355 | 505 | ||
356 | unsigned long start_jiffies; | 506 | struct policy_work *op; |
357 | dm_oblock_t old_oblock; | 507 | struct bio *overwrite_bio; |
358 | dm_oblock_t new_oblock; | 508 | struct dm_bio_prison_cell_v2 *cell; |
359 | dm_cblock_t cblock; | ||
360 | |||
361 | bool err:1; | ||
362 | bool discard:1; | ||
363 | bool writeback:1; | ||
364 | bool demote:1; | ||
365 | bool promote:1; | ||
366 | bool requeue_holder:1; | ||
367 | bool invalidate:1; | ||
368 | 509 | ||
369 | struct dm_bio_prison_cell *old_ocell; | 510 | dm_cblock_t invalidate_cblock; |
370 | struct dm_bio_prison_cell *new_ocell; | 511 | dm_oblock_t invalidate_oblock; |
371 | }; | 512 | }; |
372 | 513 | ||
373 | /* | 514 | /*----------------------------------------------------------------*/ |
374 | * Processing a bio in the worker thread may require these memory | 515 | |
375 | * allocations. We prealloc to avoid deadlocks (the same worker thread | 516 | static bool writethrough_mode(struct cache_features *f) |
376 | * frees them back to the mempool). | 517 | { |
377 | */ | 518 | return f->io_mode == CM_IO_WRITETHROUGH; |
378 | struct prealloc { | 519 | } |
379 | struct dm_cache_migration *mg; | ||
380 | struct dm_bio_prison_cell *cell1; | ||
381 | struct dm_bio_prison_cell *cell2; | ||
382 | }; | ||
383 | 520 | ||
384 | static enum cache_metadata_mode get_cache_mode(struct cache *cache); | 521 | static bool writeback_mode(struct cache_features *f) |
522 | { | ||
523 | return f->io_mode == CM_IO_WRITEBACK; | ||
524 | } | ||
385 | 525 | ||
386 | static void wake_worker(struct cache *cache) | 526 | static inline bool passthrough_mode(struct cache_features *f) |
387 | { | 527 | { |
388 | queue_work(cache->wq, &cache->worker); | 528 | return unlikely(f->io_mode == CM_IO_PASSTHROUGH); |
389 | } | 529 | } |
390 | 530 | ||
391 | /*----------------------------------------------------------------*/ | 531 | /*----------------------------------------------------------------*/ |
392 | 532 | ||
393 | static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache) | 533 | static void wake_deferred_bio_worker(struct cache *cache) |
394 | { | 534 | { |
395 | /* FIXME: change to use a local slab. */ | 535 | queue_work(cache->wq, &cache->deferred_bio_worker); |
396 | return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT); | ||
397 | } | 536 | } |
398 | 537 | ||
399 | static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell) | 538 | static void wake_deferred_writethrough_worker(struct cache *cache) |
400 | { | 539 | { |
401 | dm_bio_prison_free_cell(cache->prison, cell); | 540 | queue_work(cache->wq, &cache->deferred_writethrough_worker); |
541 | } | ||
542 | |||
543 | static void wake_migration_worker(struct cache *cache) | ||
544 | { | ||
545 | if (passthrough_mode(&cache->features)) | ||
546 | return; | ||
547 | |||
548 | queue_work(cache->wq, &cache->migration_worker); | ||
549 | } | ||
550 | |||
551 | /*----------------------------------------------------------------*/ | ||
552 | |||
553 | static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache) | ||
554 | { | ||
555 | return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT); | ||
556 | } | ||
557 | |||
558 | static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell) | ||
559 | { | ||
560 | dm_bio_prison_free_cell_v2(cache->prison, cell); | ||
402 | } | 561 | } |
403 | 562 | ||
404 | static struct dm_cache_migration *alloc_migration(struct cache *cache) | 563 | static struct dm_cache_migration *alloc_migration(struct cache *cache) |
@@ -424,146 +583,127 @@ static void free_migration(struct dm_cache_migration *mg) | |||
424 | mempool_free(mg, cache->migration_pool); | 583 | mempool_free(mg, cache->migration_pool); |
425 | } | 584 | } |
426 | 585 | ||
427 | static int prealloc_data_structs(struct cache *cache, struct prealloc *p) | 586 | /*----------------------------------------------------------------*/ |
428 | { | ||
429 | if (!p->mg) { | ||
430 | p->mg = alloc_migration(cache); | ||
431 | if (!p->mg) | ||
432 | return -ENOMEM; | ||
433 | } | ||
434 | |||
435 | if (!p->cell1) { | ||
436 | p->cell1 = alloc_prison_cell(cache); | ||
437 | if (!p->cell1) | ||
438 | return -ENOMEM; | ||
439 | } | ||
440 | |||
441 | if (!p->cell2) { | ||
442 | p->cell2 = alloc_prison_cell(cache); | ||
443 | if (!p->cell2) | ||
444 | return -ENOMEM; | ||
445 | } | ||
446 | |||
447 | return 0; | ||
448 | } | ||
449 | 587 | ||
450 | static void prealloc_free_structs(struct cache *cache, struct prealloc *p) | 588 | static inline dm_oblock_t oblock_succ(dm_oblock_t b) |
451 | { | 589 | { |
452 | if (p->cell2) | 590 | return to_oblock(from_oblock(b) + 1ull); |
453 | free_prison_cell(cache, p->cell2); | ||
454 | |||
455 | if (p->cell1) | ||
456 | free_prison_cell(cache, p->cell1); | ||
457 | |||
458 | if (p->mg) | ||
459 | free_migration(p->mg); | ||
460 | } | 591 | } |
461 | 592 | ||
462 | static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) | 593 | static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key) |
463 | { | 594 | { |
464 | struct dm_cache_migration *mg = p->mg; | 595 | key->virtual = 0; |
465 | 596 | key->dev = 0; | |
466 | BUG_ON(!mg); | 597 | key->block_begin = from_oblock(begin); |
467 | p->mg = NULL; | 598 | key->block_end = from_oblock(end); |
468 | |||
469 | return mg; | ||
470 | } | 599 | } |
471 | 600 | ||
472 | /* | 601 | /* |
473 | * You must have a cell within the prealloc struct to return. If not this | 602 | * We have two lock levels. Level 0, which is used to prevent WRITEs, and |
474 | * function will BUG() rather than returning NULL. | 603 | * level 1 which prevents *both* READs and WRITEs. |
475 | */ | 604 | */ |
476 | static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p) | 605 | #define WRITE_LOCK_LEVEL 0 |
606 | #define READ_WRITE_LOCK_LEVEL 1 | ||
607 | |||
608 | static unsigned lock_level(struct bio *bio) | ||
477 | { | 609 | { |
478 | struct dm_bio_prison_cell *r = NULL; | 610 | return bio_data_dir(bio) == WRITE ? |
611 | WRITE_LOCK_LEVEL : | ||
612 | READ_WRITE_LOCK_LEVEL; | ||
613 | } | ||
479 | 614 | ||
480 | if (p->cell1) { | 615 | /*---------------------------------------------------------------- |
481 | r = p->cell1; | 616 | * Per bio data |
482 | p->cell1 = NULL; | 617 | *--------------------------------------------------------------*/ |
483 | 618 | ||
484 | } else if (p->cell2) { | 619 | /* |
485 | r = p->cell2; | 620 | * If using writeback, leave out struct per_bio_data's writethrough fields. |
486 | p->cell2 = NULL; | 621 | */ |
487 | } else | 622 | #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) |
488 | BUG(); | 623 | #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) |
489 | 624 | ||
490 | return r; | 625 | static size_t get_per_bio_data_size(struct cache *cache) |
626 | { | ||
627 | return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; | ||
491 | } | 628 | } |
492 | 629 | ||
493 | /* | 630 | static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) |
494 | * You can't have more than two cells in a prealloc struct. BUG() will be | ||
495 | * called if you try and overfill. | ||
496 | */ | ||
497 | static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell) | ||
498 | { | 631 | { |
499 | if (!p->cell2) | 632 | struct per_bio_data *pb = dm_per_bio_data(bio, data_size); |
500 | p->cell2 = cell; | 633 | BUG_ON(!pb); |
634 | return pb; | ||
635 | } | ||
501 | 636 | ||
502 | else if (!p->cell1) | 637 | static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) |
503 | p->cell1 = cell; | 638 | { |
639 | struct per_bio_data *pb = get_per_bio_data(bio, data_size); | ||
504 | 640 | ||
505 | else | 641 | pb->tick = false; |
506 | BUG(); | 642 | pb->req_nr = dm_bio_get_target_bio_nr(bio); |
643 | pb->cell = NULL; | ||
644 | pb->len = 0; | ||
645 | |||
646 | return pb; | ||
507 | } | 647 | } |
508 | 648 | ||
509 | /*----------------------------------------------------------------*/ | 649 | /*----------------------------------------------------------------*/ |
510 | 650 | ||
511 | static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key) | 651 | static void defer_bio(struct cache *cache, struct bio *bio) |
512 | { | 652 | { |
513 | key->virtual = 0; | 653 | unsigned long flags; |
514 | key->dev = 0; | ||
515 | key->block_begin = from_oblock(begin); | ||
516 | key->block_end = from_oblock(end); | ||
517 | } | ||
518 | 654 | ||
519 | /* | 655 | spin_lock_irqsave(&cache->lock, flags); |
520 | * The caller hands in a preallocated cell, and a free function for it. | 656 | bio_list_add(&cache->deferred_bios, bio); |
521 | * The cell will be freed if there's an error, or if it wasn't used because | 657 | spin_unlock_irqrestore(&cache->lock, flags); |
522 | * a cell with that key already exists. | ||
523 | */ | ||
524 | typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); | ||
525 | 658 | ||
526 | static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end, | 659 | wake_deferred_bio_worker(cache); |
527 | struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, | 660 | } |
528 | cell_free_fn free_fn, void *free_context, | 661 | |
529 | struct dm_bio_prison_cell **cell_result) | 662 | static void defer_bios(struct cache *cache, struct bio_list *bios) |
530 | { | 663 | { |
531 | int r; | 664 | unsigned long flags; |
532 | struct dm_cell_key key; | ||
533 | 665 | ||
534 | build_key(oblock_begin, oblock_end, &key); | 666 | spin_lock_irqsave(&cache->lock, flags); |
535 | r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); | 667 | bio_list_merge(&cache->deferred_bios, bios); |
536 | if (r) | 668 | bio_list_init(bios); |
537 | free_fn(free_context, cell_prealloc); | 669 | spin_unlock_irqrestore(&cache->lock, flags); |
538 | 670 | ||
539 | return r; | 671 | wake_deferred_bio_worker(cache); |
540 | } | 672 | } |
541 | 673 | ||
542 | static int bio_detain(struct cache *cache, dm_oblock_t oblock, | 674 | /*----------------------------------------------------------------*/ |
543 | struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, | 675 | |
544 | cell_free_fn free_fn, void *free_context, | 676 | static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio) |
545 | struct dm_bio_prison_cell **cell_result) | ||
546 | { | 677 | { |
678 | bool r; | ||
679 | size_t pb_size; | ||
680 | struct per_bio_data *pb; | ||
681 | struct dm_cell_key_v2 key; | ||
547 | dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); | 682 | dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); |
548 | return bio_detain_range(cache, oblock, end, bio, | 683 | struct dm_bio_prison_cell_v2 *cell_prealloc, *cell; |
549 | cell_prealloc, free_fn, free_context, cell_result); | ||
550 | } | ||
551 | 684 | ||
552 | static int get_cell(struct cache *cache, | 685 | cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */ |
553 | dm_oblock_t oblock, | 686 | if (!cell_prealloc) { |
554 | struct prealloc *structs, | 687 | defer_bio(cache, bio); |
555 | struct dm_bio_prison_cell **cell_result) | 688 | return false; |
556 | { | 689 | } |
557 | int r; | ||
558 | struct dm_cell_key key; | ||
559 | struct dm_bio_prison_cell *cell_prealloc; | ||
560 | 690 | ||
561 | cell_prealloc = prealloc_get_cell(structs); | 691 | build_key(oblock, end, &key); |
692 | r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell); | ||
693 | if (!r) { | ||
694 | /* | ||
695 | * Failed to get the lock. | ||
696 | */ | ||
697 | free_prison_cell(cache, cell_prealloc); | ||
698 | return r; | ||
699 | } | ||
562 | 700 | ||
563 | build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key); | 701 | if (cell != cell_prealloc) |
564 | r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); | 702 | free_prison_cell(cache, cell_prealloc); |
565 | if (r) | 703 | |
566 | prealloc_put_cell(structs, cell_prealloc); | 704 | pb_size = get_per_bio_data_size(cache); |
705 | pb = get_per_bio_data(bio, pb_size); | ||
706 | pb->cell = cell; | ||
567 | 707 | ||
568 | return r; | 708 | return r; |
569 | } | 709 | } |
@@ -575,21 +715,33 @@ static bool is_dirty(struct cache *cache, dm_cblock_t b) | |||
575 | return test_bit(from_cblock(b), cache->dirty_bitset); | 715 | return test_bit(from_cblock(b), cache->dirty_bitset); |
576 | } | 716 | } |
577 | 717 | ||
578 | static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) | 718 | static void set_dirty(struct cache *cache, dm_cblock_t cblock) |
579 | { | 719 | { |
580 | if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { | 720 | if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { |
581 | atomic_inc(&cache->nr_dirty); | 721 | atomic_inc(&cache->nr_dirty); |
582 | policy_set_dirty(cache->policy, oblock); | 722 | policy_set_dirty(cache->policy, cblock); |
583 | } | 723 | } |
584 | } | 724 | } |
585 | 725 | ||
586 | static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) | 726 | /* |
727 | * These two are called when setting after migrations to force the policy | ||
728 | * and dirty bitset to be in sync. | ||
729 | */ | ||
730 | static void force_set_dirty(struct cache *cache, dm_cblock_t cblock) | ||
731 | { | ||
732 | if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) | ||
733 | atomic_inc(&cache->nr_dirty); | ||
734 | policy_set_dirty(cache->policy, cblock); | ||
735 | } | ||
736 | |||
737 | static void force_clear_dirty(struct cache *cache, dm_cblock_t cblock) | ||
587 | { | 738 | { |
588 | if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { | 739 | if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { |
589 | policy_clear_dirty(cache->policy, oblock); | ||
590 | if (atomic_dec_return(&cache->nr_dirty) == 0) | 740 | if (atomic_dec_return(&cache->nr_dirty) == 0) |
591 | dm_table_event(cache->ti->table); | 741 | dm_table_event(cache->ti->table); |
592 | } | 742 | } |
743 | |||
744 | policy_clear_dirty(cache->policy, cblock); | ||
593 | } | 745 | } |
594 | 746 | ||
595 | /*----------------------------------------------------------------*/ | 747 | /*----------------------------------------------------------------*/ |
@@ -628,11 +780,6 @@ static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) | |||
628 | oblocks_per_dblock(cache))); | 780 | oblocks_per_dblock(cache))); |
629 | } | 781 | } |
630 | 782 | ||
631 | static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock) | ||
632 | { | ||
633 | return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache)); | ||
634 | } | ||
635 | |||
636 | static void set_discard(struct cache *cache, dm_dblock_t b) | 783 | static void set_discard(struct cache *cache, dm_dblock_t b) |
637 | { | 784 | { |
638 | unsigned long flags; | 785 | unsigned long flags; |
@@ -679,83 +826,6 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) | |||
679 | return r; | 826 | return r; |
680 | } | 827 | } |
681 | 828 | ||
682 | /*----------------------------------------------------------------*/ | ||
683 | |||
684 | static void load_stats(struct cache *cache) | ||
685 | { | ||
686 | struct dm_cache_statistics stats; | ||
687 | |||
688 | dm_cache_metadata_get_stats(cache->cmd, &stats); | ||
689 | atomic_set(&cache->stats.read_hit, stats.read_hits); | ||
690 | atomic_set(&cache->stats.read_miss, stats.read_misses); | ||
691 | atomic_set(&cache->stats.write_hit, stats.write_hits); | ||
692 | atomic_set(&cache->stats.write_miss, stats.write_misses); | ||
693 | } | ||
694 | |||
695 | static void save_stats(struct cache *cache) | ||
696 | { | ||
697 | struct dm_cache_statistics stats; | ||
698 | |||
699 | if (get_cache_mode(cache) >= CM_READ_ONLY) | ||
700 | return; | ||
701 | |||
702 | stats.read_hits = atomic_read(&cache->stats.read_hit); | ||
703 | stats.read_misses = atomic_read(&cache->stats.read_miss); | ||
704 | stats.write_hits = atomic_read(&cache->stats.write_hit); | ||
705 | stats.write_misses = atomic_read(&cache->stats.write_miss); | ||
706 | |||
707 | dm_cache_metadata_set_stats(cache->cmd, &stats); | ||
708 | } | ||
709 | |||
710 | /*---------------------------------------------------------------- | ||
711 | * Per bio data | ||
712 | *--------------------------------------------------------------*/ | ||
713 | |||
714 | /* | ||
715 | * If using writeback, leave out struct per_bio_data's writethrough fields. | ||
716 | */ | ||
717 | #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) | ||
718 | #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) | ||
719 | |||
720 | static bool writethrough_mode(struct cache_features *f) | ||
721 | { | ||
722 | return f->io_mode == CM_IO_WRITETHROUGH; | ||
723 | } | ||
724 | |||
725 | static bool writeback_mode(struct cache_features *f) | ||
726 | { | ||
727 | return f->io_mode == CM_IO_WRITEBACK; | ||
728 | } | ||
729 | |||
730 | static bool passthrough_mode(struct cache_features *f) | ||
731 | { | ||
732 | return f->io_mode == CM_IO_PASSTHROUGH; | ||
733 | } | ||
734 | |||
735 | static size_t get_per_bio_data_size(struct cache *cache) | ||
736 | { | ||
737 | return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; | ||
738 | } | ||
739 | |||
740 | static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) | ||
741 | { | ||
742 | struct per_bio_data *pb = dm_per_bio_data(bio, data_size); | ||
743 | BUG_ON(!pb); | ||
744 | return pb; | ||
745 | } | ||
746 | |||
747 | static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) | ||
748 | { | ||
749 | struct per_bio_data *pb = get_per_bio_data(bio, data_size); | ||
750 | |||
751 | pb->tick = false; | ||
752 | pb->req_nr = dm_bio_get_target_bio_nr(bio); | ||
753 | pb->all_io_entry = NULL; | ||
754 | pb->len = 0; | ||
755 | |||
756 | return pb; | ||
757 | } | ||
758 | |||
759 | /*---------------------------------------------------------------- | 829 | /*---------------------------------------------------------------- |
760 | * Remapping | 830 | * Remapping |
761 | *--------------------------------------------------------------*/ | 831 | *--------------------------------------------------------------*/ |
@@ -797,8 +867,9 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) | |||
797 | } | 867 | } |
798 | 868 | ||
799 | static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, | 869 | static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, |
800 | dm_oblock_t oblock) | 870 | dm_oblock_t oblock) |
801 | { | 871 | { |
872 | // FIXME: this is called way too much. | ||
802 | check_if_tick_bio_needed(cache, bio); | 873 | check_if_tick_bio_needed(cache, bio); |
803 | remap_to_origin(cache, bio); | 874 | remap_to_origin(cache, bio); |
804 | if (bio_data_dir(bio) == WRITE) | 875 | if (bio_data_dir(bio) == WRITE) |
@@ -811,7 +882,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, | |||
811 | check_if_tick_bio_needed(cache, bio); | 882 | check_if_tick_bio_needed(cache, bio); |
812 | remap_to_cache(cache, bio, cblock); | 883 | remap_to_cache(cache, bio, cblock); |
813 | if (bio_data_dir(bio) == WRITE) { | 884 | if (bio_data_dir(bio) == WRITE) { |
814 | set_dirty(cache, oblock, cblock); | 885 | set_dirty(cache, cblock); |
815 | clear_discard(cache, oblock_to_dblock(cache, oblock)); | 886 | clear_discard(cache, oblock_to_dblock(cache, oblock)); |
816 | } | 887 | } |
817 | } | 888 | } |
@@ -828,22 +899,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) | |||
828 | return to_oblock(block_nr); | 899 | return to_oblock(block_nr); |
829 | } | 900 | } |
830 | 901 | ||
831 | /* | ||
832 | * You must increment the deferred set whilst the prison cell is held. To | ||
833 | * encourage this, we ask for 'cell' to be passed in. | ||
834 | */ | ||
835 | static void inc_ds(struct cache *cache, struct bio *bio, | ||
836 | struct dm_bio_prison_cell *cell) | ||
837 | { | ||
838 | size_t pb_data_size = get_per_bio_data_size(cache); | ||
839 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | ||
840 | |||
841 | BUG_ON(!cell); | ||
842 | BUG_ON(pb->all_io_entry); | ||
843 | |||
844 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
845 | } | ||
846 | |||
847 | static bool accountable_bio(struct cache *cache, struct bio *bio) | 902 | static bool accountable_bio(struct cache *cache, struct bio *bio) |
848 | { | 903 | { |
849 | return ((bio->bi_bdev == cache->origin_dev->bdev) && | 904 | return ((bio->bi_bdev == cache->origin_dev->bdev) && |
@@ -875,29 +930,10 @@ static void accounted_request(struct cache *cache, struct bio *bio) | |||
875 | generic_make_request(bio); | 930 | generic_make_request(bio); |
876 | } | 931 | } |
877 | 932 | ||
878 | static void issue(struct cache *cache, struct bio *bio) | 933 | static void issue_op(struct bio *bio, void *context) |
879 | { | ||
880 | unsigned long flags; | ||
881 | |||
882 | if (!op_is_flush(bio->bi_opf)) { | ||
883 | accounted_request(cache, bio); | ||
884 | return; | ||
885 | } | ||
886 | |||
887 | /* | ||
888 | * Batch together any bios that trigger commits and then issue a | ||
889 | * single commit for them in do_worker(). | ||
890 | */ | ||
891 | spin_lock_irqsave(&cache->lock, flags); | ||
892 | cache->commit_requested = true; | ||
893 | bio_list_add(&cache->deferred_flush_bios, bio); | ||
894 | spin_unlock_irqrestore(&cache->lock, flags); | ||
895 | } | ||
896 | |||
897 | static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell) | ||
898 | { | 934 | { |
899 | inc_ds(cache, bio, cell); | 935 | struct cache *cache = context; |
900 | issue(cache, bio); | 936 | accounted_request(cache, bio); |
901 | } | 937 | } |
902 | 938 | ||
903 | static void defer_writethrough_bio(struct cache *cache, struct bio *bio) | 939 | static void defer_writethrough_bio(struct cache *cache, struct bio *bio) |
@@ -908,7 +944,7 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio) | |||
908 | bio_list_add(&cache->deferred_writethrough_bios, bio); | 944 | bio_list_add(&cache->deferred_writethrough_bios, bio); |
909 | spin_unlock_irqrestore(&cache->lock, flags); | 945 | spin_unlock_irqrestore(&cache->lock, flags); |
910 | 946 | ||
911 | wake_worker(cache); | 947 | wake_deferred_writethrough_worker(cache); |
912 | } | 948 | } |
913 | 949 | ||
914 | static void writethrough_endio(struct bio *bio) | 950 | static void writethrough_endio(struct bio *bio) |
@@ -934,6 +970,7 @@ static void writethrough_endio(struct bio *bio) | |||
934 | } | 970 | } |
935 | 971 | ||
936 | /* | 972 | /* |
973 | * FIXME: send in parallel, huge latency as is. | ||
937 | * When running in writethrough mode we need to send writes to clean blocks | 974 | * When running in writethrough mode we need to send writes to clean blocks |
938 | * to both the cache and origin devices. In future we'd like to clone the | 975 | * to both the cache and origin devices. In future we'd like to clone the |
939 | * bio and send them in parallel, but for now we're doing them in | 976 | * bio and send them in parallel, but for now we're doing them in |
@@ -1046,12 +1083,58 @@ static void metadata_operation_failed(struct cache *cache, const char *op, int r | |||
1046 | set_cache_mode(cache, CM_READ_ONLY); | 1083 | set_cache_mode(cache, CM_READ_ONLY); |
1047 | } | 1084 | } |
1048 | 1085 | ||
1086 | /*----------------------------------------------------------------*/ | ||
1087 | |||
1088 | static void load_stats(struct cache *cache) | ||
1089 | { | ||
1090 | struct dm_cache_statistics stats; | ||
1091 | |||
1092 | dm_cache_metadata_get_stats(cache->cmd, &stats); | ||
1093 | atomic_set(&cache->stats.read_hit, stats.read_hits); | ||
1094 | atomic_set(&cache->stats.read_miss, stats.read_misses); | ||
1095 | atomic_set(&cache->stats.write_hit, stats.write_hits); | ||
1096 | atomic_set(&cache->stats.write_miss, stats.write_misses); | ||
1097 | } | ||
1098 | |||
1099 | static void save_stats(struct cache *cache) | ||
1100 | { | ||
1101 | struct dm_cache_statistics stats; | ||
1102 | |||
1103 | if (get_cache_mode(cache) >= CM_READ_ONLY) | ||
1104 | return; | ||
1105 | |||
1106 | stats.read_hits = atomic_read(&cache->stats.read_hit); | ||
1107 | stats.read_misses = atomic_read(&cache->stats.read_miss); | ||
1108 | stats.write_hits = atomic_read(&cache->stats.write_hit); | ||
1109 | stats.write_misses = atomic_read(&cache->stats.write_miss); | ||
1110 | |||
1111 | dm_cache_metadata_set_stats(cache->cmd, &stats); | ||
1112 | } | ||
1113 | |||
1114 | static void update_stats(struct cache_stats *stats, enum policy_operation op) | ||
1115 | { | ||
1116 | switch (op) { | ||
1117 | case POLICY_PROMOTE: | ||
1118 | atomic_inc(&stats->promotion); | ||
1119 | break; | ||
1120 | |||
1121 | case POLICY_DEMOTE: | ||
1122 | atomic_inc(&stats->demotion); | ||
1123 | break; | ||
1124 | |||
1125 | case POLICY_WRITEBACK: | ||
1126 | atomic_inc(&stats->writeback); | ||
1127 | break; | ||
1128 | } | ||
1129 | } | ||
1130 | |||
1049 | /*---------------------------------------------------------------- | 1131 | /*---------------------------------------------------------------- |
1050 | * Migration processing | 1132 | * Migration processing |
1051 | * | 1133 | * |
1052 | * Migration covers moving data from the origin device to the cache, or | 1134 | * Migration covers moving data from the origin device to the cache, or |
1053 | * vice versa. | 1135 | * vice versa. |
1054 | *--------------------------------------------------------------*/ | 1136 | *--------------------------------------------------------------*/ |
1137 | |||
1055 | static void inc_io_migrations(struct cache *cache) | 1138 | static void inc_io_migrations(struct cache *cache) |
1056 | { | 1139 | { |
1057 | atomic_inc(&cache->nr_io_migrations); | 1140 | atomic_inc(&cache->nr_io_migrations); |
@@ -1067,213 +1150,109 @@ static bool discard_or_flush(struct bio *bio) | |||
1067 | return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf); | 1150 | return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf); |
1068 | } | 1151 | } |
1069 | 1152 | ||
1070 | static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) | 1153 | static void calc_discard_block_range(struct cache *cache, struct bio *bio, |
1071 | { | 1154 | dm_dblock_t *b, dm_dblock_t *e) |
1072 | if (discard_or_flush(cell->holder)) { | ||
1073 | /* | ||
1074 | * We have to handle these bios individually. | ||
1075 | */ | ||
1076 | dm_cell_release(cache->prison, cell, &cache->deferred_bios); | ||
1077 | free_prison_cell(cache, cell); | ||
1078 | } else | ||
1079 | list_add_tail(&cell->user_list, &cache->deferred_cells); | ||
1080 | } | ||
1081 | |||
1082 | static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, bool holder) | ||
1083 | { | 1155 | { |
1084 | unsigned long flags; | 1156 | sector_t sb = bio->bi_iter.bi_sector; |
1085 | 1157 | sector_t se = bio_end_sector(bio); | |
1086 | if (!holder && dm_cell_promote_or_release(cache->prison, cell)) { | ||
1087 | /* | ||
1088 | * There was no prisoner to promote to holder, the | ||
1089 | * cell has been released. | ||
1090 | */ | ||
1091 | free_prison_cell(cache, cell); | ||
1092 | return; | ||
1093 | } | ||
1094 | 1158 | ||
1095 | spin_lock_irqsave(&cache->lock, flags); | 1159 | *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size)); |
1096 | __cell_defer(cache, cell); | ||
1097 | spin_unlock_irqrestore(&cache->lock, flags); | ||
1098 | 1160 | ||
1099 | wake_worker(cache); | 1161 | if (se - sb < cache->discard_block_size) |
1162 | *e = *b; | ||
1163 | else | ||
1164 | *e = to_dblock(block_div(se, cache->discard_block_size)); | ||
1100 | } | 1165 | } |
1101 | 1166 | ||
1102 | static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err) | 1167 | /*----------------------------------------------------------------*/ |
1103 | { | ||
1104 | dm_cell_error(cache->prison, cell, err); | ||
1105 | free_prison_cell(cache, cell); | ||
1106 | } | ||
1107 | 1168 | ||
1108 | static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell) | 1169 | static void prevent_background_work(struct cache *cache) |
1109 | { | 1170 | { |
1110 | cell_error_with_code(cache, cell, DM_ENDIO_REQUEUE); | 1171 | lockdep_off(); |
1172 | down_write(&cache->background_work_lock); | ||
1173 | lockdep_on(); | ||
1111 | } | 1174 | } |
1112 | 1175 | ||
1113 | static void free_io_migration(struct dm_cache_migration *mg) | 1176 | static void allow_background_work(struct cache *cache) |
1114 | { | 1177 | { |
1115 | struct cache *cache = mg->cache; | 1178 | lockdep_off(); |
1116 | 1179 | up_write(&cache->background_work_lock); | |
1117 | dec_io_migrations(cache); | 1180 | lockdep_on(); |
1118 | free_migration(mg); | ||
1119 | wake_worker(cache); | ||
1120 | } | 1181 | } |
1121 | 1182 | ||
1122 | static void migration_failure(struct dm_cache_migration *mg) | 1183 | static bool background_work_begin(struct cache *cache) |
1123 | { | 1184 | { |
1124 | struct cache *cache = mg->cache; | 1185 | bool r; |
1125 | const char *dev_name = cache_device_name(cache); | ||
1126 | |||
1127 | if (mg->writeback) { | ||
1128 | DMERR_LIMIT("%s: writeback failed; couldn't copy block", dev_name); | ||
1129 | set_dirty(cache, mg->old_oblock, mg->cblock); | ||
1130 | cell_defer(cache, mg->old_ocell, false); | ||
1131 | |||
1132 | } else if (mg->demote) { | ||
1133 | DMERR_LIMIT("%s: demotion failed; couldn't copy block", dev_name); | ||
1134 | policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); | ||
1135 | 1186 | ||
1136 | cell_defer(cache, mg->old_ocell, mg->promote ? false : true); | 1187 | lockdep_off(); |
1137 | if (mg->promote) | 1188 | r = down_read_trylock(&cache->background_work_lock); |
1138 | cell_defer(cache, mg->new_ocell, true); | 1189 | lockdep_on(); |
1139 | } else { | ||
1140 | DMERR_LIMIT("%s: promotion failed; couldn't copy block", dev_name); | ||
1141 | policy_remove_mapping(cache->policy, mg->new_oblock); | ||
1142 | cell_defer(cache, mg->new_ocell, true); | ||
1143 | } | ||
1144 | 1190 | ||
1145 | free_io_migration(mg); | 1191 | return r; |
1146 | } | 1192 | } |
1147 | 1193 | ||
1148 | static void migration_success_pre_commit(struct dm_cache_migration *mg) | 1194 | static void background_work_end(struct cache *cache) |
1149 | { | 1195 | { |
1150 | int r; | 1196 | lockdep_off(); |
1151 | unsigned long flags; | 1197 | up_read(&cache->background_work_lock); |
1152 | struct cache *cache = mg->cache; | 1198 | lockdep_on(); |
1153 | 1199 | } | |
1154 | if (mg->writeback) { | ||
1155 | clear_dirty(cache, mg->old_oblock, mg->cblock); | ||
1156 | cell_defer(cache, mg->old_ocell, false); | ||
1157 | free_io_migration(mg); | ||
1158 | return; | ||
1159 | 1200 | ||
1160 | } else if (mg->demote) { | 1201 | /*----------------------------------------------------------------*/ |
1161 | r = dm_cache_remove_mapping(cache->cmd, mg->cblock); | ||
1162 | if (r) { | ||
1163 | DMERR_LIMIT("%s: demotion failed; couldn't update on disk metadata", | ||
1164 | cache_device_name(cache)); | ||
1165 | metadata_operation_failed(cache, "dm_cache_remove_mapping", r); | ||
1166 | policy_force_mapping(cache->policy, mg->new_oblock, | ||
1167 | mg->old_oblock); | ||
1168 | if (mg->promote) | ||
1169 | cell_defer(cache, mg->new_ocell, true); | ||
1170 | free_io_migration(mg); | ||
1171 | return; | ||
1172 | } | ||
1173 | } else { | ||
1174 | r = dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock); | ||
1175 | if (r) { | ||
1176 | DMERR_LIMIT("%s: promotion failed; couldn't update on disk metadata", | ||
1177 | cache_device_name(cache)); | ||
1178 | metadata_operation_failed(cache, "dm_cache_insert_mapping", r); | ||
1179 | policy_remove_mapping(cache->policy, mg->new_oblock); | ||
1180 | free_io_migration(mg); | ||
1181 | return; | ||
1182 | } | ||
1183 | } | ||
1184 | 1202 | ||
1185 | spin_lock_irqsave(&cache->lock, flags); | 1203 | static void quiesce(struct dm_cache_migration *mg, |
1186 | list_add_tail(&mg->list, &cache->need_commit_migrations); | 1204 | void (*continuation)(struct work_struct *)) |
1187 | cache->commit_requested = true; | 1205 | { |
1188 | spin_unlock_irqrestore(&cache->lock, flags); | 1206 | init_continuation(&mg->k, continuation); |
1207 | dm_cell_quiesce_v2(mg->cache->prison, mg->cell, &mg->k.ws); | ||
1189 | } | 1208 | } |
1190 | 1209 | ||
1191 | static void migration_success_post_commit(struct dm_cache_migration *mg) | 1210 | static struct dm_cache_migration *ws_to_mg(struct work_struct *ws) |
1192 | { | 1211 | { |
1193 | unsigned long flags; | 1212 | struct continuation *k = container_of(ws, struct continuation, ws); |
1194 | struct cache *cache = mg->cache; | 1213 | return container_of(k, struct dm_cache_migration, k); |
1195 | |||
1196 | if (mg->writeback) { | ||
1197 | DMWARN_LIMIT("%s: writeback unexpectedly triggered commit", | ||
1198 | cache_device_name(cache)); | ||
1199 | return; | ||
1200 | |||
1201 | } else if (mg->demote) { | ||
1202 | cell_defer(cache, mg->old_ocell, mg->promote ? false : true); | ||
1203 | |||
1204 | if (mg->promote) { | ||
1205 | mg->demote = false; | ||
1206 | |||
1207 | spin_lock_irqsave(&cache->lock, flags); | ||
1208 | list_add_tail(&mg->list, &cache->quiesced_migrations); | ||
1209 | spin_unlock_irqrestore(&cache->lock, flags); | ||
1210 | |||
1211 | } else { | ||
1212 | if (mg->invalidate) | ||
1213 | policy_remove_mapping(cache->policy, mg->old_oblock); | ||
1214 | free_io_migration(mg); | ||
1215 | } | ||
1216 | |||
1217 | } else { | ||
1218 | if (mg->requeue_holder) { | ||
1219 | clear_dirty(cache, mg->new_oblock, mg->cblock); | ||
1220 | cell_defer(cache, mg->new_ocell, true); | ||
1221 | } else { | ||
1222 | /* | ||
1223 | * The block was promoted via an overwrite, so it's dirty. | ||
1224 | */ | ||
1225 | set_dirty(cache, mg->new_oblock, mg->cblock); | ||
1226 | bio_endio(mg->new_ocell->holder); | ||
1227 | cell_defer(cache, mg->new_ocell, false); | ||
1228 | } | ||
1229 | free_io_migration(mg); | ||
1230 | } | ||
1231 | } | 1214 | } |
1232 | 1215 | ||
1233 | static void copy_complete(int read_err, unsigned long write_err, void *context) | 1216 | static void copy_complete(int read_err, unsigned long write_err, void *context) |
1234 | { | 1217 | { |
1235 | unsigned long flags; | 1218 | struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k); |
1236 | struct dm_cache_migration *mg = (struct dm_cache_migration *) context; | ||
1237 | struct cache *cache = mg->cache; | ||
1238 | 1219 | ||
1239 | if (read_err || write_err) | 1220 | if (read_err || write_err) |
1240 | mg->err = true; | 1221 | mg->k.input = -EIO; |
1241 | 1222 | ||
1242 | spin_lock_irqsave(&cache->lock, flags); | 1223 | queue_continuation(mg->cache->wq, &mg->k); |
1243 | list_add_tail(&mg->list, &cache->completed_migrations); | ||
1244 | spin_unlock_irqrestore(&cache->lock, flags); | ||
1245 | |||
1246 | wake_worker(cache); | ||
1247 | } | 1224 | } |
1248 | 1225 | ||
1249 | static void issue_copy(struct dm_cache_migration *mg) | 1226 | static int copy(struct dm_cache_migration *mg, bool promote) |
1250 | { | 1227 | { |
1251 | int r; | 1228 | int r; |
1252 | struct dm_io_region o_region, c_region; | 1229 | struct dm_io_region o_region, c_region; |
1253 | struct cache *cache = mg->cache; | 1230 | struct cache *cache = mg->cache; |
1254 | sector_t cblock = from_cblock(mg->cblock); | ||
1255 | 1231 | ||
1256 | o_region.bdev = cache->origin_dev->bdev; | 1232 | o_region.bdev = cache->origin_dev->bdev; |
1233 | o_region.sector = from_oblock(mg->op->oblock) * cache->sectors_per_block; | ||
1257 | o_region.count = cache->sectors_per_block; | 1234 | o_region.count = cache->sectors_per_block; |
1258 | 1235 | ||
1259 | c_region.bdev = cache->cache_dev->bdev; | 1236 | c_region.bdev = cache->cache_dev->bdev; |
1260 | c_region.sector = cblock * cache->sectors_per_block; | 1237 | c_region.sector = from_cblock(mg->op->cblock) * cache->sectors_per_block; |
1261 | c_region.count = cache->sectors_per_block; | 1238 | c_region.count = cache->sectors_per_block; |
1262 | 1239 | ||
1263 | if (mg->writeback || mg->demote) { | 1240 | if (promote) |
1264 | /* demote */ | 1241 | r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k); |
1265 | o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block; | 1242 | else |
1266 | r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg); | 1243 | r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k); |
1267 | } else { | ||
1268 | /* promote */ | ||
1269 | o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block; | ||
1270 | r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); | ||
1271 | } | ||
1272 | 1244 | ||
1273 | if (r < 0) { | 1245 | return r; |
1274 | DMERR_LIMIT("%s: issuing migration failed", cache_device_name(cache)); | 1246 | } |
1275 | migration_failure(mg); | 1247 | |
1276 | } | 1248 | static void bio_drop_shared_lock(struct cache *cache, struct bio *bio) |
1249 | { | ||
1250 | size_t pb_data_size = get_per_bio_data_size(cache); | ||
1251 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | ||
1252 | |||
1253 | if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell)) | ||
1254 | free_prison_cell(cache, pb->cell); | ||
1255 | pb->cell = NULL; | ||
1277 | } | 1256 | } |
1278 | 1257 | ||
1279 | static void overwrite_endio(struct bio *bio) | 1258 | static void overwrite_endio(struct bio *bio) |
@@ -1282,368 +1261,475 @@ static void overwrite_endio(struct bio *bio) | |||
1282 | struct cache *cache = mg->cache; | 1261 | struct cache *cache = mg->cache; |
1283 | size_t pb_data_size = get_per_bio_data_size(cache); | 1262 | size_t pb_data_size = get_per_bio_data_size(cache); |
1284 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | 1263 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); |
1285 | unsigned long flags; | ||
1286 | 1264 | ||
1287 | dm_unhook_bio(&pb->hook_info, bio); | 1265 | dm_unhook_bio(&pb->hook_info, bio); |
1288 | 1266 | ||
1289 | if (bio->bi_error) | 1267 | if (bio->bi_error) |
1290 | mg->err = true; | 1268 | mg->k.input = bio->bi_error; |
1291 | |||
1292 | mg->requeue_holder = false; | ||
1293 | 1269 | ||
1294 | spin_lock_irqsave(&cache->lock, flags); | 1270 | queue_continuation(mg->cache->wq, &mg->k); |
1295 | list_add_tail(&mg->list, &cache->completed_migrations); | ||
1296 | spin_unlock_irqrestore(&cache->lock, flags); | ||
1297 | |||
1298 | wake_worker(cache); | ||
1299 | } | 1271 | } |
1300 | 1272 | ||
1301 | static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio) | 1273 | static void overwrite(struct dm_cache_migration *mg, |
1274 | void (*continuation)(struct work_struct *)) | ||
1302 | { | 1275 | { |
1276 | struct bio *bio = mg->overwrite_bio; | ||
1303 | size_t pb_data_size = get_per_bio_data_size(mg->cache); | 1277 | size_t pb_data_size = get_per_bio_data_size(mg->cache); |
1304 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | 1278 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); |
1305 | 1279 | ||
1306 | dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); | 1280 | dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); |
1307 | remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock); | ||
1308 | 1281 | ||
1309 | /* | 1282 | /* |
1310 | * No need to inc_ds() here, since the cell will be held for the | 1283 | * The overwrite bio is part of the copy operation, as such it does |
1311 | * duration of the io. | 1284 | * not set/clear discard or dirty flags. |
1312 | */ | 1285 | */ |
1286 | if (mg->op->op == POLICY_PROMOTE) | ||
1287 | remap_to_cache(mg->cache, bio, mg->op->cblock); | ||
1288 | else | ||
1289 | remap_to_origin(mg->cache, bio); | ||
1290 | |||
1291 | init_continuation(&mg->k, continuation); | ||
1313 | accounted_request(mg->cache, bio); | 1292 | accounted_request(mg->cache, bio); |
1314 | } | 1293 | } |
1315 | 1294 | ||
1316 | static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) | 1295 | /* |
1296 | * Migration steps: | ||
1297 | * | ||
1298 | * 1) exclusive lock preventing WRITEs | ||
1299 | * 2) quiesce | ||
1300 | * 3) copy or issue overwrite bio | ||
1301 | * 4) upgrade to exclusive lock preventing READs and WRITEs | ||
1302 | * 5) quiesce | ||
1303 | * 6) update metadata and commit | ||
1304 | * 7) unlock | ||
1305 | */ | ||
1306 | static void mg_complete(struct dm_cache_migration *mg, bool success) | ||
1317 | { | 1307 | { |
1318 | return (bio_data_dir(bio) == WRITE) && | 1308 | struct bio_list bios; |
1319 | (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); | 1309 | struct cache *cache = mg->cache; |
1320 | } | 1310 | struct policy_work *op = mg->op; |
1311 | dm_cblock_t cblock = op->cblock; | ||
1312 | |||
1313 | if (success) | ||
1314 | update_stats(&cache->stats, op->op); | ||
1315 | |||
1316 | switch (op->op) { | ||
1317 | case POLICY_PROMOTE: | ||
1318 | clear_discard(cache, oblock_to_dblock(cache, op->oblock)); | ||
1319 | policy_complete_background_work(cache->policy, op, success); | ||
1320 | |||
1321 | if (mg->overwrite_bio) { | ||
1322 | if (success) | ||
1323 | force_set_dirty(cache, cblock); | ||
1324 | else | ||
1325 | mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO); | ||
1326 | bio_endio(mg->overwrite_bio); | ||
1327 | } else { | ||
1328 | if (success) | ||
1329 | force_clear_dirty(cache, cblock); | ||
1330 | dec_io_migrations(cache); | ||
1331 | } | ||
1332 | break; | ||
1321 | 1333 | ||
1322 | static void avoid_copy(struct dm_cache_migration *mg) | 1334 | case POLICY_DEMOTE: |
1323 | { | 1335 | /* |
1324 | atomic_inc(&mg->cache->stats.copies_avoided); | 1336 | * We clear dirty here to update the nr_dirty counter. |
1325 | migration_success_pre_commit(mg); | 1337 | */ |
1326 | } | 1338 | if (success) |
1339 | force_clear_dirty(cache, cblock); | ||
1340 | policy_complete_background_work(cache->policy, op, success); | ||
1341 | dec_io_migrations(cache); | ||
1342 | break; | ||
1327 | 1343 | ||
1328 | static void calc_discard_block_range(struct cache *cache, struct bio *bio, | 1344 | case POLICY_WRITEBACK: |
1329 | dm_dblock_t *b, dm_dblock_t *e) | 1345 | if (success) |
1330 | { | 1346 | force_clear_dirty(cache, cblock); |
1331 | sector_t sb = bio->bi_iter.bi_sector; | 1347 | policy_complete_background_work(cache->policy, op, success); |
1332 | sector_t se = bio_end_sector(bio); | 1348 | dec_io_migrations(cache); |
1349 | break; | ||
1350 | } | ||
1333 | 1351 | ||
1334 | *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size)); | 1352 | bio_list_init(&bios); |
1353 | if (mg->cell) { | ||
1354 | if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios)) | ||
1355 | free_prison_cell(cache, mg->cell); | ||
1356 | } | ||
1335 | 1357 | ||
1336 | if (se - sb < cache->discard_block_size) | 1358 | free_migration(mg); |
1337 | *e = *b; | 1359 | defer_bios(cache, &bios); |
1338 | else | 1360 | wake_migration_worker(cache); |
1339 | *e = to_dblock(block_div(se, cache->discard_block_size)); | 1361 | |
1362 | background_work_end(cache); | ||
1340 | } | 1363 | } |
1341 | 1364 | ||
1342 | static void issue_discard(struct dm_cache_migration *mg) | 1365 | static void mg_success(struct work_struct *ws) |
1343 | { | 1366 | { |
1344 | dm_dblock_t b, e; | 1367 | struct dm_cache_migration *mg = ws_to_mg(ws); |
1345 | struct bio *bio = mg->new_ocell->holder; | 1368 | mg_complete(mg, mg->k.input == 0); |
1346 | struct cache *cache = mg->cache; | ||
1347 | |||
1348 | calc_discard_block_range(cache, bio, &b, &e); | ||
1349 | while (b != e) { | ||
1350 | set_discard(cache, b); | ||
1351 | b = to_dblock(from_dblock(b) + 1); | ||
1352 | } | ||
1353 | |||
1354 | bio_endio(bio); | ||
1355 | cell_defer(cache, mg->new_ocell, false); | ||
1356 | free_migration(mg); | ||
1357 | wake_worker(cache); | ||
1358 | } | 1369 | } |
1359 | 1370 | ||
1360 | static void issue_copy_or_discard(struct dm_cache_migration *mg) | 1371 | static void mg_update_metadata(struct work_struct *ws) |
1361 | { | 1372 | { |
1362 | bool avoid; | 1373 | int r; |
1374 | struct dm_cache_migration *mg = ws_to_mg(ws); | ||
1363 | struct cache *cache = mg->cache; | 1375 | struct cache *cache = mg->cache; |
1376 | struct policy_work *op = mg->op; | ||
1364 | 1377 | ||
1365 | if (mg->discard) { | 1378 | switch (op->op) { |
1366 | issue_discard(mg); | 1379 | case POLICY_PROMOTE: |
1367 | return; | 1380 | r = dm_cache_insert_mapping(cache->cmd, op->cblock, op->oblock); |
1368 | } | 1381 | if (r) { |
1382 | DMERR_LIMIT("%s: migration failed; couldn't insert mapping", | ||
1383 | cache_device_name(cache)); | ||
1384 | metadata_operation_failed(cache, "dm_cache_insert_mapping", r); | ||
1369 | 1385 | ||
1370 | if (mg->writeback || mg->demote) | 1386 | mg_complete(mg, false); |
1371 | avoid = !is_dirty(cache, mg->cblock) || | 1387 | return; |
1372 | is_discarded_oblock(cache, mg->old_oblock); | 1388 | } |
1373 | else { | 1389 | mg_complete(mg, true); |
1374 | struct bio *bio = mg->new_ocell->holder; | 1390 | break; |
1375 | 1391 | ||
1376 | avoid = is_discarded_oblock(cache, mg->new_oblock); | 1392 | case POLICY_DEMOTE: |
1393 | r = dm_cache_remove_mapping(cache->cmd, op->cblock); | ||
1394 | if (r) { | ||
1395 | DMERR_LIMIT("%s: migration failed; couldn't update on disk metadata", | ||
1396 | cache_device_name(cache)); | ||
1397 | metadata_operation_failed(cache, "dm_cache_remove_mapping", r); | ||
1377 | 1398 | ||
1378 | if (writeback_mode(&cache->features) && | 1399 | mg_complete(mg, false); |
1379 | !avoid && bio_writes_complete_block(cache, bio)) { | ||
1380 | issue_overwrite(mg, bio); | ||
1381 | return; | 1400 | return; |
1382 | } | 1401 | } |
1383 | } | ||
1384 | 1402 | ||
1385 | avoid ? avoid_copy(mg) : issue_copy(mg); | 1403 | /* |
1404 | * It would be nice if we only had to commit when a REQ_FLUSH | ||
1405 | * comes through. But there's one scenario that we have to | ||
1406 | * look out for: | ||
1407 | * | ||
1408 | * - vblock x in a cache block | ||
1409 | * - domotion occurs | ||
1410 | * - cache block gets reallocated and over written | ||
1411 | * - crash | ||
1412 | * | ||
1413 | * When we recover, because there was no commit the cache will | ||
1414 | * rollback to having the data for vblock x in the cache block. | ||
1415 | * But the cache block has since been overwritten, so it'll end | ||
1416 | * up pointing to data that was never in 'x' during the history | ||
1417 | * of the device. | ||
1418 | * | ||
1419 | * To avoid this issue we require a commit as part of the | ||
1420 | * demotion operation. | ||
1421 | */ | ||
1422 | init_continuation(&mg->k, mg_success); | ||
1423 | continue_after_commit(&cache->committer, &mg->k); | ||
1424 | schedule_commit(&cache->committer); | ||
1425 | break; | ||
1426 | |||
1427 | case POLICY_WRITEBACK: | ||
1428 | mg_complete(mg, true); | ||
1429 | break; | ||
1430 | } | ||
1386 | } | 1431 | } |
1387 | 1432 | ||
1388 | static void complete_migration(struct dm_cache_migration *mg) | 1433 | static void mg_update_metadata_after_copy(struct work_struct *ws) |
1389 | { | 1434 | { |
1390 | if (mg->err) | 1435 | struct dm_cache_migration *mg = ws_to_mg(ws); |
1391 | migration_failure(mg); | 1436 | |
1437 | /* | ||
1438 | * Did the copy succeed? | ||
1439 | */ | ||
1440 | if (mg->k.input) | ||
1441 | mg_complete(mg, false); | ||
1392 | else | 1442 | else |
1393 | migration_success_pre_commit(mg); | 1443 | mg_update_metadata(ws); |
1394 | } | 1444 | } |
1395 | 1445 | ||
1396 | static void process_migrations(struct cache *cache, struct list_head *head, | 1446 | static void mg_upgrade_lock(struct work_struct *ws) |
1397 | void (*fn)(struct dm_cache_migration *)) | ||
1398 | { | 1447 | { |
1399 | unsigned long flags; | 1448 | int r; |
1400 | struct list_head list; | 1449 | struct dm_cache_migration *mg = ws_to_mg(ws); |
1401 | struct dm_cache_migration *mg, *tmp; | ||
1402 | 1450 | ||
1403 | INIT_LIST_HEAD(&list); | 1451 | /* |
1404 | spin_lock_irqsave(&cache->lock, flags); | 1452 | * Did the copy succeed? |
1405 | list_splice_init(head, &list); | 1453 | */ |
1406 | spin_unlock_irqrestore(&cache->lock, flags); | 1454 | if (mg->k.input) |
1455 | mg_complete(mg, false); | ||
1407 | 1456 | ||
1408 | list_for_each_entry_safe(mg, tmp, &list, list) | 1457 | else { |
1409 | fn(mg); | 1458 | /* |
1410 | } | 1459 | * Now we want the lock to prevent both reads and writes. |
1460 | */ | ||
1461 | r = dm_cell_lock_promote_v2(mg->cache->prison, mg->cell, | ||
1462 | READ_WRITE_LOCK_LEVEL); | ||
1463 | if (r < 0) | ||
1464 | mg_complete(mg, false); | ||
1411 | 1465 | ||
1412 | static void __queue_quiesced_migration(struct dm_cache_migration *mg) | 1466 | else if (r) |
1413 | { | 1467 | quiesce(mg, mg_update_metadata); |
1414 | list_add_tail(&mg->list, &mg->cache->quiesced_migrations); | 1468 | |
1469 | else | ||
1470 | mg_update_metadata(ws); | ||
1471 | } | ||
1415 | } | 1472 | } |
1416 | 1473 | ||
1417 | static void queue_quiesced_migration(struct dm_cache_migration *mg) | 1474 | static void mg_copy(struct work_struct *ws) |
1418 | { | 1475 | { |
1419 | unsigned long flags; | 1476 | int r; |
1420 | struct cache *cache = mg->cache; | 1477 | struct dm_cache_migration *mg = ws_to_mg(ws); |
1421 | 1478 | ||
1422 | spin_lock_irqsave(&cache->lock, flags); | 1479 | if (mg->overwrite_bio) { |
1423 | __queue_quiesced_migration(mg); | 1480 | /* |
1424 | spin_unlock_irqrestore(&cache->lock, flags); | 1481 | * It's safe to do this here, even though it's new data |
1482 | * because all IO has been locked out of the block. | ||
1483 | * | ||
1484 | * mg_lock_writes() already took READ_WRITE_LOCK_LEVEL | ||
1485 | * so _not_ using mg_upgrade_lock() as continutation. | ||
1486 | */ | ||
1487 | overwrite(mg, mg_update_metadata_after_copy); | ||
1425 | 1488 | ||
1426 | wake_worker(cache); | 1489 | } else { |
1427 | } | 1490 | struct cache *cache = mg->cache; |
1491 | struct policy_work *op = mg->op; | ||
1492 | bool is_policy_promote = (op->op == POLICY_PROMOTE); | ||
1428 | 1493 | ||
1429 | static void queue_quiesced_migrations(struct cache *cache, struct list_head *work) | 1494 | if ((!is_policy_promote && !is_dirty(cache, op->cblock)) || |
1430 | { | 1495 | is_discarded_oblock(cache, op->oblock)) { |
1431 | unsigned long flags; | 1496 | mg_upgrade_lock(ws); |
1432 | struct dm_cache_migration *mg, *tmp; | 1497 | return; |
1498 | } | ||
1433 | 1499 | ||
1434 | spin_lock_irqsave(&cache->lock, flags); | 1500 | init_continuation(&mg->k, mg_upgrade_lock); |
1435 | list_for_each_entry_safe(mg, tmp, work, list) | ||
1436 | __queue_quiesced_migration(mg); | ||
1437 | spin_unlock_irqrestore(&cache->lock, flags); | ||
1438 | 1501 | ||
1439 | wake_worker(cache); | 1502 | r = copy(mg, is_policy_promote); |
1503 | if (r) { | ||
1504 | DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache)); | ||
1505 | mg->k.input = -EIO; | ||
1506 | mg_complete(mg, false); | ||
1507 | } | ||
1508 | } | ||
1440 | } | 1509 | } |
1441 | 1510 | ||
1442 | static void check_for_quiesced_migrations(struct cache *cache, | 1511 | static int mg_lock_writes(struct dm_cache_migration *mg) |
1443 | struct per_bio_data *pb) | ||
1444 | { | 1512 | { |
1445 | struct list_head work; | 1513 | int r; |
1514 | struct dm_cell_key_v2 key; | ||
1515 | struct cache *cache = mg->cache; | ||
1516 | struct dm_bio_prison_cell_v2 *prealloc; | ||
1446 | 1517 | ||
1447 | if (!pb->all_io_entry) | 1518 | prealloc = alloc_prison_cell(cache); |
1448 | return; | 1519 | if (!prealloc) { |
1520 | DMERR_LIMIT("%s: alloc_prison_cell failed", cache_device_name(cache)); | ||
1521 | mg_complete(mg, false); | ||
1522 | return -ENOMEM; | ||
1523 | } | ||
1524 | |||
1525 | /* | ||
1526 | * Prevent writes to the block, but allow reads to continue. | ||
1527 | * Unless we're using an overwrite bio, in which case we lock | ||
1528 | * everything. | ||
1529 | */ | ||
1530 | build_key(mg->op->oblock, oblock_succ(mg->op->oblock), &key); | ||
1531 | r = dm_cell_lock_v2(cache->prison, &key, | ||
1532 | mg->overwrite_bio ? READ_WRITE_LOCK_LEVEL : WRITE_LOCK_LEVEL, | ||
1533 | prealloc, &mg->cell); | ||
1534 | if (r < 0) { | ||
1535 | free_prison_cell(cache, prealloc); | ||
1536 | mg_complete(mg, false); | ||
1537 | return r; | ||
1538 | } | ||
1449 | 1539 | ||
1450 | INIT_LIST_HEAD(&work); | 1540 | if (mg->cell != prealloc) |
1451 | dm_deferred_entry_dec(pb->all_io_entry, &work); | 1541 | free_prison_cell(cache, prealloc); |
1452 | 1542 | ||
1453 | if (!list_empty(&work)) | 1543 | if (r == 0) |
1454 | queue_quiesced_migrations(cache, &work); | 1544 | mg_copy(&mg->k.ws); |
1455 | } | 1545 | else |
1546 | quiesce(mg, mg_copy); | ||
1456 | 1547 | ||
1457 | static void quiesce_migration(struct dm_cache_migration *mg) | 1548 | return 0; |
1458 | { | ||
1459 | if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list)) | ||
1460 | queue_quiesced_migration(mg); | ||
1461 | } | 1549 | } |
1462 | 1550 | ||
1463 | static void promote(struct cache *cache, struct prealloc *structs, | 1551 | static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio) |
1464 | dm_oblock_t oblock, dm_cblock_t cblock, | ||
1465 | struct dm_bio_prison_cell *cell) | ||
1466 | { | 1552 | { |
1467 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | 1553 | struct dm_cache_migration *mg; |
1554 | |||
1555 | if (!background_work_begin(cache)) { | ||
1556 | policy_complete_background_work(cache->policy, op, false); | ||
1557 | return -EPERM; | ||
1558 | } | ||
1559 | |||
1560 | mg = alloc_migration(cache); | ||
1561 | if (!mg) { | ||
1562 | policy_complete_background_work(cache->policy, op, false); | ||
1563 | background_work_end(cache); | ||
1564 | return -ENOMEM; | ||
1565 | } | ||
1566 | |||
1567 | memset(mg, 0, sizeof(*mg)); | ||
1468 | 1568 | ||
1469 | mg->err = false; | ||
1470 | mg->discard = false; | ||
1471 | mg->writeback = false; | ||
1472 | mg->demote = false; | ||
1473 | mg->promote = true; | ||
1474 | mg->requeue_holder = true; | ||
1475 | mg->invalidate = false; | ||
1476 | mg->cache = cache; | 1569 | mg->cache = cache; |
1477 | mg->new_oblock = oblock; | 1570 | mg->op = op; |
1478 | mg->cblock = cblock; | 1571 | mg->overwrite_bio = bio; |
1479 | mg->old_ocell = NULL; | 1572 | |
1480 | mg->new_ocell = cell; | 1573 | if (!bio) |
1481 | mg->start_jiffies = jiffies; | 1574 | inc_io_migrations(cache); |
1482 | 1575 | ||
1483 | inc_io_migrations(cache); | 1576 | return mg_lock_writes(mg); |
1484 | quiesce_migration(mg); | ||
1485 | } | 1577 | } |
1486 | 1578 | ||
1487 | static void writeback(struct cache *cache, struct prealloc *structs, | 1579 | /*---------------------------------------------------------------- |
1488 | dm_oblock_t oblock, dm_cblock_t cblock, | 1580 | * invalidation processing |
1489 | struct dm_bio_prison_cell *cell) | 1581 | *--------------------------------------------------------------*/ |
1582 | |||
1583 | static void invalidate_complete(struct dm_cache_migration *mg, bool success) | ||
1490 | { | 1584 | { |
1491 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | 1585 | struct bio_list bios; |
1586 | struct cache *cache = mg->cache; | ||
1492 | 1587 | ||
1493 | mg->err = false; | 1588 | bio_list_init(&bios); |
1494 | mg->discard = false; | 1589 | if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios)) |
1495 | mg->writeback = true; | 1590 | free_prison_cell(cache, mg->cell); |
1496 | mg->demote = false; | ||
1497 | mg->promote = false; | ||
1498 | mg->requeue_holder = true; | ||
1499 | mg->invalidate = false; | ||
1500 | mg->cache = cache; | ||
1501 | mg->old_oblock = oblock; | ||
1502 | mg->cblock = cblock; | ||
1503 | mg->old_ocell = cell; | ||
1504 | mg->new_ocell = NULL; | ||
1505 | mg->start_jiffies = jiffies; | ||
1506 | |||
1507 | inc_io_migrations(cache); | ||
1508 | quiesce_migration(mg); | ||
1509 | } | ||
1510 | |||
1511 | static void demote_then_promote(struct cache *cache, struct prealloc *structs, | ||
1512 | dm_oblock_t old_oblock, dm_oblock_t new_oblock, | ||
1513 | dm_cblock_t cblock, | ||
1514 | struct dm_bio_prison_cell *old_ocell, | ||
1515 | struct dm_bio_prison_cell *new_ocell) | ||
1516 | { | ||
1517 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | ||
1518 | |||
1519 | mg->err = false; | ||
1520 | mg->discard = false; | ||
1521 | mg->writeback = false; | ||
1522 | mg->demote = true; | ||
1523 | mg->promote = true; | ||
1524 | mg->requeue_holder = true; | ||
1525 | mg->invalidate = false; | ||
1526 | mg->cache = cache; | ||
1527 | mg->old_oblock = old_oblock; | ||
1528 | mg->new_oblock = new_oblock; | ||
1529 | mg->cblock = cblock; | ||
1530 | mg->old_ocell = old_ocell; | ||
1531 | mg->new_ocell = new_ocell; | ||
1532 | mg->start_jiffies = jiffies; | ||
1533 | 1591 | ||
1534 | inc_io_migrations(cache); | 1592 | if (!success && mg->overwrite_bio) |
1535 | quiesce_migration(mg); | 1593 | bio_io_error(mg->overwrite_bio); |
1536 | } | ||
1537 | 1594 | ||
1538 | /* | 1595 | free_migration(mg); |
1539 | * Invalidate a cache entry. No writeback occurs; any changes in the cache | 1596 | defer_bios(cache, &bios); |
1540 | * block are thrown away. | ||
1541 | */ | ||
1542 | static void invalidate(struct cache *cache, struct prealloc *structs, | ||
1543 | dm_oblock_t oblock, dm_cblock_t cblock, | ||
1544 | struct dm_bio_prison_cell *cell) | ||
1545 | { | ||
1546 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | ||
1547 | |||
1548 | mg->err = false; | ||
1549 | mg->discard = false; | ||
1550 | mg->writeback = false; | ||
1551 | mg->demote = true; | ||
1552 | mg->promote = false; | ||
1553 | mg->requeue_holder = true; | ||
1554 | mg->invalidate = true; | ||
1555 | mg->cache = cache; | ||
1556 | mg->old_oblock = oblock; | ||
1557 | mg->cblock = cblock; | ||
1558 | mg->old_ocell = cell; | ||
1559 | mg->new_ocell = NULL; | ||
1560 | mg->start_jiffies = jiffies; | ||
1561 | 1597 | ||
1562 | inc_io_migrations(cache); | 1598 | background_work_end(cache); |
1563 | quiesce_migration(mg); | ||
1564 | } | 1599 | } |
1565 | 1600 | ||
1566 | static void discard(struct cache *cache, struct prealloc *structs, | 1601 | static void invalidate_completed(struct work_struct *ws) |
1567 | struct dm_bio_prison_cell *cell) | ||
1568 | { | 1602 | { |
1569 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | 1603 | struct dm_cache_migration *mg = ws_to_mg(ws); |
1604 | invalidate_complete(mg, !mg->k.input); | ||
1605 | } | ||
1570 | 1606 | ||
1571 | mg->err = false; | 1607 | static int invalidate_cblock(struct cache *cache, dm_cblock_t cblock) |
1572 | mg->discard = true; | 1608 | { |
1573 | mg->writeback = false; | 1609 | int r = policy_invalidate_mapping(cache->policy, cblock); |
1574 | mg->demote = false; | 1610 | if (!r) { |
1575 | mg->promote = false; | 1611 | r = dm_cache_remove_mapping(cache->cmd, cblock); |
1576 | mg->requeue_holder = false; | 1612 | if (r) { |
1577 | mg->invalidate = false; | 1613 | DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata", |
1578 | mg->cache = cache; | 1614 | cache_device_name(cache)); |
1579 | mg->old_ocell = NULL; | 1615 | metadata_operation_failed(cache, "dm_cache_remove_mapping", r); |
1580 | mg->new_ocell = cell; | 1616 | } |
1581 | mg->start_jiffies = jiffies; | 1617 | |
1618 | } else if (r == -ENODATA) { | ||
1619 | /* | ||
1620 | * Harmless, already unmapped. | ||
1621 | */ | ||
1622 | r = 0; | ||
1623 | |||
1624 | } else | ||
1625 | DMERR("%s: policy_invalidate_mapping failed", cache_device_name(cache)); | ||
1582 | 1626 | ||
1583 | quiesce_migration(mg); | 1627 | return r; |
1584 | } | 1628 | } |
1585 | 1629 | ||
1586 | /*---------------------------------------------------------------- | 1630 | static void invalidate_remove(struct work_struct *ws) |
1587 | * bio processing | ||
1588 | *--------------------------------------------------------------*/ | ||
1589 | static void defer_bio(struct cache *cache, struct bio *bio) | ||
1590 | { | 1631 | { |
1591 | unsigned long flags; | 1632 | int r; |
1633 | struct dm_cache_migration *mg = ws_to_mg(ws); | ||
1634 | struct cache *cache = mg->cache; | ||
1592 | 1635 | ||
1593 | spin_lock_irqsave(&cache->lock, flags); | 1636 | r = invalidate_cblock(cache, mg->invalidate_cblock); |
1594 | bio_list_add(&cache->deferred_bios, bio); | 1637 | if (r) { |
1595 | spin_unlock_irqrestore(&cache->lock, flags); | 1638 | invalidate_complete(mg, false); |
1639 | return; | ||
1640 | } | ||
1596 | 1641 | ||
1597 | wake_worker(cache); | 1642 | init_continuation(&mg->k, invalidate_completed); |
1643 | continue_after_commit(&cache->committer, &mg->k); | ||
1644 | remap_to_origin_clear_discard(cache, mg->overwrite_bio, mg->invalidate_oblock); | ||
1645 | mg->overwrite_bio = NULL; | ||
1646 | schedule_commit(&cache->committer); | ||
1598 | } | 1647 | } |
1599 | 1648 | ||
1600 | static void process_flush_bio(struct cache *cache, struct bio *bio) | 1649 | static int invalidate_lock(struct dm_cache_migration *mg) |
1601 | { | 1650 | { |
1602 | size_t pb_data_size = get_per_bio_data_size(cache); | 1651 | int r; |
1603 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | 1652 | struct dm_cell_key_v2 key; |
1653 | struct cache *cache = mg->cache; | ||
1654 | struct dm_bio_prison_cell_v2 *prealloc; | ||
1604 | 1655 | ||
1605 | BUG_ON(bio->bi_iter.bi_size); | 1656 | prealloc = alloc_prison_cell(cache); |
1606 | if (!pb->req_nr) | 1657 | if (!prealloc) { |
1607 | remap_to_origin(cache, bio); | 1658 | invalidate_complete(mg, false); |
1608 | else | 1659 | return -ENOMEM; |
1609 | remap_to_cache(cache, bio, 0); | 1660 | } |
1610 | 1661 | ||
1611 | /* | 1662 | build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key); |
1612 | * REQ_PREFLUSH is not directed at any particular block so we don't | 1663 | r = dm_cell_lock_v2(cache->prison, &key, |
1613 | * need to inc_ds(). REQ_FUA's are split into a write + REQ_PREFLUSH | 1664 | READ_WRITE_LOCK_LEVEL, prealloc, &mg->cell); |
1614 | * by dm-core. | 1665 | if (r < 0) { |
1615 | */ | 1666 | free_prison_cell(cache, prealloc); |
1616 | issue(cache, bio); | 1667 | invalidate_complete(mg, false); |
1668 | return r; | ||
1669 | } | ||
1670 | |||
1671 | if (mg->cell != prealloc) | ||
1672 | free_prison_cell(cache, prealloc); | ||
1673 | |||
1674 | if (r) | ||
1675 | quiesce(mg, invalidate_remove); | ||
1676 | |||
1677 | else { | ||
1678 | /* | ||
1679 | * We can't call invalidate_remove() directly here because we | ||
1680 | * might still be in request context. | ||
1681 | */ | ||
1682 | init_continuation(&mg->k, invalidate_remove); | ||
1683 | queue_work(cache->wq, &mg->k.ws); | ||
1684 | } | ||
1685 | |||
1686 | return 0; | ||
1617 | } | 1687 | } |
1618 | 1688 | ||
1619 | static void process_discard_bio(struct cache *cache, struct prealloc *structs, | 1689 | static int invalidate_start(struct cache *cache, dm_cblock_t cblock, |
1620 | struct bio *bio) | 1690 | dm_oblock_t oblock, struct bio *bio) |
1621 | { | 1691 | { |
1622 | int r; | 1692 | struct dm_cache_migration *mg; |
1623 | dm_dblock_t b, e; | ||
1624 | struct dm_bio_prison_cell *cell_prealloc, *new_ocell; | ||
1625 | 1693 | ||
1626 | calc_discard_block_range(cache, bio, &b, &e); | 1694 | if (!background_work_begin(cache)) |
1627 | if (b == e) { | 1695 | return -EPERM; |
1628 | bio_endio(bio); | 1696 | |
1629 | return; | 1697 | mg = alloc_migration(cache); |
1698 | if (!mg) { | ||
1699 | background_work_end(cache); | ||
1700 | return -ENOMEM; | ||
1630 | } | 1701 | } |
1631 | 1702 | ||
1632 | cell_prealloc = prealloc_get_cell(structs); | 1703 | memset(mg, 0, sizeof(*mg)); |
1633 | r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc, | 1704 | |
1634 | (cell_free_fn) prealloc_put_cell, | 1705 | mg->cache = cache; |
1635 | structs, &new_ocell); | 1706 | mg->overwrite_bio = bio; |
1636 | if (r > 0) | 1707 | mg->invalidate_cblock = cblock; |
1637 | return; | 1708 | mg->invalidate_oblock = oblock; |
1638 | 1709 | ||
1639 | discard(cache, structs, new_ocell); | 1710 | return invalidate_lock(mg); |
1640 | } | 1711 | } |
1641 | 1712 | ||
1642 | static bool spare_migration_bandwidth(struct cache *cache) | 1713 | /*---------------------------------------------------------------- |
1714 | * bio processing | ||
1715 | *--------------------------------------------------------------*/ | ||
1716 | |||
1717 | enum busy { | ||
1718 | IDLE, | ||
1719 | MODERATE, | ||
1720 | BUSY | ||
1721 | }; | ||
1722 | |||
1723 | static enum busy spare_migration_bandwidth(struct cache *cache) | ||
1643 | { | 1724 | { |
1725 | bool idle = iot_idle_for(&cache->origin_tracker, HZ); | ||
1644 | sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) * | 1726 | sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) * |
1645 | cache->sectors_per_block; | 1727 | cache->sectors_per_block; |
1646 | return current_volume < cache->migration_threshold; | 1728 | |
1729 | if (current_volume <= cache->migration_threshold) | ||
1730 | return idle ? IDLE : MODERATE; | ||
1731 | else | ||
1732 | return idle ? MODERATE : BUSY; | ||
1647 | } | 1733 | } |
1648 | 1734 | ||
1649 | static void inc_hit_counter(struct cache *cache, struct bio *bio) | 1735 | static void inc_hit_counter(struct cache *cache, struct bio *bio) |
@@ -1660,255 +1746,143 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) | |||
1660 | 1746 | ||
1661 | /*----------------------------------------------------------------*/ | 1747 | /*----------------------------------------------------------------*/ |
1662 | 1748 | ||
1663 | struct inc_detail { | 1749 | static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) |
1664 | struct cache *cache; | ||
1665 | struct bio_list bios_for_issue; | ||
1666 | struct bio_list unhandled_bios; | ||
1667 | bool any_writes; | ||
1668 | }; | ||
1669 | |||
1670 | static void inc_fn(void *context, struct dm_bio_prison_cell *cell) | ||
1671 | { | 1750 | { |
1672 | struct bio *bio; | 1751 | return (bio_data_dir(bio) == WRITE) && |
1673 | struct inc_detail *detail = context; | 1752 | (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); |
1674 | struct cache *cache = detail->cache; | ||
1675 | |||
1676 | inc_ds(cache, cell->holder, cell); | ||
1677 | if (bio_data_dir(cell->holder) == WRITE) | ||
1678 | detail->any_writes = true; | ||
1679 | |||
1680 | while ((bio = bio_list_pop(&cell->bios))) { | ||
1681 | if (discard_or_flush(bio)) { | ||
1682 | bio_list_add(&detail->unhandled_bios, bio); | ||
1683 | continue; | ||
1684 | } | ||
1685 | |||
1686 | if (bio_data_dir(bio) == WRITE) | ||
1687 | detail->any_writes = true; | ||
1688 | |||
1689 | bio_list_add(&detail->bios_for_issue, bio); | ||
1690 | inc_ds(cache, bio, cell); | ||
1691 | } | ||
1692 | } | 1753 | } |
1693 | 1754 | ||
1694 | // FIXME: refactor these two | 1755 | static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block) |
1695 | static void remap_cell_to_origin_clear_discard(struct cache *cache, | ||
1696 | struct dm_bio_prison_cell *cell, | ||
1697 | dm_oblock_t oblock, bool issue_holder) | ||
1698 | { | 1756 | { |
1699 | struct bio *bio; | 1757 | return writeback_mode(&cache->features) && |
1700 | unsigned long flags; | 1758 | (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio)); |
1701 | struct inc_detail detail; | ||
1702 | |||
1703 | detail.cache = cache; | ||
1704 | bio_list_init(&detail.bios_for_issue); | ||
1705 | bio_list_init(&detail.unhandled_bios); | ||
1706 | detail.any_writes = false; | ||
1707 | |||
1708 | spin_lock_irqsave(&cache->lock, flags); | ||
1709 | dm_cell_visit_release(cache->prison, inc_fn, &detail, cell); | ||
1710 | bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios); | ||
1711 | spin_unlock_irqrestore(&cache->lock, flags); | ||
1712 | |||
1713 | remap_to_origin(cache, cell->holder); | ||
1714 | if (issue_holder) | ||
1715 | issue(cache, cell->holder); | ||
1716 | else | ||
1717 | accounted_begin(cache, cell->holder); | ||
1718 | |||
1719 | if (detail.any_writes) | ||
1720 | clear_discard(cache, oblock_to_dblock(cache, oblock)); | ||
1721 | |||
1722 | while ((bio = bio_list_pop(&detail.bios_for_issue))) { | ||
1723 | remap_to_origin(cache, bio); | ||
1724 | issue(cache, bio); | ||
1725 | } | ||
1726 | |||
1727 | free_prison_cell(cache, cell); | ||
1728 | } | 1759 | } |
1729 | 1760 | ||
1730 | static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_cell *cell, | 1761 | static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, |
1731 | dm_oblock_t oblock, dm_cblock_t cblock, bool issue_holder) | 1762 | bool *commit_needed) |
1732 | { | 1763 | { |
1733 | struct bio *bio; | 1764 | int r, data_dir; |
1734 | unsigned long flags; | 1765 | bool rb, background_queued; |
1735 | struct inc_detail detail; | 1766 | dm_cblock_t cblock; |
1736 | 1767 | size_t pb_data_size = get_per_bio_data_size(cache); | |
1737 | detail.cache = cache; | 1768 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); |
1738 | bio_list_init(&detail.bios_for_issue); | ||
1739 | bio_list_init(&detail.unhandled_bios); | ||
1740 | detail.any_writes = false; | ||
1741 | |||
1742 | spin_lock_irqsave(&cache->lock, flags); | ||
1743 | dm_cell_visit_release(cache->prison, inc_fn, &detail, cell); | ||
1744 | bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios); | ||
1745 | spin_unlock_irqrestore(&cache->lock, flags); | ||
1746 | |||
1747 | remap_to_cache(cache, cell->holder, cblock); | ||
1748 | if (issue_holder) | ||
1749 | issue(cache, cell->holder); | ||
1750 | else | ||
1751 | accounted_begin(cache, cell->holder); | ||
1752 | 1769 | ||
1753 | if (detail.any_writes) { | 1770 | *commit_needed = false; |
1754 | set_dirty(cache, oblock, cblock); | ||
1755 | clear_discard(cache, oblock_to_dblock(cache, oblock)); | ||
1756 | } | ||
1757 | 1771 | ||
1758 | while ((bio = bio_list_pop(&detail.bios_for_issue))) { | 1772 | rb = bio_detain_shared(cache, block, bio); |
1759 | remap_to_cache(cache, bio, cblock); | 1773 | if (!rb) { |
1760 | issue(cache, bio); | 1774 | /* |
1775 | * An exclusive lock is held for this block, so we have to | ||
1776 | * wait. We set the commit_needed flag so the current | ||
1777 | * transaction will be committed asap, allowing this lock | ||
1778 | * to be dropped. | ||
1779 | */ | ||
1780 | *commit_needed = true; | ||
1781 | return DM_MAPIO_SUBMITTED; | ||
1761 | } | 1782 | } |
1762 | 1783 | ||
1763 | free_prison_cell(cache, cell); | 1784 | data_dir = bio_data_dir(bio); |
1764 | } | ||
1765 | 1785 | ||
1766 | /*----------------------------------------------------------------*/ | 1786 | if (optimisable_bio(cache, bio, block)) { |
1787 | struct policy_work *op = NULL; | ||
1767 | 1788 | ||
1768 | struct old_oblock_lock { | 1789 | r = policy_lookup_with_work(cache->policy, block, &cblock, data_dir, true, &op); |
1769 | struct policy_locker locker; | 1790 | if (unlikely(r && r != -ENOENT)) { |
1770 | struct cache *cache; | 1791 | DMERR_LIMIT("%s: policy_lookup_with_work() failed with r = %d", |
1771 | struct prealloc *structs; | 1792 | cache_device_name(cache), r); |
1772 | struct dm_bio_prison_cell *cell; | 1793 | bio_io_error(bio); |
1773 | }; | 1794 | return DM_MAPIO_SUBMITTED; |
1774 | 1795 | } | |
1775 | static int null_locker(struct policy_locker *locker, dm_oblock_t b) | ||
1776 | { | ||
1777 | /* This should never be called */ | ||
1778 | BUG(); | ||
1779 | return 0; | ||
1780 | } | ||
1781 | 1796 | ||
1782 | static int cell_locker(struct policy_locker *locker, dm_oblock_t b) | 1797 | if (r == -ENOENT && op) { |
1783 | { | 1798 | bio_drop_shared_lock(cache, bio); |
1784 | struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker); | 1799 | BUG_ON(op->op != POLICY_PROMOTE); |
1785 | struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs); | 1800 | mg_start(cache, op, bio); |
1801 | return DM_MAPIO_SUBMITTED; | ||
1802 | } | ||
1803 | } else { | ||
1804 | r = policy_lookup(cache->policy, block, &cblock, data_dir, false, &background_queued); | ||
1805 | if (unlikely(r && r != -ENOENT)) { | ||
1806 | DMERR_LIMIT("%s: policy_lookup() failed with r = %d", | ||
1807 | cache_device_name(cache), r); | ||
1808 | bio_io_error(bio); | ||
1809 | return DM_MAPIO_SUBMITTED; | ||
1810 | } | ||
1786 | 1811 | ||
1787 | return bio_detain(l->cache, b, NULL, cell_prealloc, | 1812 | if (background_queued) |
1788 | (cell_free_fn) prealloc_put_cell, | 1813 | wake_migration_worker(cache); |
1789 | l->structs, &l->cell); | 1814 | } |
1790 | } | ||
1791 | 1815 | ||
1792 | static void process_cell(struct cache *cache, struct prealloc *structs, | 1816 | if (r == -ENOENT) { |
1793 | struct dm_bio_prison_cell *new_ocell) | 1817 | /* |
1794 | { | 1818 | * Miss. |
1795 | int r; | 1819 | */ |
1796 | bool release_cell = true; | 1820 | inc_miss_counter(cache, bio); |
1797 | struct bio *bio = new_ocell->holder; | 1821 | if (pb->req_nr == 0) { |
1798 | dm_oblock_t block = get_bio_block(cache, bio); | 1822 | accounted_begin(cache, bio); |
1799 | struct policy_result lookup_result; | 1823 | remap_to_origin_clear_discard(cache, bio, block); |
1800 | bool passthrough = passthrough_mode(&cache->features); | ||
1801 | bool fast_promotion, can_migrate; | ||
1802 | struct old_oblock_lock ool; | ||
1803 | |||
1804 | fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio); | ||
1805 | can_migrate = !passthrough && (fast_promotion || spare_migration_bandwidth(cache)); | ||
1806 | |||
1807 | ool.locker.fn = cell_locker; | ||
1808 | ool.cache = cache; | ||
1809 | ool.structs = structs; | ||
1810 | ool.cell = NULL; | ||
1811 | r = policy_map(cache->policy, block, true, can_migrate, fast_promotion, | ||
1812 | bio, &ool.locker, &lookup_result); | ||
1813 | |||
1814 | if (r == -EWOULDBLOCK) | ||
1815 | /* migration has been denied */ | ||
1816 | lookup_result.op = POLICY_MISS; | ||
1817 | |||
1818 | switch (lookup_result.op) { | ||
1819 | case POLICY_HIT: | ||
1820 | if (passthrough) { | ||
1821 | inc_miss_counter(cache, bio); | ||
1822 | 1824 | ||
1825 | } else { | ||
1823 | /* | 1826 | /* |
1824 | * Passthrough always maps to the origin, | 1827 | * This is a duplicate writethrough io that is no |
1825 | * invalidating any cache blocks that are written | 1828 | * longer needed because the block has been demoted. |
1826 | * to. | ||
1827 | */ | 1829 | */ |
1830 | bio_endio(bio); | ||
1831 | return DM_MAPIO_SUBMITTED; | ||
1832 | } | ||
1833 | } else { | ||
1834 | /* | ||
1835 | * Hit. | ||
1836 | */ | ||
1837 | inc_hit_counter(cache, bio); | ||
1828 | 1838 | ||
1839 | /* | ||
1840 | * Passthrough always maps to the origin, invalidating any | ||
1841 | * cache blocks that are written to. | ||
1842 | */ | ||
1843 | if (passthrough_mode(&cache->features)) { | ||
1829 | if (bio_data_dir(bio) == WRITE) { | 1844 | if (bio_data_dir(bio) == WRITE) { |
1845 | bio_drop_shared_lock(cache, bio); | ||
1830 | atomic_inc(&cache->stats.demotion); | 1846 | atomic_inc(&cache->stats.demotion); |
1831 | invalidate(cache, structs, block, lookup_result.cblock, new_ocell); | 1847 | invalidate_start(cache, cblock, block, bio); |
1832 | release_cell = false; | 1848 | } else |
1833 | |||
1834 | } else { | ||
1835 | /* FIXME: factor out issue_origin() */ | ||
1836 | remap_to_origin_clear_discard(cache, bio, block); | 1849 | remap_to_origin_clear_discard(cache, bio, block); |
1837 | inc_and_issue(cache, bio, new_ocell); | 1850 | |
1838 | } | ||
1839 | } else { | 1851 | } else { |
1840 | inc_hit_counter(cache, bio); | 1852 | if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && |
1841 | 1853 | !is_dirty(cache, cblock)) { | |
1842 | if (bio_data_dir(bio) == WRITE && | 1854 | remap_to_origin_then_cache(cache, bio, block, cblock); |
1843 | writethrough_mode(&cache->features) && | 1855 | accounted_begin(cache, bio); |
1844 | !is_dirty(cache, lookup_result.cblock)) { | 1856 | } else |
1845 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); | 1857 | remap_to_cache_dirty(cache, bio, block, cblock); |
1846 | inc_and_issue(cache, bio, new_ocell); | ||
1847 | |||
1848 | } else { | ||
1849 | remap_cell_to_cache_dirty(cache, new_ocell, block, lookup_result.cblock, true); | ||
1850 | release_cell = false; | ||
1851 | } | ||
1852 | } | 1858 | } |
1853 | |||
1854 | break; | ||
1855 | |||
1856 | case POLICY_MISS: | ||
1857 | inc_miss_counter(cache, bio); | ||
1858 | remap_cell_to_origin_clear_discard(cache, new_ocell, block, true); | ||
1859 | release_cell = false; | ||
1860 | break; | ||
1861 | |||
1862 | case POLICY_NEW: | ||
1863 | atomic_inc(&cache->stats.promotion); | ||
1864 | promote(cache, structs, block, lookup_result.cblock, new_ocell); | ||
1865 | release_cell = false; | ||
1866 | break; | ||
1867 | |||
1868 | case POLICY_REPLACE: | ||
1869 | atomic_inc(&cache->stats.demotion); | ||
1870 | atomic_inc(&cache->stats.promotion); | ||
1871 | demote_then_promote(cache, structs, lookup_result.old_oblock, | ||
1872 | block, lookup_result.cblock, | ||
1873 | ool.cell, new_ocell); | ||
1874 | release_cell = false; | ||
1875 | break; | ||
1876 | |||
1877 | default: | ||
1878 | DMERR_LIMIT("%s: %s: erroring bio, unknown policy op: %u", | ||
1879 | cache_device_name(cache), __func__, | ||
1880 | (unsigned) lookup_result.op); | ||
1881 | bio_io_error(bio); | ||
1882 | } | 1859 | } |
1883 | 1860 | ||
1884 | if (release_cell) | ||
1885 | cell_defer(cache, new_ocell, false); | ||
1886 | } | ||
1887 | |||
1888 | static void process_bio(struct cache *cache, struct prealloc *structs, | ||
1889 | struct bio *bio) | ||
1890 | { | ||
1891 | int r; | ||
1892 | dm_oblock_t block = get_bio_block(cache, bio); | ||
1893 | struct dm_bio_prison_cell *cell_prealloc, *new_ocell; | ||
1894 | |||
1895 | /* | 1861 | /* |
1896 | * Check to see if that block is currently migrating. | 1862 | * dm core turns FUA requests into a separate payload and FLUSH req. |
1897 | */ | 1863 | */ |
1898 | cell_prealloc = prealloc_get_cell(structs); | 1864 | if (bio->bi_opf & REQ_FUA) { |
1899 | r = bio_detain(cache, block, bio, cell_prealloc, | 1865 | /* |
1900 | (cell_free_fn) prealloc_put_cell, | 1866 | * issue_after_commit will call accounted_begin a second time. So |
1901 | structs, &new_ocell); | 1867 | * we call accounted_complete() to avoid double accounting. |
1902 | if (r > 0) | 1868 | */ |
1903 | return; | 1869 | accounted_complete(cache, bio); |
1870 | issue_after_commit(&cache->committer, bio); | ||
1871 | *commit_needed = true; | ||
1872 | return DM_MAPIO_SUBMITTED; | ||
1873 | } | ||
1904 | 1874 | ||
1905 | process_cell(cache, structs, new_ocell); | 1875 | return DM_MAPIO_REMAPPED; |
1906 | } | 1876 | } |
1907 | 1877 | ||
1908 | static int need_commit_due_to_time(struct cache *cache) | 1878 | static bool process_bio(struct cache *cache, struct bio *bio) |
1909 | { | 1879 | { |
1910 | return jiffies < cache->last_commit_jiffies || | 1880 | bool commit_needed; |
1911 | jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; | 1881 | |
1882 | if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED) | ||
1883 | generic_make_request(bio); | ||
1884 | |||
1885 | return commit_needed; | ||
1912 | } | 1886 | } |
1913 | 1887 | ||
1914 | /* | 1888 | /* |
@@ -1929,123 +1903,88 @@ static int commit(struct cache *cache, bool clean_shutdown) | |||
1929 | return r; | 1903 | return r; |
1930 | } | 1904 | } |
1931 | 1905 | ||
1932 | static int commit_if_needed(struct cache *cache) | 1906 | /* |
1907 | * Used by the batcher. | ||
1908 | */ | ||
1909 | static int commit_op(void *context) | ||
1933 | { | 1910 | { |
1934 | int r = 0; | 1911 | struct cache *cache = context; |
1935 | 1912 | ||
1936 | if ((cache->commit_requested || need_commit_due_to_time(cache)) && | 1913 | if (dm_cache_changed_this_transaction(cache->cmd)) |
1937 | dm_cache_changed_this_transaction(cache->cmd)) { | 1914 | return commit(cache, false); |
1938 | r = commit(cache, false); | ||
1939 | cache->commit_requested = false; | ||
1940 | cache->last_commit_jiffies = jiffies; | ||
1941 | } | ||
1942 | 1915 | ||
1943 | return r; | 1916 | return 0; |
1944 | } | 1917 | } |
1945 | 1918 | ||
1946 | static void process_deferred_bios(struct cache *cache) | 1919 | /*----------------------------------------------------------------*/ |
1947 | { | ||
1948 | bool prealloc_used = false; | ||
1949 | unsigned long flags; | ||
1950 | struct bio_list bios; | ||
1951 | struct bio *bio; | ||
1952 | struct prealloc structs; | ||
1953 | |||
1954 | memset(&structs, 0, sizeof(structs)); | ||
1955 | bio_list_init(&bios); | ||
1956 | |||
1957 | spin_lock_irqsave(&cache->lock, flags); | ||
1958 | bio_list_merge(&bios, &cache->deferred_bios); | ||
1959 | bio_list_init(&cache->deferred_bios); | ||
1960 | spin_unlock_irqrestore(&cache->lock, flags); | ||
1961 | |||
1962 | while (!bio_list_empty(&bios)) { | ||
1963 | /* | ||
1964 | * If we've got no free migration structs, and processing | ||
1965 | * this bio might require one, we pause until there are some | ||
1966 | * prepared mappings to process. | ||
1967 | */ | ||
1968 | prealloc_used = true; | ||
1969 | if (prealloc_data_structs(cache, &structs)) { | ||
1970 | spin_lock_irqsave(&cache->lock, flags); | ||
1971 | bio_list_merge(&cache->deferred_bios, &bios); | ||
1972 | spin_unlock_irqrestore(&cache->lock, flags); | ||
1973 | break; | ||
1974 | } | ||
1975 | 1920 | ||
1976 | bio = bio_list_pop(&bios); | 1921 | static bool process_flush_bio(struct cache *cache, struct bio *bio) |
1922 | { | ||
1923 | size_t pb_data_size = get_per_bio_data_size(cache); | ||
1924 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | ||
1977 | 1925 | ||
1978 | if (bio->bi_opf & REQ_PREFLUSH) | 1926 | if (!pb->req_nr) |
1979 | process_flush_bio(cache, bio); | 1927 | remap_to_origin(cache, bio); |
1980 | else if (bio_op(bio) == REQ_OP_DISCARD) | 1928 | else |
1981 | process_discard_bio(cache, &structs, bio); | 1929 | remap_to_cache(cache, bio, 0); |
1982 | else | ||
1983 | process_bio(cache, &structs, bio); | ||
1984 | } | ||
1985 | 1930 | ||
1986 | if (prealloc_used) | 1931 | issue_after_commit(&cache->committer, bio); |
1987 | prealloc_free_structs(cache, &structs); | 1932 | return true; |
1988 | } | 1933 | } |
1989 | 1934 | ||
1990 | static void process_deferred_cells(struct cache *cache) | 1935 | static bool process_discard_bio(struct cache *cache, struct bio *bio) |
1991 | { | 1936 | { |
1992 | bool prealloc_used = false; | 1937 | dm_dblock_t b, e; |
1993 | unsigned long flags; | ||
1994 | struct dm_bio_prison_cell *cell, *tmp; | ||
1995 | struct list_head cells; | ||
1996 | struct prealloc structs; | ||
1997 | |||
1998 | memset(&structs, 0, sizeof(structs)); | ||
1999 | |||
2000 | INIT_LIST_HEAD(&cells); | ||
2001 | |||
2002 | spin_lock_irqsave(&cache->lock, flags); | ||
2003 | list_splice_init(&cache->deferred_cells, &cells); | ||
2004 | spin_unlock_irqrestore(&cache->lock, flags); | ||
2005 | |||
2006 | list_for_each_entry_safe(cell, tmp, &cells, user_list) { | ||
2007 | /* | ||
2008 | * If we've got no free migration structs, and processing | ||
2009 | * this bio might require one, we pause until there are some | ||
2010 | * prepared mappings to process. | ||
2011 | */ | ||
2012 | prealloc_used = true; | ||
2013 | if (prealloc_data_structs(cache, &structs)) { | ||
2014 | spin_lock_irqsave(&cache->lock, flags); | ||
2015 | list_splice(&cells, &cache->deferred_cells); | ||
2016 | spin_unlock_irqrestore(&cache->lock, flags); | ||
2017 | break; | ||
2018 | } | ||
2019 | 1938 | ||
2020 | process_cell(cache, &structs, cell); | 1939 | // FIXME: do we need to lock the region? Or can we just assume the |
1940 | // user wont be so foolish as to issue discard concurrently with | ||
1941 | // other IO? | ||
1942 | calc_discard_block_range(cache, bio, &b, &e); | ||
1943 | while (b != e) { | ||
1944 | set_discard(cache, b); | ||
1945 | b = to_dblock(from_dblock(b) + 1); | ||
2021 | } | 1946 | } |
2022 | 1947 | ||
2023 | if (prealloc_used) | 1948 | bio_endio(bio); |
2024 | prealloc_free_structs(cache, &structs); | 1949 | |
1950 | return false; | ||
2025 | } | 1951 | } |
2026 | 1952 | ||
2027 | static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) | 1953 | static void process_deferred_bios(struct work_struct *ws) |
2028 | { | 1954 | { |
1955 | struct cache *cache = container_of(ws, struct cache, deferred_bio_worker); | ||
1956 | |||
2029 | unsigned long flags; | 1957 | unsigned long flags; |
1958 | bool commit_needed = false; | ||
2030 | struct bio_list bios; | 1959 | struct bio_list bios; |
2031 | struct bio *bio; | 1960 | struct bio *bio; |
2032 | 1961 | ||
2033 | bio_list_init(&bios); | 1962 | bio_list_init(&bios); |
2034 | 1963 | ||
2035 | spin_lock_irqsave(&cache->lock, flags); | 1964 | spin_lock_irqsave(&cache->lock, flags); |
2036 | bio_list_merge(&bios, &cache->deferred_flush_bios); | 1965 | bio_list_merge(&bios, &cache->deferred_bios); |
2037 | bio_list_init(&cache->deferred_flush_bios); | 1966 | bio_list_init(&cache->deferred_bios); |
2038 | spin_unlock_irqrestore(&cache->lock, flags); | 1967 | spin_unlock_irqrestore(&cache->lock, flags); |
2039 | 1968 | ||
2040 | /* | 1969 | while ((bio = bio_list_pop(&bios))) { |
2041 | * These bios have already been through inc_ds() | 1970 | if (bio->bi_opf & REQ_PREFLUSH) |
2042 | */ | 1971 | commit_needed = process_flush_bio(cache, bio) || commit_needed; |
2043 | while ((bio = bio_list_pop(&bios))) | 1972 | |
2044 | submit_bios ? accounted_request(cache, bio) : bio_io_error(bio); | 1973 | else if (bio_op(bio) == REQ_OP_DISCARD) |
1974 | commit_needed = process_discard_bio(cache, bio) || commit_needed; | ||
1975 | |||
1976 | else | ||
1977 | commit_needed = process_bio(cache, bio) || commit_needed; | ||
1978 | } | ||
1979 | |||
1980 | if (commit_needed) | ||
1981 | schedule_commit(&cache->committer); | ||
2045 | } | 1982 | } |
2046 | 1983 | ||
2047 | static void process_deferred_writethrough_bios(struct cache *cache) | 1984 | static void process_deferred_writethrough_bios(struct work_struct *ws) |
2048 | { | 1985 | { |
1986 | struct cache *cache = container_of(ws, struct cache, deferred_writethrough_worker); | ||
1987 | |||
2049 | unsigned long flags; | 1988 | unsigned long flags; |
2050 | struct bio_list bios; | 1989 | struct bio_list bios; |
2051 | struct bio *bio; | 1990 | struct bio *bio; |
@@ -2058,153 +1997,15 @@ static void process_deferred_writethrough_bios(struct cache *cache) | |||
2058 | spin_unlock_irqrestore(&cache->lock, flags); | 1997 | spin_unlock_irqrestore(&cache->lock, flags); |
2059 | 1998 | ||
2060 | /* | 1999 | /* |
2061 | * These bios have already been through inc_ds() | 2000 | * These bios have already been through accounted_begin() |
2062 | */ | 2001 | */ |
2063 | while ((bio = bio_list_pop(&bios))) | 2002 | while ((bio = bio_list_pop(&bios))) |
2064 | accounted_request(cache, bio); | 2003 | generic_make_request(bio); |
2065 | } | ||
2066 | |||
2067 | static void writeback_some_dirty_blocks(struct cache *cache) | ||
2068 | { | ||
2069 | bool prealloc_used = false; | ||
2070 | dm_oblock_t oblock; | ||
2071 | dm_cblock_t cblock; | ||
2072 | struct prealloc structs; | ||
2073 | struct dm_bio_prison_cell *old_ocell; | ||
2074 | bool busy = !iot_idle_for(&cache->origin_tracker, HZ); | ||
2075 | |||
2076 | memset(&structs, 0, sizeof(structs)); | ||
2077 | |||
2078 | while (spare_migration_bandwidth(cache)) { | ||
2079 | if (policy_writeback_work(cache->policy, &oblock, &cblock, busy)) | ||
2080 | break; /* no work to do */ | ||
2081 | |||
2082 | prealloc_used = true; | ||
2083 | if (prealloc_data_structs(cache, &structs) || | ||
2084 | get_cell(cache, oblock, &structs, &old_ocell)) { | ||
2085 | policy_set_dirty(cache->policy, oblock); | ||
2086 | break; | ||
2087 | } | ||
2088 | |||
2089 | writeback(cache, &structs, oblock, cblock, old_ocell); | ||
2090 | } | ||
2091 | |||
2092 | if (prealloc_used) | ||
2093 | prealloc_free_structs(cache, &structs); | ||
2094 | } | ||
2095 | |||
2096 | /*---------------------------------------------------------------- | ||
2097 | * Invalidations. | ||
2098 | * Dropping something from the cache *without* writing back. | ||
2099 | *--------------------------------------------------------------*/ | ||
2100 | |||
2101 | static void process_invalidation_request(struct cache *cache, struct invalidation_request *req) | ||
2102 | { | ||
2103 | int r = 0; | ||
2104 | uint64_t begin = from_cblock(req->cblocks->begin); | ||
2105 | uint64_t end = from_cblock(req->cblocks->end); | ||
2106 | |||
2107 | while (begin != end) { | ||
2108 | r = policy_remove_cblock(cache->policy, to_cblock(begin)); | ||
2109 | if (!r) { | ||
2110 | r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin)); | ||
2111 | if (r) { | ||
2112 | metadata_operation_failed(cache, "dm_cache_remove_mapping", r); | ||
2113 | break; | ||
2114 | } | ||
2115 | |||
2116 | } else if (r == -ENODATA) { | ||
2117 | /* harmless, already unmapped */ | ||
2118 | r = 0; | ||
2119 | |||
2120 | } else { | ||
2121 | DMERR("%s: policy_remove_cblock failed", cache_device_name(cache)); | ||
2122 | break; | ||
2123 | } | ||
2124 | |||
2125 | begin++; | ||
2126 | } | ||
2127 | |||
2128 | cache->commit_requested = true; | ||
2129 | |||
2130 | req->err = r; | ||
2131 | atomic_set(&req->complete, 1); | ||
2132 | |||
2133 | wake_up(&req->result_wait); | ||
2134 | } | ||
2135 | |||
2136 | static void process_invalidation_requests(struct cache *cache) | ||
2137 | { | ||
2138 | struct list_head list; | ||
2139 | struct invalidation_request *req, *tmp; | ||
2140 | |||
2141 | INIT_LIST_HEAD(&list); | ||
2142 | spin_lock(&cache->invalidation_lock); | ||
2143 | list_splice_init(&cache->invalidation_requests, &list); | ||
2144 | spin_unlock(&cache->invalidation_lock); | ||
2145 | |||
2146 | list_for_each_entry_safe (req, tmp, &list, list) | ||
2147 | process_invalidation_request(cache, req); | ||
2148 | } | 2004 | } |
2149 | 2005 | ||
2150 | /*---------------------------------------------------------------- | 2006 | /*---------------------------------------------------------------- |
2151 | * Main worker loop | 2007 | * Main worker loop |
2152 | *--------------------------------------------------------------*/ | 2008 | *--------------------------------------------------------------*/ |
2153 | static bool is_quiescing(struct cache *cache) | ||
2154 | { | ||
2155 | return atomic_read(&cache->quiescing); | ||
2156 | } | ||
2157 | |||
2158 | static void ack_quiescing(struct cache *cache) | ||
2159 | { | ||
2160 | if (is_quiescing(cache)) { | ||
2161 | atomic_inc(&cache->quiescing_ack); | ||
2162 | wake_up(&cache->quiescing_wait); | ||
2163 | } | ||
2164 | } | ||
2165 | |||
2166 | static void wait_for_quiescing_ack(struct cache *cache) | ||
2167 | { | ||
2168 | wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack)); | ||
2169 | } | ||
2170 | |||
2171 | static void start_quiescing(struct cache *cache) | ||
2172 | { | ||
2173 | atomic_inc(&cache->quiescing); | ||
2174 | wait_for_quiescing_ack(cache); | ||
2175 | } | ||
2176 | |||
2177 | static void stop_quiescing(struct cache *cache) | ||
2178 | { | ||
2179 | atomic_set(&cache->quiescing, 0); | ||
2180 | atomic_set(&cache->quiescing_ack, 0); | ||
2181 | } | ||
2182 | |||
2183 | static void wait_for_migrations(struct cache *cache) | ||
2184 | { | ||
2185 | wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations)); | ||
2186 | } | ||
2187 | |||
2188 | static void stop_worker(struct cache *cache) | ||
2189 | { | ||
2190 | cancel_delayed_work(&cache->waker); | ||
2191 | flush_workqueue(cache->wq); | ||
2192 | } | ||
2193 | |||
2194 | static void requeue_deferred_cells(struct cache *cache) | ||
2195 | { | ||
2196 | unsigned long flags; | ||
2197 | struct list_head cells; | ||
2198 | struct dm_bio_prison_cell *cell, *tmp; | ||
2199 | |||
2200 | INIT_LIST_HEAD(&cells); | ||
2201 | spin_lock_irqsave(&cache->lock, flags); | ||
2202 | list_splice_init(&cache->deferred_cells, &cells); | ||
2203 | spin_unlock_irqrestore(&cache->lock, flags); | ||
2204 | |||
2205 | list_for_each_entry_safe(cell, tmp, &cells, user_list) | ||
2206 | cell_requeue(cache, cell); | ||
2207 | } | ||
2208 | 2009 | ||
2209 | static void requeue_deferred_bios(struct cache *cache) | 2010 | static void requeue_deferred_bios(struct cache *cache) |
2210 | { | 2011 | { |
@@ -2221,53 +2022,6 @@ static void requeue_deferred_bios(struct cache *cache) | |||
2221 | } | 2022 | } |
2222 | } | 2023 | } |
2223 | 2024 | ||
2224 | static int more_work(struct cache *cache) | ||
2225 | { | ||
2226 | if (is_quiescing(cache)) | ||
2227 | return !list_empty(&cache->quiesced_migrations) || | ||
2228 | !list_empty(&cache->completed_migrations) || | ||
2229 | !list_empty(&cache->need_commit_migrations); | ||
2230 | else | ||
2231 | return !bio_list_empty(&cache->deferred_bios) || | ||
2232 | !list_empty(&cache->deferred_cells) || | ||
2233 | !bio_list_empty(&cache->deferred_flush_bios) || | ||
2234 | !bio_list_empty(&cache->deferred_writethrough_bios) || | ||
2235 | !list_empty(&cache->quiesced_migrations) || | ||
2236 | !list_empty(&cache->completed_migrations) || | ||
2237 | !list_empty(&cache->need_commit_migrations) || | ||
2238 | cache->invalidate; | ||
2239 | } | ||
2240 | |||
2241 | static void do_worker(struct work_struct *ws) | ||
2242 | { | ||
2243 | struct cache *cache = container_of(ws, struct cache, worker); | ||
2244 | |||
2245 | do { | ||
2246 | if (!is_quiescing(cache)) { | ||
2247 | writeback_some_dirty_blocks(cache); | ||
2248 | process_deferred_writethrough_bios(cache); | ||
2249 | process_deferred_bios(cache); | ||
2250 | process_deferred_cells(cache); | ||
2251 | process_invalidation_requests(cache); | ||
2252 | } | ||
2253 | |||
2254 | process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard); | ||
2255 | process_migrations(cache, &cache->completed_migrations, complete_migration); | ||
2256 | |||
2257 | if (commit_if_needed(cache)) { | ||
2258 | process_deferred_flush_bios(cache, false); | ||
2259 | process_migrations(cache, &cache->need_commit_migrations, migration_failure); | ||
2260 | } else { | ||
2261 | process_deferred_flush_bios(cache, true); | ||
2262 | process_migrations(cache, &cache->need_commit_migrations, | ||
2263 | migration_success_post_commit); | ||
2264 | } | ||
2265 | |||
2266 | ack_quiescing(cache); | ||
2267 | |||
2268 | } while (more_work(cache)); | ||
2269 | } | ||
2270 | |||
2271 | /* | 2025 | /* |
2272 | * We want to commit periodically so that not too much | 2026 | * We want to commit periodically so that not too much |
2273 | * unwritten metadata builds up. | 2027 | * unwritten metadata builds up. |
@@ -2275,25 +2029,39 @@ static void do_worker(struct work_struct *ws) | |||
2275 | static void do_waker(struct work_struct *ws) | 2029 | static void do_waker(struct work_struct *ws) |
2276 | { | 2030 | { |
2277 | struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); | 2031 | struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); |
2032 | |||
2278 | policy_tick(cache->policy, true); | 2033 | policy_tick(cache->policy, true); |
2279 | wake_worker(cache); | 2034 | wake_migration_worker(cache); |
2035 | schedule_commit(&cache->committer); | ||
2280 | queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); | 2036 | queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); |
2281 | } | 2037 | } |
2282 | 2038 | ||
2283 | /*----------------------------------------------------------------*/ | 2039 | static void check_migrations(struct work_struct *ws) |
2284 | |||
2285 | static int is_congested(struct dm_dev *dev, int bdi_bits) | ||
2286 | { | 2040 | { |
2287 | struct request_queue *q = bdev_get_queue(dev->bdev); | 2041 | int r; |
2288 | return bdi_congested(q->backing_dev_info, bdi_bits); | 2042 | struct policy_work *op; |
2289 | } | 2043 | struct cache *cache = container_of(ws, struct cache, migration_worker); |
2044 | enum busy b; | ||
2290 | 2045 | ||
2291 | static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) | 2046 | for (;;) { |
2292 | { | 2047 | b = spare_migration_bandwidth(cache); |
2293 | struct cache *cache = container_of(cb, struct cache, callbacks); | 2048 | if (b == BUSY) |
2049 | break; | ||
2294 | 2050 | ||
2295 | return is_congested(cache->origin_dev, bdi_bits) || | 2051 | r = policy_get_background_work(cache->policy, b == IDLE, &op); |
2296 | is_congested(cache->cache_dev, bdi_bits); | 2052 | if (r == -ENODATA) |
2053 | break; | ||
2054 | |||
2055 | if (r) { | ||
2056 | DMERR_LIMIT("%s: policy_background_work failed", | ||
2057 | cache_device_name(cache)); | ||
2058 | break; | ||
2059 | } | ||
2060 | |||
2061 | r = mg_start(cache, op, NULL); | ||
2062 | if (r) | ||
2063 | break; | ||
2064 | } | ||
2297 | } | 2065 | } |
2298 | 2066 | ||
2299 | /*---------------------------------------------------------------- | 2067 | /*---------------------------------------------------------------- |
@@ -2310,11 +2078,8 @@ static void destroy(struct cache *cache) | |||
2310 | 2078 | ||
2311 | mempool_destroy(cache->migration_pool); | 2079 | mempool_destroy(cache->migration_pool); |
2312 | 2080 | ||
2313 | if (cache->all_io_ds) | ||
2314 | dm_deferred_set_destroy(cache->all_io_ds); | ||
2315 | |||
2316 | if (cache->prison) | 2081 | if (cache->prison) |
2317 | dm_bio_prison_destroy(cache->prison); | 2082 | dm_bio_prison_destroy_v2(cache->prison); |
2318 | 2083 | ||
2319 | if (cache->wq) | 2084 | if (cache->wq) |
2320 | destroy_workqueue(cache->wq); | 2085 | destroy_workqueue(cache->wq); |
@@ -2707,6 +2472,7 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca, | |||
2707 | return PTR_ERR(p); | 2472 | return PTR_ERR(p); |
2708 | } | 2473 | } |
2709 | cache->policy = p; | 2474 | cache->policy = p; |
2475 | BUG_ON(!cache->policy); | ||
2710 | 2476 | ||
2711 | return 0; | 2477 | return 0; |
2712 | } | 2478 | } |
@@ -2750,6 +2516,20 @@ static void set_cache_size(struct cache *cache, dm_cblock_t size) | |||
2750 | cache->cache_size = size; | 2516 | cache->cache_size = size; |
2751 | } | 2517 | } |
2752 | 2518 | ||
2519 | static int is_congested(struct dm_dev *dev, int bdi_bits) | ||
2520 | { | ||
2521 | struct request_queue *q = bdev_get_queue(dev->bdev); | ||
2522 | return bdi_congested(q->backing_dev_info, bdi_bits); | ||
2523 | } | ||
2524 | |||
2525 | static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) | ||
2526 | { | ||
2527 | struct cache *cache = container_of(cb, struct cache, callbacks); | ||
2528 | |||
2529 | return is_congested(cache->origin_dev, bdi_bits) || | ||
2530 | is_congested(cache->cache_dev, bdi_bits); | ||
2531 | } | ||
2532 | |||
2753 | #define DEFAULT_MIGRATION_THRESHOLD 2048 | 2533 | #define DEFAULT_MIGRATION_THRESHOLD 2048 |
2754 | 2534 | ||
2755 | static int cache_create(struct cache_args *ca, struct cache **result) | 2535 | static int cache_create(struct cache_args *ca, struct cache **result) |
@@ -2787,7 +2567,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2787 | 2567 | ||
2788 | ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; | 2568 | ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; |
2789 | 2569 | ||
2790 | /* FIXME: factor out this whole section */ | ||
2791 | origin_blocks = cache->origin_sectors = ca->origin_sectors; | 2570 | origin_blocks = cache->origin_sectors = ca->origin_sectors; |
2792 | origin_blocks = block_div(origin_blocks, ca->block_size); | 2571 | origin_blocks = block_div(origin_blocks, ca->block_size); |
2793 | cache->origin_blocks = to_oblock(origin_blocks); | 2572 | cache->origin_blocks = to_oblock(origin_blocks); |
@@ -2853,24 +2632,18 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2853 | r = -EINVAL; | 2632 | r = -EINVAL; |
2854 | goto bad; | 2633 | goto bad; |
2855 | } | 2634 | } |
2635 | |||
2636 | policy_allow_migrations(cache->policy, false); | ||
2856 | } | 2637 | } |
2857 | 2638 | ||
2858 | spin_lock_init(&cache->lock); | 2639 | spin_lock_init(&cache->lock); |
2859 | INIT_LIST_HEAD(&cache->deferred_cells); | 2640 | INIT_LIST_HEAD(&cache->deferred_cells); |
2860 | bio_list_init(&cache->deferred_bios); | 2641 | bio_list_init(&cache->deferred_bios); |
2861 | bio_list_init(&cache->deferred_flush_bios); | ||
2862 | bio_list_init(&cache->deferred_writethrough_bios); | 2642 | bio_list_init(&cache->deferred_writethrough_bios); |
2863 | INIT_LIST_HEAD(&cache->quiesced_migrations); | ||
2864 | INIT_LIST_HEAD(&cache->completed_migrations); | ||
2865 | INIT_LIST_HEAD(&cache->need_commit_migrations); | ||
2866 | atomic_set(&cache->nr_allocated_migrations, 0); | 2643 | atomic_set(&cache->nr_allocated_migrations, 0); |
2867 | atomic_set(&cache->nr_io_migrations, 0); | 2644 | atomic_set(&cache->nr_io_migrations, 0); |
2868 | init_waitqueue_head(&cache->migration_wait); | 2645 | init_waitqueue_head(&cache->migration_wait); |
2869 | 2646 | ||
2870 | init_waitqueue_head(&cache->quiescing_wait); | ||
2871 | atomic_set(&cache->quiescing, 0); | ||
2872 | atomic_set(&cache->quiescing_ack, 0); | ||
2873 | |||
2874 | r = -ENOMEM; | 2647 | r = -ENOMEM; |
2875 | atomic_set(&cache->nr_dirty, 0); | 2648 | atomic_set(&cache->nr_dirty, 0); |
2876 | cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); | 2649 | cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); |
@@ -2899,27 +2672,23 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2899 | goto bad; | 2672 | goto bad; |
2900 | } | 2673 | } |
2901 | 2674 | ||
2902 | cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); | 2675 | cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0); |
2903 | if (!cache->wq) { | 2676 | if (!cache->wq) { |
2904 | *error = "could not create workqueue for metadata object"; | 2677 | *error = "could not create workqueue for metadata object"; |
2905 | goto bad; | 2678 | goto bad; |
2906 | } | 2679 | } |
2907 | INIT_WORK(&cache->worker, do_worker); | 2680 | INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios); |
2681 | INIT_WORK(&cache->deferred_writethrough_worker, | ||
2682 | process_deferred_writethrough_bios); | ||
2683 | INIT_WORK(&cache->migration_worker, check_migrations); | ||
2908 | INIT_DELAYED_WORK(&cache->waker, do_waker); | 2684 | INIT_DELAYED_WORK(&cache->waker, do_waker); |
2909 | cache->last_commit_jiffies = jiffies; | ||
2910 | 2685 | ||
2911 | cache->prison = dm_bio_prison_create(); | 2686 | cache->prison = dm_bio_prison_create_v2(cache->wq); |
2912 | if (!cache->prison) { | 2687 | if (!cache->prison) { |
2913 | *error = "could not create bio prison"; | 2688 | *error = "could not create bio prison"; |
2914 | goto bad; | 2689 | goto bad; |
2915 | } | 2690 | } |
2916 | 2691 | ||
2917 | cache->all_io_ds = dm_deferred_set_create(); | ||
2918 | if (!cache->all_io_ds) { | ||
2919 | *error = "could not create all_io deferred set"; | ||
2920 | goto bad; | ||
2921 | } | ||
2922 | |||
2923 | cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, | 2692 | cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, |
2924 | migration_cache); | 2693 | migration_cache); |
2925 | if (!cache->migration_pool) { | 2694 | if (!cache->migration_pool) { |
@@ -2946,11 +2715,15 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2946 | spin_lock_init(&cache->invalidation_lock); | 2715 | spin_lock_init(&cache->invalidation_lock); |
2947 | INIT_LIST_HEAD(&cache->invalidation_requests); | 2716 | INIT_LIST_HEAD(&cache->invalidation_requests); |
2948 | 2717 | ||
2718 | batcher_init(&cache->committer, commit_op, cache, | ||
2719 | issue_op, cache, cache->wq); | ||
2949 | iot_init(&cache->origin_tracker); | 2720 | iot_init(&cache->origin_tracker); |
2950 | 2721 | ||
2722 | init_rwsem(&cache->background_work_lock); | ||
2723 | prevent_background_work(cache); | ||
2724 | |||
2951 | *result = cache; | 2725 | *result = cache; |
2952 | return 0; | 2726 | return 0; |
2953 | |||
2954 | bad: | 2727 | bad: |
2955 | destroy(cache); | 2728 | destroy(cache); |
2956 | return r; | 2729 | return r; |
@@ -3008,7 +2781,6 @@ static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3008 | } | 2781 | } |
3009 | 2782 | ||
3010 | ti->private = cache; | 2783 | ti->private = cache; |
3011 | |||
3012 | out: | 2784 | out: |
3013 | destroy_cache_args(ca); | 2785 | destroy_cache_args(ca); |
3014 | return r; | 2786 | return r; |
@@ -3021,17 +2793,11 @@ static int cache_map(struct dm_target *ti, struct bio *bio) | |||
3021 | struct cache *cache = ti->private; | 2793 | struct cache *cache = ti->private; |
3022 | 2794 | ||
3023 | int r; | 2795 | int r; |
3024 | struct dm_bio_prison_cell *cell = NULL; | 2796 | bool commit_needed; |
3025 | dm_oblock_t block = get_bio_block(cache, bio); | 2797 | dm_oblock_t block = get_bio_block(cache, bio); |
3026 | size_t pb_data_size = get_per_bio_data_size(cache); | 2798 | size_t pb_data_size = get_per_bio_data_size(cache); |
3027 | bool can_migrate = false; | ||
3028 | bool fast_promotion; | ||
3029 | struct policy_result lookup_result; | ||
3030 | struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); | ||
3031 | struct old_oblock_lock ool; | ||
3032 | |||
3033 | ool.locker.fn = null_locker; | ||
3034 | 2799 | ||
2800 | init_per_bio_data(bio, pb_data_size); | ||
3035 | if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { | 2801 | if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { |
3036 | /* | 2802 | /* |
3037 | * This can only occur if the io goes to a partial block at | 2803 | * This can only occur if the io goes to a partial block at |
@@ -3048,101 +2814,9 @@ static int cache_map(struct dm_target *ti, struct bio *bio) | |||
3048 | return DM_MAPIO_SUBMITTED; | 2814 | return DM_MAPIO_SUBMITTED; |
3049 | } | 2815 | } |
3050 | 2816 | ||
3051 | /* | 2817 | r = map_bio(cache, bio, block, &commit_needed); |
3052 | * Check to see if that block is currently migrating. | 2818 | if (commit_needed) |
3053 | */ | 2819 | schedule_commit(&cache->committer); |
3054 | cell = alloc_prison_cell(cache); | ||
3055 | if (!cell) { | ||
3056 | defer_bio(cache, bio); | ||
3057 | return DM_MAPIO_SUBMITTED; | ||
3058 | } | ||
3059 | |||
3060 | r = bio_detain(cache, block, bio, cell, | ||
3061 | (cell_free_fn) free_prison_cell, | ||
3062 | cache, &cell); | ||
3063 | if (r) { | ||
3064 | if (r < 0) | ||
3065 | defer_bio(cache, bio); | ||
3066 | |||
3067 | return DM_MAPIO_SUBMITTED; | ||
3068 | } | ||
3069 | |||
3070 | fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio); | ||
3071 | |||
3072 | r = policy_map(cache->policy, block, false, can_migrate, fast_promotion, | ||
3073 | bio, &ool.locker, &lookup_result); | ||
3074 | if (r == -EWOULDBLOCK) { | ||
3075 | cell_defer(cache, cell, true); | ||
3076 | return DM_MAPIO_SUBMITTED; | ||
3077 | |||
3078 | } else if (r) { | ||
3079 | DMERR_LIMIT("%s: Unexpected return from cache replacement policy: %d", | ||
3080 | cache_device_name(cache), r); | ||
3081 | cell_defer(cache, cell, false); | ||
3082 | bio_io_error(bio); | ||
3083 | return DM_MAPIO_SUBMITTED; | ||
3084 | } | ||
3085 | |||
3086 | r = DM_MAPIO_REMAPPED; | ||
3087 | switch (lookup_result.op) { | ||
3088 | case POLICY_HIT: | ||
3089 | if (passthrough_mode(&cache->features)) { | ||
3090 | if (bio_data_dir(bio) == WRITE) { | ||
3091 | /* | ||
3092 | * We need to invalidate this block, so | ||
3093 | * defer for the worker thread. | ||
3094 | */ | ||
3095 | cell_defer(cache, cell, true); | ||
3096 | r = DM_MAPIO_SUBMITTED; | ||
3097 | |||
3098 | } else { | ||
3099 | inc_miss_counter(cache, bio); | ||
3100 | remap_to_origin_clear_discard(cache, bio, block); | ||
3101 | accounted_begin(cache, bio); | ||
3102 | inc_ds(cache, bio, cell); | ||
3103 | // FIXME: we want to remap hits or misses straight | ||
3104 | // away rather than passing over to the worker. | ||
3105 | cell_defer(cache, cell, false); | ||
3106 | } | ||
3107 | |||
3108 | } else { | ||
3109 | inc_hit_counter(cache, bio); | ||
3110 | if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && | ||
3111 | !is_dirty(cache, lookup_result.cblock)) { | ||
3112 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); | ||
3113 | accounted_begin(cache, bio); | ||
3114 | inc_ds(cache, bio, cell); | ||
3115 | cell_defer(cache, cell, false); | ||
3116 | |||
3117 | } else | ||
3118 | remap_cell_to_cache_dirty(cache, cell, block, lookup_result.cblock, false); | ||
3119 | } | ||
3120 | break; | ||
3121 | |||
3122 | case POLICY_MISS: | ||
3123 | inc_miss_counter(cache, bio); | ||
3124 | if (pb->req_nr != 0) { | ||
3125 | /* | ||
3126 | * This is a duplicate writethrough io that is no | ||
3127 | * longer needed because the block has been demoted. | ||
3128 | */ | ||
3129 | bio_endio(bio); | ||
3130 | // FIXME: remap everything as a miss | ||
3131 | cell_defer(cache, cell, false); | ||
3132 | r = DM_MAPIO_SUBMITTED; | ||
3133 | |||
3134 | } else | ||
3135 | remap_cell_to_origin_clear_discard(cache, cell, block, false); | ||
3136 | break; | ||
3137 | |||
3138 | default: | ||
3139 | DMERR_LIMIT("%s: %s: erroring bio: unknown policy op: %u", | ||
3140 | cache_device_name(cache), __func__, | ||
3141 | (unsigned) lookup_result.op); | ||
3142 | cell_defer(cache, cell, false); | ||
3143 | bio_io_error(bio); | ||
3144 | r = DM_MAPIO_SUBMITTED; | ||
3145 | } | ||
3146 | 2820 | ||
3147 | return r; | 2821 | return r; |
3148 | } | 2822 | } |
@@ -3162,7 +2836,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) | |||
3162 | spin_unlock_irqrestore(&cache->lock, flags); | 2836 | spin_unlock_irqrestore(&cache->lock, flags); |
3163 | } | 2837 | } |
3164 | 2838 | ||
3165 | check_for_quiesced_migrations(cache, pb); | 2839 | bio_drop_shared_lock(cache, bio); |
3166 | accounted_complete(cache, bio); | 2840 | accounted_complete(cache, bio); |
3167 | 2841 | ||
3168 | return 0; | 2842 | return 0; |
@@ -3262,12 +2936,18 @@ static void cache_postsuspend(struct dm_target *ti) | |||
3262 | { | 2936 | { |
3263 | struct cache *cache = ti->private; | 2937 | struct cache *cache = ti->private; |
3264 | 2938 | ||
3265 | start_quiescing(cache); | 2939 | prevent_background_work(cache); |
3266 | wait_for_migrations(cache); | 2940 | BUG_ON(atomic_read(&cache->nr_io_migrations)); |
3267 | stop_worker(cache); | 2941 | |
2942 | cancel_delayed_work(&cache->waker); | ||
2943 | flush_workqueue(cache->wq); | ||
2944 | WARN_ON(cache->origin_tracker.in_flight); | ||
2945 | |||
2946 | /* | ||
2947 | * If it's a flush suspend there won't be any deferred bios, so this | ||
2948 | * call is harmless. | ||
2949 | */ | ||
3268 | requeue_deferred_bios(cache); | 2950 | requeue_deferred_bios(cache); |
3269 | requeue_deferred_cells(cache); | ||
3270 | stop_quiescing(cache); | ||
3271 | 2951 | ||
3272 | if (get_cache_mode(cache) == CM_WRITE) | 2952 | if (get_cache_mode(cache) == CM_WRITE) |
3273 | (void) sync_metadata(cache); | 2953 | (void) sync_metadata(cache); |
@@ -3279,15 +2959,16 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, | |||
3279 | int r; | 2959 | int r; |
3280 | struct cache *cache = context; | 2960 | struct cache *cache = context; |
3281 | 2961 | ||
3282 | r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid); | 2962 | if (dirty) { |
2963 | set_bit(from_cblock(cblock), cache->dirty_bitset); | ||
2964 | atomic_inc(&cache->nr_dirty); | ||
2965 | } else | ||
2966 | clear_bit(from_cblock(cblock), cache->dirty_bitset); | ||
2967 | |||
2968 | r = policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid); | ||
3283 | if (r) | 2969 | if (r) |
3284 | return r; | 2970 | return r; |
3285 | 2971 | ||
3286 | if (dirty) | ||
3287 | set_dirty(cache, oblock, cblock); | ||
3288 | else | ||
3289 | clear_dirty(cache, oblock, cblock); | ||
3290 | |||
3291 | return 0; | 2972 | return 0; |
3292 | } | 2973 | } |
3293 | 2974 | ||
@@ -3486,6 +3167,7 @@ static void cache_resume(struct dm_target *ti) | |||
3486 | struct cache *cache = ti->private; | 3167 | struct cache *cache = ti->private; |
3487 | 3168 | ||
3488 | cache->need_tick_bio = true; | 3169 | cache->need_tick_bio = true; |
3170 | allow_background_work(cache); | ||
3489 | do_waker(&cache->waker.work); | 3171 | do_waker(&cache->waker.work); |
3490 | } | 3172 | } |
3491 | 3173 | ||
@@ -3620,10 +3302,19 @@ err: | |||
3620 | } | 3302 | } |
3621 | 3303 | ||
3622 | /* | 3304 | /* |
3305 | * Defines a range of cblocks, begin to (end - 1) are in the range. end is | ||
3306 | * the one-past-the-end value. | ||
3307 | */ | ||
3308 | struct cblock_range { | ||
3309 | dm_cblock_t begin; | ||
3310 | dm_cblock_t end; | ||
3311 | }; | ||
3312 | |||
3313 | /* | ||
3623 | * A cache block range can take two forms: | 3314 | * A cache block range can take two forms: |
3624 | * | 3315 | * |
3625 | * i) A single cblock, eg. '3456' | 3316 | * i) A single cblock, eg. '3456' |
3626 | * ii) A begin and end cblock with dots between, eg. 123-234 | 3317 | * ii) A begin and end cblock with a dash between, eg. 123-234 |
3627 | */ | 3318 | */ |
3628 | static int parse_cblock_range(struct cache *cache, const char *str, | 3319 | static int parse_cblock_range(struct cache *cache, const char *str, |
3629 | struct cblock_range *result) | 3320 | struct cblock_range *result) |
@@ -3689,23 +3380,31 @@ static int validate_cblock_range(struct cache *cache, struct cblock_range *range | |||
3689 | return 0; | 3380 | return 0; |
3690 | } | 3381 | } |
3691 | 3382 | ||
3383 | static inline dm_cblock_t cblock_succ(dm_cblock_t b) | ||
3384 | { | ||
3385 | return to_cblock(from_cblock(b) + 1); | ||
3386 | } | ||
3387 | |||
3692 | static int request_invalidation(struct cache *cache, struct cblock_range *range) | 3388 | static int request_invalidation(struct cache *cache, struct cblock_range *range) |
3693 | { | 3389 | { |
3694 | struct invalidation_request req; | 3390 | int r = 0; |
3695 | 3391 | ||
3696 | INIT_LIST_HEAD(&req.list); | 3392 | /* |
3697 | req.cblocks = range; | 3393 | * We don't need to do any locking here because we know we're in |
3698 | atomic_set(&req.complete, 0); | 3394 | * passthrough mode. There's is potential for a race between an |
3699 | req.err = 0; | 3395 | * invalidation triggered by an io and an invalidation message. This |
3700 | init_waitqueue_head(&req.result_wait); | 3396 | * is harmless, we must not worry if the policy call fails. |
3397 | */ | ||
3398 | while (range->begin != range->end) { | ||
3399 | r = invalidate_cblock(cache, range->begin); | ||
3400 | if (r) | ||
3401 | return r; | ||
3701 | 3402 | ||
3702 | spin_lock(&cache->invalidation_lock); | 3403 | range->begin = cblock_succ(range->begin); |
3703 | list_add(&req.list, &cache->invalidation_requests); | 3404 | } |
3704 | spin_unlock(&cache->invalidation_lock); | ||
3705 | wake_worker(cache); | ||
3706 | 3405 | ||
3707 | wait_event(req.result_wait, atomic_read(&req.complete)); | 3406 | cache->commit_requested = true; |
3708 | return req.err; | 3407 | return r; |
3709 | } | 3408 | } |
3710 | 3409 | ||
3711 | static int process_invalidate_cblocks_message(struct cache *cache, unsigned count, | 3410 | static int process_invalidate_cblocks_message(struct cache *cache, unsigned count, |
@@ -3815,7 +3514,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3815 | 3514 | ||
3816 | static struct target_type cache_target = { | 3515 | static struct target_type cache_target = { |
3817 | .name = "cache", | 3516 | .name = "cache", |
3818 | .version = {1, 10, 0}, | 3517 | .version = {2, 0, 0}, |
3819 | .module = THIS_MODULE, | 3518 | .module = THIS_MODULE, |
3820 | .ctr = cache_ctr, | 3519 | .ctr = cache_ctr, |
3821 | .dtr = cache_dtr, | 3520 | .dtr = cache_dtr, |