diff options
author | Heinz Mauelshagen <hjm@redhat.com> | 2008-10-21 12:45:06 -0400 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2008-10-21 12:45:06 -0400 |
commit | 1f965b19437017cea6d3f3f46acdc5acae5fd011 (patch) | |
tree | f70fd0684d1afbde7f0031a6f8cb6aa58880723c /drivers/md/dm-region-hash.c | |
parent | f3e1d26ede3fb15c06904d700f1d7b21bba2215e (diff) |
dm raid1: separate region_hash interface part1
Separate the region hash code from raid1 so it can be shared by forthcoming
targets. Use BUG_ON() for failed async dm_io() calls.
Signed-off-by: Heinz Mauelshagen <hjm@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md/dm-region-hash.c')
-rw-r--r-- | drivers/md/dm-region-hash.c | 704 |
1 files changed, 704 insertions, 0 deletions
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c new file mode 100644 index 000000000000..59f8d9df9e1a --- /dev/null +++ b/drivers/md/dm-region-hash.c | |||
@@ -0,0 +1,704 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2003 Sistina Software Limited. | ||
3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This file is released under the GPL. | ||
6 | */ | ||
7 | |||
8 | #include <linux/dm-dirty-log.h> | ||
9 | #include <linux/dm-region-hash.h> | ||
10 | |||
11 | #include <linux/ctype.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/vmalloc.h> | ||
15 | |||
16 | #include "dm.h" | ||
17 | #include "dm-bio-list.h" | ||
18 | |||
19 | #define DM_MSG_PREFIX "region hash" | ||
20 | |||
21 | /*----------------------------------------------------------------- | ||
22 | * Region hash | ||
23 | * | ||
24 | * The mirror splits itself up into discrete regions. Each | ||
25 | * region can be in one of three states: clean, dirty, | ||
26 | * nosync. There is no need to put clean regions in the hash. | ||
27 | * | ||
28 | * In addition to being present in the hash table a region _may_ | ||
29 | * be present on one of three lists. | ||
30 | * | ||
31 | * clean_regions: Regions on this list have no io pending to | ||
32 | * them, they are in sync, we are no longer interested in them, | ||
33 | * they are dull. dm_rh_update_states() will remove them from the | ||
34 | * hash table. | ||
35 | * | ||
36 | * quiesced_regions: These regions have been spun down, ready | ||
37 | * for recovery. rh_recovery_start() will remove regions from | ||
38 | * this list and hand them to kmirrord, which will schedule the | ||
39 | * recovery io with kcopyd. | ||
40 | * | ||
41 | * recovered_regions: Regions that kcopyd has successfully | ||
42 | * recovered. dm_rh_update_states() will now schedule any delayed | ||
43 | * io, up the recovery_count, and remove the region from the | ||
44 | * hash. | ||
45 | * | ||
46 | * There are 2 locks: | ||
47 | * A rw spin lock 'hash_lock' protects just the hash table, | ||
48 | * this is never held in write mode from interrupt context, | ||
49 | * which I believe means that we only have to disable irqs when | ||
50 | * doing a write lock. | ||
51 | * | ||
52 | * An ordinary spin lock 'region_lock' that protects the three | ||
53 | * lists in the region_hash, with the 'state', 'list' and | ||
54 | * 'delayed_bios' fields of the regions. This is used from irq | ||
55 | * context, so all other uses will have to suspend local irqs. | ||
56 | *---------------------------------------------------------------*/ | ||
57 | struct dm_region_hash { | ||
58 | uint32_t region_size; | ||
59 | unsigned region_shift; | ||
60 | |||
61 | /* holds persistent region state */ | ||
62 | struct dm_dirty_log *log; | ||
63 | |||
64 | /* hash table */ | ||
65 | rwlock_t hash_lock; | ||
66 | mempool_t *region_pool; | ||
67 | unsigned mask; | ||
68 | unsigned nr_buckets; | ||
69 | unsigned prime; | ||
70 | unsigned shift; | ||
71 | struct list_head *buckets; | ||
72 | |||
73 | unsigned max_recovery; /* Max # of regions to recover in parallel */ | ||
74 | |||
75 | spinlock_t region_lock; | ||
76 | atomic_t recovery_in_flight; | ||
77 | struct semaphore recovery_count; | ||
78 | struct list_head clean_regions; | ||
79 | struct list_head quiesced_regions; | ||
80 | struct list_head recovered_regions; | ||
81 | struct list_head failed_recovered_regions; | ||
82 | |||
83 | void *context; | ||
84 | sector_t target_begin; | ||
85 | |||
86 | /* Callback function to schedule bios writes */ | ||
87 | void (*dispatch_bios)(void *context, struct bio_list *bios); | ||
88 | |||
89 | /* Callback function to wakeup callers worker thread. */ | ||
90 | void (*wakeup_workers)(void *context); | ||
91 | |||
92 | /* Callback function to wakeup callers recovery waiters. */ | ||
93 | void (*wakeup_all_recovery_waiters)(void *context); | ||
94 | }; | ||
95 | |||
96 | struct dm_region { | ||
97 | struct dm_region_hash *rh; /* FIXME: can we get rid of this ? */ | ||
98 | region_t key; | ||
99 | int state; | ||
100 | |||
101 | struct list_head hash_list; | ||
102 | struct list_head list; | ||
103 | |||
104 | atomic_t pending; | ||
105 | struct bio_list delayed_bios; | ||
106 | }; | ||
107 | |||
108 | /* | ||
109 | * Conversion fns | ||
110 | */ | ||
111 | static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector) | ||
112 | { | ||
113 | return sector >> rh->region_shift; | ||
114 | } | ||
115 | |||
116 | sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region) | ||
117 | { | ||
118 | return region << rh->region_shift; | ||
119 | } | ||
120 | EXPORT_SYMBOL_GPL(dm_rh_region_to_sector); | ||
121 | |||
122 | region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio) | ||
123 | { | ||
124 | return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin); | ||
125 | } | ||
126 | EXPORT_SYMBOL_GPL(dm_rh_bio_to_region); | ||
127 | |||
128 | void *dm_rh_region_context(struct dm_region *reg) | ||
129 | { | ||
130 | return reg->rh->context; | ||
131 | } | ||
132 | EXPORT_SYMBOL_GPL(dm_rh_region_context); | ||
133 | |||
134 | region_t dm_rh_get_region_key(struct dm_region *reg) | ||
135 | { | ||
136 | return reg->key; | ||
137 | } | ||
138 | EXPORT_SYMBOL_GPL(dm_rh_get_region_key); | ||
139 | |||
140 | sector_t dm_rh_get_region_size(struct dm_region_hash *rh) | ||
141 | { | ||
142 | return rh->region_size; | ||
143 | } | ||
144 | EXPORT_SYMBOL_GPL(dm_rh_get_region_size); | ||
145 | |||
146 | /* | ||
147 | * FIXME: shall we pass in a structure instead of all these args to | ||
148 | * dm_region_hash_create()???? | ||
149 | */ | ||
150 | #define RH_HASH_MULT 2654435387U | ||
151 | #define RH_HASH_SHIFT 12 | ||
152 | |||
153 | #define MIN_REGIONS 64 | ||
154 | struct dm_region_hash *dm_region_hash_create( | ||
155 | void *context, void (*dispatch_bios)(void *context, | ||
156 | struct bio_list *bios), | ||
157 | void (*wakeup_workers)(void *context), | ||
158 | void (*wakeup_all_recovery_waiters)(void *context), | ||
159 | sector_t target_begin, unsigned max_recovery, | ||
160 | struct dm_dirty_log *log, uint32_t region_size, | ||
161 | region_t nr_regions) | ||
162 | { | ||
163 | struct dm_region_hash *rh; | ||
164 | unsigned nr_buckets, max_buckets; | ||
165 | size_t i; | ||
166 | |||
167 | /* | ||
168 | * Calculate a suitable number of buckets for our hash | ||
169 | * table. | ||
170 | */ | ||
171 | max_buckets = nr_regions >> 6; | ||
172 | for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1) | ||
173 | ; | ||
174 | nr_buckets >>= 1; | ||
175 | |||
176 | rh = kmalloc(sizeof(*rh), GFP_KERNEL); | ||
177 | if (!rh) { | ||
178 | DMERR("unable to allocate region hash memory"); | ||
179 | return ERR_PTR(-ENOMEM); | ||
180 | } | ||
181 | |||
182 | rh->context = context; | ||
183 | rh->dispatch_bios = dispatch_bios; | ||
184 | rh->wakeup_workers = wakeup_workers; | ||
185 | rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters; | ||
186 | rh->target_begin = target_begin; | ||
187 | rh->max_recovery = max_recovery; | ||
188 | rh->log = log; | ||
189 | rh->region_size = region_size; | ||
190 | rh->region_shift = ffs(region_size) - 1; | ||
191 | rwlock_init(&rh->hash_lock); | ||
192 | rh->mask = nr_buckets - 1; | ||
193 | rh->nr_buckets = nr_buckets; | ||
194 | |||
195 | rh->shift = RH_HASH_SHIFT; | ||
196 | rh->prime = RH_HASH_MULT; | ||
197 | |||
198 | rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets)); | ||
199 | if (!rh->buckets) { | ||
200 | DMERR("unable to allocate region hash bucket memory"); | ||
201 | kfree(rh); | ||
202 | return ERR_PTR(-ENOMEM); | ||
203 | } | ||
204 | |||
205 | for (i = 0; i < nr_buckets; i++) | ||
206 | INIT_LIST_HEAD(rh->buckets + i); | ||
207 | |||
208 | spin_lock_init(&rh->region_lock); | ||
209 | sema_init(&rh->recovery_count, 0); | ||
210 | atomic_set(&rh->recovery_in_flight, 0); | ||
211 | INIT_LIST_HEAD(&rh->clean_regions); | ||
212 | INIT_LIST_HEAD(&rh->quiesced_regions); | ||
213 | INIT_LIST_HEAD(&rh->recovered_regions); | ||
214 | INIT_LIST_HEAD(&rh->failed_recovered_regions); | ||
215 | |||
216 | rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, | ||
217 | sizeof(struct dm_region)); | ||
218 | if (!rh->region_pool) { | ||
219 | vfree(rh->buckets); | ||
220 | kfree(rh); | ||
221 | rh = ERR_PTR(-ENOMEM); | ||
222 | } | ||
223 | |||
224 | return rh; | ||
225 | } | ||
226 | EXPORT_SYMBOL_GPL(dm_region_hash_create); | ||
227 | |||
228 | void dm_region_hash_destroy(struct dm_region_hash *rh) | ||
229 | { | ||
230 | unsigned h; | ||
231 | struct dm_region *reg, *nreg; | ||
232 | |||
233 | BUG_ON(!list_empty(&rh->quiesced_regions)); | ||
234 | for (h = 0; h < rh->nr_buckets; h++) { | ||
235 | list_for_each_entry_safe(reg, nreg, rh->buckets + h, | ||
236 | hash_list) { | ||
237 | BUG_ON(atomic_read(®->pending)); | ||
238 | mempool_free(reg, rh->region_pool); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | if (rh->log) | ||
243 | dm_dirty_log_destroy(rh->log); | ||
244 | |||
245 | if (rh->region_pool) | ||
246 | mempool_destroy(rh->region_pool); | ||
247 | |||
248 | vfree(rh->buckets); | ||
249 | kfree(rh); | ||
250 | } | ||
251 | EXPORT_SYMBOL_GPL(dm_region_hash_destroy); | ||
252 | |||
253 | struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh) | ||
254 | { | ||
255 | return rh->log; | ||
256 | } | ||
257 | EXPORT_SYMBOL_GPL(dm_rh_dirty_log); | ||
258 | |||
259 | static unsigned rh_hash(struct dm_region_hash *rh, region_t region) | ||
260 | { | ||
261 | return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask; | ||
262 | } | ||
263 | |||
264 | static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region) | ||
265 | { | ||
266 | struct dm_region *reg; | ||
267 | struct list_head *bucket = rh->buckets + rh_hash(rh, region); | ||
268 | |||
269 | list_for_each_entry(reg, bucket, hash_list) | ||
270 | if (reg->key == region) | ||
271 | return reg; | ||
272 | |||
273 | return NULL; | ||
274 | } | ||
275 | |||
276 | static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg) | ||
277 | { | ||
278 | list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key)); | ||
279 | } | ||
280 | |||
281 | static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region) | ||
282 | { | ||
283 | struct dm_region *reg, *nreg; | ||
284 | |||
285 | nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC); | ||
286 | if (unlikely(!nreg)) | ||
287 | nreg = kmalloc(sizeof(*nreg), GFP_NOIO); | ||
288 | |||
289 | nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? | ||
290 | DM_RH_CLEAN : DM_RH_NOSYNC; | ||
291 | nreg->rh = rh; | ||
292 | nreg->key = region; | ||
293 | INIT_LIST_HEAD(&nreg->list); | ||
294 | atomic_set(&nreg->pending, 0); | ||
295 | bio_list_init(&nreg->delayed_bios); | ||
296 | |||
297 | write_lock_irq(&rh->hash_lock); | ||
298 | reg = __rh_lookup(rh, region); | ||
299 | if (reg) | ||
300 | /* We lost the race. */ | ||
301 | mempool_free(nreg, rh->region_pool); | ||
302 | else { | ||
303 | __rh_insert(rh, nreg); | ||
304 | if (nreg->state == DM_RH_CLEAN) { | ||
305 | spin_lock(&rh->region_lock); | ||
306 | list_add(&nreg->list, &rh->clean_regions); | ||
307 | spin_unlock(&rh->region_lock); | ||
308 | } | ||
309 | |||
310 | reg = nreg; | ||
311 | } | ||
312 | write_unlock_irq(&rh->hash_lock); | ||
313 | |||
314 | return reg; | ||
315 | } | ||
316 | |||
317 | static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region) | ||
318 | { | ||
319 | struct dm_region *reg; | ||
320 | |||
321 | reg = __rh_lookup(rh, region); | ||
322 | if (!reg) { | ||
323 | read_unlock(&rh->hash_lock); | ||
324 | reg = __rh_alloc(rh, region); | ||
325 | read_lock(&rh->hash_lock); | ||
326 | } | ||
327 | |||
328 | return reg; | ||
329 | } | ||
330 | |||
331 | int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block) | ||
332 | { | ||
333 | int r; | ||
334 | struct dm_region *reg; | ||
335 | |||
336 | read_lock(&rh->hash_lock); | ||
337 | reg = __rh_lookup(rh, region); | ||
338 | read_unlock(&rh->hash_lock); | ||
339 | |||
340 | if (reg) | ||
341 | return reg->state; | ||
342 | |||
343 | /* | ||
344 | * The region wasn't in the hash, so we fall back to the | ||
345 | * dirty log. | ||
346 | */ | ||
347 | r = rh->log->type->in_sync(rh->log, region, may_block); | ||
348 | |||
349 | /* | ||
350 | * Any error from the dirty log (eg. -EWOULDBLOCK) gets | ||
351 | * taken as a DM_RH_NOSYNC | ||
352 | */ | ||
353 | return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC; | ||
354 | } | ||
355 | EXPORT_SYMBOL_GPL(dm_rh_get_state); | ||
356 | |||
357 | static void complete_resync_work(struct dm_region *reg, int success) | ||
358 | { | ||
359 | struct dm_region_hash *rh = reg->rh; | ||
360 | |||
361 | rh->log->type->set_region_sync(rh->log, reg->key, success); | ||
362 | |||
363 | /* | ||
364 | * Dispatch the bios before we call 'wake_up_all'. | ||
365 | * This is important because if we are suspending, | ||
366 | * we want to know that recovery is complete and | ||
367 | * the work queue is flushed. If we wake_up_all | ||
368 | * before we dispatch_bios (queue bios and call wake()), | ||
369 | * then we risk suspending before the work queue | ||
370 | * has been properly flushed. | ||
371 | */ | ||
372 | rh->dispatch_bios(rh->context, ®->delayed_bios); | ||
373 | if (atomic_dec_and_test(&rh->recovery_in_flight)) | ||
374 | rh->wakeup_all_recovery_waiters(rh->context); | ||
375 | up(&rh->recovery_count); | ||
376 | } | ||
377 | |||
378 | /* dm_rh_mark_nosync | ||
379 | * @ms | ||
380 | * @bio | ||
381 | * @done | ||
382 | * @error | ||
383 | * | ||
384 | * The bio was written on some mirror(s) but failed on other mirror(s). | ||
385 | * We can successfully endio the bio but should avoid the region being | ||
386 | * marked clean by setting the state DM_RH_NOSYNC. | ||
387 | * | ||
388 | * This function is _not_ safe in interrupt context! | ||
389 | */ | ||
390 | void dm_rh_mark_nosync(struct dm_region_hash *rh, | ||
391 | struct bio *bio, unsigned done, int error) | ||
392 | { | ||
393 | unsigned long flags; | ||
394 | struct dm_dirty_log *log = rh->log; | ||
395 | struct dm_region *reg; | ||
396 | region_t region = dm_rh_bio_to_region(rh, bio); | ||
397 | int recovering = 0; | ||
398 | |||
399 | /* We must inform the log that the sync count has changed. */ | ||
400 | log->type->set_region_sync(log, region, 0); | ||
401 | |||
402 | read_lock(&rh->hash_lock); | ||
403 | reg = __rh_find(rh, region); | ||
404 | read_unlock(&rh->hash_lock); | ||
405 | |||
406 | /* region hash entry should exist because write was in-flight */ | ||
407 | BUG_ON(!reg); | ||
408 | BUG_ON(!list_empty(®->list)); | ||
409 | |||
410 | spin_lock_irqsave(&rh->region_lock, flags); | ||
411 | /* | ||
412 | * Possible cases: | ||
413 | * 1) DM_RH_DIRTY | ||
414 | * 2) DM_RH_NOSYNC: was dirty, other preceeding writes failed | ||
415 | * 3) DM_RH_RECOVERING: flushing pending writes | ||
416 | * Either case, the region should have not been connected to list. | ||
417 | */ | ||
418 | recovering = (reg->state == DM_RH_RECOVERING); | ||
419 | reg->state = DM_RH_NOSYNC; | ||
420 | BUG_ON(!list_empty(®->list)); | ||
421 | spin_unlock_irqrestore(&rh->region_lock, flags); | ||
422 | |||
423 | bio_endio(bio, error); | ||
424 | if (recovering) | ||
425 | complete_resync_work(reg, 0); | ||
426 | } | ||
427 | EXPORT_SYMBOL_GPL(dm_rh_mark_nosync); | ||
428 | |||
429 | void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled) | ||
430 | { | ||
431 | struct dm_region *reg, *next; | ||
432 | |||
433 | LIST_HEAD(clean); | ||
434 | LIST_HEAD(recovered); | ||
435 | LIST_HEAD(failed_recovered); | ||
436 | |||
437 | /* | ||
438 | * Quickly grab the lists. | ||
439 | */ | ||
440 | write_lock_irq(&rh->hash_lock); | ||
441 | spin_lock(&rh->region_lock); | ||
442 | if (!list_empty(&rh->clean_regions)) { | ||
443 | list_splice_init(&rh->clean_regions, &clean); | ||
444 | |||
445 | list_for_each_entry(reg, &clean, list) | ||
446 | list_del(®->hash_list); | ||
447 | } | ||
448 | |||
449 | if (!list_empty(&rh->recovered_regions)) { | ||
450 | list_splice_init(&rh->recovered_regions, &recovered); | ||
451 | |||
452 | list_for_each_entry(reg, &recovered, list) | ||
453 | list_del(®->hash_list); | ||
454 | } | ||
455 | |||
456 | if (!list_empty(&rh->failed_recovered_regions)) { | ||
457 | list_splice_init(&rh->failed_recovered_regions, | ||
458 | &failed_recovered); | ||
459 | |||
460 | list_for_each_entry(reg, &failed_recovered, list) | ||
461 | list_del(®->hash_list); | ||
462 | } | ||
463 | |||
464 | spin_unlock(&rh->region_lock); | ||
465 | write_unlock_irq(&rh->hash_lock); | ||
466 | |||
467 | /* | ||
468 | * All the regions on the recovered and clean lists have | ||
469 | * now been pulled out of the system, so no need to do | ||
470 | * any more locking. | ||
471 | */ | ||
472 | list_for_each_entry_safe(reg, next, &recovered, list) { | ||
473 | rh->log->type->clear_region(rh->log, reg->key); | ||
474 | complete_resync_work(reg, 1); | ||
475 | mempool_free(reg, rh->region_pool); | ||
476 | } | ||
477 | |||
478 | list_for_each_entry_safe(reg, next, &failed_recovered, list) { | ||
479 | complete_resync_work(reg, errors_handled ? 0 : 1); | ||
480 | mempool_free(reg, rh->region_pool); | ||
481 | } | ||
482 | |||
483 | list_for_each_entry_safe(reg, next, &clean, list) { | ||
484 | rh->log->type->clear_region(rh->log, reg->key); | ||
485 | mempool_free(reg, rh->region_pool); | ||
486 | } | ||
487 | |||
488 | rh->log->type->flush(rh->log); | ||
489 | } | ||
490 | EXPORT_SYMBOL_GPL(dm_rh_update_states); | ||
491 | |||
492 | static void rh_inc(struct dm_region_hash *rh, region_t region) | ||
493 | { | ||
494 | struct dm_region *reg; | ||
495 | |||
496 | read_lock(&rh->hash_lock); | ||
497 | reg = __rh_find(rh, region); | ||
498 | |||
499 | spin_lock_irq(&rh->region_lock); | ||
500 | atomic_inc(®->pending); | ||
501 | |||
502 | if (reg->state == DM_RH_CLEAN) { | ||
503 | reg->state = DM_RH_DIRTY; | ||
504 | list_del_init(®->list); /* take off the clean list */ | ||
505 | spin_unlock_irq(&rh->region_lock); | ||
506 | |||
507 | rh->log->type->mark_region(rh->log, reg->key); | ||
508 | } else | ||
509 | spin_unlock_irq(&rh->region_lock); | ||
510 | |||
511 | |||
512 | read_unlock(&rh->hash_lock); | ||
513 | } | ||
514 | |||
515 | void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) | ||
516 | { | ||
517 | struct bio *bio; | ||
518 | |||
519 | for (bio = bios->head; bio; bio = bio->bi_next) | ||
520 | rh_inc(rh, dm_rh_bio_to_region(rh, bio)); | ||
521 | } | ||
522 | EXPORT_SYMBOL_GPL(dm_rh_inc_pending); | ||
523 | |||
524 | void dm_rh_dec(struct dm_region_hash *rh, region_t region) | ||
525 | { | ||
526 | unsigned long flags; | ||
527 | struct dm_region *reg; | ||
528 | int should_wake = 0; | ||
529 | |||
530 | read_lock(&rh->hash_lock); | ||
531 | reg = __rh_lookup(rh, region); | ||
532 | read_unlock(&rh->hash_lock); | ||
533 | |||
534 | spin_lock_irqsave(&rh->region_lock, flags); | ||
535 | if (atomic_dec_and_test(®->pending)) { | ||
536 | /* | ||
537 | * There is no pending I/O for this region. | ||
538 | * We can move the region to corresponding list for next action. | ||
539 | * At this point, the region is not yet connected to any list. | ||
540 | * | ||
541 | * If the state is DM_RH_NOSYNC, the region should be kept off | ||
542 | * from clean list. | ||
543 | * The hash entry for DM_RH_NOSYNC will remain in memory | ||
544 | * until the region is recovered or the map is reloaded. | ||
545 | */ | ||
546 | |||
547 | /* do nothing for DM_RH_NOSYNC */ | ||
548 | if (reg->state == DM_RH_RECOVERING) { | ||
549 | list_add_tail(®->list, &rh->quiesced_regions); | ||
550 | } else if (reg->state == DM_RH_DIRTY) { | ||
551 | reg->state = DM_RH_CLEAN; | ||
552 | list_add(®->list, &rh->clean_regions); | ||
553 | } | ||
554 | should_wake = 1; | ||
555 | } | ||
556 | spin_unlock_irqrestore(&rh->region_lock, flags); | ||
557 | |||
558 | if (should_wake) | ||
559 | rh->wakeup_workers(rh->context); | ||
560 | } | ||
561 | EXPORT_SYMBOL_GPL(dm_rh_dec); | ||
562 | |||
563 | /* | ||
564 | * Starts quiescing a region in preparation for recovery. | ||
565 | */ | ||
566 | static int __rh_recovery_prepare(struct dm_region_hash *rh) | ||
567 | { | ||
568 | int r; | ||
569 | region_t region; | ||
570 | struct dm_region *reg; | ||
571 | |||
572 | /* | ||
573 | * Ask the dirty log what's next. | ||
574 | */ | ||
575 | r = rh->log->type->get_resync_work(rh->log, ®ion); | ||
576 | if (r <= 0) | ||
577 | return r; | ||
578 | |||
579 | /* | ||
580 | * Get this region, and start it quiescing by setting the | ||
581 | * recovering flag. | ||
582 | */ | ||
583 | read_lock(&rh->hash_lock); | ||
584 | reg = __rh_find(rh, region); | ||
585 | read_unlock(&rh->hash_lock); | ||
586 | |||
587 | spin_lock_irq(&rh->region_lock); | ||
588 | reg->state = DM_RH_RECOVERING; | ||
589 | |||
590 | /* Already quiesced ? */ | ||
591 | if (atomic_read(®->pending)) | ||
592 | list_del_init(®->list); | ||
593 | else | ||
594 | list_move(®->list, &rh->quiesced_regions); | ||
595 | |||
596 | spin_unlock_irq(&rh->region_lock); | ||
597 | |||
598 | return 1; | ||
599 | } | ||
600 | |||
601 | void dm_rh_recovery_prepare(struct dm_region_hash *rh) | ||
602 | { | ||
603 | /* Extra reference to avoid race with dm_rh_stop_recovery */ | ||
604 | atomic_inc(&rh->recovery_in_flight); | ||
605 | |||
606 | while (!down_trylock(&rh->recovery_count)) { | ||
607 | atomic_inc(&rh->recovery_in_flight); | ||
608 | if (__rh_recovery_prepare(rh) <= 0) { | ||
609 | atomic_dec(&rh->recovery_in_flight); | ||
610 | up(&rh->recovery_count); | ||
611 | break; | ||
612 | } | ||
613 | } | ||
614 | |||
615 | /* Drop the extra reference */ | ||
616 | if (atomic_dec_and_test(&rh->recovery_in_flight)) | ||
617 | rh->wakeup_all_recovery_waiters(rh->context); | ||
618 | } | ||
619 | EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare); | ||
620 | |||
621 | /* | ||
622 | * Returns any quiesced regions. | ||
623 | */ | ||
624 | struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh) | ||
625 | { | ||
626 | struct dm_region *reg = NULL; | ||
627 | |||
628 | spin_lock_irq(&rh->region_lock); | ||
629 | if (!list_empty(&rh->quiesced_regions)) { | ||
630 | reg = list_entry(rh->quiesced_regions.next, | ||
631 | struct dm_region, list); | ||
632 | list_del_init(®->list); /* remove from the quiesced list */ | ||
633 | } | ||
634 | spin_unlock_irq(&rh->region_lock); | ||
635 | |||
636 | return reg; | ||
637 | } | ||
638 | EXPORT_SYMBOL_GPL(dm_rh_recovery_start); | ||
639 | |||
640 | void dm_rh_recovery_end(struct dm_region *reg, int success) | ||
641 | { | ||
642 | struct dm_region_hash *rh = reg->rh; | ||
643 | |||
644 | spin_lock_irq(&rh->region_lock); | ||
645 | if (success) | ||
646 | list_add(®->list, ®->rh->recovered_regions); | ||
647 | else { | ||
648 | reg->state = DM_RH_NOSYNC; | ||
649 | list_add(®->list, ®->rh->failed_recovered_regions); | ||
650 | } | ||
651 | spin_unlock_irq(&rh->region_lock); | ||
652 | |||
653 | rh->wakeup_workers(rh->context); | ||
654 | } | ||
655 | EXPORT_SYMBOL_GPL(dm_rh_recovery_end); | ||
656 | |||
657 | /* Return recovery in flight count. */ | ||
658 | int dm_rh_recovery_in_flight(struct dm_region_hash *rh) | ||
659 | { | ||
660 | return atomic_read(&rh->recovery_in_flight); | ||
661 | } | ||
662 | EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight); | ||
663 | |||
664 | int dm_rh_flush(struct dm_region_hash *rh) | ||
665 | { | ||
666 | return rh->log->type->flush(rh->log); | ||
667 | } | ||
668 | EXPORT_SYMBOL_GPL(dm_rh_flush); | ||
669 | |||
670 | void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio) | ||
671 | { | ||
672 | struct dm_region *reg; | ||
673 | |||
674 | read_lock(&rh->hash_lock); | ||
675 | reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio)); | ||
676 | bio_list_add(®->delayed_bios, bio); | ||
677 | read_unlock(&rh->hash_lock); | ||
678 | } | ||
679 | EXPORT_SYMBOL_GPL(dm_rh_delay); | ||
680 | |||
681 | void dm_rh_stop_recovery(struct dm_region_hash *rh) | ||
682 | { | ||
683 | int i; | ||
684 | |||
685 | /* wait for any recovering regions */ | ||
686 | for (i = 0; i < rh->max_recovery; i++) | ||
687 | down(&rh->recovery_count); | ||
688 | } | ||
689 | EXPORT_SYMBOL_GPL(dm_rh_stop_recovery); | ||
690 | |||
691 | void dm_rh_start_recovery(struct dm_region_hash *rh) | ||
692 | { | ||
693 | int i; | ||
694 | |||
695 | for (i = 0; i < rh->max_recovery; i++) | ||
696 | up(&rh->recovery_count); | ||
697 | |||
698 | rh->wakeup_workers(rh->context); | ||
699 | } | ||
700 | EXPORT_SYMBOL_GPL(dm_rh_start_recovery); | ||
701 | |||
702 | MODULE_DESCRIPTION(DM_NAME " region hash"); | ||
703 | MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>"); | ||
704 | MODULE_LICENSE("GPL"); | ||