aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-04-03 13:02:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-03 13:02:45 -0400
commitd9b9be024a6628a01d8730d1fd0b5f25658a2794 (patch)
tree9f8e606f975f6dff4213747e85fedaccd148eb60
parent9b59f0316bc556a1b63518f0b1224cf9be48467b (diff)
parent99360b4c18f7675b50d283301d46d755affe75fd (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (36 commits) dm: set queue ordered mode dm: move wait queue declaration dm: merge pushback and deferred bio lists dm: allow uninterruptible wait for pending io dm: merge __flush_deferred_io into caller dm: move bio_io_error into __split_and_process_bio dm: rename __split_bio dm: remove unnecessary struct dm_wq_req dm: remove unnecessary work queue context field dm: remove unnecessary work queue type field dm: bio list add bio_list_add_head dm snapshot: persistent fix dtr cleanup dm snapshot: move status to exception store dm snapshot: move ctr parsing to exception store dm snapshot: use DMEMIT macro for status dm snapshot: remove dm_snap header dm snapshot: remove dm_snap header use dm exception store: move cow pointer dm exception store: move chunk_fields dm exception store: move dm_target pointer ...
-rw-r--r--drivers/md/dm-bio-list.h10
-rw-r--r--drivers/md/dm-bio-record.h26
-rw-r--r--drivers/md/dm-crypt.c6
-rw-r--r--drivers/md/dm-exception-store.c252
-rw-r--r--drivers/md/dm-exception-store.h58
-rw-r--r--drivers/md/dm-io.c5
-rw-r--r--drivers/md/dm-log.c75
-rw-r--r--drivers/md/dm-path-selector.c21
-rw-r--r--drivers/md/dm-raid1.c50
-rw-r--r--drivers/md/dm-snap-persistent.c153
-rw-r--r--drivers/md/dm-snap-transient.c86
-rw-r--r--drivers/md/dm-snap.c384
-rw-r--r--drivers/md/dm-snap.h105
-rw-r--r--drivers/md/dm-table.c26
-rw-r--r--drivers/md/dm-target.c104
-rw-r--r--drivers/md/dm.c134
-rw-r--r--drivers/md/dm.h2
-rw-r--r--include/linux/device-mapper.h3
-rw-r--r--include/linux/dm-dirty-log.h13
19 files changed, 893 insertions, 620 deletions
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h
index d4509be0fe67..345098b4ca77 100644
--- a/drivers/md/dm-bio-list.h
+++ b/drivers/md/dm-bio-list.h
@@ -52,6 +52,16 @@ static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
52 bl->tail = bio; 52 bl->tail = bio;
53} 53}
54 54
55static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
56{
57 bio->bi_next = bl->head;
58
59 bl->head = bio;
60
61 if (!bl->tail)
62 bl->tail = bio;
63}
64
55static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) 65static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
56{ 66{
57 if (!bl2->head) 67 if (!bl2->head)
diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h
index d3ec217847d6..3a8cfa2645c7 100644
--- a/drivers/md/dm-bio-record.h
+++ b/drivers/md/dm-bio-record.h
@@ -16,30 +16,56 @@
16 * functions in this file help the target record and restore the 16 * functions in this file help the target record and restore the
17 * original bio state. 17 * original bio state.
18 */ 18 */
19
20struct dm_bio_vec_details {
21#if PAGE_SIZE < 65536
22 __u16 bv_len;
23 __u16 bv_offset;
24#else
25 unsigned bv_len;
26 unsigned bv_offset;
27#endif
28};
29
19struct dm_bio_details { 30struct dm_bio_details {
20 sector_t bi_sector; 31 sector_t bi_sector;
21 struct block_device *bi_bdev; 32 struct block_device *bi_bdev;
22 unsigned int bi_size; 33 unsigned int bi_size;
23 unsigned short bi_idx; 34 unsigned short bi_idx;
24 unsigned long bi_flags; 35 unsigned long bi_flags;
36 struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES];
25}; 37};
26 38
27static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) 39static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
28{ 40{
41 unsigned i;
42
29 bd->bi_sector = bio->bi_sector; 43 bd->bi_sector = bio->bi_sector;
30 bd->bi_bdev = bio->bi_bdev; 44 bd->bi_bdev = bio->bi_bdev;
31 bd->bi_size = bio->bi_size; 45 bd->bi_size = bio->bi_size;
32 bd->bi_idx = bio->bi_idx; 46 bd->bi_idx = bio->bi_idx;
33 bd->bi_flags = bio->bi_flags; 47 bd->bi_flags = bio->bi_flags;
48
49 for (i = 0; i < bio->bi_vcnt; i++) {
50 bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len;
51 bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset;
52 }
34} 53}
35 54
36static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) 55static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
37{ 56{
57 unsigned i;
58
38 bio->bi_sector = bd->bi_sector; 59 bio->bi_sector = bd->bi_sector;
39 bio->bi_bdev = bd->bi_bdev; 60 bio->bi_bdev = bd->bi_bdev;
40 bio->bi_size = bd->bi_size; 61 bio->bi_size = bd->bi_size;
41 bio->bi_idx = bd->bi_idx; 62 bio->bi_idx = bd->bi_idx;
42 bio->bi_flags = bd->bi_flags; 63 bio->bi_flags = bd->bi_flags;
64
65 for (i = 0; i < bio->bi_vcnt; i++) {
66 bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len;
67 bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset;
68 }
43} 69}
44 70
45#endif 71#endif
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index bfefd079a955..53394e863c74 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1156,8 +1156,7 @@ bad_ivmode:
1156 crypto_free_ablkcipher(tfm); 1156 crypto_free_ablkcipher(tfm);
1157bad_cipher: 1157bad_cipher:
1158 /* Must zero key material before freeing */ 1158 /* Must zero key material before freeing */
1159 memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); 1159 kzfree(cc);
1160 kfree(cc);
1161 return -EINVAL; 1160 return -EINVAL;
1162} 1161}
1163 1162
@@ -1183,8 +1182,7 @@ static void crypt_dtr(struct dm_target *ti)
1183 dm_put_device(ti, cc->dev); 1182 dm_put_device(ti, cc->dev);
1184 1183
1185 /* Must zero key material before freeing */ 1184 /* Must zero key material before freeing */
1186 memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); 1185 kzfree(cc);
1187 kfree(cc);
1188} 1186}
1189 1187
1190static int crypt_map(struct dm_target *ti, struct bio *bio, 1188static int crypt_map(struct dm_target *ti, struct bio *bio,
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index dccbfb0e010f..a2e26c242141 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -7,6 +7,7 @@
7 7
8#include "dm-exception-store.h" 8#include "dm-exception-store.h"
9 9
10#include <linux/ctype.h>
10#include <linux/mm.h> 11#include <linux/mm.h>
11#include <linux/pagemap.h> 12#include <linux/pagemap.h>
12#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
@@ -14,6 +15,257 @@
14 15
15#define DM_MSG_PREFIX "snapshot exception stores" 16#define DM_MSG_PREFIX "snapshot exception stores"
16 17
18static LIST_HEAD(_exception_store_types);
19static DEFINE_SPINLOCK(_lock);
20
21static struct dm_exception_store_type *__find_exception_store_type(const char *name)
22{
23 struct dm_exception_store_type *type;
24
25 list_for_each_entry(type, &_exception_store_types, list)
26 if (!strcmp(name, type->name))
27 return type;
28
29 return NULL;
30}
31
32static struct dm_exception_store_type *_get_exception_store_type(const char *name)
33{
34 struct dm_exception_store_type *type;
35
36 spin_lock(&_lock);
37
38 type = __find_exception_store_type(name);
39
40 if (type && !try_module_get(type->module))
41 type = NULL;
42
43 spin_unlock(&_lock);
44
45 return type;
46}
47
48/*
49 * get_type
50 * @type_name
51 *
52 * Attempt to retrieve the dm_exception_store_type by name. If not already
53 * available, attempt to load the appropriate module.
54 *
55 * Exstore modules are named "dm-exstore-" followed by the 'type_name'.
56 * Modules may contain multiple types.
57 * This function will first try the module "dm-exstore-<type_name>",
58 * then truncate 'type_name' on the last '-' and try again.
59 *
60 * For example, if type_name was "clustered-shared", it would search
61 * 'dm-exstore-clustered-shared' then 'dm-exstore-clustered'.
62 *
63 * 'dm-exception-store-<type_name>' is too long of a name in my
64 * opinion, which is why I've chosen to have the files
65 * containing exception store implementations be 'dm-exstore-<type_name>'.
66 * If you want your module to be autoloaded, you will follow this
67 * naming convention.
68 *
69 * Returns: dm_exception_store_type* on success, NULL on failure
70 */
71static struct dm_exception_store_type *get_type(const char *type_name)
72{
73 char *p, *type_name_dup;
74 struct dm_exception_store_type *type;
75
76 type = _get_exception_store_type(type_name);
77 if (type)
78 return type;
79
80 type_name_dup = kstrdup(type_name, GFP_KERNEL);
81 if (!type_name_dup) {
82 DMERR("No memory left to attempt load for \"%s\"", type_name);
83 return NULL;
84 }
85
86 while (request_module("dm-exstore-%s", type_name_dup) ||
87 !(type = _get_exception_store_type(type_name))) {
88 p = strrchr(type_name_dup, '-');
89 if (!p)
90 break;
91 p[0] = '\0';
92 }
93
94 if (!type)
95 DMWARN("Module for exstore type \"%s\" not found.", type_name);
96
97 kfree(type_name_dup);
98
99 return type;
100}
101
102static void put_type(struct dm_exception_store_type *type)
103{
104 spin_lock(&_lock);
105 module_put(type->module);
106 spin_unlock(&_lock);
107}
108
109int dm_exception_store_type_register(struct dm_exception_store_type *type)
110{
111 int r = 0;
112
113 spin_lock(&_lock);
114 if (!__find_exception_store_type(type->name))
115 list_add(&type->list, &_exception_store_types);
116 else
117 r = -EEXIST;
118 spin_unlock(&_lock);
119
120 return r;
121}
122EXPORT_SYMBOL(dm_exception_store_type_register);
123
124int dm_exception_store_type_unregister(struct dm_exception_store_type *type)
125{
126 spin_lock(&_lock);
127
128 if (!__find_exception_store_type(type->name)) {
129 spin_unlock(&_lock);
130 return -EINVAL;
131 }
132
133 list_del(&type->list);
134
135 spin_unlock(&_lock);
136
137 return 0;
138}
139EXPORT_SYMBOL(dm_exception_store_type_unregister);
140
141/*
142 * Round a number up to the nearest 'size' boundary. size must
143 * be a power of 2.
144 */
145static ulong round_up(ulong n, ulong size)
146{
147 size--;
148 return (n + size) & ~size;
149}
150
151static int set_chunk_size(struct dm_exception_store *store,
152 const char *chunk_size_arg, char **error)
153{
154 unsigned long chunk_size_ulong;
155 char *value;
156
157 chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
158 if (*chunk_size_arg == '\0' || *value != '\0') {
159 *error = "Invalid chunk size";
160 return -EINVAL;
161 }
162
163 if (!chunk_size_ulong) {
164 store->chunk_size = store->chunk_mask = store->chunk_shift = 0;
165 return 0;
166 }
167
168 /*
169 * Chunk size must be multiple of page size. Silently
170 * round up if it's not.
171 */
172 chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
173
174 /* Check chunk_size is a power of 2 */
175 if (!is_power_of_2(chunk_size_ulong)) {
176 *error = "Chunk size is not a power of 2";
177 return -EINVAL;
178 }
179
180 /* Validate the chunk size against the device block size */
181 if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
182 *error = "Chunk size is not a multiple of device blocksize";
183 return -EINVAL;
184 }
185
186 store->chunk_size = chunk_size_ulong;
187 store->chunk_mask = chunk_size_ulong - 1;
188 store->chunk_shift = ffs(chunk_size_ulong) - 1;
189
190 return 0;
191}
192
193int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
194 unsigned *args_used,
195 struct dm_exception_store **store)
196{
197 int r = 0;
198 struct dm_exception_store_type *type;
199 struct dm_exception_store *tmp_store;
200 char persistent;
201
202 if (argc < 3) {
203 ti->error = "Insufficient exception store arguments";
204 return -EINVAL;
205 }
206
207 tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL);
208 if (!tmp_store) {
209 ti->error = "Exception store allocation failed";
210 return -ENOMEM;
211 }
212
213 persistent = toupper(*argv[1]);
214 if (persistent != 'P' && persistent != 'N') {
215 ti->error = "Persistent flag is not P or N";
216 return -EINVAL;
217 }
218
219 type = get_type(argv[1]);
220 if (!type) {
221 ti->error = "Exception store type not recognised";
222 r = -EINVAL;
223 goto bad_type;
224 }
225
226 tmp_store->type = type;
227 tmp_store->ti = ti;
228
229 r = dm_get_device(ti, argv[0], 0, 0,
230 FMODE_READ | FMODE_WRITE, &tmp_store->cow);
231 if (r) {
232 ti->error = "Cannot get COW device";
233 goto bad_cow;
234 }
235
236 r = set_chunk_size(tmp_store, argv[2], &ti->error);
237 if (r)
238 goto bad_cow;
239
240 r = type->ctr(tmp_store, 0, NULL);
241 if (r) {
242 ti->error = "Exception store type constructor failed";
243 goto bad_ctr;
244 }
245
246 *args_used = 3;
247 *store = tmp_store;
248 return 0;
249
250bad_ctr:
251 dm_put_device(ti, tmp_store->cow);
252bad_cow:
253 put_type(type);
254bad_type:
255 kfree(tmp_store);
256 return r;
257}
258EXPORT_SYMBOL(dm_exception_store_create);
259
260void dm_exception_store_destroy(struct dm_exception_store *store)
261{
262 store->type->dtr(store);
263 dm_put_device(store->ti, store->cow);
264 put_type(store->type);
265 kfree(store);
266}
267EXPORT_SYMBOL(dm_exception_store_destroy);
268
17int dm_exception_store_init(void) 269int dm_exception_store_init(void)
18{ 270{
19 int r; 271 int r;
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index bb9f33d5daa2..0a2e6e7f67b3 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -37,11 +37,18 @@ struct dm_snap_exception {
37 * Abstraction to handle the meta/layout of exception stores (the 37 * Abstraction to handle the meta/layout of exception stores (the
38 * COW device). 38 * COW device).
39 */ 39 */
40struct dm_exception_store { 40struct dm_exception_store;
41struct dm_exception_store_type {
42 const char *name;
43 struct module *module;
44
45 int (*ctr) (struct dm_exception_store *store,
46 unsigned argc, char **argv);
47
41 /* 48 /*
42 * Destroys this object when you've finished with it. 49 * Destroys this object when you've finished with it.
43 */ 50 */
44 void (*destroy) (struct dm_exception_store *store); 51 void (*dtr) (struct dm_exception_store *store);
45 52
46 /* 53 /*
47 * The target shouldn't read the COW device until this is 54 * The target shouldn't read the COW device until this is
@@ -72,8 +79,9 @@ struct dm_exception_store {
72 */ 79 */
73 void (*drop_snapshot) (struct dm_exception_store *store); 80 void (*drop_snapshot) (struct dm_exception_store *store);
74 81
75 int (*status) (struct dm_exception_store *store, status_type_t status, 82 unsigned (*status) (struct dm_exception_store *store,
76 char *result, unsigned int maxlen); 83 status_type_t status, char *result,
84 unsigned maxlen);
77 85
78 /* 86 /*
79 * Return how full the snapshot is. 87 * Return how full the snapshot is.
@@ -82,7 +90,21 @@ struct dm_exception_store {
82 sector_t *numerator, 90 sector_t *numerator,
83 sector_t *denominator); 91 sector_t *denominator);
84 92
85 struct dm_snapshot *snap; 93 /* For internal device-mapper use only. */
94 struct list_head list;
95};
96
97struct dm_exception_store {
98 struct dm_exception_store_type *type;
99 struct dm_target *ti;
100
101 struct dm_dev *cow;
102
103 /* Size of data blocks saved - must be a power of 2 */
104 chunk_t chunk_size;
105 chunk_t chunk_mask;
106 chunk_t chunk_shift;
107
86 void *context; 108 void *context;
87}; 109};
88 110
@@ -129,6 +151,28 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
129 151
130# endif 152# endif
131 153
154/*
155 * Return the number of sectors in the device.
156 */
157static inline sector_t get_dev_size(struct block_device *bdev)
158{
159 return bdev->bd_inode->i_size >> SECTOR_SHIFT;
160}
161
162static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
163 sector_t sector)
164{
165 return (sector & ~store->chunk_mask) >> store->chunk_shift;
166}
167
168int dm_exception_store_type_register(struct dm_exception_store_type *type);
169int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
170
171int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
172 unsigned *args_used,
173 struct dm_exception_store **store);
174void dm_exception_store_destroy(struct dm_exception_store *store);
175
132int dm_exception_store_init(void); 176int dm_exception_store_init(void);
133void dm_exception_store_exit(void); 177void dm_exception_store_exit(void);
134 178
@@ -141,8 +185,4 @@ void dm_persistent_snapshot_exit(void);
141int dm_transient_snapshot_init(void); 185int dm_transient_snapshot_init(void);
142void dm_transient_snapshot_exit(void); 186void dm_transient_snapshot_exit(void);
143 187
144int dm_create_persistent(struct dm_exception_store *store);
145
146int dm_create_transient(struct dm_exception_store *store);
147
148#endif /* _LINUX_DM_EXCEPTION_STORE */ 188#endif /* _LINUX_DM_EXCEPTION_STORE */
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 36e2b5e46a6b..e73aabd61cd7 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -370,16 +370,13 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
370 while (1) { 370 while (1) {
371 set_current_state(TASK_UNINTERRUPTIBLE); 371 set_current_state(TASK_UNINTERRUPTIBLE);
372 372
373 if (!atomic_read(&io.count) || signal_pending(current)) 373 if (!atomic_read(&io.count))
374 break; 374 break;
375 375
376 io_schedule(); 376 io_schedule();
377 } 377 }
378 set_current_state(TASK_RUNNING); 378 set_current_state(TASK_RUNNING);
379 379
380 if (atomic_read(&io.count))
381 return -EINTR;
382
383 if (error_bits) 380 if (error_bits)
384 *error_bits = io.error_bits; 381 *error_bits = io.error_bits;
385 382
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 737961f275c1..be233bc4d917 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -16,40 +16,29 @@
16 16
17#define DM_MSG_PREFIX "dirty region log" 17#define DM_MSG_PREFIX "dirty region log"
18 18
19struct dm_dirty_log_internal {
20 struct dm_dirty_log_type *type;
21
22 struct list_head list;
23 long use;
24};
25
26static LIST_HEAD(_log_types); 19static LIST_HEAD(_log_types);
27static DEFINE_SPINLOCK(_lock); 20static DEFINE_SPINLOCK(_lock);
28 21
29static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name) 22static struct dm_dirty_log_type *__find_dirty_log_type(const char *name)
30{ 23{
31 struct dm_dirty_log_internal *log_type; 24 struct dm_dirty_log_type *log_type;
32 25
33 list_for_each_entry(log_type, &_log_types, list) 26 list_for_each_entry(log_type, &_log_types, list)
34 if (!strcmp(name, log_type->type->name)) 27 if (!strcmp(name, log_type->name))
35 return log_type; 28 return log_type;
36 29
37 return NULL; 30 return NULL;
38} 31}
39 32
40static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name) 33static struct dm_dirty_log_type *_get_dirty_log_type(const char *name)
41{ 34{
42 struct dm_dirty_log_internal *log_type; 35 struct dm_dirty_log_type *log_type;
43 36
44 spin_lock(&_lock); 37 spin_lock(&_lock);
45 38
46 log_type = __find_dirty_log_type(name); 39 log_type = __find_dirty_log_type(name);
47 if (log_type) { 40 if (log_type && !try_module_get(log_type->module))
48 if (!log_type->use && !try_module_get(log_type->type->module)) 41 log_type = NULL;
49 log_type = NULL;
50 else
51 log_type->use++;
52 }
53 42
54 spin_unlock(&_lock); 43 spin_unlock(&_lock);
55 44
@@ -76,14 +65,14 @@ static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
76static struct dm_dirty_log_type *get_type(const char *type_name) 65static struct dm_dirty_log_type *get_type(const char *type_name)
77{ 66{
78 char *p, *type_name_dup; 67 char *p, *type_name_dup;
79 struct dm_dirty_log_internal *log_type; 68 struct dm_dirty_log_type *log_type;
80 69
81 if (!type_name) 70 if (!type_name)
82 return NULL; 71 return NULL;
83 72
84 log_type = _get_dirty_log_type(type_name); 73 log_type = _get_dirty_log_type(type_name);
85 if (log_type) 74 if (log_type)
86 return log_type->type; 75 return log_type;
87 76
88 type_name_dup = kstrdup(type_name, GFP_KERNEL); 77 type_name_dup = kstrdup(type_name, GFP_KERNEL);
89 if (!type_name_dup) { 78 if (!type_name_dup) {
@@ -105,56 +94,33 @@ static struct dm_dirty_log_type *get_type(const char *type_name)
105 94
106 kfree(type_name_dup); 95 kfree(type_name_dup);
107 96
108 return log_type ? log_type->type : NULL; 97 return log_type;
109} 98}
110 99
111static void put_type(struct dm_dirty_log_type *type) 100static void put_type(struct dm_dirty_log_type *type)
112{ 101{
113 struct dm_dirty_log_internal *log_type;
114
115 if (!type) 102 if (!type)
116 return; 103 return;
117 104
118 spin_lock(&_lock); 105 spin_lock(&_lock);
119 log_type = __find_dirty_log_type(type->name); 106 if (!__find_dirty_log_type(type->name))
120 if (!log_type)
121 goto out; 107 goto out;
122 108
123 if (!--log_type->use) 109 module_put(type->module);
124 module_put(type->module);
125
126 BUG_ON(log_type->use < 0);
127 110
128out: 111out:
129 spin_unlock(&_lock); 112 spin_unlock(&_lock);
130} 113}
131 114
132static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type)
133{
134 struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type),
135 GFP_KERNEL);
136
137 if (log_type)
138 log_type->type = type;
139
140 return log_type;
141}
142
143int dm_dirty_log_type_register(struct dm_dirty_log_type *type) 115int dm_dirty_log_type_register(struct dm_dirty_log_type *type)
144{ 116{
145 struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type);
146 int r = 0; 117 int r = 0;
147 118
148 if (!log_type)
149 return -ENOMEM;
150
151 spin_lock(&_lock); 119 spin_lock(&_lock);
152 if (!__find_dirty_log_type(type->name)) 120 if (!__find_dirty_log_type(type->name))
153 list_add(&log_type->list, &_log_types); 121 list_add(&type->list, &_log_types);
154 else { 122 else
155 kfree(log_type);
156 r = -EEXIST; 123 r = -EEXIST;
157 }
158 spin_unlock(&_lock); 124 spin_unlock(&_lock);
159 125
160 return r; 126 return r;
@@ -163,25 +129,16 @@ EXPORT_SYMBOL(dm_dirty_log_type_register);
163 129
164int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type) 130int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type)
165{ 131{
166 struct dm_dirty_log_internal *log_type;
167
168 spin_lock(&_lock); 132 spin_lock(&_lock);
169 133
170 log_type = __find_dirty_log_type(type->name); 134 if (!__find_dirty_log_type(type->name)) {
171 if (!log_type) {
172 spin_unlock(&_lock); 135 spin_unlock(&_lock);
173 return -EINVAL; 136 return -EINVAL;
174 } 137 }
175 138
176 if (log_type->use) { 139 list_del(&type->list);
177 spin_unlock(&_lock);
178 return -ETXTBSY;
179 }
180
181 list_del(&log_type->list);
182 140
183 spin_unlock(&_lock); 141 spin_unlock(&_lock);
184 kfree(log_type);
185 142
186 return 0; 143 return 0;
187} 144}
diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c
index 96ea226155b1..42c04f04a0c4 100644
--- a/drivers/md/dm-path-selector.c
+++ b/drivers/md/dm-path-selector.c
@@ -17,9 +17,7 @@
17 17
18struct ps_internal { 18struct ps_internal {
19 struct path_selector_type pst; 19 struct path_selector_type pst;
20
21 struct list_head list; 20 struct list_head list;
22 long use;
23}; 21};
24 22
25#define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst) 23#define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst)
@@ -45,12 +43,8 @@ static struct ps_internal *get_path_selector(const char *name)
45 43
46 down_read(&_ps_lock); 44 down_read(&_ps_lock);
47 psi = __find_path_selector_type(name); 45 psi = __find_path_selector_type(name);
48 if (psi) { 46 if (psi && !try_module_get(psi->pst.module))
49 if ((psi->use == 0) && !try_module_get(psi->pst.module)) 47 psi = NULL;
50 psi = NULL;
51 else
52 psi->use++;
53 }
54 up_read(&_ps_lock); 48 up_read(&_ps_lock);
55 49
56 return psi; 50 return psi;
@@ -84,11 +78,7 @@ void dm_put_path_selector(struct path_selector_type *pst)
84 if (!psi) 78 if (!psi)
85 goto out; 79 goto out;
86 80
87 if (--psi->use == 0) 81 module_put(psi->pst.module);
88 module_put(psi->pst.module);
89
90 BUG_ON(psi->use < 0);
91
92out: 82out:
93 up_read(&_ps_lock); 83 up_read(&_ps_lock);
94} 84}
@@ -136,11 +126,6 @@ int dm_unregister_path_selector(struct path_selector_type *pst)
136 return -EINVAL; 126 return -EINVAL;
137 } 127 }
138 128
139 if (psi->use) {
140 up_write(&_ps_lock);
141 return -ETXTBSY;
142 }
143
144 list_del(&psi->list); 129 list_del(&psi->list);
145 130
146 up_write(&_ps_lock); 131 up_write(&_ps_lock);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4d6bc101962e..536ef0bef154 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -145,6 +145,8 @@ struct dm_raid1_read_record {
145 struct dm_bio_details details; 145 struct dm_bio_details details;
146}; 146};
147 147
148static struct kmem_cache *_dm_raid1_read_record_cache;
149
148/* 150/*
149 * Every mirror should look like this one. 151 * Every mirror should look like this one.
150 */ 152 */
@@ -586,6 +588,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
586 int state; 588 int state;
587 struct bio *bio; 589 struct bio *bio;
588 struct bio_list sync, nosync, recover, *this_list = NULL; 590 struct bio_list sync, nosync, recover, *this_list = NULL;
591 struct bio_list requeue;
592 struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
593 region_t region;
589 594
590 if (!writes->head) 595 if (!writes->head)
591 return; 596 return;
@@ -596,10 +601,18 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
596 bio_list_init(&sync); 601 bio_list_init(&sync);
597 bio_list_init(&nosync); 602 bio_list_init(&nosync);
598 bio_list_init(&recover); 603 bio_list_init(&recover);
604 bio_list_init(&requeue);
599 605
600 while ((bio = bio_list_pop(writes))) { 606 while ((bio = bio_list_pop(writes))) {
601 state = dm_rh_get_state(ms->rh, 607 region = dm_rh_bio_to_region(ms->rh, bio);
602 dm_rh_bio_to_region(ms->rh, bio), 1); 608
609 if (log->type->is_remote_recovering &&
610 log->type->is_remote_recovering(log, region)) {
611 bio_list_add(&requeue, bio);
612 continue;
613 }
614
615 state = dm_rh_get_state(ms->rh, region, 1);
603 switch (state) { 616 switch (state) {
604 case DM_RH_CLEAN: 617 case DM_RH_CLEAN:
605 case DM_RH_DIRTY: 618 case DM_RH_DIRTY:
@@ -619,6 +632,16 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
619 } 632 }
620 633
621 /* 634 /*
635 * Add bios that are delayed due to remote recovery
636 * back on to the write queue
637 */
638 if (unlikely(requeue.head)) {
639 spin_lock_irq(&ms->lock);
640 bio_list_merge(&ms->writes, &requeue);
641 spin_unlock_irq(&ms->lock);
642 }
643
644 /*
622 * Increment the pending counts for any regions that will 645 * Increment the pending counts for any regions that will
623 * be written to (writes to recover regions are going to 646 * be written to (writes to recover regions are going to
624 * be delayed). 647 * be delayed).
@@ -764,9 +787,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
764 atomic_set(&ms->suspend, 0); 787 atomic_set(&ms->suspend, 0);
765 atomic_set(&ms->default_mirror, DEFAULT_MIRROR); 788 atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
766 789
767 len = sizeof(struct dm_raid1_read_record); 790 ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
768 ms->read_record_pool = mempool_create_kmalloc_pool(MIN_READ_RECORDS, 791 _dm_raid1_read_record_cache);
769 len); 792
770 if (!ms->read_record_pool) { 793 if (!ms->read_record_pool) {
771 ti->error = "Error creating mirror read_record_pool"; 794 ti->error = "Error creating mirror read_record_pool";
772 kfree(ms); 795 kfree(ms);
@@ -1279,16 +1302,31 @@ static int __init dm_mirror_init(void)
1279{ 1302{
1280 int r; 1303 int r;
1281 1304
1305 _dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
1306 if (!_dm_raid1_read_record_cache) {
1307 DMERR("Can't allocate dm_raid1_read_record cache");
1308 r = -ENOMEM;
1309 goto bad_cache;
1310 }
1311
1282 r = dm_register_target(&mirror_target); 1312 r = dm_register_target(&mirror_target);
1283 if (r < 0) 1313 if (r < 0) {
1284 DMERR("Failed to register mirror target"); 1314 DMERR("Failed to register mirror target");
1315 goto bad_target;
1316 }
1317
1318 return 0;
1285 1319
1320bad_target:
1321 kmem_cache_destroy(_dm_raid1_read_record_cache);
1322bad_cache:
1286 return r; 1323 return r;
1287} 1324}
1288 1325
1289static void __exit dm_mirror_exit(void) 1326static void __exit dm_mirror_exit(void)
1290{ 1327{
1291 dm_unregister_target(&mirror_target); 1328 dm_unregister_target(&mirror_target);
1329 kmem_cache_destroy(_dm_raid1_read_record_cache);
1292} 1330}
1293 1331
1294/* Module hooks */ 1332/* Module hooks */
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 936b34e0959f..e75c6dd76a9a 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -6,7 +6,6 @@
6 */ 6 */
7 7
8#include "dm-exception-store.h" 8#include "dm-exception-store.h"
9#include "dm-snap.h"
10 9
11#include <linux/mm.h> 10#include <linux/mm.h>
12#include <linux/pagemap.h> 11#include <linux/pagemap.h>
@@ -89,7 +88,7 @@ struct commit_callback {
89 * The top level structure for a persistent exception store. 88 * The top level structure for a persistent exception store.
90 */ 89 */
91struct pstore { 90struct pstore {
92 struct dm_snapshot *snap; /* up pointer to my snapshot */ 91 struct dm_exception_store *store;
93 int version; 92 int version;
94 int valid; 93 int valid;
95 uint32_t exceptions_per_area; 94 uint32_t exceptions_per_area;
@@ -141,7 +140,7 @@ static int alloc_area(struct pstore *ps)
141 int r = -ENOMEM; 140 int r = -ENOMEM;
142 size_t len; 141 size_t len;
143 142
144 len = ps->snap->chunk_size << SECTOR_SHIFT; 143 len = ps->store->chunk_size << SECTOR_SHIFT;
145 144
146 /* 145 /*
147 * Allocate the chunk_size block of memory that will hold 146 * Allocate the chunk_size block of memory that will hold
@@ -163,9 +162,12 @@ static int alloc_area(struct pstore *ps)
163 162
164static void free_area(struct pstore *ps) 163static void free_area(struct pstore *ps)
165{ 164{
166 vfree(ps->area); 165 if (ps->area)
166 vfree(ps->area);
167 ps->area = NULL; 167 ps->area = NULL;
168 vfree(ps->zero_area); 168
169 if (ps->zero_area)
170 vfree(ps->zero_area);
169 ps->zero_area = NULL; 171 ps->zero_area = NULL;
170} 172}
171 173
@@ -189,9 +191,9 @@ static void do_metadata(struct work_struct *work)
189static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) 191static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
190{ 192{
191 struct dm_io_region where = { 193 struct dm_io_region where = {
192 .bdev = ps->snap->cow->bdev, 194 .bdev = ps->store->cow->bdev,
193 .sector = ps->snap->chunk_size * chunk, 195 .sector = ps->store->chunk_size * chunk,
194 .count = ps->snap->chunk_size, 196 .count = ps->store->chunk_size,
195 }; 197 };
196 struct dm_io_request io_req = { 198 struct dm_io_request io_req = {
197 .bi_rw = rw, 199 .bi_rw = rw,
@@ -247,15 +249,15 @@ static int area_io(struct pstore *ps, int rw)
247 249
248static void zero_memory_area(struct pstore *ps) 250static void zero_memory_area(struct pstore *ps)
249{ 251{
250 memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); 252 memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
251} 253}
252 254
253static int zero_disk_area(struct pstore *ps, chunk_t area) 255static int zero_disk_area(struct pstore *ps, chunk_t area)
254{ 256{
255 struct dm_io_region where = { 257 struct dm_io_region where = {
256 .bdev = ps->snap->cow->bdev, 258 .bdev = ps->store->cow->bdev,
257 .sector = ps->snap->chunk_size * area_location(ps, area), 259 .sector = ps->store->chunk_size * area_location(ps, area),
258 .count = ps->snap->chunk_size, 260 .count = ps->store->chunk_size,
259 }; 261 };
260 struct dm_io_request io_req = { 262 struct dm_io_request io_req = {
261 .bi_rw = WRITE, 263 .bi_rw = WRITE,
@@ -278,15 +280,15 @@ static int read_header(struct pstore *ps, int *new_snapshot)
278 /* 280 /*
279 * Use default chunk size (or hardsect_size, if larger) if none supplied 281 * Use default chunk size (or hardsect_size, if larger) if none supplied
280 */ 282 */
281 if (!ps->snap->chunk_size) { 283 if (!ps->store->chunk_size) {
282 ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, 284 ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
283 bdev_hardsect_size(ps->snap->cow->bdev) >> 9); 285 bdev_hardsect_size(ps->store->cow->bdev) >> 9);
284 ps->snap->chunk_mask = ps->snap->chunk_size - 1; 286 ps->store->chunk_mask = ps->store->chunk_size - 1;
285 ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; 287 ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
286 chunk_size_supplied = 0; 288 chunk_size_supplied = 0;
287 } 289 }
288 290
289 ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> 291 ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
290 chunk_size)); 292 chunk_size));
291 if (IS_ERR(ps->io_client)) 293 if (IS_ERR(ps->io_client))
292 return PTR_ERR(ps->io_client); 294 return PTR_ERR(ps->io_client);
@@ -317,22 +319,22 @@ static int read_header(struct pstore *ps, int *new_snapshot)
317 ps->version = le32_to_cpu(dh->version); 319 ps->version = le32_to_cpu(dh->version);
318 chunk_size = le32_to_cpu(dh->chunk_size); 320 chunk_size = le32_to_cpu(dh->chunk_size);
319 321
320 if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) 322 if (!chunk_size_supplied || ps->store->chunk_size == chunk_size)
321 return 0; 323 return 0;
322 324
323 DMWARN("chunk size %llu in device metadata overrides " 325 DMWARN("chunk size %llu in device metadata overrides "
324 "table chunk size of %llu.", 326 "table chunk size of %llu.",
325 (unsigned long long)chunk_size, 327 (unsigned long long)chunk_size,
326 (unsigned long long)ps->snap->chunk_size); 328 (unsigned long long)ps->store->chunk_size);
327 329
328 /* We had a bogus chunk_size. Fix stuff up. */ 330 /* We had a bogus chunk_size. Fix stuff up. */
329 free_area(ps); 331 free_area(ps);
330 332
331 ps->snap->chunk_size = chunk_size; 333 ps->store->chunk_size = chunk_size;
332 ps->snap->chunk_mask = chunk_size - 1; 334 ps->store->chunk_mask = chunk_size - 1;
333 ps->snap->chunk_shift = ffs(chunk_size) - 1; 335 ps->store->chunk_shift = ffs(chunk_size) - 1;
334 336
335 r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), 337 r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
336 ps->io_client); 338 ps->io_client);
337 if (r) 339 if (r)
338 return r; 340 return r;
@@ -349,13 +351,13 @@ static int write_header(struct pstore *ps)
349{ 351{
350 struct disk_header *dh; 352 struct disk_header *dh;
351 353
352 memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); 354 memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
353 355
354 dh = (struct disk_header *) ps->area; 356 dh = (struct disk_header *) ps->area;
355 dh->magic = cpu_to_le32(SNAP_MAGIC); 357 dh->magic = cpu_to_le32(SNAP_MAGIC);
356 dh->valid = cpu_to_le32(ps->valid); 358 dh->valid = cpu_to_le32(ps->valid);
357 dh->version = cpu_to_le32(ps->version); 359 dh->version = cpu_to_le32(ps->version);
358 dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); 360 dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
359 361
360 return chunk_io(ps, 0, WRITE, 1); 362 return chunk_io(ps, 0, WRITE, 1);
361} 363}
@@ -474,18 +476,25 @@ static struct pstore *get_info(struct dm_exception_store *store)
474static void persistent_fraction_full(struct dm_exception_store *store, 476static void persistent_fraction_full(struct dm_exception_store *store,
475 sector_t *numerator, sector_t *denominator) 477 sector_t *numerator, sector_t *denominator)
476{ 478{
477 *numerator = get_info(store)->next_free * store->snap->chunk_size; 479 *numerator = get_info(store)->next_free * store->chunk_size;
478 *denominator = get_dev_size(store->snap->cow->bdev); 480 *denominator = get_dev_size(store->cow->bdev);
479} 481}
480 482
481static void persistent_destroy(struct dm_exception_store *store) 483static void persistent_dtr(struct dm_exception_store *store)
482{ 484{
483 struct pstore *ps = get_info(store); 485 struct pstore *ps = get_info(store);
484 486
485 destroy_workqueue(ps->metadata_wq); 487 destroy_workqueue(ps->metadata_wq);
486 dm_io_client_destroy(ps->io_client); 488
487 vfree(ps->callbacks); 489 /* Created in read_header */
490 if (ps->io_client)
491 dm_io_client_destroy(ps->io_client);
488 free_area(ps); 492 free_area(ps);
493
494 /* Allocated in persistent_read_metadata */
495 if (ps->callbacks)
496 vfree(ps->callbacks);
497
489 kfree(ps); 498 kfree(ps);
490} 499}
491 500
@@ -507,7 +516,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
507 /* 516 /*
508 * Now we know correct chunk_size, complete the initialisation. 517 * Now we know correct chunk_size, complete the initialisation.
509 */ 518 */
510 ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / 519 ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
511 sizeof(struct disk_exception); 520 sizeof(struct disk_exception);
512 ps->callbacks = dm_vcalloc(ps->exceptions_per_area, 521 ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
513 sizeof(*ps->callbacks)); 522 sizeof(*ps->callbacks));
@@ -564,10 +573,10 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
564 struct pstore *ps = get_info(store); 573 struct pstore *ps = get_info(store);
565 uint32_t stride; 574 uint32_t stride;
566 chunk_t next_free; 575 chunk_t next_free;
567 sector_t size = get_dev_size(store->snap->cow->bdev); 576 sector_t size = get_dev_size(store->cow->bdev);
568 577
569 /* Is there enough room ? */ 578 /* Is there enough room ? */
570 if (size < ((ps->next_free + 1) * store->snap->chunk_size)) 579 if (size < ((ps->next_free + 1) * store->chunk_size))
571 return -ENOSPC; 580 return -ENOSPC;
572 581
573 e->new_chunk = ps->next_free; 582 e->new_chunk = ps->next_free;
@@ -656,16 +665,17 @@ static void persistent_drop_snapshot(struct dm_exception_store *store)
656 DMWARN("write header failed"); 665 DMWARN("write header failed");
657} 666}
658 667
659int dm_create_persistent(struct dm_exception_store *store) 668static int persistent_ctr(struct dm_exception_store *store,
669 unsigned argc, char **argv)
660{ 670{
661 struct pstore *ps; 671 struct pstore *ps;
662 672
663 /* allocate the pstore */ 673 /* allocate the pstore */
664 ps = kmalloc(sizeof(*ps), GFP_KERNEL); 674 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
665 if (!ps) 675 if (!ps)
666 return -ENOMEM; 676 return -ENOMEM;
667 677
668 ps->snap = store->snap; 678 ps->store = store;
669 ps->valid = 1; 679 ps->valid = 1;
670 ps->version = SNAPSHOT_DISK_VERSION; 680 ps->version = SNAPSHOT_DISK_VERSION;
671 ps->area = NULL; 681 ps->area = NULL;
@@ -683,22 +693,77 @@ int dm_create_persistent(struct dm_exception_store *store)
683 return -ENOMEM; 693 return -ENOMEM;
684 } 694 }
685 695
686 store->destroy = persistent_destroy;
687 store->read_metadata = persistent_read_metadata;
688 store->prepare_exception = persistent_prepare_exception;
689 store->commit_exception = persistent_commit_exception;
690 store->drop_snapshot = persistent_drop_snapshot;
691 store->fraction_full = persistent_fraction_full;
692 store->context = ps; 696 store->context = ps;
693 697
694 return 0; 698 return 0;
695} 699}
696 700
701static unsigned persistent_status(struct dm_exception_store *store,
702 status_type_t status, char *result,
703 unsigned maxlen)
704{
705 unsigned sz = 0;
706
707 switch (status) {
708 case STATUSTYPE_INFO:
709 break;
710 case STATUSTYPE_TABLE:
711 DMEMIT(" %s P %llu", store->cow->name,
712 (unsigned long long)store->chunk_size);
713 }
714
715 return sz;
716}
717
718static struct dm_exception_store_type _persistent_type = {
719 .name = "persistent",
720 .module = THIS_MODULE,
721 .ctr = persistent_ctr,
722 .dtr = persistent_dtr,
723 .read_metadata = persistent_read_metadata,
724 .prepare_exception = persistent_prepare_exception,
725 .commit_exception = persistent_commit_exception,
726 .drop_snapshot = persistent_drop_snapshot,
727 .fraction_full = persistent_fraction_full,
728 .status = persistent_status,
729};
730
731static struct dm_exception_store_type _persistent_compat_type = {
732 .name = "P",
733 .module = THIS_MODULE,
734 .ctr = persistent_ctr,
735 .dtr = persistent_dtr,
736 .read_metadata = persistent_read_metadata,
737 .prepare_exception = persistent_prepare_exception,
738 .commit_exception = persistent_commit_exception,
739 .drop_snapshot = persistent_drop_snapshot,
740 .fraction_full = persistent_fraction_full,
741 .status = persistent_status,
742};
743
697int dm_persistent_snapshot_init(void) 744int dm_persistent_snapshot_init(void)
698{ 745{
699 return 0; 746 int r;
747
748 r = dm_exception_store_type_register(&_persistent_type);
749 if (r) {
750 DMERR("Unable to register persistent exception store type");
751 return r;
752 }
753
754 r = dm_exception_store_type_register(&_persistent_compat_type);
755 if (r) {
756 DMERR("Unable to register old-style persistent exception "
757 "store type");
758 dm_exception_store_type_unregister(&_persistent_type);
759 return r;
760 }
761
762 return r;
700} 763}
701 764
702void dm_persistent_snapshot_exit(void) 765void dm_persistent_snapshot_exit(void)
703{ 766{
767 dm_exception_store_type_unregister(&_persistent_type);
768 dm_exception_store_type_unregister(&_persistent_compat_type);
704} 769}
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c
index 7f6e2e6dcb0d..cde5aa558e6d 100644
--- a/drivers/md/dm-snap-transient.c
+++ b/drivers/md/dm-snap-transient.c
@@ -6,7 +6,6 @@
6 */ 6 */
7 7
8#include "dm-exception-store.h" 8#include "dm-exception-store.h"
9#include "dm-snap.h"
10 9
11#include <linux/mm.h> 10#include <linux/mm.h>
12#include <linux/pagemap.h> 11#include <linux/pagemap.h>
@@ -23,7 +22,7 @@ struct transient_c {
23 sector_t next_free; 22 sector_t next_free;
24}; 23};
25 24
26static void transient_destroy(struct dm_exception_store *store) 25static void transient_dtr(struct dm_exception_store *store)
27{ 26{
28 kfree(store->context); 27 kfree(store->context);
29} 28}
@@ -39,14 +38,14 @@ static int transient_read_metadata(struct dm_exception_store *store,
39static int transient_prepare_exception(struct dm_exception_store *store, 38static int transient_prepare_exception(struct dm_exception_store *store,
40 struct dm_snap_exception *e) 39 struct dm_snap_exception *e)
41{ 40{
42 struct transient_c *tc = (struct transient_c *) store->context; 41 struct transient_c *tc = store->context;
43 sector_t size = get_dev_size(store->snap->cow->bdev); 42 sector_t size = get_dev_size(store->cow->bdev);
44 43
45 if (size < (tc->next_free + store->snap->chunk_size)) 44 if (size < (tc->next_free + store->chunk_size))
46 return -1; 45 return -1;
47 46
48 e->new_chunk = sector_to_chunk(store->snap, tc->next_free); 47 e->new_chunk = sector_to_chunk(store, tc->next_free);
49 tc->next_free += store->snap->chunk_size; 48 tc->next_free += store->chunk_size;
50 49
51 return 0; 50 return 0;
52} 51}
@@ -64,20 +63,14 @@ static void transient_fraction_full(struct dm_exception_store *store,
64 sector_t *numerator, sector_t *denominator) 63 sector_t *numerator, sector_t *denominator)
65{ 64{
66 *numerator = ((struct transient_c *) store->context)->next_free; 65 *numerator = ((struct transient_c *) store->context)->next_free;
67 *denominator = get_dev_size(store->snap->cow->bdev); 66 *denominator = get_dev_size(store->cow->bdev);
68} 67}
69 68
70int dm_create_transient(struct dm_exception_store *store) 69static int transient_ctr(struct dm_exception_store *store,
70 unsigned argc, char **argv)
71{ 71{
72 struct transient_c *tc; 72 struct transient_c *tc;
73 73
74 store->destroy = transient_destroy;
75 store->read_metadata = transient_read_metadata;
76 store->prepare_exception = transient_prepare_exception;
77 store->commit_exception = transient_commit_exception;
78 store->drop_snapshot = NULL;
79 store->fraction_full = transient_fraction_full;
80
81 tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); 74 tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
82 if (!tc) 75 if (!tc)
83 return -ENOMEM; 76 return -ENOMEM;
@@ -88,11 +81,70 @@ int dm_create_transient(struct dm_exception_store *store)
88 return 0; 81 return 0;
89} 82}
90 83
84static unsigned transient_status(struct dm_exception_store *store,
85 status_type_t status, char *result,
86 unsigned maxlen)
87{
88 unsigned sz = 0;
89
90 switch (status) {
91 case STATUSTYPE_INFO:
92 break;
93 case STATUSTYPE_TABLE:
94 DMEMIT(" %s N %llu", store->cow->name,
95 (unsigned long long)store->chunk_size);
96 }
97
98 return sz;
99}
100
101static struct dm_exception_store_type _transient_type = {
102 .name = "transient",
103 .module = THIS_MODULE,
104 .ctr = transient_ctr,
105 .dtr = transient_dtr,
106 .read_metadata = transient_read_metadata,
107 .prepare_exception = transient_prepare_exception,
108 .commit_exception = transient_commit_exception,
109 .fraction_full = transient_fraction_full,
110 .status = transient_status,
111};
112
113static struct dm_exception_store_type _transient_compat_type = {
114 .name = "N",
115 .module = THIS_MODULE,
116 .ctr = transient_ctr,
117 .dtr = transient_dtr,
118 .read_metadata = transient_read_metadata,
119 .prepare_exception = transient_prepare_exception,
120 .commit_exception = transient_commit_exception,
121 .fraction_full = transient_fraction_full,
122 .status = transient_status,
123};
124
91int dm_transient_snapshot_init(void) 125int dm_transient_snapshot_init(void)
92{ 126{
93 return 0; 127 int r;
128
129 r = dm_exception_store_type_register(&_transient_type);
130 if (r) {
131 DMWARN("Unable to register transient exception store type");
132 return r;
133 }
134
135 r = dm_exception_store_type_register(&_transient_compat_type);
136 if (r) {
137 DMWARN("Unable to register old-style transient "
138 "exception store type");
139 dm_exception_store_type_unregister(&_transient_type);
140 return r;
141 }
142
143 return r;
94} 144}
95 145
96void dm_transient_snapshot_exit(void) 146void dm_transient_snapshot_exit(void)
97{ 147{
148 dm_exception_store_type_unregister(&_transient_type);
149 dm_exception_store_type_unregister(&_transient_compat_type);
98} 150}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 65ff82ff124e..981a0413068f 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -7,7 +7,6 @@
7 */ 7 */
8 8
9#include <linux/blkdev.h> 9#include <linux/blkdev.h>
10#include <linux/ctype.h>
11#include <linux/device-mapper.h> 10#include <linux/device-mapper.h>
12#include <linux/delay.h> 11#include <linux/delay.h>
13#include <linux/fs.h> 12#include <linux/fs.h>
@@ -20,9 +19,9 @@
20#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
21#include <linux/log2.h> 20#include <linux/log2.h>
22#include <linux/dm-kcopyd.h> 21#include <linux/dm-kcopyd.h>
22#include <linux/workqueue.h>
23 23
24#include "dm-exception-store.h" 24#include "dm-exception-store.h"
25#include "dm-snap.h"
26#include "dm-bio-list.h" 25#include "dm-bio-list.h"
27 26
28#define DM_MSG_PREFIX "snapshots" 27#define DM_MSG_PREFIX "snapshots"
@@ -47,9 +46,76 @@
47 */ 46 */
48#define MIN_IOS 256 47#define MIN_IOS 256
49 48
49#define DM_TRACKED_CHUNK_HASH_SIZE 16
50#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
51 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
52
53struct exception_table {
54 uint32_t hash_mask;
55 unsigned hash_shift;
56 struct list_head *table;
57};
58
59struct dm_snapshot {
60 struct rw_semaphore lock;
61
62 struct dm_dev *origin;
63
64 /* List of snapshots per Origin */
65 struct list_head list;
66
67 /* You can't use a snapshot if this is 0 (e.g. if full) */
68 int valid;
69
70 /* Origin writes don't trigger exceptions until this is set */
71 int active;
72
73 mempool_t *pending_pool;
74
75 atomic_t pending_exceptions_count;
76
77 struct exception_table pending;
78 struct exception_table complete;
79
80 /*
81 * pe_lock protects all pending_exception operations and access
82 * as well as the snapshot_bios list.
83 */
84 spinlock_t pe_lock;
85
86 /* The on disk metadata handler */
87 struct dm_exception_store *store;
88
89 struct dm_kcopyd_client *kcopyd_client;
90
91 /* Queue of snapshot writes for ksnapd to flush */
92 struct bio_list queued_bios;
93 struct work_struct queued_bios_work;
94
95 /* Chunks with outstanding reads */
96 mempool_t *tracked_chunk_pool;
97 spinlock_t tracked_chunk_lock;
98 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
99};
100
50static struct workqueue_struct *ksnapd; 101static struct workqueue_struct *ksnapd;
51static void flush_queued_bios(struct work_struct *work); 102static void flush_queued_bios(struct work_struct *work);
52 103
104static sector_t chunk_to_sector(struct dm_exception_store *store,
105 chunk_t chunk)
106{
107 return chunk << store->chunk_shift;
108}
109
110static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
111{
112 /*
113 * There is only ever one instance of a particular block
114 * device so we can compare pointers safely.
115 */
116 return lhs == rhs;
117}
118
53struct dm_snap_pending_exception { 119struct dm_snap_pending_exception {
54 struct dm_snap_exception e; 120 struct dm_snap_exception e;
55 121
@@ -476,11 +542,11 @@ static int init_hash_tables(struct dm_snapshot *s)
476 * Calculate based on the size of the original volume or 542 * Calculate based on the size of the original volume or
477 * the COW volume... 543 * the COW volume...
478 */ 544 */
479 cow_dev_size = get_dev_size(s->cow->bdev); 545 cow_dev_size = get_dev_size(s->store->cow->bdev);
480 origin_dev_size = get_dev_size(s->origin->bdev); 546 origin_dev_size = get_dev_size(s->origin->bdev);
481 max_buckets = calc_max_buckets(); 547 max_buckets = calc_max_buckets();
482 548
483 hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift; 549 hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
484 hash_size = min(hash_size, max_buckets); 550 hash_size = min(hash_size, max_buckets);
485 551
486 hash_size = rounddown_pow_of_two(hash_size); 552 hash_size = rounddown_pow_of_two(hash_size);
@@ -505,58 +571,6 @@ static int init_hash_tables(struct dm_snapshot *s)
505} 571}
506 572
507/* 573/*
508 * Round a number up to the nearest 'size' boundary. size must
509 * be a power of 2.
510 */
511static ulong round_up(ulong n, ulong size)
512{
513 size--;
514 return (n + size) & ~size;
515}
516
517static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
518 char **error)
519{
520 unsigned long chunk_size;
521 char *value;
522
523 chunk_size = simple_strtoul(chunk_size_arg, &value, 10);
524 if (*chunk_size_arg == '\0' || *value != '\0') {
525 *error = "Invalid chunk size";
526 return -EINVAL;
527 }
528
529 if (!chunk_size) {
530 s->chunk_size = s->chunk_mask = s->chunk_shift = 0;
531 return 0;
532 }
533
534 /*
535 * Chunk size must be multiple of page size. Silently
536 * round up if it's not.
537 */
538 chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
539
540 /* Check chunk_size is a power of 2 */
541 if (!is_power_of_2(chunk_size)) {
542 *error = "Chunk size is not a power of 2";
543 return -EINVAL;
544 }
545
546 /* Validate the chunk size against the device block size */
547 if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) {
548 *error = "Chunk size is not a multiple of device blocksize";
549 return -EINVAL;
550 }
551
552 s->chunk_size = chunk_size;
553 s->chunk_mask = chunk_size - 1;
554 s->chunk_shift = ffs(chunk_size) - 1;
555
556 return 0;
557}
558
559/*
560 * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> 574 * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
561 */ 575 */
562static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 576static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
@@ -564,91 +578,68 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
564 struct dm_snapshot *s; 578 struct dm_snapshot *s;
565 int i; 579 int i;
566 int r = -EINVAL; 580 int r = -EINVAL;
567 char persistent;
568 char *origin_path; 581 char *origin_path;
569 char *cow_path; 582 struct dm_exception_store *store;
583 unsigned args_used;
570 584
571 if (argc != 4) { 585 if (argc != 4) {
572 ti->error = "requires exactly 4 arguments"; 586 ti->error = "requires exactly 4 arguments";
573 r = -EINVAL; 587 r = -EINVAL;
574 goto bad1; 588 goto bad_args;
575 } 589 }
576 590
577 origin_path = argv[0]; 591 origin_path = argv[0];
578 cow_path = argv[1]; 592 argv++;
579 persistent = toupper(*argv[2]); 593 argc--;
580 594
581 if (persistent != 'P' && persistent != 'N') { 595 r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
582 ti->error = "Persistent flag is not P or N"; 596 if (r) {
597 ti->error = "Couldn't create exception store";
583 r = -EINVAL; 598 r = -EINVAL;
584 goto bad1; 599 goto bad_args;
585 } 600 }
586 601
602 argv += args_used;
603 argc -= args_used;
604
587 s = kmalloc(sizeof(*s), GFP_KERNEL); 605 s = kmalloc(sizeof(*s), GFP_KERNEL);
588 if (s == NULL) { 606 if (!s) {
589 ti->error = "Cannot allocate snapshot context private " 607 ti->error = "Cannot allocate snapshot context private "
590 "structure"; 608 "structure";
591 r = -ENOMEM; 609 r = -ENOMEM;
592 goto bad1; 610 goto bad_snap;
593 } 611 }
594 612
595 r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin); 613 r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
596 if (r) { 614 if (r) {
597 ti->error = "Cannot get origin device"; 615 ti->error = "Cannot get origin device";
598 goto bad2; 616 goto bad_origin;
599 }
600
601 r = dm_get_device(ti, cow_path, 0, 0,
602 FMODE_READ | FMODE_WRITE, &s->cow);
603 if (r) {
604 dm_put_device(ti, s->origin);
605 ti->error = "Cannot get COW device";
606 goto bad2;
607 } 617 }
608 618
609 r = set_chunk_size(s, argv[3], &ti->error); 619 s->store = store;
610 if (r)
611 goto bad3;
612
613 s->type = persistent;
614
615 s->valid = 1; 620 s->valid = 1;
616 s->active = 0; 621 s->active = 0;
617 atomic_set(&s->pending_exceptions_count, 0); 622 atomic_set(&s->pending_exceptions_count, 0);
618 init_rwsem(&s->lock); 623 init_rwsem(&s->lock);
619 spin_lock_init(&s->pe_lock); 624 spin_lock_init(&s->pe_lock);
620 s->ti = ti;
621 625
622 /* Allocate hash table for COW data */ 626 /* Allocate hash table for COW data */
623 if (init_hash_tables(s)) { 627 if (init_hash_tables(s)) {
624 ti->error = "Unable to allocate hash table space"; 628 ti->error = "Unable to allocate hash table space";
625 r = -ENOMEM; 629 r = -ENOMEM;
626 goto bad3; 630 goto bad_hash_tables;
627 }
628
629 s->store.snap = s;
630
631 if (persistent == 'P')
632 r = dm_create_persistent(&s->store);
633 else
634 r = dm_create_transient(&s->store);
635
636 if (r) {
637 ti->error = "Couldn't create exception store";
638 r = -EINVAL;
639 goto bad4;
640 } 631 }
641 632
642 r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); 633 r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
643 if (r) { 634 if (r) {
644 ti->error = "Could not create kcopyd client"; 635 ti->error = "Could not create kcopyd client";
645 goto bad5; 636 goto bad_kcopyd;
646 } 637 }
647 638
648 s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); 639 s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
649 if (!s->pending_pool) { 640 if (!s->pending_pool) {
650 ti->error = "Could not allocate mempool for pending exceptions"; 641 ti->error = "Could not allocate mempool for pending exceptions";
651 goto bad6; 642 goto bad_pending_pool;
652 } 643 }
653 644
654 s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, 645 s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
@@ -665,7 +656,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
665 spin_lock_init(&s->tracked_chunk_lock); 656 spin_lock_init(&s->tracked_chunk_lock);
666 657
667 /* Metadata must only be loaded into one table at once */ 658 /* Metadata must only be loaded into one table at once */
668 r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s); 659 r = s->store->type->read_metadata(s->store, dm_add_exception,
660 (void *)s);
669 if (r < 0) { 661 if (r < 0) {
670 ti->error = "Failed to read snapshot metadata"; 662 ti->error = "Failed to read snapshot metadata";
671 goto bad_load_and_register; 663 goto bad_load_and_register;
@@ -686,34 +678,33 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
686 } 678 }
687 679
688 ti->private = s; 680 ti->private = s;
689 ti->split_io = s->chunk_size; 681 ti->split_io = s->store->chunk_size;
690 682
691 return 0; 683 return 0;
692 684
693 bad_load_and_register: 685bad_load_and_register:
694 mempool_destroy(s->tracked_chunk_pool); 686 mempool_destroy(s->tracked_chunk_pool);
695 687
696 bad_tracked_chunk_pool: 688bad_tracked_chunk_pool:
697 mempool_destroy(s->pending_pool); 689 mempool_destroy(s->pending_pool);
698 690
699 bad6: 691bad_pending_pool:
700 dm_kcopyd_client_destroy(s->kcopyd_client); 692 dm_kcopyd_client_destroy(s->kcopyd_client);
701 693
702 bad5: 694bad_kcopyd:
703 s->store.destroy(&s->store);
704
705 bad4:
706 exit_exception_table(&s->pending, pending_cache); 695 exit_exception_table(&s->pending, pending_cache);
707 exit_exception_table(&s->complete, exception_cache); 696 exit_exception_table(&s->complete, exception_cache);
708 697
709 bad3: 698bad_hash_tables:
710 dm_put_device(ti, s->cow);
711 dm_put_device(ti, s->origin); 699 dm_put_device(ti, s->origin);
712 700
713 bad2: 701bad_origin:
714 kfree(s); 702 kfree(s);
715 703
716 bad1: 704bad_snap:
705 dm_exception_store_destroy(store);
706
707bad_args:
717 return r; 708 return r;
718} 709}
719 710
@@ -724,8 +715,6 @@ static void __free_exceptions(struct dm_snapshot *s)
724 715
725 exit_exception_table(&s->pending, pending_cache); 716 exit_exception_table(&s->pending, pending_cache);
726 exit_exception_table(&s->complete, exception_cache); 717 exit_exception_table(&s->complete, exception_cache);
727
728 s->store.destroy(&s->store);
729} 718}
730 719
731static void snapshot_dtr(struct dm_target *ti) 720static void snapshot_dtr(struct dm_target *ti)
@@ -761,7 +750,8 @@ static void snapshot_dtr(struct dm_target *ti)
761 mempool_destroy(s->pending_pool); 750 mempool_destroy(s->pending_pool);
762 751
763 dm_put_device(ti, s->origin); 752 dm_put_device(ti, s->origin);
764 dm_put_device(ti, s->cow); 753
754 dm_exception_store_destroy(s->store);
765 755
766 kfree(s); 756 kfree(s);
767} 757}
@@ -820,12 +810,12 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
820 else if (err == -ENOMEM) 810 else if (err == -ENOMEM)
821 DMERR("Invalidating snapshot: Unable to allocate exception."); 811 DMERR("Invalidating snapshot: Unable to allocate exception.");
822 812
823 if (s->store.drop_snapshot) 813 if (s->store->type->drop_snapshot)
824 s->store.drop_snapshot(&s->store); 814 s->store->type->drop_snapshot(s->store);
825 815
826 s->valid = 0; 816 s->valid = 0;
827 817
828 dm_table_event(s->ti->table); 818 dm_table_event(s->store->ti->table);
829} 819}
830 820
831static void get_pending_exception(struct dm_snap_pending_exception *pe) 821static void get_pending_exception(struct dm_snap_pending_exception *pe)
@@ -943,8 +933,8 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
943 933
944 else 934 else
945 /* Update the metadata if we are persistent */ 935 /* Update the metadata if we are persistent */
946 s->store.commit_exception(&s->store, &pe->e, commit_callback, 936 s->store->type->commit_exception(s->store, &pe->e,
947 pe); 937 commit_callback, pe);
948} 938}
949 939
950/* 940/*
@@ -960,11 +950,11 @@ static void start_copy(struct dm_snap_pending_exception *pe)
960 dev_size = get_dev_size(bdev); 950 dev_size = get_dev_size(bdev);
961 951
962 src.bdev = bdev; 952 src.bdev = bdev;
963 src.sector = chunk_to_sector(s, pe->e.old_chunk); 953 src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
964 src.count = min(s->chunk_size, dev_size - src.sector); 954 src.count = min(s->store->chunk_size, dev_size - src.sector);
965 955
966 dest.bdev = s->cow->bdev; 956 dest.bdev = s->store->cow->bdev;
967 dest.sector = chunk_to_sector(s, pe->e.new_chunk); 957 dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
968 dest.count = src.count; 958 dest.count = src.count;
969 959
970 /* Hand over to kcopyd */ 960 /* Hand over to kcopyd */
@@ -972,6 +962,17 @@ static void start_copy(struct dm_snap_pending_exception *pe)
972 &src, 1, &dest, 0, copy_callback, pe); 962 &src, 1, &dest, 0, copy_callback, pe);
973} 963}
974 964
965static struct dm_snap_pending_exception *
966__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
967{
968 struct dm_snap_exception *e = lookup_exception(&s->pending, chunk);
969
970 if (!e)
971 return NULL;
972
973 return container_of(e, struct dm_snap_pending_exception, e);
974}
975
975/* 976/*
976 * Looks to see if this snapshot already has a pending exception 977 * Looks to see if this snapshot already has a pending exception
977 * for this chunk, otherwise it allocates a new one and inserts 978 * for this chunk, otherwise it allocates a new one and inserts
@@ -981,40 +982,15 @@ static void start_copy(struct dm_snap_pending_exception *pe)
981 * this. 982 * this.
982 */ 983 */
983static struct dm_snap_pending_exception * 984static struct dm_snap_pending_exception *
984__find_pending_exception(struct dm_snapshot *s, struct bio *bio) 985__find_pending_exception(struct dm_snapshot *s,
986 struct dm_snap_pending_exception *pe, chunk_t chunk)
985{ 987{
986 struct dm_snap_exception *e; 988 struct dm_snap_pending_exception *pe2;
987 struct dm_snap_pending_exception *pe;
988 chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
989 989
990 /* 990 pe2 = __lookup_pending_exception(s, chunk);
991 * Is there a pending exception for this already ? 991 if (pe2) {
992 */
993 e = lookup_exception(&s->pending, chunk);
994 if (e) {
995 /* cast the exception to a pending exception */
996 pe = container_of(e, struct dm_snap_pending_exception, e);
997 goto out;
998 }
999
1000 /*
1001 * Create a new pending exception, we don't want
1002 * to hold the lock while we do this.
1003 */
1004 up_write(&s->lock);
1005 pe = alloc_pending_exception(s);
1006 down_write(&s->lock);
1007
1008 if (!s->valid) {
1009 free_pending_exception(pe);
1010 return NULL;
1011 }
1012
1013 e = lookup_exception(&s->pending, chunk);
1014 if (e) {
1015 free_pending_exception(pe); 992 free_pending_exception(pe);
1016 pe = container_of(e, struct dm_snap_pending_exception, e); 993 return pe2;
1017 goto out;
1018 } 994 }
1019 995
1020 pe->e.old_chunk = chunk; 996 pe->e.old_chunk = chunk;
@@ -1024,7 +1000,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
1024 atomic_set(&pe->ref_count, 0); 1000 atomic_set(&pe->ref_count, 0);
1025 pe->started = 0; 1001 pe->started = 0;
1026 1002
1027 if (s->store.prepare_exception(&s->store, &pe->e)) { 1003 if (s->store->type->prepare_exception(s->store, &pe->e)) {
1028 free_pending_exception(pe); 1004 free_pending_exception(pe);
1029 return NULL; 1005 return NULL;
1030 } 1006 }
@@ -1032,17 +1008,18 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
1032 get_pending_exception(pe); 1008 get_pending_exception(pe);
1033 insert_exception(&s->pending, &pe->e); 1009 insert_exception(&s->pending, &pe->e);
1034 1010
1035 out:
1036 return pe; 1011 return pe;
1037} 1012}
1038 1013
1039static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e, 1014static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
1040 struct bio *bio, chunk_t chunk) 1015 struct bio *bio, chunk_t chunk)
1041{ 1016{
1042 bio->bi_bdev = s->cow->bdev; 1017 bio->bi_bdev = s->store->cow->bdev;
1043 bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) + 1018 bio->bi_sector = chunk_to_sector(s->store,
1044 (chunk - e->old_chunk)) + 1019 dm_chunk_number(e->new_chunk) +
1045 (bio->bi_sector & s->chunk_mask); 1020 (chunk - e->old_chunk)) +
1021 (bio->bi_sector &
1022 s->store->chunk_mask);
1046} 1023}
1047 1024
1048static int snapshot_map(struct dm_target *ti, struct bio *bio, 1025static int snapshot_map(struct dm_target *ti, struct bio *bio,
@@ -1054,7 +1031,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
1054 chunk_t chunk; 1031 chunk_t chunk;
1055 struct dm_snap_pending_exception *pe = NULL; 1032 struct dm_snap_pending_exception *pe = NULL;
1056 1033
1057 chunk = sector_to_chunk(s, bio->bi_sector); 1034 chunk = sector_to_chunk(s->store, bio->bi_sector);
1058 1035
1059 /* Full snapshots are not usable */ 1036 /* Full snapshots are not usable */
1060 /* To get here the table must be live so s->active is always set. */ 1037 /* To get here the table must be live so s->active is always set. */
@@ -1083,11 +1060,31 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
1083 * writeable. 1060 * writeable.
1084 */ 1061 */
1085 if (bio_rw(bio) == WRITE) { 1062 if (bio_rw(bio) == WRITE) {
1086 pe = __find_pending_exception(s, bio); 1063 pe = __lookup_pending_exception(s, chunk);
1087 if (!pe) { 1064 if (!pe) {
1088 __invalidate_snapshot(s, -ENOMEM); 1065 up_write(&s->lock);
1089 r = -EIO; 1066 pe = alloc_pending_exception(s);
1090 goto out_unlock; 1067 down_write(&s->lock);
1068
1069 if (!s->valid) {
1070 free_pending_exception(pe);
1071 r = -EIO;
1072 goto out_unlock;
1073 }
1074
1075 e = lookup_exception(&s->complete, chunk);
1076 if (e) {
1077 free_pending_exception(pe);
1078 remap_exception(s, e, bio, chunk);
1079 goto out_unlock;
1080 }
1081
1082 pe = __find_pending_exception(s, pe, chunk);
1083 if (!pe) {
1084 __invalidate_snapshot(s, -ENOMEM);
1085 r = -EIO;
1086 goto out_unlock;
1087 }
1091 } 1088 }
1092 1089
1093 remap_exception(s, &pe->e, bio, chunk); 1090 remap_exception(s, &pe->e, bio, chunk);
@@ -1137,24 +1134,25 @@ static void snapshot_resume(struct dm_target *ti)
1137static int snapshot_status(struct dm_target *ti, status_type_t type, 1134static int snapshot_status(struct dm_target *ti, status_type_t type,
1138 char *result, unsigned int maxlen) 1135 char *result, unsigned int maxlen)
1139{ 1136{
1137 unsigned sz = 0;
1140 struct dm_snapshot *snap = ti->private; 1138 struct dm_snapshot *snap = ti->private;
1141 1139
1142 switch (type) { 1140 switch (type) {
1143 case STATUSTYPE_INFO: 1141 case STATUSTYPE_INFO:
1144 if (!snap->valid) 1142 if (!snap->valid)
1145 snprintf(result, maxlen, "Invalid"); 1143 DMEMIT("Invalid");
1146 else { 1144 else {
1147 if (snap->store.fraction_full) { 1145 if (snap->store->type->fraction_full) {
1148 sector_t numerator, denominator; 1146 sector_t numerator, denominator;
1149 snap->store.fraction_full(&snap->store, 1147 snap->store->type->fraction_full(snap->store,
1150 &numerator, 1148 &numerator,
1151 &denominator); 1149 &denominator);
1152 snprintf(result, maxlen, "%llu/%llu", 1150 DMEMIT("%llu/%llu",
1153 (unsigned long long)numerator, 1151 (unsigned long long)numerator,
1154 (unsigned long long)denominator); 1152 (unsigned long long)denominator);
1155 } 1153 }
1156 else 1154 else
1157 snprintf(result, maxlen, "Unknown"); 1155 DMEMIT("Unknown");
1158 } 1156 }
1159 break; 1157 break;
1160 1158
@@ -1164,10 +1162,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
1164 * to make private copies if the output is to 1162 * to make private copies if the output is to
1165 * make sense. 1163 * make sense.
1166 */ 1164 */
1167 snprintf(result, maxlen, "%s %s %c %llu", 1165 DMEMIT("%s", snap->origin->name);
1168 snap->origin->name, snap->cow->name, 1166 snap->store->type->status(snap->store, type, result + sz,
1169 snap->type, 1167 maxlen - sz);
1170 (unsigned long long)snap->chunk_size);
1171 break; 1168 break;
1172 } 1169 }
1173 1170
@@ -1196,14 +1193,14 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
1196 goto next_snapshot; 1193 goto next_snapshot;
1197 1194
1198 /* Nothing to do if writing beyond end of snapshot */ 1195 /* Nothing to do if writing beyond end of snapshot */
1199 if (bio->bi_sector >= dm_table_get_size(snap->ti->table)) 1196 if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
1200 goto next_snapshot; 1197 goto next_snapshot;
1201 1198
1202 /* 1199 /*
1203 * Remember, different snapshots can have 1200 * Remember, different snapshots can have
1204 * different chunk sizes. 1201 * different chunk sizes.
1205 */ 1202 */
1206 chunk = sector_to_chunk(snap, bio->bi_sector); 1203 chunk = sector_to_chunk(snap->store, bio->bi_sector);
1207 1204
1208 /* 1205 /*
1209 * Check exception table to see if block 1206 * Check exception table to see if block
@@ -1217,10 +1214,28 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
1217 if (e) 1214 if (e)
1218 goto next_snapshot; 1215 goto next_snapshot;
1219 1216
1220 pe = __find_pending_exception(snap, bio); 1217 pe = __lookup_pending_exception(snap, chunk);
1221 if (!pe) { 1218 if (!pe) {
1222 __invalidate_snapshot(snap, -ENOMEM); 1219 up_write(&snap->lock);
1223 goto next_snapshot; 1220 pe = alloc_pending_exception(snap);
1221 down_write(&snap->lock);
1222
1223 if (!snap->valid) {
1224 free_pending_exception(pe);
1225 goto next_snapshot;
1226 }
1227
1228 e = lookup_exception(&snap->complete, chunk);
1229 if (e) {
1230 free_pending_exception(pe);
1231 goto next_snapshot;
1232 }
1233
1234 pe = __find_pending_exception(snap, pe, chunk);
1235 if (!pe) {
1236 __invalidate_snapshot(snap, -ENOMEM);
1237 goto next_snapshot;
1238 }
1224 } 1239 }
1225 1240
1226 if (!primary_pe) { 1241 if (!primary_pe) {
@@ -1360,7 +1375,8 @@ static void origin_resume(struct dm_target *ti)
1360 o = __lookup_origin(dev->bdev); 1375 o = __lookup_origin(dev->bdev);
1361 if (o) 1376 if (o)
1362 list_for_each_entry (snap, &o->snapshots, list) 1377 list_for_each_entry (snap, &o->snapshots, list)
1363 chunk_size = min_not_zero(chunk_size, snap->chunk_size); 1378 chunk_size = min_not_zero(chunk_size,
1379 snap->store->chunk_size);
1364 up_read(&_origins_lock); 1380 up_read(&_origins_lock);
1365 1381
1366 ti->split_io = chunk_size; 1382 ti->split_io = chunk_size;
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h
deleted file mode 100644
index d9e62b43cf85..000000000000
--- a/drivers/md/dm-snap.h
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
3 *
4 * This file is released under the GPL.
5 */
6
7#ifndef DM_SNAPSHOT_H
8#define DM_SNAPSHOT_H
9
10#include <linux/device-mapper.h>
11#include "dm-exception-store.h"
12#include "dm-bio-list.h"
13#include <linux/blkdev.h>
14#include <linux/workqueue.h>
15
16struct exception_table {
17 uint32_t hash_mask;
18 unsigned hash_shift;
19 struct list_head *table;
20};
21
22#define DM_TRACKED_CHUNK_HASH_SIZE 16
23#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
24 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
25
26struct dm_snapshot {
27 struct rw_semaphore lock;
28 struct dm_target *ti;
29
30 struct dm_dev *origin;
31 struct dm_dev *cow;
32
33 /* List of snapshots per Origin */
34 struct list_head list;
35
36 /* Size of data blocks saved - must be a power of 2 */
37 chunk_t chunk_size;
38 chunk_t chunk_mask;
39 chunk_t chunk_shift;
40
41 /* You can't use a snapshot if this is 0 (e.g. if full) */
42 int valid;
43
44 /* Origin writes don't trigger exceptions until this is set */
45 int active;
46
47 /* Used for display of table */
48 char type;
49
50 mempool_t *pending_pool;
51
52 atomic_t pending_exceptions_count;
53
54 struct exception_table pending;
55 struct exception_table complete;
56
57 /*
58 * pe_lock protects all pending_exception operations and access
59 * as well as the snapshot_bios list.
60 */
61 spinlock_t pe_lock;
62
63 /* The on disk metadata handler */
64 struct dm_exception_store store;
65
66 struct dm_kcopyd_client *kcopyd_client;
67
68 /* Queue of snapshot writes for ksnapd to flush */
69 struct bio_list queued_bios;
70 struct work_struct queued_bios_work;
71
72 /* Chunks with outstanding reads */
73 mempool_t *tracked_chunk_pool;
74 spinlock_t tracked_chunk_lock;
75 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
76};
77
78/*
79 * Return the number of sectors in the device.
80 */
81static inline sector_t get_dev_size(struct block_device *bdev)
82{
83 return bdev->bd_inode->i_size >> SECTOR_SHIFT;
84}
85
86static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
87{
88 return (sector & ~s->chunk_mask) >> s->chunk_shift;
89}
90
91static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
92{
93 return chunk << s->chunk_shift;
94}
95
96static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
97{
98 /*
99 * There is only ever one instance of a particular block
100 * device so we can compare pointers safely.
101 */
102 return lhs == rhs;
103}
104
105#endif
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 2fd66c30f7f8..e8361b191b9b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -399,28 +399,30 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start,
399} 399}
400 400
401/* 401/*
402 * This upgrades the mode on an already open dm_dev. Being 402 * This upgrades the mode on an already open dm_dev, being
403 * careful to leave things as they were if we fail to reopen the 403 * careful to leave things as they were if we fail to reopen the
404 * device. 404 * device and not to touch the existing bdev field in case
405 * it is accessed concurrently inside dm_table_any_congested().
405 */ 406 */
406static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, 407static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
407 struct mapped_device *md) 408 struct mapped_device *md)
408{ 409{
409 int r; 410 int r;
410 struct dm_dev_internal dd_copy; 411 struct dm_dev_internal dd_new, dd_old;
411 dev_t dev = dd->dm_dev.bdev->bd_dev;
412 412
413 dd_copy = *dd; 413 dd_new = dd_old = *dd;
414
415 dd_new.dm_dev.mode |= new_mode;
416 dd_new.dm_dev.bdev = NULL;
417
418 r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
419 if (r)
420 return r;
414 421
415 dd->dm_dev.mode |= new_mode; 422 dd->dm_dev.mode |= new_mode;
416 dd->dm_dev.bdev = NULL; 423 close_dev(&dd_old, md);
417 r = open_dev(dd, dev, md);
418 if (!r)
419 close_dev(&dd_copy, md);
420 else
421 *dd = dd_copy;
422 424
423 return r; 425 return 0;
424} 426}
425 427
426/* 428/*
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 7decf10006e4..04feccf2a997 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -14,45 +14,34 @@
14 14
15#define DM_MSG_PREFIX "target" 15#define DM_MSG_PREFIX "target"
16 16
17struct tt_internal {
18 struct target_type tt;
19
20 struct list_head list;
21 long use;
22};
23
24static LIST_HEAD(_targets); 17static LIST_HEAD(_targets);
25static DECLARE_RWSEM(_lock); 18static DECLARE_RWSEM(_lock);
26 19
27#define DM_MOD_NAME_SIZE 32 20#define DM_MOD_NAME_SIZE 32
28 21
29static inline struct tt_internal *__find_target_type(const char *name) 22static inline struct target_type *__find_target_type(const char *name)
30{ 23{
31 struct tt_internal *ti; 24 struct target_type *tt;
32 25
33 list_for_each_entry (ti, &_targets, list) 26 list_for_each_entry(tt, &_targets, list)
34 if (!strcmp(name, ti->tt.name)) 27 if (!strcmp(name, tt->name))
35 return ti; 28 return tt;
36 29
37 return NULL; 30 return NULL;
38} 31}
39 32
40static struct tt_internal *get_target_type(const char *name) 33static struct target_type *get_target_type(const char *name)
41{ 34{
42 struct tt_internal *ti; 35 struct target_type *tt;
43 36
44 down_read(&_lock); 37 down_read(&_lock);
45 38
46 ti = __find_target_type(name); 39 tt = __find_target_type(name);
47 if (ti) { 40 if (tt && !try_module_get(tt->module))
48 if ((ti->use == 0) && !try_module_get(ti->tt.module)) 41 tt = NULL;
49 ti = NULL;
50 else
51 ti->use++;
52 }
53 42
54 up_read(&_lock); 43 up_read(&_lock);
55 return ti; 44 return tt;
56} 45}
57 46
58static void load_module(const char *name) 47static void load_module(const char *name)
@@ -62,92 +51,59 @@ static void load_module(const char *name)
62 51
63struct target_type *dm_get_target_type(const char *name) 52struct target_type *dm_get_target_type(const char *name)
64{ 53{
65 struct tt_internal *ti = get_target_type(name); 54 struct target_type *tt = get_target_type(name);
66 55
67 if (!ti) { 56 if (!tt) {
68 load_module(name); 57 load_module(name);
69 ti = get_target_type(name); 58 tt = get_target_type(name);
70 } 59 }
71 60
72 return ti ? &ti->tt : NULL; 61 return tt;
73} 62}
74 63
75void dm_put_target_type(struct target_type *t) 64void dm_put_target_type(struct target_type *tt)
76{ 65{
77 struct tt_internal *ti = (struct tt_internal *) t;
78
79 down_read(&_lock); 66 down_read(&_lock);
80 if (--ti->use == 0) 67 module_put(tt->module);
81 module_put(ti->tt.module);
82
83 BUG_ON(ti->use < 0);
84 up_read(&_lock); 68 up_read(&_lock);
85
86 return;
87}
88
89static struct tt_internal *alloc_target(struct target_type *t)
90{
91 struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
92
93 if (ti)
94 ti->tt = *t;
95
96 return ti;
97} 69}
98 70
99
100int dm_target_iterate(void (*iter_func)(struct target_type *tt, 71int dm_target_iterate(void (*iter_func)(struct target_type *tt,
101 void *param), void *param) 72 void *param), void *param)
102{ 73{
103 struct tt_internal *ti; 74 struct target_type *tt;
104 75
105 down_read(&_lock); 76 down_read(&_lock);
106 list_for_each_entry (ti, &_targets, list) 77 list_for_each_entry(tt, &_targets, list)
107 iter_func(&ti->tt, param); 78 iter_func(tt, param);
108 up_read(&_lock); 79 up_read(&_lock);
109 80
110 return 0; 81 return 0;
111} 82}
112 83
113int dm_register_target(struct target_type *t) 84int dm_register_target(struct target_type *tt)
114{ 85{
115 int rv = 0; 86 int rv = 0;
116 struct tt_internal *ti = alloc_target(t);
117
118 if (!ti)
119 return -ENOMEM;
120 87
121 down_write(&_lock); 88 down_write(&_lock);
122 if (__find_target_type(t->name)) 89 if (__find_target_type(tt->name))
123 rv = -EEXIST; 90 rv = -EEXIST;
124 else 91 else
125 list_add(&ti->list, &_targets); 92 list_add(&tt->list, &_targets);
126 93
127 up_write(&_lock); 94 up_write(&_lock);
128 if (rv)
129 kfree(ti);
130 return rv; 95 return rv;
131} 96}
132 97
133void dm_unregister_target(struct target_type *t) 98void dm_unregister_target(struct target_type *tt)
134{ 99{
135 struct tt_internal *ti;
136
137 down_write(&_lock); 100 down_write(&_lock);
138 if (!(ti = __find_target_type(t->name))) { 101 if (!__find_target_type(tt->name)) {
139 DMCRIT("Unregistering unrecognised target: %s", t->name); 102 DMCRIT("Unregistering unrecognised target: %s", tt->name);
140 BUG();
141 }
142
143 if (ti->use) {
144 DMCRIT("Attempt to unregister target still in use: %s",
145 t->name);
146 BUG(); 103 BUG();
147 } 104 }
148 105
149 list_del(&ti->list); 106 list_del(&tt->list);
150 kfree(ti);
151 107
152 up_write(&_lock); 108 up_write(&_lock);
153} 109}
@@ -156,17 +112,17 @@ void dm_unregister_target(struct target_type *t)
156 * io-err: always fails an io, useful for bringing 112 * io-err: always fails an io, useful for bringing
157 * up LVs that have holes in them. 113 * up LVs that have holes in them.
158 */ 114 */
159static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args) 115static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
160{ 116{
161 return 0; 117 return 0;
162} 118}
163 119
164static void io_err_dtr(struct dm_target *ti) 120static void io_err_dtr(struct dm_target *tt)
165{ 121{
166 /* empty */ 122 /* empty */
167} 123}
168 124
169static int io_err_map(struct dm_target *ti, struct bio *bio, 125static int io_err_map(struct dm_target *tt, struct bio *bio,
170 union map_info *map_context) 126 union map_info *map_context)
171{ 127{
172 return -EIO; 128 return -EIO;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8d40f27cce89..788ba96a6256 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -99,19 +99,9 @@ union map_info *dm_get_mapinfo(struct bio *bio)
99/* 99/*
100 * Work processed by per-device workqueue. 100 * Work processed by per-device workqueue.
101 */ 101 */
102struct dm_wq_req {
103 enum {
104 DM_WQ_FLUSH_DEFERRED,
105 } type;
106 struct work_struct work;
107 struct mapped_device *md;
108 void *context;
109};
110
111struct mapped_device { 102struct mapped_device {
112 struct rw_semaphore io_lock; 103 struct rw_semaphore io_lock;
113 struct mutex suspend_lock; 104 struct mutex suspend_lock;
114 spinlock_t pushback_lock;
115 rwlock_t map_lock; 105 rwlock_t map_lock;
116 atomic_t holders; 106 atomic_t holders;
117 atomic_t open_count; 107 atomic_t open_count;
@@ -129,8 +119,9 @@ struct mapped_device {
129 */ 119 */
130 atomic_t pending; 120 atomic_t pending;
131 wait_queue_head_t wait; 121 wait_queue_head_t wait;
122 struct work_struct work;
132 struct bio_list deferred; 123 struct bio_list deferred;
133 struct bio_list pushback; 124 spinlock_t deferred_lock;
134 125
135 /* 126 /*
136 * Processing queue (flush/barriers) 127 * Processing queue (flush/barriers)
@@ -453,7 +444,9 @@ static int queue_io(struct mapped_device *md, struct bio *bio)
453 return 1; 444 return 1;
454 } 445 }
455 446
447 spin_lock_irq(&md->deferred_lock);
456 bio_list_add(&md->deferred, bio); 448 bio_list_add(&md->deferred, bio);
449 spin_unlock_irq(&md->deferred_lock);
457 450
458 up_write(&md->io_lock); 451 up_write(&md->io_lock);
459 return 0; /* deferred successfully */ 452 return 0; /* deferred successfully */
@@ -537,16 +530,14 @@ static void dec_pending(struct dm_io *io, int error)
537 if (io->error == DM_ENDIO_REQUEUE) { 530 if (io->error == DM_ENDIO_REQUEUE) {
538 /* 531 /*
539 * Target requested pushing back the I/O. 532 * Target requested pushing back the I/O.
540 * This must be handled before the sleeper on
541 * suspend queue merges the pushback list.
542 */ 533 */
543 spin_lock_irqsave(&md->pushback_lock, flags); 534 spin_lock_irqsave(&md->deferred_lock, flags);
544 if (__noflush_suspending(md)) 535 if (__noflush_suspending(md))
545 bio_list_add(&md->pushback, io->bio); 536 bio_list_add(&md->deferred, io->bio);
546 else 537 else
547 /* noflush suspend was interrupted. */ 538 /* noflush suspend was interrupted. */
548 io->error = -EIO; 539 io->error = -EIO;
549 spin_unlock_irqrestore(&md->pushback_lock, flags); 540 spin_unlock_irqrestore(&md->deferred_lock, flags);
550 } 541 }
551 542
552 end_io_acct(io); 543 end_io_acct(io);
@@ -834,20 +825,22 @@ static int __clone_and_map(struct clone_info *ci)
834} 825}
835 826
836/* 827/*
837 * Split the bio into several clones. 828 * Split the bio into several clones and submit it to targets.
838 */ 829 */
839static int __split_bio(struct mapped_device *md, struct bio *bio) 830static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
840{ 831{
841 struct clone_info ci; 832 struct clone_info ci;
842 int error = 0; 833 int error = 0;
843 834
844 ci.map = dm_get_table(md); 835 ci.map = dm_get_table(md);
845 if (unlikely(!ci.map)) 836 if (unlikely(!ci.map)) {
846 return -EIO; 837 bio_io_error(bio);
838 return;
839 }
847 if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { 840 if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
848 dm_table_put(ci.map); 841 dm_table_put(ci.map);
849 bio_endio(bio, -EOPNOTSUPP); 842 bio_endio(bio, -EOPNOTSUPP);
850 return 0; 843 return;
851 } 844 }
852 ci.md = md; 845 ci.md = md;
853 ci.bio = bio; 846 ci.bio = bio;
@@ -867,8 +860,6 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
867 /* drop the extra reference count */ 860 /* drop the extra reference count */
868 dec_pending(ci.io, error); 861 dec_pending(ci.io, error);
869 dm_table_put(ci.map); 862 dm_table_put(ci.map);
870
871 return 0;
872} 863}
873/*----------------------------------------------------------------- 864/*-----------------------------------------------------------------
874 * CRUD END 865 * CRUD END
@@ -959,8 +950,9 @@ static int dm_request(struct request_queue *q, struct bio *bio)
959 down_read(&md->io_lock); 950 down_read(&md->io_lock);
960 } 951 }
961 952
962 r = __split_bio(md, bio); 953 __split_and_process_bio(md, bio);
963 up_read(&md->io_lock); 954 up_read(&md->io_lock);
955 return 0;
964 956
965out_req: 957out_req:
966 if (r < 0) 958 if (r < 0)
@@ -1074,6 +1066,8 @@ out:
1074 1066
1075static struct block_device_operations dm_blk_dops; 1067static struct block_device_operations dm_blk_dops;
1076 1068
1069static void dm_wq_work(struct work_struct *work);
1070
1077/* 1071/*
1078 * Allocate and initialise a blank device with a given minor. 1072 * Allocate and initialise a blank device with a given minor.
1079 */ 1073 */
@@ -1101,7 +1095,7 @@ static struct mapped_device *alloc_dev(int minor)
1101 1095
1102 init_rwsem(&md->io_lock); 1096 init_rwsem(&md->io_lock);
1103 mutex_init(&md->suspend_lock); 1097 mutex_init(&md->suspend_lock);
1104 spin_lock_init(&md->pushback_lock); 1098 spin_lock_init(&md->deferred_lock);
1105 rwlock_init(&md->map_lock); 1099 rwlock_init(&md->map_lock);
1106 atomic_set(&md->holders, 1); 1100 atomic_set(&md->holders, 1);
1107 atomic_set(&md->open_count, 0); 1101 atomic_set(&md->open_count, 0);
@@ -1118,6 +1112,7 @@ static struct mapped_device *alloc_dev(int minor)
1118 md->queue->backing_dev_info.congested_fn = dm_any_congested; 1112 md->queue->backing_dev_info.congested_fn = dm_any_congested;
1119 md->queue->backing_dev_info.congested_data = md; 1113 md->queue->backing_dev_info.congested_data = md;
1120 blk_queue_make_request(md->queue, dm_request); 1114 blk_queue_make_request(md->queue, dm_request);
1115 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
1121 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1116 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1122 md->queue->unplug_fn = dm_unplug_all; 1117 md->queue->unplug_fn = dm_unplug_all;
1123 blk_queue_merge_bvec(md->queue, dm_merge_bvec); 1118 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
@@ -1140,6 +1135,7 @@ static struct mapped_device *alloc_dev(int minor)
1140 1135
1141 atomic_set(&md->pending, 0); 1136 atomic_set(&md->pending, 0);
1142 init_waitqueue_head(&md->wait); 1137 init_waitqueue_head(&md->wait);
1138 INIT_WORK(&md->work, dm_wq_work);
1143 init_waitqueue_head(&md->eventq); 1139 init_waitqueue_head(&md->eventq);
1144 1140
1145 md->disk->major = _major; 1141 md->disk->major = _major;
@@ -1379,18 +1375,24 @@ void dm_put(struct mapped_device *md)
1379} 1375}
1380EXPORT_SYMBOL_GPL(dm_put); 1376EXPORT_SYMBOL_GPL(dm_put);
1381 1377
1382static int dm_wait_for_completion(struct mapped_device *md) 1378static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
1383{ 1379{
1384 int r = 0; 1380 int r = 0;
1381 DECLARE_WAITQUEUE(wait, current);
1382
1383 dm_unplug_all(md->queue);
1384
1385 add_wait_queue(&md->wait, &wait);
1385 1386
1386 while (1) { 1387 while (1) {
1387 set_current_state(TASK_INTERRUPTIBLE); 1388 set_current_state(interruptible);
1388 1389
1389 smp_mb(); 1390 smp_mb();
1390 if (!atomic_read(&md->pending)) 1391 if (!atomic_read(&md->pending))
1391 break; 1392 break;
1392 1393
1393 if (signal_pending(current)) { 1394 if (interruptible == TASK_INTERRUPTIBLE &&
1395 signal_pending(current)) {
1394 r = -EINTR; 1396 r = -EINTR;
1395 break; 1397 break;
1396 } 1398 }
@@ -1399,67 +1401,40 @@ static int dm_wait_for_completion(struct mapped_device *md)
1399 } 1401 }
1400 set_current_state(TASK_RUNNING); 1402 set_current_state(TASK_RUNNING);
1401 1403
1404 remove_wait_queue(&md->wait, &wait);
1405
1402 return r; 1406 return r;
1403} 1407}
1404 1408
1405/* 1409/*
1406 * Process the deferred bios 1410 * Process the deferred bios
1407 */ 1411 */
1408static void __flush_deferred_io(struct mapped_device *md) 1412static void dm_wq_work(struct work_struct *work)
1409{ 1413{
1414 struct mapped_device *md = container_of(work, struct mapped_device,
1415 work);
1410 struct bio *c; 1416 struct bio *c;
1411 1417
1412 while ((c = bio_list_pop(&md->deferred))) { 1418 down_write(&md->io_lock);
1413 if (__split_bio(md, c))
1414 bio_io_error(c);
1415 }
1416
1417 clear_bit(DMF_BLOCK_IO, &md->flags);
1418}
1419 1419
1420static void __merge_pushback_list(struct mapped_device *md) 1420next_bio:
1421{ 1421 spin_lock_irq(&md->deferred_lock);
1422 unsigned long flags; 1422 c = bio_list_pop(&md->deferred);
1423 spin_unlock_irq(&md->deferred_lock);
1423 1424
1424 spin_lock_irqsave(&md->pushback_lock, flags); 1425 if (c) {
1425 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 1426 __split_and_process_bio(md, c);
1426 bio_list_merge_head(&md->deferred, &md->pushback); 1427 goto next_bio;
1427 bio_list_init(&md->pushback); 1428 }
1428 spin_unlock_irqrestore(&md->pushback_lock, flags);
1429}
1430 1429
1431static void dm_wq_work(struct work_struct *work) 1430 clear_bit(DMF_BLOCK_IO, &md->flags);
1432{
1433 struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
1434 struct mapped_device *md = req->md;
1435 1431
1436 down_write(&md->io_lock);
1437 switch (req->type) {
1438 case DM_WQ_FLUSH_DEFERRED:
1439 __flush_deferred_io(md);
1440 break;
1441 default:
1442 DMERR("dm_wq_work: unrecognised work type %d", req->type);
1443 BUG();
1444 }
1445 up_write(&md->io_lock); 1432 up_write(&md->io_lock);
1446} 1433}
1447 1434
1448static void dm_wq_queue(struct mapped_device *md, int type, void *context, 1435static void dm_queue_flush(struct mapped_device *md)
1449 struct dm_wq_req *req)
1450{
1451 req->type = type;
1452 req->md = md;
1453 req->context = context;
1454 INIT_WORK(&req->work, dm_wq_work);
1455 queue_work(md->wq, &req->work);
1456}
1457
1458static void dm_queue_flush(struct mapped_device *md, int type, void *context)
1459{ 1436{
1460 struct dm_wq_req req; 1437 queue_work(md->wq, &md->work);
1461
1462 dm_wq_queue(md, type, context, &req);
1463 flush_workqueue(md->wq); 1438 flush_workqueue(md->wq);
1464} 1439}
1465 1440
@@ -1534,7 +1509,6 @@ static void unlock_fs(struct mapped_device *md)
1534int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 1509int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1535{ 1510{
1536 struct dm_table *map = NULL; 1511 struct dm_table *map = NULL;
1537 DECLARE_WAITQUEUE(wait, current);
1538 int r = 0; 1512 int r = 0;
1539 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; 1513 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
1540 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; 1514 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
@@ -1584,28 +1558,22 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1584 down_write(&md->io_lock); 1558 down_write(&md->io_lock);
1585 set_bit(DMF_BLOCK_IO, &md->flags); 1559 set_bit(DMF_BLOCK_IO, &md->flags);
1586 1560
1587 add_wait_queue(&md->wait, &wait);
1588 up_write(&md->io_lock); 1561 up_write(&md->io_lock);
1589 1562
1590 /* unplug */
1591 if (map)
1592 dm_table_unplug_all(map);
1593
1594 /* 1563 /*
1595 * Wait for the already-mapped ios to complete. 1564 * Wait for the already-mapped ios to complete.
1596 */ 1565 */
1597 r = dm_wait_for_completion(md); 1566 r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
1598 1567
1599 down_write(&md->io_lock); 1568 down_write(&md->io_lock);
1600 remove_wait_queue(&md->wait, &wait);
1601 1569
1602 if (noflush) 1570 if (noflush)
1603 __merge_pushback_list(md); 1571 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
1604 up_write(&md->io_lock); 1572 up_write(&md->io_lock);
1605 1573
1606 /* were we interrupted ? */ 1574 /* were we interrupted ? */
1607 if (r < 0) { 1575 if (r < 0) {
1608 dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); 1576 dm_queue_flush(md);
1609 1577
1610 unlock_fs(md); 1578 unlock_fs(md);
1611 goto out; /* pushback list is already flushed, so skip flush */ 1579 goto out; /* pushback list is already flushed, so skip flush */
@@ -1645,7 +1613,7 @@ int dm_resume(struct mapped_device *md)
1645 if (r) 1613 if (r)
1646 goto out; 1614 goto out;
1647 1615
1648 dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); 1616 dm_queue_flush(md);
1649 1617
1650 unlock_fs(md); 1618 unlock_fs(md);
1651 1619
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 20194e000c5a..b48397c0abbd 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -60,7 +60,7 @@ int dm_table_barrier_ok(struct dm_table *t);
60int dm_target_init(void); 60int dm_target_init(void);
61void dm_target_exit(void); 61void dm_target_exit(void);
62struct target_type *dm_get_target_type(const char *name); 62struct target_type *dm_get_target_type(const char *name);
63void dm_put_target_type(struct target_type *t); 63void dm_put_target_type(struct target_type *tt);
64int dm_target_iterate(void (*iter_func)(struct target_type *tt, 64int dm_target_iterate(void (*iter_func)(struct target_type *tt,
65 void *param), void *param); 65 void *param), void *param);
66 66
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 8209e08969f9..66ec05a57955 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -139,6 +139,9 @@ struct target_type {
139 dm_ioctl_fn ioctl; 139 dm_ioctl_fn ioctl;
140 dm_merge_fn merge; 140 dm_merge_fn merge;
141 dm_busy_fn busy; 141 dm_busy_fn busy;
142
143 /* For internal device-mapper use. */
144 struct list_head list;
142}; 145};
143 146
144struct io_restrictions { 147struct io_restrictions {
diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h
index 600c5fb2daad..5e8b11d88f6f 100644
--- a/include/linux/dm-dirty-log.h
+++ b/include/linux/dm-dirty-log.h
@@ -28,6 +28,9 @@ struct dm_dirty_log_type {
28 const char *name; 28 const char *name;
29 struct module *module; 29 struct module *module;
30 30
31 /* For internal device-mapper use */
32 struct list_head list;
33
31 int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, 34 int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
32 unsigned argc, char **argv); 35 unsigned argc, char **argv);
33 void (*dtr)(struct dm_dirty_log *log); 36 void (*dtr)(struct dm_dirty_log *log);
@@ -113,6 +116,16 @@ struct dm_dirty_log_type {
113 */ 116 */
114 int (*status)(struct dm_dirty_log *log, status_type_t status_type, 117 int (*status)(struct dm_dirty_log *log, status_type_t status_type,
115 char *result, unsigned maxlen); 118 char *result, unsigned maxlen);
119
120 /*
121 * is_remote_recovering is necessary for cluster mirroring. It provides
122 * a way to detect recovery on another node, so we aren't writing
123 * concurrently. This function is likely to block (when a cluster log
124 * is used).
125 *
126 * Returns: 0, 1
127 */
128 int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
116}; 129};
117 130
118int dm_dirty_log_type_register(struct dm_dirty_log_type *type); 131int dm_dirty_log_type_register(struct dm_dirty_log_type *type);