diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/Kconfig | 11 | ||||
-rw-r--r-- | drivers/md/Makefile | 3 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-base.c | 696 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-transfer.c | 276 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-transfer.h | 18 |
5 files changed, 1004 insertions, 0 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 09f93fa68912..020f9573fd82 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -231,6 +231,17 @@ config DM_MIRROR | |||
231 | Allow volume managers to mirror logical volumes, also | 231 | Allow volume managers to mirror logical volumes, also |
232 | needed for live data migration tools such as 'pvmove'. | 232 | needed for live data migration tools such as 'pvmove'. |
233 | 233 | ||
234 | config DM_LOG_USERSPACE | ||
235 | tristate "Mirror userspace logging (EXPERIMENTAL)" | ||
236 | depends on DM_MIRROR && EXPERIMENTAL && NET | ||
237 | select CONNECTOR | ||
238 | ---help--- | ||
239 | The userspace logging module provides a mechanism for | ||
240 | relaying the dm-dirty-log API to userspace. Log designs | ||
241 | which are more suited to userspace implementation (e.g. | ||
242 | shared storage logs) or experimental logs can be implemented | ||
243 | by leveraging this framework. | ||
244 | |||
234 | config DM_ZERO | 245 | config DM_ZERO |
235 | tristate "Zero target" | 246 | tristate "Zero target" |
236 | depends on BLK_DEV_DM | 247 | depends on BLK_DEV_DM |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index dade52f60733..1dc4185bd781 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -8,6 +8,8 @@ dm-multipath-y += dm-path-selector.o dm-mpath.o | |||
8 | dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ | 8 | dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ |
9 | dm-snap-persistent.o | 9 | dm-snap-persistent.o |
10 | dm-mirror-y += dm-raid1.o | 10 | dm-mirror-y += dm-raid1.o |
11 | dm-log-userspace-y \ | ||
12 | += dm-log-userspace-base.o dm-log-userspace-transfer.o | ||
11 | md-mod-y += md.o bitmap.o | 13 | md-mod-y += md.o bitmap.o |
12 | raid456-y += raid5.o | 14 | raid456-y += raid5.o |
13 | raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ | 15 | raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ |
@@ -40,6 +42,7 @@ obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o | |||
40 | obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o | 42 | obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o |
41 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o | 43 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o |
42 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o | 44 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o |
45 | obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o | ||
43 | obj-$(CONFIG_DM_ZERO) += dm-zero.o | 46 | obj-$(CONFIG_DM_ZERO) += dm-zero.o |
44 | 47 | ||
45 | quiet_cmd_unroll = UNROLL $@ | 48 | quiet_cmd_unroll = UNROLL $@ |
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c new file mode 100644 index 000000000000..e69b96560997 --- /dev/null +++ b/drivers/md/dm-log-userspace-base.c | |||
@@ -0,0 +1,696 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2006-2009 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the LGPL. | ||
5 | */ | ||
6 | |||
7 | #include <linux/bio.h> | ||
8 | #include <linux/dm-dirty-log.h> | ||
9 | #include <linux/device-mapper.h> | ||
10 | #include <linux/dm-log-userspace.h> | ||
11 | |||
12 | #include "dm-log-userspace-transfer.h" | ||
13 | |||
14 | struct flush_entry { | ||
15 | int type; | ||
16 | region_t region; | ||
17 | struct list_head list; | ||
18 | }; | ||
19 | |||
20 | struct log_c { | ||
21 | struct dm_target *ti; | ||
22 | uint32_t region_size; | ||
23 | region_t region_count; | ||
24 | char uuid[DM_UUID_LEN]; | ||
25 | |||
26 | char *usr_argv_str; | ||
27 | uint32_t usr_argc; | ||
28 | |||
29 | /* | ||
30 | * in_sync_hint gets set when doing is_remote_recovering. It | ||
31 | * represents the first region that needs recovery. IOW, the | ||
32 | * first zero bit of sync_bits. This can be useful for to limit | ||
33 | * traffic for calls like is_remote_recovering and get_resync_work, | ||
34 | * but be take care in its use for anything else. | ||
35 | */ | ||
36 | uint64_t in_sync_hint; | ||
37 | |||
38 | spinlock_t flush_lock; | ||
39 | struct list_head flush_list; /* only for clear and mark requests */ | ||
40 | }; | ||
41 | |||
42 | static mempool_t *flush_entry_pool; | ||
43 | |||
44 | static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data) | ||
45 | { | ||
46 | return kmalloc(sizeof(struct flush_entry), gfp_mask); | ||
47 | } | ||
48 | |||
49 | static void flush_entry_free(void *element, void *pool_data) | ||
50 | { | ||
51 | kfree(element); | ||
52 | } | ||
53 | |||
54 | static int userspace_do_request(struct log_c *lc, const char *uuid, | ||
55 | int request_type, char *data, size_t data_size, | ||
56 | char *rdata, size_t *rdata_size) | ||
57 | { | ||
58 | int r; | ||
59 | |||
60 | /* | ||
61 | * If the server isn't there, -ESRCH is returned, | ||
62 | * and we must keep trying until the server is | ||
63 | * restored. | ||
64 | */ | ||
65 | retry: | ||
66 | r = dm_consult_userspace(uuid, request_type, data, | ||
67 | data_size, rdata, rdata_size); | ||
68 | |||
69 | if (r != -ESRCH) | ||
70 | return r; | ||
71 | |||
72 | DMERR(" Userspace log server not found."); | ||
73 | while (1) { | ||
74 | set_current_state(TASK_INTERRUPTIBLE); | ||
75 | schedule_timeout(2*HZ); | ||
76 | DMWARN("Attempting to contact userspace log server..."); | ||
77 | r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str, | ||
78 | strlen(lc->usr_argv_str) + 1, | ||
79 | NULL, NULL); | ||
80 | if (!r) | ||
81 | break; | ||
82 | } | ||
83 | DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); | ||
84 | r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL, | ||
85 | 0, NULL, NULL); | ||
86 | if (!r) | ||
87 | goto retry; | ||
88 | |||
89 | DMERR("Error trying to resume userspace log: %d", r); | ||
90 | |||
91 | return -ESRCH; | ||
92 | } | ||
93 | |||
94 | static int build_constructor_string(struct dm_target *ti, | ||
95 | unsigned argc, char **argv, | ||
96 | char **ctr_str) | ||
97 | { | ||
98 | int i, str_size; | ||
99 | char *str = NULL; | ||
100 | |||
101 | *ctr_str = NULL; | ||
102 | |||
103 | for (i = 0, str_size = 0; i < argc; i++) | ||
104 | str_size += strlen(argv[i]) + 1; /* +1 for space between args */ | ||
105 | |||
106 | str_size += 20; /* Max number of chars in a printed u64 number */ | ||
107 | |||
108 | str = kzalloc(str_size, GFP_KERNEL); | ||
109 | if (!str) { | ||
110 | DMWARN("Unable to allocate memory for constructor string"); | ||
111 | return -ENOMEM; | ||
112 | } | ||
113 | |||
114 | for (i = 0, str_size = 0; i < argc; i++) | ||
115 | str_size += sprintf(str + str_size, "%s ", argv[i]); | ||
116 | str_size += sprintf(str + str_size, "%llu", | ||
117 | (unsigned long long)ti->len); | ||
118 | |||
119 | *ctr_str = str; | ||
120 | return str_size; | ||
121 | } | ||
122 | |||
123 | /* | ||
124 | * userspace_ctr | ||
125 | * | ||
126 | * argv contains: | ||
127 | * <UUID> <other args> | ||
128 | * Where 'other args' is the userspace implementation specific log | ||
129 | * arguments. An example might be: | ||
130 | * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync] | ||
131 | * | ||
132 | * So, this module will strip off the <UUID> for identification purposes | ||
133 | * when communicating with userspace about a log; but will pass on everything | ||
134 | * else. | ||
135 | */ | ||
136 | static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, | ||
137 | unsigned argc, char **argv) | ||
138 | { | ||
139 | int r = 0; | ||
140 | int str_size; | ||
141 | char *ctr_str = NULL; | ||
142 | struct log_c *lc = NULL; | ||
143 | uint64_t rdata; | ||
144 | size_t rdata_size = sizeof(rdata); | ||
145 | |||
146 | if (argc < 3) { | ||
147 | DMWARN("Too few arguments to userspace dirty log"); | ||
148 | return -EINVAL; | ||
149 | } | ||
150 | |||
151 | lc = kmalloc(sizeof(*lc), GFP_KERNEL); | ||
152 | if (!lc) { | ||
153 | DMWARN("Unable to allocate userspace log context."); | ||
154 | return -ENOMEM; | ||
155 | } | ||
156 | |||
157 | lc->ti = ti; | ||
158 | |||
159 | if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { | ||
160 | DMWARN("UUID argument too long."); | ||
161 | kfree(lc); | ||
162 | return -EINVAL; | ||
163 | } | ||
164 | |||
165 | strncpy(lc->uuid, argv[0], DM_UUID_LEN); | ||
166 | spin_lock_init(&lc->flush_lock); | ||
167 | INIT_LIST_HEAD(&lc->flush_list); | ||
168 | |||
169 | str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); | ||
170 | if (str_size < 0) { | ||
171 | kfree(lc); | ||
172 | return str_size; | ||
173 | } | ||
174 | |||
175 | /* Send table string */ | ||
176 | r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR, | ||
177 | ctr_str, str_size, NULL, NULL); | ||
178 | |||
179 | if (r == -ESRCH) { | ||
180 | DMERR("Userspace log server not found"); | ||
181 | goto out; | ||
182 | } | ||
183 | |||
184 | /* Since the region size does not change, get it now */ | ||
185 | rdata_size = sizeof(rdata); | ||
186 | r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE, | ||
187 | NULL, 0, (char *)&rdata, &rdata_size); | ||
188 | |||
189 | if (r) { | ||
190 | DMERR("Failed to get region size of dirty log"); | ||
191 | goto out; | ||
192 | } | ||
193 | |||
194 | lc->region_size = (uint32_t)rdata; | ||
195 | lc->region_count = dm_sector_div_up(ti->len, lc->region_size); | ||
196 | |||
197 | out: | ||
198 | if (r) { | ||
199 | kfree(lc); | ||
200 | kfree(ctr_str); | ||
201 | } else { | ||
202 | lc->usr_argv_str = ctr_str; | ||
203 | lc->usr_argc = argc; | ||
204 | log->context = lc; | ||
205 | } | ||
206 | |||
207 | return r; | ||
208 | } | ||
209 | |||
210 | static void userspace_dtr(struct dm_dirty_log *log) | ||
211 | { | ||
212 | int r; | ||
213 | struct log_c *lc = log->context; | ||
214 | |||
215 | r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR, | ||
216 | NULL, 0, | ||
217 | NULL, NULL); | ||
218 | |||
219 | kfree(lc->usr_argv_str); | ||
220 | kfree(lc); | ||
221 | |||
222 | return; | ||
223 | } | ||
224 | |||
225 | static int userspace_presuspend(struct dm_dirty_log *log) | ||
226 | { | ||
227 | int r; | ||
228 | struct log_c *lc = log->context; | ||
229 | |||
230 | r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND, | ||
231 | NULL, 0, | ||
232 | NULL, NULL); | ||
233 | |||
234 | return r; | ||
235 | } | ||
236 | |||
237 | static int userspace_postsuspend(struct dm_dirty_log *log) | ||
238 | { | ||
239 | int r; | ||
240 | struct log_c *lc = log->context; | ||
241 | |||
242 | r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND, | ||
243 | NULL, 0, | ||
244 | NULL, NULL); | ||
245 | |||
246 | return r; | ||
247 | } | ||
248 | |||
249 | static int userspace_resume(struct dm_dirty_log *log) | ||
250 | { | ||
251 | int r; | ||
252 | struct log_c *lc = log->context; | ||
253 | |||
254 | lc->in_sync_hint = 0; | ||
255 | r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME, | ||
256 | NULL, 0, | ||
257 | NULL, NULL); | ||
258 | |||
259 | return r; | ||
260 | } | ||
261 | |||
262 | static uint32_t userspace_get_region_size(struct dm_dirty_log *log) | ||
263 | { | ||
264 | struct log_c *lc = log->context; | ||
265 | |||
266 | return lc->region_size; | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * userspace_is_clean | ||
271 | * | ||
272 | * Check whether a region is clean. If there is any sort of | ||
273 | * failure when consulting the server, we return not clean. | ||
274 | * | ||
275 | * Returns: 1 if clean, 0 otherwise | ||
276 | */ | ||
277 | static int userspace_is_clean(struct dm_dirty_log *log, region_t region) | ||
278 | { | ||
279 | int r; | ||
280 | uint64_t region64 = (uint64_t)region; | ||
281 | int64_t is_clean; | ||
282 | size_t rdata_size; | ||
283 | struct log_c *lc = log->context; | ||
284 | |||
285 | rdata_size = sizeof(is_clean); | ||
286 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN, | ||
287 | (char *)®ion64, sizeof(region64), | ||
288 | (char *)&is_clean, &rdata_size); | ||
289 | |||
290 | return (r) ? 0 : (int)is_clean; | ||
291 | } | ||
292 | |||
293 | /* | ||
294 | * userspace_in_sync | ||
295 | * | ||
296 | * Check if the region is in-sync. If there is any sort | ||
297 | * of failure when consulting the server, we assume that | ||
298 | * the region is not in sync. | ||
299 | * | ||
300 | * If 'can_block' is set, return immediately | ||
301 | * | ||
302 | * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK | ||
303 | */ | ||
304 | static int userspace_in_sync(struct dm_dirty_log *log, region_t region, | ||
305 | int can_block) | ||
306 | { | ||
307 | int r; | ||
308 | uint64_t region64 = region; | ||
309 | int64_t in_sync; | ||
310 | size_t rdata_size; | ||
311 | struct log_c *lc = log->context; | ||
312 | |||
313 | /* | ||
314 | * We can never respond directly - even if in_sync_hint is | ||
315 | * set. This is because another machine could see a device | ||
316 | * failure and mark the region out-of-sync. If we don't go | ||
317 | * to userspace to ask, we might think the region is in-sync | ||
318 | * and allow a read to pick up data that is stale. (This is | ||
319 | * very unlikely if a device actually fails; but it is very | ||
320 | * likely if a connection to one device from one machine fails.) | ||
321 | * | ||
322 | * There still might be a problem if the mirror caches the region | ||
323 | * state as in-sync... but then this call would not be made. So, | ||
324 | * that is a mirror problem. | ||
325 | */ | ||
326 | if (!can_block) | ||
327 | return -EWOULDBLOCK; | ||
328 | |||
329 | rdata_size = sizeof(in_sync); | ||
330 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC, | ||
331 | (char *)®ion64, sizeof(region64), | ||
332 | (char *)&in_sync, &rdata_size); | ||
333 | return (r) ? 0 : (int)in_sync; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * userspace_flush | ||
338 | * | ||
339 | * This function is ok to block. | ||
340 | * The flush happens in two stages. First, it sends all | ||
341 | * clear/mark requests that are on the list. Then it | ||
342 | * tells the server to commit them. This gives the | ||
343 | * server a chance to optimise the commit, instead of | ||
344 | * doing it for every request. | ||
345 | * | ||
346 | * Additionally, we could implement another thread that | ||
347 | * sends the requests up to the server - reducing the | ||
348 | * load on flush. Then the flush would have less in | ||
349 | * the list and be responsible for the finishing commit. | ||
350 | * | ||
351 | * Returns: 0 on success, < 0 on failure | ||
352 | */ | ||
353 | static int userspace_flush(struct dm_dirty_log *log) | ||
354 | { | ||
355 | int r = 0; | ||
356 | unsigned long flags; | ||
357 | struct log_c *lc = log->context; | ||
358 | LIST_HEAD(flush_list); | ||
359 | struct flush_entry *fe, *tmp_fe; | ||
360 | |||
361 | spin_lock_irqsave(&lc->flush_lock, flags); | ||
362 | list_splice_init(&lc->flush_list, &flush_list); | ||
363 | spin_unlock_irqrestore(&lc->flush_lock, flags); | ||
364 | |||
365 | if (list_empty(&flush_list)) | ||
366 | return 0; | ||
367 | |||
368 | /* | ||
369 | * FIXME: Count up requests, group request types, | ||
370 | * allocate memory to stick all requests in and | ||
371 | * send to server in one go. Failing the allocation, | ||
372 | * do it one by one. | ||
373 | */ | ||
374 | |||
375 | list_for_each_entry(fe, &flush_list, list) { | ||
376 | r = userspace_do_request(lc, lc->uuid, fe->type, | ||
377 | (char *)&fe->region, | ||
378 | sizeof(fe->region), | ||
379 | NULL, NULL); | ||
380 | if (r) | ||
381 | goto fail; | ||
382 | } | ||
383 | |||
384 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, | ||
385 | NULL, 0, NULL, NULL); | ||
386 | |||
387 | fail: | ||
388 | /* | ||
389 | * We can safely remove these entries, even if failure. | ||
390 | * Calling code will receive an error and will know that | ||
391 | * the log facility has failed. | ||
392 | */ | ||
393 | list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) { | ||
394 | list_del(&fe->list); | ||
395 | mempool_free(fe, flush_entry_pool); | ||
396 | } | ||
397 | |||
398 | if (r) | ||
399 | dm_table_event(lc->ti->table); | ||
400 | |||
401 | return r; | ||
402 | } | ||
403 | |||
404 | /* | ||
405 | * userspace_mark_region | ||
406 | * | ||
407 | * This function should avoid blocking unless absolutely required. | ||
408 | * (Memory allocation is valid for blocking.) | ||
409 | */ | ||
410 | static void userspace_mark_region(struct dm_dirty_log *log, region_t region) | ||
411 | { | ||
412 | unsigned long flags; | ||
413 | struct log_c *lc = log->context; | ||
414 | struct flush_entry *fe; | ||
415 | |||
416 | /* Wait for an allocation, but _never_ fail */ | ||
417 | fe = mempool_alloc(flush_entry_pool, GFP_NOIO); | ||
418 | BUG_ON(!fe); | ||
419 | |||
420 | spin_lock_irqsave(&lc->flush_lock, flags); | ||
421 | fe->type = DM_ULOG_MARK_REGION; | ||
422 | fe->region = region; | ||
423 | list_add(&fe->list, &lc->flush_list); | ||
424 | spin_unlock_irqrestore(&lc->flush_lock, flags); | ||
425 | |||
426 | return; | ||
427 | } | ||
428 | |||
429 | /* | ||
430 | * userspace_clear_region | ||
431 | * | ||
432 | * This function must not block. | ||
433 | * So, the alloc can't block. In the worst case, it is ok to | ||
434 | * fail. It would simply mean we can't clear the region. | ||
435 | * Does nothing to current sync context, but does mean | ||
436 | * the region will be re-sync'ed on a reload of the mirror | ||
437 | * even though it is in-sync. | ||
438 | */ | ||
439 | static void userspace_clear_region(struct dm_dirty_log *log, region_t region) | ||
440 | { | ||
441 | unsigned long flags; | ||
442 | struct log_c *lc = log->context; | ||
443 | struct flush_entry *fe; | ||
444 | |||
445 | /* | ||
446 | * If we fail to allocate, we skip the clearing of | ||
447 | * the region. This doesn't hurt us in any way, except | ||
448 | * to cause the region to be resync'ed when the | ||
449 | * device is activated next time. | ||
450 | */ | ||
451 | fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC); | ||
452 | if (!fe) { | ||
453 | DMERR("Failed to allocate memory to clear region."); | ||
454 | return; | ||
455 | } | ||
456 | |||
457 | spin_lock_irqsave(&lc->flush_lock, flags); | ||
458 | fe->type = DM_ULOG_CLEAR_REGION; | ||
459 | fe->region = region; | ||
460 | list_add(&fe->list, &lc->flush_list); | ||
461 | spin_unlock_irqrestore(&lc->flush_lock, flags); | ||
462 | |||
463 | return; | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * userspace_get_resync_work | ||
468 | * | ||
469 | * Get a region that needs recovery. It is valid to return | ||
470 | * an error for this function. | ||
471 | * | ||
472 | * Returns: 1 if region filled, 0 if no work, <0 on error | ||
473 | */ | ||
474 | static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) | ||
475 | { | ||
476 | int r; | ||
477 | size_t rdata_size; | ||
478 | struct log_c *lc = log->context; | ||
479 | struct { | ||
480 | int64_t i; /* 64-bit for mix arch compatibility */ | ||
481 | region_t r; | ||
482 | } pkg; | ||
483 | |||
484 | if (lc->in_sync_hint >= lc->region_count) | ||
485 | return 0; | ||
486 | |||
487 | rdata_size = sizeof(pkg); | ||
488 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, | ||
489 | NULL, 0, | ||
490 | (char *)&pkg, &rdata_size); | ||
491 | |||
492 | *region = pkg.r; | ||
493 | return (r) ? r : (int)pkg.i; | ||
494 | } | ||
495 | |||
496 | /* | ||
497 | * userspace_set_region_sync | ||
498 | * | ||
499 | * Set the sync status of a given region. This function | ||
500 | * must not fail. | ||
501 | */ | ||
502 | static void userspace_set_region_sync(struct dm_dirty_log *log, | ||
503 | region_t region, int in_sync) | ||
504 | { | ||
505 | int r; | ||
506 | struct log_c *lc = log->context; | ||
507 | struct { | ||
508 | region_t r; | ||
509 | int64_t i; | ||
510 | } pkg; | ||
511 | |||
512 | pkg.r = region; | ||
513 | pkg.i = (int64_t)in_sync; | ||
514 | |||
515 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, | ||
516 | (char *)&pkg, sizeof(pkg), | ||
517 | NULL, NULL); | ||
518 | |||
519 | /* | ||
520 | * It would be nice to be able to report failures. | ||
521 | * However, it is easy emough to detect and resolve. | ||
522 | */ | ||
523 | return; | ||
524 | } | ||
525 | |||
526 | /* | ||
527 | * userspace_get_sync_count | ||
528 | * | ||
529 | * If there is any sort of failure when consulting the server, | ||
530 | * we assume that the sync count is zero. | ||
531 | * | ||
532 | * Returns: sync count on success, 0 on failure | ||
533 | */ | ||
534 | static region_t userspace_get_sync_count(struct dm_dirty_log *log) | ||
535 | { | ||
536 | int r; | ||
537 | size_t rdata_size; | ||
538 | uint64_t sync_count; | ||
539 | struct log_c *lc = log->context; | ||
540 | |||
541 | rdata_size = sizeof(sync_count); | ||
542 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, | ||
543 | NULL, 0, | ||
544 | (char *)&sync_count, &rdata_size); | ||
545 | |||
546 | if (r) | ||
547 | return 0; | ||
548 | |||
549 | if (sync_count >= lc->region_count) | ||
550 | lc->in_sync_hint = lc->region_count; | ||
551 | |||
552 | return (region_t)sync_count; | ||
553 | } | ||
554 | |||
555 | /* | ||
556 | * userspace_status | ||
557 | * | ||
558 | * Returns: amount of space consumed | ||
559 | */ | ||
560 | static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, | ||
561 | char *result, unsigned maxlen) | ||
562 | { | ||
563 | int r = 0; | ||
564 | size_t sz = (size_t)maxlen; | ||
565 | struct log_c *lc = log->context; | ||
566 | |||
567 | switch (status_type) { | ||
568 | case STATUSTYPE_INFO: | ||
569 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, | ||
570 | NULL, 0, | ||
571 | result, &sz); | ||
572 | |||
573 | if (r) { | ||
574 | sz = 0; | ||
575 | DMEMIT("%s 1 COM_FAILURE", log->type->name); | ||
576 | } | ||
577 | break; | ||
578 | case STATUSTYPE_TABLE: | ||
579 | sz = 0; | ||
580 | DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1, | ||
581 | lc->uuid, lc->usr_argv_str); | ||
582 | break; | ||
583 | } | ||
584 | return (r) ? 0 : (int)sz; | ||
585 | } | ||
586 | |||
587 | /* | ||
588 | * userspace_is_remote_recovering | ||
589 | * | ||
590 | * Returns: 1 if region recovering, 0 otherwise | ||
591 | */ | ||
592 | static int userspace_is_remote_recovering(struct dm_dirty_log *log, | ||
593 | region_t region) | ||
594 | { | ||
595 | int r; | ||
596 | uint64_t region64 = region; | ||
597 | struct log_c *lc = log->context; | ||
598 | static unsigned long long limit; | ||
599 | struct { | ||
600 | int64_t is_recovering; | ||
601 | uint64_t in_sync_hint; | ||
602 | } pkg; | ||
603 | size_t rdata_size = sizeof(pkg); | ||
604 | |||
605 | /* | ||
606 | * Once the mirror has been reported to be in-sync, | ||
607 | * it will never again ask for recovery work. So, | ||
608 | * we can safely say there is not a remote machine | ||
609 | * recovering if the device is in-sync. (in_sync_hint | ||
610 | * must be reset at resume time.) | ||
611 | */ | ||
612 | if (region < lc->in_sync_hint) | ||
613 | return 0; | ||
614 | else if (jiffies < limit) | ||
615 | return 1; | ||
616 | |||
617 | limit = jiffies + (HZ / 4); | ||
618 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING, | ||
619 | (char *)®ion64, sizeof(region64), | ||
620 | (char *)&pkg, &rdata_size); | ||
621 | if (r) | ||
622 | return 1; | ||
623 | |||
624 | lc->in_sync_hint = pkg.in_sync_hint; | ||
625 | |||
626 | return (int)pkg.is_recovering; | ||
627 | } | ||
628 | |||
629 | static struct dm_dirty_log_type _userspace_type = { | ||
630 | .name = "userspace", | ||
631 | .module = THIS_MODULE, | ||
632 | .ctr = userspace_ctr, | ||
633 | .dtr = userspace_dtr, | ||
634 | .presuspend = userspace_presuspend, | ||
635 | .postsuspend = userspace_postsuspend, | ||
636 | .resume = userspace_resume, | ||
637 | .get_region_size = userspace_get_region_size, | ||
638 | .is_clean = userspace_is_clean, | ||
639 | .in_sync = userspace_in_sync, | ||
640 | .flush = userspace_flush, | ||
641 | .mark_region = userspace_mark_region, | ||
642 | .clear_region = userspace_clear_region, | ||
643 | .get_resync_work = userspace_get_resync_work, | ||
644 | .set_region_sync = userspace_set_region_sync, | ||
645 | .get_sync_count = userspace_get_sync_count, | ||
646 | .status = userspace_status, | ||
647 | .is_remote_recovering = userspace_is_remote_recovering, | ||
648 | }; | ||
649 | |||
650 | static int __init userspace_dirty_log_init(void) | ||
651 | { | ||
652 | int r = 0; | ||
653 | |||
654 | flush_entry_pool = mempool_create(100, flush_entry_alloc, | ||
655 | flush_entry_free, NULL); | ||
656 | |||
657 | if (!flush_entry_pool) { | ||
658 | DMWARN("Unable to create flush_entry_pool: No memory."); | ||
659 | return -ENOMEM; | ||
660 | } | ||
661 | |||
662 | r = dm_ulog_tfr_init(); | ||
663 | if (r) { | ||
664 | DMWARN("Unable to initialize userspace log communications"); | ||
665 | mempool_destroy(flush_entry_pool); | ||
666 | return r; | ||
667 | } | ||
668 | |||
669 | r = dm_dirty_log_type_register(&_userspace_type); | ||
670 | if (r) { | ||
671 | DMWARN("Couldn't register userspace dirty log type"); | ||
672 | dm_ulog_tfr_exit(); | ||
673 | mempool_destroy(flush_entry_pool); | ||
674 | return r; | ||
675 | } | ||
676 | |||
677 | DMINFO("version 1.0.0 loaded"); | ||
678 | return 0; | ||
679 | } | ||
680 | |||
681 | static void __exit userspace_dirty_log_exit(void) | ||
682 | { | ||
683 | dm_dirty_log_type_unregister(&_userspace_type); | ||
684 | dm_ulog_tfr_exit(); | ||
685 | mempool_destroy(flush_entry_pool); | ||
686 | |||
687 | DMINFO("version 1.0.0 unloaded"); | ||
688 | return; | ||
689 | } | ||
690 | |||
691 | module_init(userspace_dirty_log_init); | ||
692 | module_exit(userspace_dirty_log_exit); | ||
693 | |||
694 | MODULE_DESCRIPTION(DM_NAME " userspace dirty log link"); | ||
695 | MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>"); | ||
696 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c new file mode 100644 index 000000000000..0ca1ee768a1f --- /dev/null +++ b/drivers/md/dm-log-userspace-transfer.c | |||
@@ -0,0 +1,276 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2006-2009 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the LGPL. | ||
5 | */ | ||
6 | |||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <net/sock.h> | ||
10 | #include <linux/workqueue.h> | ||
11 | #include <linux/connector.h> | ||
12 | #include <linux/device-mapper.h> | ||
13 | #include <linux/dm-log-userspace.h> | ||
14 | |||
15 | #include "dm-log-userspace-transfer.h" | ||
16 | |||
17 | static uint32_t dm_ulog_seq; | ||
18 | |||
19 | /* | ||
20 | * Netlink/Connector is an unreliable protocol. How long should | ||
21 | * we wait for a response before assuming it was lost and retrying? | ||
22 | * (If we do receive a response after this time, it will be discarded | ||
23 | * and the response to the resent request will be waited for. | ||
24 | */ | ||
25 | #define DM_ULOG_RETRY_TIMEOUT (15 * HZ) | ||
26 | |||
27 | /* | ||
28 | * Pre-allocated space for speed | ||
29 | */ | ||
30 | #define DM_ULOG_PREALLOCED_SIZE 512 | ||
31 | static struct cn_msg *prealloced_cn_msg; | ||
32 | static struct dm_ulog_request *prealloced_ulog_tfr; | ||
33 | |||
34 | static struct cb_id ulog_cn_id = { | ||
35 | .idx = CN_IDX_DM, | ||
36 | .val = CN_VAL_DM_USERSPACE_LOG | ||
37 | }; | ||
38 | |||
39 | static DEFINE_MUTEX(dm_ulog_lock); | ||
40 | |||
41 | struct receiving_pkg { | ||
42 | struct list_head list; | ||
43 | struct completion complete; | ||
44 | |||
45 | uint32_t seq; | ||
46 | |||
47 | int error; | ||
48 | size_t *data_size; | ||
49 | char *data; | ||
50 | }; | ||
51 | |||
52 | static DEFINE_SPINLOCK(receiving_list_lock); | ||
53 | static struct list_head receiving_list; | ||
54 | |||
55 | static int dm_ulog_sendto_server(struct dm_ulog_request *tfr) | ||
56 | { | ||
57 | int r; | ||
58 | struct cn_msg *msg = prealloced_cn_msg; | ||
59 | |||
60 | memset(msg, 0, sizeof(struct cn_msg)); | ||
61 | |||
62 | msg->id.idx = ulog_cn_id.idx; | ||
63 | msg->id.val = ulog_cn_id.val; | ||
64 | msg->ack = 0; | ||
65 | msg->seq = tfr->seq; | ||
66 | msg->len = sizeof(struct dm_ulog_request) + tfr->data_size; | ||
67 | |||
68 | r = cn_netlink_send(msg, 0, gfp_any()); | ||
69 | |||
70 | return r; | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * Parameters for this function can be either msg or tfr, but not | ||
75 | * both. This function fills in the reply for a waiting request. | ||
76 | * If just msg is given, then the reply is simply an ACK from userspace | ||
77 | * that the request was received. | ||
78 | * | ||
79 | * Returns: 0 on success, -ENOENT on failure | ||
80 | */ | ||
81 | static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr) | ||
82 | { | ||
83 | uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0; | ||
84 | struct receiving_pkg *pkg; | ||
85 | |||
86 | /* | ||
87 | * The 'receiving_pkg' entries in this list are statically | ||
88 | * allocated on the stack in 'dm_consult_userspace'. | ||
89 | * Each process that is waiting for a reply from the user | ||
90 | * space server will have an entry in this list. | ||
91 | * | ||
92 | * We are safe to do it this way because the stack space | ||
93 | * is unique to each process, but still addressable by | ||
94 | * other processes. | ||
95 | */ | ||
96 | list_for_each_entry(pkg, &receiving_list, list) { | ||
97 | if (rtn_seq != pkg->seq) | ||
98 | continue; | ||
99 | |||
100 | if (msg) { | ||
101 | pkg->error = -msg->ack; | ||
102 | /* | ||
103 | * If we are trying again, we will need to know our | ||
104 | * storage capacity. Otherwise, along with the | ||
105 | * error code, we make explicit that we have no data. | ||
106 | */ | ||
107 | if (pkg->error != -EAGAIN) | ||
108 | *(pkg->data_size) = 0; | ||
109 | } else if (tfr->data_size > *(pkg->data_size)) { | ||
110 | DMERR("Insufficient space to receive package [%u] " | ||
111 | "(%u vs %lu)", tfr->request_type, | ||
112 | tfr->data_size, *(pkg->data_size)); | ||
113 | |||
114 | *(pkg->data_size) = 0; | ||
115 | pkg->error = -ENOSPC; | ||
116 | } else { | ||
117 | pkg->error = tfr->error; | ||
118 | memcpy(pkg->data, tfr->data, tfr->data_size); | ||
119 | *(pkg->data_size) = tfr->data_size; | ||
120 | } | ||
121 | complete(&pkg->complete); | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | return -ENOENT; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * This is the connector callback that delivers data | ||
130 | * that was sent from userspace. | ||
131 | */ | ||
132 | static void cn_ulog_callback(void *data) | ||
133 | { | ||
134 | struct cn_msg *msg = (struct cn_msg *)data; | ||
135 | struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1); | ||
136 | |||
137 | spin_lock(&receiving_list_lock); | ||
138 | if (msg->len == 0) | ||
139 | fill_pkg(msg, NULL); | ||
140 | else if (msg->len < sizeof(*tfr)) | ||
141 | DMERR("Incomplete message received (expected %u, got %u): [%u]", | ||
142 | (unsigned)sizeof(*tfr), msg->len, msg->seq); | ||
143 | else | ||
144 | fill_pkg(NULL, tfr); | ||
145 | spin_unlock(&receiving_list_lock); | ||
146 | } | ||
147 | |||
148 | /** | ||
149 | * dm_consult_userspace | ||
150 | * @uuid: log's uuid (must be DM_UUID_LEN in size) | ||
151 | * @request_type: found in include/linux/dm-log-userspace.h | ||
152 | * @data: data to tx to the server | ||
153 | * @data_size: size of data in bytes | ||
154 | * @rdata: place to put return data from server | ||
155 | * @rdata_size: value-result (amount of space given/amount of space used) | ||
156 | * | ||
157 | * rdata_size is undefined on failure. | ||
158 | * | ||
159 | * Memory used to communicate with userspace is zero'ed | ||
160 | * before populating to ensure that no unwanted bits leak | ||
161 | * from kernel space to user-space. All userspace log communications | ||
162 | * between kernel and user space go through this function. | ||
163 | * | ||
164 | * Returns: 0 on success, -EXXX on failure | ||
165 | **/ | ||
166 | int dm_consult_userspace(const char *uuid, int request_type, | ||
167 | char *data, size_t data_size, | ||
168 | char *rdata, size_t *rdata_size) | ||
169 | { | ||
170 | int r = 0; | ||
171 | size_t dummy = 0; | ||
172 | int overhead_size = | ||
173 | sizeof(struct dm_ulog_request *) + sizeof(struct cn_msg); | ||
174 | struct dm_ulog_request *tfr = prealloced_ulog_tfr; | ||
175 | struct receiving_pkg pkg; | ||
176 | |||
177 | if (data_size > (DM_ULOG_PREALLOCED_SIZE - overhead_size)) { | ||
178 | DMINFO("Size of tfr exceeds preallocated size"); | ||
179 | return -EINVAL; | ||
180 | } | ||
181 | |||
182 | if (!rdata_size) | ||
183 | rdata_size = &dummy; | ||
184 | resend: | ||
185 | /* | ||
186 | * We serialize the sending of requests so we can | ||
187 | * use the preallocated space. | ||
188 | */ | ||
189 | mutex_lock(&dm_ulog_lock); | ||
190 | |||
191 | memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size); | ||
192 | memcpy(tfr->uuid, uuid, DM_UUID_LEN); | ||
193 | tfr->seq = dm_ulog_seq++; | ||
194 | |||
195 | /* | ||
196 | * Must be valid request type (all other bits set to | ||
197 | * zero). This reserves other bits for possible future | ||
198 | * use. | ||
199 | */ | ||
200 | tfr->request_type = request_type & DM_ULOG_REQUEST_MASK; | ||
201 | |||
202 | tfr->data_size = data_size; | ||
203 | if (data && data_size) | ||
204 | memcpy(tfr->data, data, data_size); | ||
205 | |||
206 | memset(&pkg, 0, sizeof(pkg)); | ||
207 | init_completion(&pkg.complete); | ||
208 | pkg.seq = tfr->seq; | ||
209 | pkg.data_size = rdata_size; | ||
210 | pkg.data = rdata; | ||
211 | spin_lock(&receiving_list_lock); | ||
212 | list_add(&(pkg.list), &receiving_list); | ||
213 | spin_unlock(&receiving_list_lock); | ||
214 | |||
215 | r = dm_ulog_sendto_server(tfr); | ||
216 | |||
217 | mutex_unlock(&dm_ulog_lock); | ||
218 | |||
219 | if (r) { | ||
220 | DMERR("Unable to send log request [%u] to userspace: %d", | ||
221 | request_type, r); | ||
222 | spin_lock(&receiving_list_lock); | ||
223 | list_del_init(&(pkg.list)); | ||
224 | spin_unlock(&receiving_list_lock); | ||
225 | |||
226 | goto out; | ||
227 | } | ||
228 | |||
229 | r = wait_for_completion_timeout(&(pkg.complete), DM_ULOG_RETRY_TIMEOUT); | ||
230 | spin_lock(&receiving_list_lock); | ||
231 | list_del_init(&(pkg.list)); | ||
232 | spin_unlock(&receiving_list_lock); | ||
233 | if (!r) { | ||
234 | DMWARN("[%s] Request timed out: [%u/%u] - retrying", | ||
235 | (strlen(uuid) > 8) ? | ||
236 | (uuid + (strlen(uuid) - 8)) : (uuid), | ||
237 | request_type, pkg.seq); | ||
238 | goto resend; | ||
239 | } | ||
240 | |||
241 | r = pkg.error; | ||
242 | if (r == -EAGAIN) | ||
243 | goto resend; | ||
244 | |||
245 | out: | ||
246 | return r; | ||
247 | } | ||
248 | |||
249 | int dm_ulog_tfr_init(void) | ||
250 | { | ||
251 | int r; | ||
252 | void *prealloced; | ||
253 | |||
254 | INIT_LIST_HEAD(&receiving_list); | ||
255 | |||
256 | prealloced = kmalloc(DM_ULOG_PREALLOCED_SIZE, GFP_KERNEL); | ||
257 | if (!prealloced) | ||
258 | return -ENOMEM; | ||
259 | |||
260 | prealloced_cn_msg = prealloced; | ||
261 | prealloced_ulog_tfr = prealloced + sizeof(struct cn_msg); | ||
262 | |||
263 | r = cn_add_callback(&ulog_cn_id, "dmlogusr", cn_ulog_callback); | ||
264 | if (r) { | ||
265 | cn_del_callback(&ulog_cn_id); | ||
266 | return r; | ||
267 | } | ||
268 | |||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | void dm_ulog_tfr_exit(void) | ||
273 | { | ||
274 | cn_del_callback(&ulog_cn_id); | ||
275 | kfree(prealloced_cn_msg); | ||
276 | } | ||
diff --git a/drivers/md/dm-log-userspace-transfer.h b/drivers/md/dm-log-userspace-transfer.h new file mode 100644 index 000000000000..c26d8e4e2710 --- /dev/null +++ b/drivers/md/dm-log-userspace-transfer.h | |||
@@ -0,0 +1,18 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2006-2009 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the LGPL. | ||
5 | */ | ||
6 | |||
7 | #ifndef __DM_LOG_USERSPACE_TRANSFER_H__ | ||
8 | #define __DM_LOG_USERSPACE_TRANSFER_H__ | ||
9 | |||
10 | #define DM_MSG_PREFIX "dm-log-userspace" | ||
11 | |||
12 | int dm_ulog_tfr_init(void); | ||
13 | void dm_ulog_tfr_exit(void); | ||
14 | int dm_consult_userspace(const char *uuid, int request_type, | ||
15 | char *data, size_t data_size, | ||
16 | char *rdata, size_t *rdata_size); | ||
17 | |||
18 | #endif /* __DM_LOG_USERSPACE_TRANSFER_H__ */ | ||