diff options
-rw-r--r-- | Documentation/device-mapper/dm-log.txt | 54 | ||||
-rw-r--r-- | drivers/md/Kconfig | 11 | ||||
-rw-r--r-- | drivers/md/Makefile | 3 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-base.c | 696 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-transfer.c | 276 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-transfer.h | 18 | ||||
-rw-r--r-- | include/linux/Kbuild | 1 | ||||
-rw-r--r-- | include/linux/connector.h | 4 | ||||
-rw-r--r-- | include/linux/dm-log-userspace.h | 386 |
9 files changed, 1448 insertions, 1 deletions
diff --git a/Documentation/device-mapper/dm-log.txt b/Documentation/device-mapper/dm-log.txt new file mode 100644 index 000000000000..994dd75475a6 --- /dev/null +++ b/Documentation/device-mapper/dm-log.txt | |||
@@ -0,0 +1,54 @@ | |||
1 | Device-Mapper Logging | ||
2 | ===================== | ||
3 | The device-mapper logging code is used by some of the device-mapper | ||
4 | RAID targets to track regions of the disk that are not consistent. | ||
5 | A region (or portion of the address space) of the disk may be | ||
6 | inconsistent because a RAID stripe is currently being operated on or | ||
7 | a machine died while the region was being altered. In the case of | ||
8 | mirrors, a region would be considered dirty/inconsistent while you | ||
9 | are writing to it because the writes need to be replicated for all | ||
10 | the legs of the mirror and may not reach the legs at the same time. | ||
11 | Once all writes are complete, the region is considered clean again. | ||
12 | |||
13 | There is a generic logging interface that the device-mapper RAID | ||
14 | implementations use to perform logging operations (see | ||
15 | dm_dirty_log_type in include/linux/dm-dirty-log.h). Various different | ||
16 | logging implementations are available and provide different | ||
17 | capabilities. The list includes: | ||
18 | |||
19 | Type Files | ||
20 | ==== ===== | ||
21 | disk drivers/md/dm-log.c | ||
22 | core drivers/md/dm-log.c | ||
23 | userspace drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h | ||
24 | |||
25 | The "disk" log type | ||
26 | ------------------- | ||
27 | This log implementation commits the log state to disk. This way, the | ||
28 | logging state survives reboots/crashes. | ||
29 | |||
30 | The "core" log type | ||
31 | ------------------- | ||
32 | This log implementation keeps the log state in memory. The log state | ||
33 | will not survive a reboot or crash, but there may be a small boost in | ||
34 | performance. This method can also be used if no storage device is | ||
35 | available for storing log state. | ||
36 | |||
37 | The "userspace" log type | ||
38 | ------------------------ | ||
39 | This log type simply provides a way to export the log API to userspace, | ||
40 | so log implementations can be done there. This is done by forwarding most | ||
41 | logging requests to userspace, where a daemon receives and processes the | ||
42 | request. | ||
43 | |||
44 | The structure used for communication between kernel and userspace are | ||
45 | located in include/linux/dm-log-userspace.h. Due to the frequency, | ||
46 | diversity, and 2-way communication nature of the exchanges between | ||
47 | kernel and userspace, 'connector' is used as the interface for | ||
48 | communication. | ||
49 | |||
50 | There are currently two userspace log implementations that leverage this | ||
51 | framework - "clustered_disk" and "clustered_core". These implementations | ||
52 | provide a cluster-coherent log for shared-storage. Device-mapper mirroring | ||
53 | can be used in a shared-storage environment when the cluster log implementations | ||
54 | are employed. | ||
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 09f93fa68912..020f9573fd82 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -231,6 +231,17 @@ config DM_MIRROR | |||
231 | Allow volume managers to mirror logical volumes, also | 231 | Allow volume managers to mirror logical volumes, also |
232 | needed for live data migration tools such as 'pvmove'. | 232 | needed for live data migration tools such as 'pvmove'. |
233 | 233 | ||
234 | config DM_LOG_USERSPACE | ||
235 | tristate "Mirror userspace logging (EXPERIMENTAL)" | ||
236 | depends on DM_MIRROR && EXPERIMENTAL && NET | ||
237 | select CONNECTOR | ||
238 | ---help--- | ||
239 | The userspace logging module provides a mechanism for | ||
240 | relaying the dm-dirty-log API to userspace. Log designs | ||
241 | which are more suited to userspace implementation (e.g. | ||
242 | shared storage logs) or experimental logs can be implemented | ||
243 | by leveraging this framework. | ||
244 | |||
234 | config DM_ZERO | 245 | config DM_ZERO |
235 | tristate "Zero target" | 246 | tristate "Zero target" |
236 | depends on BLK_DEV_DM | 247 | depends on BLK_DEV_DM |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index dade52f60733..1dc4185bd781 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -8,6 +8,8 @@ dm-multipath-y += dm-path-selector.o dm-mpath.o | |||
8 | dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ | 8 | dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ |
9 | dm-snap-persistent.o | 9 | dm-snap-persistent.o |
10 | dm-mirror-y += dm-raid1.o | 10 | dm-mirror-y += dm-raid1.o |
11 | dm-log-userspace-y \ | ||
12 | += dm-log-userspace-base.o dm-log-userspace-transfer.o | ||
11 | md-mod-y += md.o bitmap.o | 13 | md-mod-y += md.o bitmap.o |
12 | raid456-y += raid5.o | 14 | raid456-y += raid5.o |
13 | raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ | 15 | raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ |
@@ -40,6 +42,7 @@ obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o | |||
40 | obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o | 42 | obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o |
41 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o | 43 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o |
42 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o | 44 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o |
45 | obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o | ||
43 | obj-$(CONFIG_DM_ZERO) += dm-zero.o | 46 | obj-$(CONFIG_DM_ZERO) += dm-zero.o |
44 | 47 | ||
45 | quiet_cmd_unroll = UNROLL $@ | 48 | quiet_cmd_unroll = UNROLL $@ |
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c new file mode 100644 index 000000000000..e69b96560997 --- /dev/null +++ b/drivers/md/dm-log-userspace-base.c | |||
@@ -0,0 +1,696 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2006-2009 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the LGPL. | ||
5 | */ | ||
6 | |||
7 | #include <linux/bio.h> | ||
8 | #include <linux/dm-dirty-log.h> | ||
9 | #include <linux/device-mapper.h> | ||
10 | #include <linux/dm-log-userspace.h> | ||
11 | |||
12 | #include "dm-log-userspace-transfer.h" | ||
13 | |||
14 | struct flush_entry { | ||
15 | int type; | ||
16 | region_t region; | ||
17 | struct list_head list; | ||
18 | }; | ||
19 | |||
20 | struct log_c { | ||
21 | struct dm_target *ti; | ||
22 | uint32_t region_size; | ||
23 | region_t region_count; | ||
24 | char uuid[DM_UUID_LEN]; | ||
25 | |||
26 | char *usr_argv_str; | ||
27 | uint32_t usr_argc; | ||
28 | |||
29 | /* | ||
30 | * in_sync_hint gets set when doing is_remote_recovering. It | ||
31 | * represents the first region that needs recovery. IOW, the | ||
32 | * first zero bit of sync_bits. This can be useful for to limit | ||
33 | * traffic for calls like is_remote_recovering and get_resync_work, | ||
34 | * but be take care in its use for anything else. | ||
35 | */ | ||
36 | uint64_t in_sync_hint; | ||
37 | |||
38 | spinlock_t flush_lock; | ||
39 | struct list_head flush_list; /* only for clear and mark requests */ | ||
40 | }; | ||
41 | |||
42 | static mempool_t *flush_entry_pool; | ||
43 | |||
44 | static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data) | ||
45 | { | ||
46 | return kmalloc(sizeof(struct flush_entry), gfp_mask); | ||
47 | } | ||
48 | |||
49 | static void flush_entry_free(void *element, void *pool_data) | ||
50 | { | ||
51 | kfree(element); | ||
52 | } | ||
53 | |||
54 | static int userspace_do_request(struct log_c *lc, const char *uuid, | ||
55 | int request_type, char *data, size_t data_size, | ||
56 | char *rdata, size_t *rdata_size) | ||
57 | { | ||
58 | int r; | ||
59 | |||
60 | /* | ||
61 | * If the server isn't there, -ESRCH is returned, | ||
62 | * and we must keep trying until the server is | ||
63 | * restored. | ||
64 | */ | ||
65 | retry: | ||
66 | r = dm_consult_userspace(uuid, request_type, data, | ||
67 | data_size, rdata, rdata_size); | ||
68 | |||
69 | if (r != -ESRCH) | ||
70 | return r; | ||
71 | |||
72 | DMERR(" Userspace log server not found."); | ||
73 | while (1) { | ||
74 | set_current_state(TASK_INTERRUPTIBLE); | ||
75 | schedule_timeout(2*HZ); | ||
76 | DMWARN("Attempting to contact userspace log server..."); | ||
77 | r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str, | ||
78 | strlen(lc->usr_argv_str) + 1, | ||
79 | NULL, NULL); | ||
80 | if (!r) | ||
81 | break; | ||
82 | } | ||
83 | DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); | ||
84 | r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL, | ||
85 | 0, NULL, NULL); | ||
86 | if (!r) | ||
87 | goto retry; | ||
88 | |||
89 | DMERR("Error trying to resume userspace log: %d", r); | ||
90 | |||
91 | return -ESRCH; | ||
92 | } | ||
93 | |||
94 | static int build_constructor_string(struct dm_target *ti, | ||
95 | unsigned argc, char **argv, | ||
96 | char **ctr_str) | ||
97 | { | ||
98 | int i, str_size; | ||
99 | char *str = NULL; | ||
100 | |||
101 | *ctr_str = NULL; | ||
102 | |||
103 | for (i = 0, str_size = 0; i < argc; i++) | ||
104 | str_size += strlen(argv[i]) + 1; /* +1 for space between args */ | ||
105 | |||
106 | str_size += 20; /* Max number of chars in a printed u64 number */ | ||
107 | |||
108 | str = kzalloc(str_size, GFP_KERNEL); | ||
109 | if (!str) { | ||
110 | DMWARN("Unable to allocate memory for constructor string"); | ||
111 | return -ENOMEM; | ||
112 | } | ||
113 | |||
114 | for (i = 0, str_size = 0; i < argc; i++) | ||
115 | str_size += sprintf(str + str_size, "%s ", argv[i]); | ||
116 | str_size += sprintf(str + str_size, "%llu", | ||
117 | (unsigned long long)ti->len); | ||
118 | |||
119 | *ctr_str = str; | ||
120 | return str_size; | ||
121 | } | ||
122 | |||
123 | /* | ||
124 | * userspace_ctr | ||
125 | * | ||
126 | * argv contains: | ||
127 | * <UUID> <other args> | ||
128 | * Where 'other args' is the userspace implementation specific log | ||
129 | * arguments. An example might be: | ||
130 | * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync] | ||
131 | * | ||
132 | * So, this module will strip off the <UUID> for identification purposes | ||
133 | * when communicating with userspace about a log; but will pass on everything | ||
134 | * else. | ||
135 | */ | ||
136 | static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, | ||
137 | unsigned argc, char **argv) | ||
138 | { | ||
139 | int r = 0; | ||
140 | int str_size; | ||
141 | char *ctr_str = NULL; | ||
142 | struct log_c *lc = NULL; | ||
143 | uint64_t rdata; | ||
144 | size_t rdata_size = sizeof(rdata); | ||
145 | |||
146 | if (argc < 3) { | ||
147 | DMWARN("Too few arguments to userspace dirty log"); | ||
148 | return -EINVAL; | ||
149 | } | ||
150 | |||
151 | lc = kmalloc(sizeof(*lc), GFP_KERNEL); | ||
152 | if (!lc) { | ||
153 | DMWARN("Unable to allocate userspace log context."); | ||
154 | return -ENOMEM; | ||
155 | } | ||
156 | |||
157 | lc->ti = ti; | ||
158 | |||
159 | if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { | ||
160 | DMWARN("UUID argument too long."); | ||
161 | kfree(lc); | ||
162 | return -EINVAL; | ||
163 | } | ||
164 | |||
165 | strncpy(lc->uuid, argv[0], DM_UUID_LEN); | ||
166 | spin_lock_init(&lc->flush_lock); | ||
167 | INIT_LIST_HEAD(&lc->flush_list); | ||
168 | |||
169 | str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); | ||
170 | if (str_size < 0) { | ||
171 | kfree(lc); | ||
172 | return str_size; | ||
173 | } | ||
174 | |||
175 | /* Send table string */ | ||
176 | r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR, | ||
177 | ctr_str, str_size, NULL, NULL); | ||
178 | |||
179 | if (r == -ESRCH) { | ||
180 | DMERR("Userspace log server not found"); | ||
181 | goto out; | ||
182 | } | ||
183 | |||
184 | /* Since the region size does not change, get it now */ | ||
185 | rdata_size = sizeof(rdata); | ||
186 | r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE, | ||
187 | NULL, 0, (char *)&rdata, &rdata_size); | ||
188 | |||
189 | if (r) { | ||
190 | DMERR("Failed to get region size of dirty log"); | ||
191 | goto out; | ||
192 | } | ||
193 | |||
194 | lc->region_size = (uint32_t)rdata; | ||
195 | lc->region_count = dm_sector_div_up(ti->len, lc->region_size); | ||
196 | |||
197 | out: | ||
198 | if (r) { | ||
199 | kfree(lc); | ||
200 | kfree(ctr_str); | ||
201 | } else { | ||
202 | lc->usr_argv_str = ctr_str; | ||
203 | lc->usr_argc = argc; | ||
204 | log->context = lc; | ||
205 | } | ||
206 | |||
207 | return r; | ||
208 | } | ||
209 | |||
210 | static void userspace_dtr(struct dm_dirty_log *log) | ||
211 | { | ||
212 | int r; | ||
213 | struct log_c *lc = log->context; | ||
214 | |||
215 | r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR, | ||
216 | NULL, 0, | ||
217 | NULL, NULL); | ||
218 | |||
219 | kfree(lc->usr_argv_str); | ||
220 | kfree(lc); | ||
221 | |||
222 | return; | ||
223 | } | ||
224 | |||
225 | static int userspace_presuspend(struct dm_dirty_log *log) | ||
226 | { | ||
227 | int r; | ||
228 | struct log_c *lc = log->context; | ||
229 | |||
230 | r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND, | ||
231 | NULL, 0, | ||
232 | NULL, NULL); | ||
233 | |||
234 | return r; | ||
235 | } | ||
236 | |||
237 | static int userspace_postsuspend(struct dm_dirty_log *log) | ||
238 | { | ||
239 | int r; | ||
240 | struct log_c *lc = log->context; | ||
241 | |||
242 | r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND, | ||
243 | NULL, 0, | ||
244 | NULL, NULL); | ||
245 | |||
246 | return r; | ||
247 | } | ||
248 | |||
249 | static int userspace_resume(struct dm_dirty_log *log) | ||
250 | { | ||
251 | int r; | ||
252 | struct log_c *lc = log->context; | ||
253 | |||
254 | lc->in_sync_hint = 0; | ||
255 | r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME, | ||
256 | NULL, 0, | ||
257 | NULL, NULL); | ||
258 | |||
259 | return r; | ||
260 | } | ||
261 | |||
262 | static uint32_t userspace_get_region_size(struct dm_dirty_log *log) | ||
263 | { | ||
264 | struct log_c *lc = log->context; | ||
265 | |||
266 | return lc->region_size; | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * userspace_is_clean | ||
271 | * | ||
272 | * Check whether a region is clean. If there is any sort of | ||
273 | * failure when consulting the server, we return not clean. | ||
274 | * | ||
275 | * Returns: 1 if clean, 0 otherwise | ||
276 | */ | ||
277 | static int userspace_is_clean(struct dm_dirty_log *log, region_t region) | ||
278 | { | ||
279 | int r; | ||
280 | uint64_t region64 = (uint64_t)region; | ||
281 | int64_t is_clean; | ||
282 | size_t rdata_size; | ||
283 | struct log_c *lc = log->context; | ||
284 | |||
285 | rdata_size = sizeof(is_clean); | ||
286 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN, | ||
287 | (char *)®ion64, sizeof(region64), | ||
288 | (char *)&is_clean, &rdata_size); | ||
289 | |||
290 | return (r) ? 0 : (int)is_clean; | ||
291 | } | ||
292 | |||
293 | /* | ||
294 | * userspace_in_sync | ||
295 | * | ||
296 | * Check if the region is in-sync. If there is any sort | ||
297 | * of failure when consulting the server, we assume that | ||
298 | * the region is not in sync. | ||
299 | * | ||
300 | * If 'can_block' is set, return immediately | ||
301 | * | ||
302 | * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK | ||
303 | */ | ||
304 | static int userspace_in_sync(struct dm_dirty_log *log, region_t region, | ||
305 | int can_block) | ||
306 | { | ||
307 | int r; | ||
308 | uint64_t region64 = region; | ||
309 | int64_t in_sync; | ||
310 | size_t rdata_size; | ||
311 | struct log_c *lc = log->context; | ||
312 | |||
313 | /* | ||
314 | * We can never respond directly - even if in_sync_hint is | ||
315 | * set. This is because another machine could see a device | ||
316 | * failure and mark the region out-of-sync. If we don't go | ||
317 | * to userspace to ask, we might think the region is in-sync | ||
318 | * and allow a read to pick up data that is stale. (This is | ||
319 | * very unlikely if a device actually fails; but it is very | ||
320 | * likely if a connection to one device from one machine fails.) | ||
321 | * | ||
322 | * There still might be a problem if the mirror caches the region | ||
323 | * state as in-sync... but then this call would not be made. So, | ||
324 | * that is a mirror problem. | ||
325 | */ | ||
326 | if (!can_block) | ||
327 | return -EWOULDBLOCK; | ||
328 | |||
329 | rdata_size = sizeof(in_sync); | ||
330 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC, | ||
331 | (char *)®ion64, sizeof(region64), | ||
332 | (char *)&in_sync, &rdata_size); | ||
333 | return (r) ? 0 : (int)in_sync; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * userspace_flush | ||
338 | * | ||
339 | * This function is ok to block. | ||
340 | * The flush happens in two stages. First, it sends all | ||
341 | * clear/mark requests that are on the list. Then it | ||
342 | * tells the server to commit them. This gives the | ||
343 | * server a chance to optimise the commit, instead of | ||
344 | * doing it for every request. | ||
345 | * | ||
346 | * Additionally, we could implement another thread that | ||
347 | * sends the requests up to the server - reducing the | ||
348 | * load on flush. Then the flush would have less in | ||
349 | * the list and be responsible for the finishing commit. | ||
350 | * | ||
351 | * Returns: 0 on success, < 0 on failure | ||
352 | */ | ||
353 | static int userspace_flush(struct dm_dirty_log *log) | ||
354 | { | ||
355 | int r = 0; | ||
356 | unsigned long flags; | ||
357 | struct log_c *lc = log->context; | ||
358 | LIST_HEAD(flush_list); | ||
359 | struct flush_entry *fe, *tmp_fe; | ||
360 | |||
361 | spin_lock_irqsave(&lc->flush_lock, flags); | ||
362 | list_splice_init(&lc->flush_list, &flush_list); | ||
363 | spin_unlock_irqrestore(&lc->flush_lock, flags); | ||
364 | |||
365 | if (list_empty(&flush_list)) | ||
366 | return 0; | ||
367 | |||
368 | /* | ||
369 | * FIXME: Count up requests, group request types, | ||
370 | * allocate memory to stick all requests in and | ||
371 | * send to server in one go. Failing the allocation, | ||
372 | * do it one by one. | ||
373 | */ | ||
374 | |||
375 | list_for_each_entry(fe, &flush_list, list) { | ||
376 | r = userspace_do_request(lc, lc->uuid, fe->type, | ||
377 | (char *)&fe->region, | ||
378 | sizeof(fe->region), | ||
379 | NULL, NULL); | ||
380 | if (r) | ||
381 | goto fail; | ||
382 | } | ||
383 | |||
384 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, | ||
385 | NULL, 0, NULL, NULL); | ||
386 | |||
387 | fail: | ||
388 | /* | ||
389 | * We can safely remove these entries, even if failure. | ||
390 | * Calling code will receive an error and will know that | ||
391 | * the log facility has failed. | ||
392 | */ | ||
393 | list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) { | ||
394 | list_del(&fe->list); | ||
395 | mempool_free(fe, flush_entry_pool); | ||
396 | } | ||
397 | |||
398 | if (r) | ||
399 | dm_table_event(lc->ti->table); | ||
400 | |||
401 | return r; | ||
402 | } | ||
403 | |||
404 | /* | ||
405 | * userspace_mark_region | ||
406 | * | ||
407 | * This function should avoid blocking unless absolutely required. | ||
408 | * (Memory allocation is valid for blocking.) | ||
409 | */ | ||
410 | static void userspace_mark_region(struct dm_dirty_log *log, region_t region) | ||
411 | { | ||
412 | unsigned long flags; | ||
413 | struct log_c *lc = log->context; | ||
414 | struct flush_entry *fe; | ||
415 | |||
416 | /* Wait for an allocation, but _never_ fail */ | ||
417 | fe = mempool_alloc(flush_entry_pool, GFP_NOIO); | ||
418 | BUG_ON(!fe); | ||
419 | |||
420 | spin_lock_irqsave(&lc->flush_lock, flags); | ||
421 | fe->type = DM_ULOG_MARK_REGION; | ||
422 | fe->region = region; | ||
423 | list_add(&fe->list, &lc->flush_list); | ||
424 | spin_unlock_irqrestore(&lc->flush_lock, flags); | ||
425 | |||
426 | return; | ||
427 | } | ||
428 | |||
429 | /* | ||
430 | * userspace_clear_region | ||
431 | * | ||
432 | * This function must not block. | ||
433 | * So, the alloc can't block. In the worst case, it is ok to | ||
434 | * fail. It would simply mean we can't clear the region. | ||
435 | * Does nothing to current sync context, but does mean | ||
436 | * the region will be re-sync'ed on a reload of the mirror | ||
437 | * even though it is in-sync. | ||
438 | */ | ||
439 | static void userspace_clear_region(struct dm_dirty_log *log, region_t region) | ||
440 | { | ||
441 | unsigned long flags; | ||
442 | struct log_c *lc = log->context; | ||
443 | struct flush_entry *fe; | ||
444 | |||
445 | /* | ||
446 | * If we fail to allocate, we skip the clearing of | ||
447 | * the region. This doesn't hurt us in any way, except | ||
448 | * to cause the region to be resync'ed when the | ||
449 | * device is activated next time. | ||
450 | */ | ||
451 | fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC); | ||
452 | if (!fe) { | ||
453 | DMERR("Failed to allocate memory to clear region."); | ||
454 | return; | ||
455 | } | ||
456 | |||
457 | spin_lock_irqsave(&lc->flush_lock, flags); | ||
458 | fe->type = DM_ULOG_CLEAR_REGION; | ||
459 | fe->region = region; | ||
460 | list_add(&fe->list, &lc->flush_list); | ||
461 | spin_unlock_irqrestore(&lc->flush_lock, flags); | ||
462 | |||
463 | return; | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * userspace_get_resync_work | ||
468 | * | ||
469 | * Get a region that needs recovery. It is valid to return | ||
470 | * an error for this function. | ||
471 | * | ||
472 | * Returns: 1 if region filled, 0 if no work, <0 on error | ||
473 | */ | ||
474 | static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) | ||
475 | { | ||
476 | int r; | ||
477 | size_t rdata_size; | ||
478 | struct log_c *lc = log->context; | ||
479 | struct { | ||
480 | int64_t i; /* 64-bit for mix arch compatibility */ | ||
481 | region_t r; | ||
482 | } pkg; | ||
483 | |||
484 | if (lc->in_sync_hint >= lc->region_count) | ||
485 | return 0; | ||
486 | |||
487 | rdata_size = sizeof(pkg); | ||
488 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, | ||
489 | NULL, 0, | ||
490 | (char *)&pkg, &rdata_size); | ||
491 | |||
492 | *region = pkg.r; | ||
493 | return (r) ? r : (int)pkg.i; | ||
494 | } | ||
495 | |||
496 | /* | ||
497 | * userspace_set_region_sync | ||
498 | * | ||
499 | * Set the sync status of a given region. This function | ||
500 | * must not fail. | ||
501 | */ | ||
502 | static void userspace_set_region_sync(struct dm_dirty_log *log, | ||
503 | region_t region, int in_sync) | ||
504 | { | ||
505 | int r; | ||
506 | struct log_c *lc = log->context; | ||
507 | struct { | ||
508 | region_t r; | ||
509 | int64_t i; | ||
510 | } pkg; | ||
511 | |||
512 | pkg.r = region; | ||
513 | pkg.i = (int64_t)in_sync; | ||
514 | |||
515 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, | ||
516 | (char *)&pkg, sizeof(pkg), | ||
517 | NULL, NULL); | ||
518 | |||
519 | /* | ||
520 | * It would be nice to be able to report failures. | ||
521 | * However, it is easy emough to detect and resolve. | ||
522 | */ | ||
523 | return; | ||
524 | } | ||
525 | |||
526 | /* | ||
527 | * userspace_get_sync_count | ||
528 | * | ||
529 | * If there is any sort of failure when consulting the server, | ||
530 | * we assume that the sync count is zero. | ||
531 | * | ||
532 | * Returns: sync count on success, 0 on failure | ||
533 | */ | ||
534 | static region_t userspace_get_sync_count(struct dm_dirty_log *log) | ||
535 | { | ||
536 | int r; | ||
537 | size_t rdata_size; | ||
538 | uint64_t sync_count; | ||
539 | struct log_c *lc = log->context; | ||
540 | |||
541 | rdata_size = sizeof(sync_count); | ||
542 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, | ||
543 | NULL, 0, | ||
544 | (char *)&sync_count, &rdata_size); | ||
545 | |||
546 | if (r) | ||
547 | return 0; | ||
548 | |||
549 | if (sync_count >= lc->region_count) | ||
550 | lc->in_sync_hint = lc->region_count; | ||
551 | |||
552 | return (region_t)sync_count; | ||
553 | } | ||
554 | |||
555 | /* | ||
556 | * userspace_status | ||
557 | * | ||
558 | * Returns: amount of space consumed | ||
559 | */ | ||
560 | static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, | ||
561 | char *result, unsigned maxlen) | ||
562 | { | ||
563 | int r = 0; | ||
564 | size_t sz = (size_t)maxlen; | ||
565 | struct log_c *lc = log->context; | ||
566 | |||
567 | switch (status_type) { | ||
568 | case STATUSTYPE_INFO: | ||
569 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, | ||
570 | NULL, 0, | ||
571 | result, &sz); | ||
572 | |||
573 | if (r) { | ||
574 | sz = 0; | ||
575 | DMEMIT("%s 1 COM_FAILURE", log->type->name); | ||
576 | } | ||
577 | break; | ||
578 | case STATUSTYPE_TABLE: | ||
579 | sz = 0; | ||
580 | DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1, | ||
581 | lc->uuid, lc->usr_argv_str); | ||
582 | break; | ||
583 | } | ||
584 | return (r) ? 0 : (int)sz; | ||
585 | } | ||
586 | |||
587 | /* | ||
588 | * userspace_is_remote_recovering | ||
589 | * | ||
590 | * Returns: 1 if region recovering, 0 otherwise | ||
591 | */ | ||
592 | static int userspace_is_remote_recovering(struct dm_dirty_log *log, | ||
593 | region_t region) | ||
594 | { | ||
595 | int r; | ||
596 | uint64_t region64 = region; | ||
597 | struct log_c *lc = log->context; | ||
598 | static unsigned long long limit; | ||
599 | struct { | ||
600 | int64_t is_recovering; | ||
601 | uint64_t in_sync_hint; | ||
602 | } pkg; | ||
603 | size_t rdata_size = sizeof(pkg); | ||
604 | |||
605 | /* | ||
606 | * Once the mirror has been reported to be in-sync, | ||
607 | * it will never again ask for recovery work. So, | ||
608 | * we can safely say there is not a remote machine | ||
609 | * recovering if the device is in-sync. (in_sync_hint | ||
610 | * must be reset at resume time.) | ||
611 | */ | ||
612 | if (region < lc->in_sync_hint) | ||
613 | return 0; | ||
614 | else if (jiffies < limit) | ||
615 | return 1; | ||
616 | |||
617 | limit = jiffies + (HZ / 4); | ||
618 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING, | ||
619 | (char *)®ion64, sizeof(region64), | ||
620 | (char *)&pkg, &rdata_size); | ||
621 | if (r) | ||
622 | return 1; | ||
623 | |||
624 | lc->in_sync_hint = pkg.in_sync_hint; | ||
625 | |||
626 | return (int)pkg.is_recovering; | ||
627 | } | ||
628 | |||
629 | static struct dm_dirty_log_type _userspace_type = { | ||
630 | .name = "userspace", | ||
631 | .module = THIS_MODULE, | ||
632 | .ctr = userspace_ctr, | ||
633 | .dtr = userspace_dtr, | ||
634 | .presuspend = userspace_presuspend, | ||
635 | .postsuspend = userspace_postsuspend, | ||
636 | .resume = userspace_resume, | ||
637 | .get_region_size = userspace_get_region_size, | ||
638 | .is_clean = userspace_is_clean, | ||
639 | .in_sync = userspace_in_sync, | ||
640 | .flush = userspace_flush, | ||
641 | .mark_region = userspace_mark_region, | ||
642 | .clear_region = userspace_clear_region, | ||
643 | .get_resync_work = userspace_get_resync_work, | ||
644 | .set_region_sync = userspace_set_region_sync, | ||
645 | .get_sync_count = userspace_get_sync_count, | ||
646 | .status = userspace_status, | ||
647 | .is_remote_recovering = userspace_is_remote_recovering, | ||
648 | }; | ||
649 | |||
650 | static int __init userspace_dirty_log_init(void) | ||
651 | { | ||
652 | int r = 0; | ||
653 | |||
654 | flush_entry_pool = mempool_create(100, flush_entry_alloc, | ||
655 | flush_entry_free, NULL); | ||
656 | |||
657 | if (!flush_entry_pool) { | ||
658 | DMWARN("Unable to create flush_entry_pool: No memory."); | ||
659 | return -ENOMEM; | ||
660 | } | ||
661 | |||
662 | r = dm_ulog_tfr_init(); | ||
663 | if (r) { | ||
664 | DMWARN("Unable to initialize userspace log communications"); | ||
665 | mempool_destroy(flush_entry_pool); | ||
666 | return r; | ||
667 | } | ||
668 | |||
669 | r = dm_dirty_log_type_register(&_userspace_type); | ||
670 | if (r) { | ||
671 | DMWARN("Couldn't register userspace dirty log type"); | ||
672 | dm_ulog_tfr_exit(); | ||
673 | mempool_destroy(flush_entry_pool); | ||
674 | return r; | ||
675 | } | ||
676 | |||
677 | DMINFO("version 1.0.0 loaded"); | ||
678 | return 0; | ||
679 | } | ||
680 | |||
681 | static void __exit userspace_dirty_log_exit(void) | ||
682 | { | ||
683 | dm_dirty_log_type_unregister(&_userspace_type); | ||
684 | dm_ulog_tfr_exit(); | ||
685 | mempool_destroy(flush_entry_pool); | ||
686 | |||
687 | DMINFO("version 1.0.0 unloaded"); | ||
688 | return; | ||
689 | } | ||
690 | |||
691 | module_init(userspace_dirty_log_init); | ||
692 | module_exit(userspace_dirty_log_exit); | ||
693 | |||
694 | MODULE_DESCRIPTION(DM_NAME " userspace dirty log link"); | ||
695 | MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>"); | ||
696 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c new file mode 100644 index 000000000000..0ca1ee768a1f --- /dev/null +++ b/drivers/md/dm-log-userspace-transfer.c | |||
@@ -0,0 +1,276 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2006-2009 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the LGPL. | ||
5 | */ | ||
6 | |||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <net/sock.h> | ||
10 | #include <linux/workqueue.h> | ||
11 | #include <linux/connector.h> | ||
12 | #include <linux/device-mapper.h> | ||
13 | #include <linux/dm-log-userspace.h> | ||
14 | |||
15 | #include "dm-log-userspace-transfer.h" | ||
16 | |||
17 | static uint32_t dm_ulog_seq; | ||
18 | |||
19 | /* | ||
20 | * Netlink/Connector is an unreliable protocol. How long should | ||
21 | * we wait for a response before assuming it was lost and retrying? | ||
22 | * (If we do receive a response after this time, it will be discarded | ||
23 | * and the response to the resent request will be waited for. | ||
24 | */ | ||
25 | #define DM_ULOG_RETRY_TIMEOUT (15 * HZ) | ||
26 | |||
27 | /* | ||
28 | * Pre-allocated space for speed | ||
29 | */ | ||
30 | #define DM_ULOG_PREALLOCED_SIZE 512 | ||
31 | static struct cn_msg *prealloced_cn_msg; | ||
32 | static struct dm_ulog_request *prealloced_ulog_tfr; | ||
33 | |||
34 | static struct cb_id ulog_cn_id = { | ||
35 | .idx = CN_IDX_DM, | ||
36 | .val = CN_VAL_DM_USERSPACE_LOG | ||
37 | }; | ||
38 | |||
39 | static DEFINE_MUTEX(dm_ulog_lock); | ||
40 | |||
41 | struct receiving_pkg { | ||
42 | struct list_head list; | ||
43 | struct completion complete; | ||
44 | |||
45 | uint32_t seq; | ||
46 | |||
47 | int error; | ||
48 | size_t *data_size; | ||
49 | char *data; | ||
50 | }; | ||
51 | |||
52 | static DEFINE_SPINLOCK(receiving_list_lock); | ||
53 | static struct list_head receiving_list; | ||
54 | |||
55 | static int dm_ulog_sendto_server(struct dm_ulog_request *tfr) | ||
56 | { | ||
57 | int r; | ||
58 | struct cn_msg *msg = prealloced_cn_msg; | ||
59 | |||
60 | memset(msg, 0, sizeof(struct cn_msg)); | ||
61 | |||
62 | msg->id.idx = ulog_cn_id.idx; | ||
63 | msg->id.val = ulog_cn_id.val; | ||
64 | msg->ack = 0; | ||
65 | msg->seq = tfr->seq; | ||
66 | msg->len = sizeof(struct dm_ulog_request) + tfr->data_size; | ||
67 | |||
68 | r = cn_netlink_send(msg, 0, gfp_any()); | ||
69 | |||
70 | return r; | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * Parameters for this function can be either msg or tfr, but not | ||
75 | * both. This function fills in the reply for a waiting request. | ||
76 | * If just msg is given, then the reply is simply an ACK from userspace | ||
77 | * that the request was received. | ||
78 | * | ||
79 | * Returns: 0 on success, -ENOENT on failure | ||
80 | */ | ||
81 | static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr) | ||
82 | { | ||
83 | uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0; | ||
84 | struct receiving_pkg *pkg; | ||
85 | |||
86 | /* | ||
87 | * The 'receiving_pkg' entries in this list are statically | ||
88 | * allocated on the stack in 'dm_consult_userspace'. | ||
89 | * Each process that is waiting for a reply from the user | ||
90 | * space server will have an entry in this list. | ||
91 | * | ||
92 | * We are safe to do it this way because the stack space | ||
93 | * is unique to each process, but still addressable by | ||
94 | * other processes. | ||
95 | */ | ||
96 | list_for_each_entry(pkg, &receiving_list, list) { | ||
97 | if (rtn_seq != pkg->seq) | ||
98 | continue; | ||
99 | |||
100 | if (msg) { | ||
101 | pkg->error = -msg->ack; | ||
102 | /* | ||
103 | * If we are trying again, we will need to know our | ||
104 | * storage capacity. Otherwise, along with the | ||
105 | * error code, we make explicit that we have no data. | ||
106 | */ | ||
107 | if (pkg->error != -EAGAIN) | ||
108 | *(pkg->data_size) = 0; | ||
109 | } else if (tfr->data_size > *(pkg->data_size)) { | ||
110 | DMERR("Insufficient space to receive package [%u] " | ||
111 | "(%u vs %lu)", tfr->request_type, | ||
112 | tfr->data_size, *(pkg->data_size)); | ||
113 | |||
114 | *(pkg->data_size) = 0; | ||
115 | pkg->error = -ENOSPC; | ||
116 | } else { | ||
117 | pkg->error = tfr->error; | ||
118 | memcpy(pkg->data, tfr->data, tfr->data_size); | ||
119 | *(pkg->data_size) = tfr->data_size; | ||
120 | } | ||
121 | complete(&pkg->complete); | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | return -ENOENT; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * This is the connector callback that delivers data | ||
130 | * that was sent from userspace. | ||
131 | */ | ||
132 | static void cn_ulog_callback(void *data) | ||
133 | { | ||
134 | struct cn_msg *msg = (struct cn_msg *)data; | ||
135 | struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1); | ||
136 | |||
137 | spin_lock(&receiving_list_lock); | ||
138 | if (msg->len == 0) | ||
139 | fill_pkg(msg, NULL); | ||
140 | else if (msg->len < sizeof(*tfr)) | ||
141 | DMERR("Incomplete message received (expected %u, got %u): [%u]", | ||
142 | (unsigned)sizeof(*tfr), msg->len, msg->seq); | ||
143 | else | ||
144 | fill_pkg(NULL, tfr); | ||
145 | spin_unlock(&receiving_list_lock); | ||
146 | } | ||
147 | |||
148 | /** | ||
149 | * dm_consult_userspace | ||
150 | * @uuid: log's uuid (must be DM_UUID_LEN in size) | ||
151 | * @request_type: found in include/linux/dm-log-userspace.h | ||
152 | * @data: data to tx to the server | ||
153 | * @data_size: size of data in bytes | ||
154 | * @rdata: place to put return data from server | ||
155 | * @rdata_size: value-result (amount of space given/amount of space used) | ||
156 | * | ||
157 | * rdata_size is undefined on failure. | ||
158 | * | ||
159 | * Memory used to communicate with userspace is zero'ed | ||
160 | * before populating to ensure that no unwanted bits leak | ||
161 | * from kernel space to user-space. All userspace log communications | ||
162 | * between kernel and user space go through this function. | ||
163 | * | ||
164 | * Returns: 0 on success, -EXXX on failure | ||
165 | **/ | ||
166 | int dm_consult_userspace(const char *uuid, int request_type, | ||
167 | char *data, size_t data_size, | ||
168 | char *rdata, size_t *rdata_size) | ||
169 | { | ||
170 | int r = 0; | ||
171 | size_t dummy = 0; | ||
172 | int overhead_size = | ||
173 | sizeof(struct dm_ulog_request *) + sizeof(struct cn_msg); | ||
174 | struct dm_ulog_request *tfr = prealloced_ulog_tfr; | ||
175 | struct receiving_pkg pkg; | ||
176 | |||
177 | if (data_size > (DM_ULOG_PREALLOCED_SIZE - overhead_size)) { | ||
178 | DMINFO("Size of tfr exceeds preallocated size"); | ||
179 | return -EINVAL; | ||
180 | } | ||
181 | |||
182 | if (!rdata_size) | ||
183 | rdata_size = &dummy; | ||
184 | resend: | ||
185 | /* | ||
186 | * We serialize the sending of requests so we can | ||
187 | * use the preallocated space. | ||
188 | */ | ||
189 | mutex_lock(&dm_ulog_lock); | ||
190 | |||
191 | memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size); | ||
192 | memcpy(tfr->uuid, uuid, DM_UUID_LEN); | ||
193 | tfr->seq = dm_ulog_seq++; | ||
194 | |||
195 | /* | ||
196 | * Must be valid request type (all other bits set to | ||
197 | * zero). This reserves other bits for possible future | ||
198 | * use. | ||
199 | */ | ||
200 | tfr->request_type = request_type & DM_ULOG_REQUEST_MASK; | ||
201 | |||
202 | tfr->data_size = data_size; | ||
203 | if (data && data_size) | ||
204 | memcpy(tfr->data, data, data_size); | ||
205 | |||
206 | memset(&pkg, 0, sizeof(pkg)); | ||
207 | init_completion(&pkg.complete); | ||
208 | pkg.seq = tfr->seq; | ||
209 | pkg.data_size = rdata_size; | ||
210 | pkg.data = rdata; | ||
211 | spin_lock(&receiving_list_lock); | ||
212 | list_add(&(pkg.list), &receiving_list); | ||
213 | spin_unlock(&receiving_list_lock); | ||
214 | |||
215 | r = dm_ulog_sendto_server(tfr); | ||
216 | |||
217 | mutex_unlock(&dm_ulog_lock); | ||
218 | |||
219 | if (r) { | ||
220 | DMERR("Unable to send log request [%u] to userspace: %d", | ||
221 | request_type, r); | ||
222 | spin_lock(&receiving_list_lock); | ||
223 | list_del_init(&(pkg.list)); | ||
224 | spin_unlock(&receiving_list_lock); | ||
225 | |||
226 | goto out; | ||
227 | } | ||
228 | |||
229 | r = wait_for_completion_timeout(&(pkg.complete), DM_ULOG_RETRY_TIMEOUT); | ||
230 | spin_lock(&receiving_list_lock); | ||
231 | list_del_init(&(pkg.list)); | ||
232 | spin_unlock(&receiving_list_lock); | ||
233 | if (!r) { | ||
234 | DMWARN("[%s] Request timed out: [%u/%u] - retrying", | ||
235 | (strlen(uuid) > 8) ? | ||
236 | (uuid + (strlen(uuid) - 8)) : (uuid), | ||
237 | request_type, pkg.seq); | ||
238 | goto resend; | ||
239 | } | ||
240 | |||
241 | r = pkg.error; | ||
242 | if (r == -EAGAIN) | ||
243 | goto resend; | ||
244 | |||
245 | out: | ||
246 | return r; | ||
247 | } | ||
248 | |||
249 | int dm_ulog_tfr_init(void) | ||
250 | { | ||
251 | int r; | ||
252 | void *prealloced; | ||
253 | |||
254 | INIT_LIST_HEAD(&receiving_list); | ||
255 | |||
256 | prealloced = kmalloc(DM_ULOG_PREALLOCED_SIZE, GFP_KERNEL); | ||
257 | if (!prealloced) | ||
258 | return -ENOMEM; | ||
259 | |||
260 | prealloced_cn_msg = prealloced; | ||
261 | prealloced_ulog_tfr = prealloced + sizeof(struct cn_msg); | ||
262 | |||
263 | r = cn_add_callback(&ulog_cn_id, "dmlogusr", cn_ulog_callback); | ||
264 | if (r) { | ||
265 | cn_del_callback(&ulog_cn_id); | ||
266 | return r; | ||
267 | } | ||
268 | |||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | void dm_ulog_tfr_exit(void) | ||
273 | { | ||
274 | cn_del_callback(&ulog_cn_id); | ||
275 | kfree(prealloced_cn_msg); | ||
276 | } | ||
diff --git a/drivers/md/dm-log-userspace-transfer.h b/drivers/md/dm-log-userspace-transfer.h new file mode 100644 index 000000000000..c26d8e4e2710 --- /dev/null +++ b/drivers/md/dm-log-userspace-transfer.h | |||
@@ -0,0 +1,18 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2006-2009 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the LGPL. | ||
5 | */ | ||
6 | |||
7 | #ifndef __DM_LOG_USERSPACE_TRANSFER_H__ | ||
8 | #define __DM_LOG_USERSPACE_TRANSFER_H__ | ||
9 | |||
10 | #define DM_MSG_PREFIX "dm-log-userspace" | ||
11 | |||
12 | int dm_ulog_tfr_init(void); | ||
13 | void dm_ulog_tfr_exit(void); | ||
14 | int dm_consult_userspace(const char *uuid, int request_type, | ||
15 | char *data, size_t data_size, | ||
16 | char *rdata, size_t *rdata_size); | ||
17 | |||
18 | #endif /* __DM_LOG_USERSPACE_TRANSFER_H__ */ | ||
diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 03f22076381f..334a3593cdfd 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild | |||
@@ -57,6 +57,7 @@ header-y += dlmconstants.h | |||
57 | header-y += dlm_device.h | 57 | header-y += dlm_device.h |
58 | header-y += dlm_netlink.h | 58 | header-y += dlm_netlink.h |
59 | header-y += dm-ioctl.h | 59 | header-y += dm-ioctl.h |
60 | header-y += dm-log-userspace.h | ||
60 | header-y += dn.h | 61 | header-y += dn.h |
61 | header-y += dqblk_xfs.h | 62 | header-y += dqblk_xfs.h |
62 | header-y += efs_fs_sb.h | 63 | header-y += efs_fs_sb.h |
diff --git a/include/linux/connector.h b/include/linux/connector.h index b9966e64604e..b68d27850d51 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h | |||
@@ -41,8 +41,10 @@ | |||
41 | #define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ | 41 | #define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ |
42 | #define CN_DST_IDX 0x6 | 42 | #define CN_DST_IDX 0x6 |
43 | #define CN_DST_VAL 0x1 | 43 | #define CN_DST_VAL 0x1 |
44 | #define CN_IDX_DM 0x7 /* Device Mapper */ | ||
45 | #define CN_VAL_DM_USERSPACE_LOG 0x1 | ||
44 | 46 | ||
45 | #define CN_NETLINK_USERS 7 | 47 | #define CN_NETLINK_USERS 8 |
46 | 48 | ||
47 | /* | 49 | /* |
48 | * Maximum connector's message size. | 50 | * Maximum connector's message size. |
diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h new file mode 100644 index 000000000000..642e3017b51f --- /dev/null +++ b/include/linux/dm-log-userspace.h | |||
@@ -0,0 +1,386 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2006-2009 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the LGPL. | ||
5 | */ | ||
6 | |||
7 | #ifndef __DM_LOG_USERSPACE_H__ | ||
8 | #define __DM_LOG_USERSPACE_H__ | ||
9 | |||
10 | #include <linux/dm-ioctl.h> /* For DM_UUID_LEN */ | ||
11 | |||
12 | /* | ||
13 | * The device-mapper userspace log module consists of a kernel component and | ||
14 | * a user-space component. The kernel component implements the API defined | ||
15 | * in dm-dirty-log.h. Its purpose is simply to pass the parameters and | ||
16 | * return values of those API functions between kernel and user-space. | ||
17 | * | ||
18 | * Below are defined the 'request_types' - DM_ULOG_CTR, DM_ULOG_DTR, etc. | ||
19 | * These request types represent the different functions in the device-mapper | ||
20 | * dirty log API. Each of these is described in more detail below. | ||
21 | * | ||
22 | * The user-space program must listen for requests from the kernel (representing | ||
23 | * the various API functions) and process them. | ||
24 | * | ||
25 | * User-space begins by setting up the communication link (error checking | ||
26 | * removed for clarity): | ||
27 | * fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); | ||
28 | * addr.nl_family = AF_NETLINK; | ||
29 | * addr.nl_groups = CN_IDX_DM; | ||
30 | * addr.nl_pid = 0; | ||
31 | * r = bind(fd, (struct sockaddr *) &addr, sizeof(addr)); | ||
32 | * opt = addr.nl_groups; | ||
33 | * setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt)); | ||
34 | * | ||
35 | * User-space will then wait to receive requests form the kernel, which it | ||
36 | * will process as described below. The requests are received in the form, | ||
37 | * ((struct dm_ulog_request) + (additional data)). Depending on the request | ||
38 | * type, there may or may not be 'additional data'. In the descriptions below, | ||
39 | * you will see 'Payload-to-userspace' and 'Payload-to-kernel'. The | ||
40 | * 'Payload-to-userspace' is what the kernel sends in 'additional data' as | ||
41 | * necessary parameters to complete the request. The 'Payload-to-kernel' is | ||
42 | * the 'additional data' returned to the kernel that contains the necessary | ||
43 | * results of the request. The 'data_size' field in the dm_ulog_request | ||
44 | * structure denotes the availability and amount of payload data. | ||
45 | */ | ||
46 | |||
47 | /* | ||
48 | * DM_ULOG_CTR corresponds to (found in dm-dirty-log.h): | ||
49 | * int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, | ||
50 | * unsigned argc, char **argv); | ||
51 | * | ||
52 | * Payload-to-userspace: | ||
53 | * A single string containing all the argv arguments separated by ' 's | ||
54 | * Payload-to-kernel: | ||
55 | * None. ('data_size' in the dm_ulog_request struct should be 0.) | ||
56 | * | ||
57 | * The UUID contained in the dm_ulog_request structure is the reference that | ||
58 | * will be used by all request types to a specific log. The constructor must | ||
59 | * record this assotiation with instance created. | ||
60 | * | ||
61 | * When the request has been processed, user-space must return the | ||
62 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
63 | * 'data_size' appropriately. | ||
64 | */ | ||
65 | #define DM_ULOG_CTR 1 | ||
66 | |||
67 | /* | ||
68 | * DM_ULOG_DTR corresponds to (found in dm-dirty-log.h): | ||
69 | * void (*dtr)(struct dm_dirty_log *log); | ||
70 | * | ||
71 | * Payload-to-userspace: | ||
72 | * A single string containing all the argv arguments separated by ' 's | ||
73 | * Payload-to-kernel: | ||
74 | * None. ('data_size' in the dm_ulog_request struct should be 0.) | ||
75 | * | ||
76 | * The UUID contained in the dm_ulog_request structure is all that is | ||
77 | * necessary to identify the log instance being destroyed. There is no | ||
78 | * payload data. | ||
79 | * | ||
80 | * When the request has been processed, user-space must return the | ||
81 | * dm_ulog_request to the kernel - setting the 'error' field and clearing | ||
82 | * 'data_size' appropriately. | ||
83 | */ | ||
84 | #define DM_ULOG_DTR 2 | ||
85 | |||
86 | /* | ||
87 | * DM_ULOG_PRESUSPEND corresponds to (found in dm-dirty-log.h): | ||
88 | * int (*presuspend)(struct dm_dirty_log *log); | ||
89 | * | ||
90 | * Payload-to-userspace: | ||
91 | * None. | ||
92 | * Payload-to-kernel: | ||
93 | * None. | ||
94 | * | ||
95 | * The UUID contained in the dm_ulog_request structure is all that is | ||
96 | * necessary to identify the log instance being presuspended. There is no | ||
97 | * payload data. | ||
98 | * | ||
99 | * When the request has been processed, user-space must return the | ||
100 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
101 | * 'data_size' appropriately. | ||
102 | */ | ||
103 | #define DM_ULOG_PRESUSPEND 3 | ||
104 | |||
105 | /* | ||
106 | * DM_ULOG_POSTSUSPEND corresponds to (found in dm-dirty-log.h): | ||
107 | * int (*postsuspend)(struct dm_dirty_log *log); | ||
108 | * | ||
109 | * Payload-to-userspace: | ||
110 | * None. | ||
111 | * Payload-to-kernel: | ||
112 | * None. | ||
113 | * | ||
114 | * The UUID contained in the dm_ulog_request structure is all that is | ||
115 | * necessary to identify the log instance being postsuspended. There is no | ||
116 | * payload data. | ||
117 | * | ||
118 | * When the request has been processed, user-space must return the | ||
119 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
120 | * 'data_size' appropriately. | ||
121 | */ | ||
122 | #define DM_ULOG_POSTSUSPEND 4 | ||
123 | |||
124 | /* | ||
125 | * DM_ULOG_RESUME corresponds to (found in dm-dirty-log.h): | ||
126 | * int (*resume)(struct dm_dirty_log *log); | ||
127 | * | ||
128 | * Payload-to-userspace: | ||
129 | * None. | ||
130 | * Payload-to-kernel: | ||
131 | * None. | ||
132 | * | ||
133 | * The UUID contained in the dm_ulog_request structure is all that is | ||
134 | * necessary to identify the log instance being resumed. There is no | ||
135 | * payload data. | ||
136 | * | ||
137 | * When the request has been processed, user-space must return the | ||
138 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
139 | * 'data_size' appropriately. | ||
140 | */ | ||
141 | #define DM_ULOG_RESUME 5 | ||
142 | |||
143 | /* | ||
144 | * DM_ULOG_GET_REGION_SIZE corresponds to (found in dm-dirty-log.h): | ||
145 | * uint32_t (*get_region_size)(struct dm_dirty_log *log); | ||
146 | * | ||
147 | * Payload-to-userspace: | ||
148 | * None. | ||
149 | * Payload-to-kernel: | ||
150 | * uint64_t - contains the region size | ||
151 | * | ||
152 | * The region size is something that was determined at constructor time. | ||
153 | * It is returned in the payload area and 'data_size' is set to | ||
154 | * reflect this. | ||
155 | * | ||
156 | * When the request has been processed, user-space must return the | ||
157 | * dm_ulog_request to the kernel - setting the 'error' field appropriately. | ||
158 | */ | ||
159 | #define DM_ULOG_GET_REGION_SIZE 6 | ||
160 | |||
161 | /* | ||
162 | * DM_ULOG_IS_CLEAN corresponds to (found in dm-dirty-log.h): | ||
163 | * int (*is_clean)(struct dm_dirty_log *log, region_t region); | ||
164 | * | ||
165 | * Payload-to-userspace: | ||
166 | * uint64_t - the region to get clean status on | ||
167 | * Payload-to-kernel: | ||
168 | * int64_t - 1 if clean, 0 otherwise | ||
169 | * | ||
170 | * Payload is sizeof(uint64_t) and contains the region for which the clean | ||
171 | * status is being made. | ||
172 | * | ||
173 | * When the request has been processed, user-space must return the | ||
174 | * dm_ulog_request to the kernel - filling the payload with 0 (not clean) or | ||
175 | * 1 (clean), setting 'data_size' and 'error' appropriately. | ||
176 | */ | ||
177 | #define DM_ULOG_IS_CLEAN 7 | ||
178 | |||
179 | /* | ||
180 | * DM_ULOG_IN_SYNC corresponds to (found in dm-dirty-log.h): | ||
181 | * int (*in_sync)(struct dm_dirty_log *log, region_t region, | ||
182 | * int can_block); | ||
183 | * | ||
184 | * Payload-to-userspace: | ||
185 | * uint64_t - the region to get sync status on | ||
186 | * Payload-to-kernel: | ||
187 | * int64_t - 1 if in-sync, 0 otherwise | ||
188 | * | ||
189 | * Exactly the same as 'is_clean' above, except this time asking "has the | ||
190 | * region been recovered?" vs. "is the region not being modified?" | ||
191 | */ | ||
192 | #define DM_ULOG_IN_SYNC 8 | ||
193 | |||
194 | /* | ||
195 | * DM_ULOG_FLUSH corresponds to (found in dm-dirty-log.h): | ||
196 | * int (*flush)(struct dm_dirty_log *log); | ||
197 | * | ||
198 | * Payload-to-userspace: | ||
199 | * None. | ||
200 | * Payload-to-kernel: | ||
201 | * None. | ||
202 | * | ||
203 | * No incoming or outgoing payload. Simply flush log state to disk. | ||
204 | * | ||
205 | * When the request has been processed, user-space must return the | ||
206 | * dm_ulog_request to the kernel - setting the 'error' field and clearing | ||
207 | * 'data_size' appropriately. | ||
208 | */ | ||
209 | #define DM_ULOG_FLUSH 9 | ||
210 | |||
211 | /* | ||
212 | * DM_ULOG_MARK_REGION corresponds to (found in dm-dirty-log.h): | ||
213 | * void (*mark_region)(struct dm_dirty_log *log, region_t region); | ||
214 | * | ||
215 | * Payload-to-userspace: | ||
216 | * uint64_t [] - region(s) to mark | ||
217 | * Payload-to-kernel: | ||
218 | * None. | ||
219 | * | ||
220 | * Incoming payload contains the one or more regions to mark dirty. | ||
221 | * The number of regions contained in the payload can be determined from | ||
222 | * 'data_size/sizeof(uint64_t)'. | ||
223 | * | ||
224 | * When the request has been processed, user-space must return the | ||
225 | * dm_ulog_request to the kernel - setting the 'error' field and clearing | ||
226 | * 'data_size' appropriately. | ||
227 | */ | ||
228 | #define DM_ULOG_MARK_REGION 10 | ||
229 | |||
230 | /* | ||
231 | * DM_ULOG_CLEAR_REGION corresponds to (found in dm-dirty-log.h): | ||
232 | * void (*clear_region)(struct dm_dirty_log *log, region_t region); | ||
233 | * | ||
234 | * Payload-to-userspace: | ||
235 | * uint64_t [] - region(s) to clear | ||
236 | * Payload-to-kernel: | ||
237 | * None. | ||
238 | * | ||
239 | * Incoming payload contains the one or more regions to mark clean. | ||
240 | * The number of regions contained in the payload can be determined from | ||
241 | * 'data_size/sizeof(uint64_t)'. | ||
242 | * | ||
243 | * When the request has been processed, user-space must return the | ||
244 | * dm_ulog_request to the kernel - setting the 'error' field and clearing | ||
245 | * 'data_size' appropriately. | ||
246 | */ | ||
247 | #define DM_ULOG_CLEAR_REGION 11 | ||
248 | |||
249 | /* | ||
250 | * DM_ULOG_GET_RESYNC_WORK corresponds to (found in dm-dirty-log.h): | ||
251 | * int (*get_resync_work)(struct dm_dirty_log *log, region_t *region); | ||
252 | * | ||
253 | * Payload-to-userspace: | ||
254 | * None. | ||
255 | * Payload-to-kernel: | ||
256 | * { | ||
257 | * int64_t i; -- 1 if recovery necessary, 0 otherwise | ||
258 | * uint64_t r; -- The region to recover if i=1 | ||
259 | * } | ||
260 | * 'data_size' should be set appropriately. | ||
261 | * | ||
262 | * When the request has been processed, user-space must return the | ||
263 | * dm_ulog_request to the kernel - setting the 'error' field appropriately. | ||
264 | */ | ||
265 | #define DM_ULOG_GET_RESYNC_WORK 12 | ||
266 | |||
267 | /* | ||
268 | * DM_ULOG_SET_REGION_SYNC corresponds to (found in dm-dirty-log.h): | ||
269 | * void (*set_region_sync)(struct dm_dirty_log *log, | ||
270 | * region_t region, int in_sync); | ||
271 | * | ||
272 | * Payload-to-userspace: | ||
273 | * { | ||
274 | * uint64_t - region to set sync state on | ||
275 | * int64_t - 0 if not-in-sync, 1 if in-sync | ||
276 | * } | ||
277 | * Payload-to-kernel: | ||
278 | * None. | ||
279 | * | ||
280 | * When the request has been processed, user-space must return the | ||
281 | * dm_ulog_request to the kernel - setting the 'error' field and clearing | ||
282 | * 'data_size' appropriately. | ||
283 | */ | ||
284 | #define DM_ULOG_SET_REGION_SYNC 13 | ||
285 | |||
286 | /* | ||
287 | * DM_ULOG_GET_SYNC_COUNT corresponds to (found in dm-dirty-log.h): | ||
288 | * region_t (*get_sync_count)(struct dm_dirty_log *log); | ||
289 | * | ||
290 | * Payload-to-userspace: | ||
291 | * None. | ||
292 | * Payload-to-kernel: | ||
293 | * uint64_t - the number of in-sync regions | ||
294 | * | ||
295 | * No incoming payload. Kernel-bound payload contains the number of | ||
296 | * regions that are in-sync (in a size_t). | ||
297 | * | ||
298 | * When the request has been processed, user-space must return the | ||
299 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
300 | * 'data_size' appropriately. | ||
301 | */ | ||
302 | #define DM_ULOG_GET_SYNC_COUNT 14 | ||
303 | |||
304 | /* | ||
305 | * DM_ULOG_STATUS_INFO corresponds to (found in dm-dirty-log.h): | ||
306 | * int (*status)(struct dm_dirty_log *log, STATUSTYPE_INFO, | ||
307 | * char *result, unsigned maxlen); | ||
308 | * | ||
309 | * Payload-to-userspace: | ||
310 | * None. | ||
311 | * Payload-to-kernel: | ||
312 | * Character string containing STATUSTYPE_INFO | ||
313 | * | ||
314 | * When the request has been processed, user-space must return the | ||
315 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
316 | * 'data_size' appropriately. | ||
317 | */ | ||
318 | #define DM_ULOG_STATUS_INFO 15 | ||
319 | |||
320 | /* | ||
321 | * DM_ULOG_STATUS_TABLE corresponds to (found in dm-dirty-log.h): | ||
322 | * int (*status)(struct dm_dirty_log *log, STATUSTYPE_TABLE, | ||
323 | * char *result, unsigned maxlen); | ||
324 | * | ||
325 | * Payload-to-userspace: | ||
326 | * None. | ||
327 | * Payload-to-kernel: | ||
328 | * Character string containing STATUSTYPE_TABLE | ||
329 | * | ||
330 | * When the request has been processed, user-space must return the | ||
331 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
332 | * 'data_size' appropriately. | ||
333 | */ | ||
334 | #define DM_ULOG_STATUS_TABLE 16 | ||
335 | |||
336 | /* | ||
337 | * DM_ULOG_IS_REMOTE_RECOVERING corresponds to (found in dm-dirty-log.h): | ||
338 | * int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region); | ||
339 | * | ||
340 | * Payload-to-userspace: | ||
341 | * uint64_t - region to determine recovery status on | ||
342 | * Payload-to-kernel: | ||
343 | * { | ||
344 | * int64_t is_recovering; -- 0 if no, 1 if yes | ||
345 | * uint64_t in_sync_hint; -- lowest region still needing resync | ||
346 | * } | ||
347 | * | ||
348 | * When the request has been processed, user-space must return the | ||
349 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
350 | * 'data_size' appropriately. | ||
351 | */ | ||
352 | #define DM_ULOG_IS_REMOTE_RECOVERING 17 | ||
353 | |||
354 | /* | ||
355 | * (DM_ULOG_REQUEST_MASK & request_type) to get the request type | ||
356 | * | ||
357 | * Payload-to-userspace: | ||
358 | * A single string containing all the argv arguments separated by ' 's | ||
359 | * Payload-to-kernel: | ||
360 | * None. ('data_size' in the dm_ulog_request struct should be 0.) | ||
361 | * | ||
362 | * We are reserving 8 bits of the 32-bit 'request_type' field for the | ||
363 | * various request types above. The remaining 24-bits are currently | ||
364 | * set to zero and are reserved for future use and compatibility concerns. | ||
365 | * | ||
366 | * User-space should always use DM_ULOG_REQUEST_TYPE to aquire the | ||
367 | * request type from the 'request_type' field to maintain forward compatibility. | ||
368 | */ | ||
369 | #define DM_ULOG_REQUEST_MASK 0xFF | ||
370 | #define DM_ULOG_REQUEST_TYPE(request_type) \ | ||
371 | (DM_ULOG_REQUEST_MASK & (request_type)) | ||
372 | |||
373 | struct dm_ulog_request { | ||
374 | char uuid[DM_UUID_LEN]; /* Ties a request to a specific mirror log */ | ||
375 | char padding[7]; /* Padding because DM_UUID_LEN = 129 */ | ||
376 | |||
377 | int32_t error; /* Used to report back processing errors */ | ||
378 | |||
379 | uint32_t seq; /* Sequence number for request */ | ||
380 | uint32_t request_type; /* DM_ULOG_* defined above */ | ||
381 | uint32_t data_size; /* How much data (not including this struct) */ | ||
382 | |||
383 | char data[0]; | ||
384 | }; | ||
385 | |||
386 | #endif /* __DM_LOG_USERSPACE_H__ */ | ||