diff options
author | Jonthan Brassow <jbrassow@redhat.com> | 2009-06-22 05:12:35 -0400 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2009-06-22 05:12:35 -0400 |
commit | f5db4af466e2dca0fe822019812d586ca910b00c (patch) | |
tree | 1bbaaa36509df9f7eecc19ccffa434048cf4b555 /include | |
parent | 754c5fc7ebb417b23601a6222a6005cc2e7f2913 (diff) |
dm raid1: add userspace log
This patch contains a device-mapper mirror log module that forwards
requests to userspace for processing.
The structures used for communication between kernel and userspace are
located in include/linux/dm-log-userspace.h. Due to the frequency,
diversity, and 2-way communication nature of the exchanges between
kernel and userspace, 'connector' was chosen as the interface for
communication.
The first log implementations written in userspace - "clustered-disk"
and "clustered-core" - support clustered shared storage. A userspace
daemon (in the LVM2 source code repository) uses openAIS/corosync to
process requests in an ordered fashion with the rest of the nodes in the
cluster so as to prevent log state corruption. Other implementations
with no association to LVM or openAIS/corosync, are certainly possible.
(Imagine if two machines are writing to the same region of a mirror.
They would both mark the region dirty, but you need a cluster-aware
entity that can handle properly marking the region clean when they are
done. Otherwise, you might clear the region when the first machine is
done, not the second.)
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Cc: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/Kbuild | 1 | ||||
-rw-r--r-- | include/linux/connector.h | 4 | ||||
-rw-r--r-- | include/linux/dm-log-userspace.h | 386 |
3 files changed, 390 insertions, 1 deletions
diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 03f22076381f..334a3593cdfd 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild | |||
@@ -57,6 +57,7 @@ header-y += dlmconstants.h | |||
57 | header-y += dlm_device.h | 57 | header-y += dlm_device.h |
58 | header-y += dlm_netlink.h | 58 | header-y += dlm_netlink.h |
59 | header-y += dm-ioctl.h | 59 | header-y += dm-ioctl.h |
60 | header-y += dm-log-userspace.h | ||
60 | header-y += dn.h | 61 | header-y += dn.h |
61 | header-y += dqblk_xfs.h | 62 | header-y += dqblk_xfs.h |
62 | header-y += efs_fs_sb.h | 63 | header-y += efs_fs_sb.h |
diff --git a/include/linux/connector.h b/include/linux/connector.h index b9966e64604e..b68d27850d51 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h | |||
@@ -41,8 +41,10 @@ | |||
41 | #define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ | 41 | #define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ |
42 | #define CN_DST_IDX 0x6 | 42 | #define CN_DST_IDX 0x6 |
43 | #define CN_DST_VAL 0x1 | 43 | #define CN_DST_VAL 0x1 |
44 | #define CN_IDX_DM 0x7 /* Device Mapper */ | ||
45 | #define CN_VAL_DM_USERSPACE_LOG 0x1 | ||
44 | 46 | ||
45 | #define CN_NETLINK_USERS 7 | 47 | #define CN_NETLINK_USERS 8 |
46 | 48 | ||
47 | /* | 49 | /* |
48 | * Maximum connector's message size. | 50 | * Maximum connector's message size. |
diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h new file mode 100644 index 000000000000..642e3017b51f --- /dev/null +++ b/include/linux/dm-log-userspace.h | |||
@@ -0,0 +1,386 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2006-2009 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the LGPL. | ||
5 | */ | ||
6 | |||
7 | #ifndef __DM_LOG_USERSPACE_H__ | ||
8 | #define __DM_LOG_USERSPACE_H__ | ||
9 | |||
10 | #include <linux/dm-ioctl.h> /* For DM_UUID_LEN */ | ||
11 | |||
12 | /* | ||
13 | * The device-mapper userspace log module consists of a kernel component and | ||
14 | * a user-space component. The kernel component implements the API defined | ||
15 | * in dm-dirty-log.h. Its purpose is simply to pass the parameters and | ||
16 | * return values of those API functions between kernel and user-space. | ||
17 | * | ||
18 | * Below are defined the 'request_types' - DM_ULOG_CTR, DM_ULOG_DTR, etc. | ||
19 | * These request types represent the different functions in the device-mapper | ||
20 | * dirty log API. Each of these is described in more detail below. | ||
21 | * | ||
22 | * The user-space program must listen for requests from the kernel (representing | ||
23 | * the various API functions) and process them. | ||
24 | * | ||
25 | * User-space begins by setting up the communication link (error checking | ||
26 | * removed for clarity): | ||
27 | * fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); | ||
28 | * addr.nl_family = AF_NETLINK; | ||
29 | * addr.nl_groups = CN_IDX_DM; | ||
30 | * addr.nl_pid = 0; | ||
31 | * r = bind(fd, (struct sockaddr *) &addr, sizeof(addr)); | ||
32 | * opt = addr.nl_groups; | ||
33 | * setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt)); | ||
34 | * | ||
35 | * User-space will then wait to receive requests form the kernel, which it | ||
36 | * will process as described below. The requests are received in the form, | ||
37 | * ((struct dm_ulog_request) + (additional data)). Depending on the request | ||
38 | * type, there may or may not be 'additional data'. In the descriptions below, | ||
39 | * you will see 'Payload-to-userspace' and 'Payload-to-kernel'. The | ||
40 | * 'Payload-to-userspace' is what the kernel sends in 'additional data' as | ||
41 | * necessary parameters to complete the request. The 'Payload-to-kernel' is | ||
42 | * the 'additional data' returned to the kernel that contains the necessary | ||
43 | * results of the request. The 'data_size' field in the dm_ulog_request | ||
44 | * structure denotes the availability and amount of payload data. | ||
45 | */ | ||
46 | |||
47 | /* | ||
48 | * DM_ULOG_CTR corresponds to (found in dm-dirty-log.h): | ||
49 | * int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, | ||
50 | * unsigned argc, char **argv); | ||
51 | * | ||
52 | * Payload-to-userspace: | ||
53 | * A single string containing all the argv arguments separated by ' 's | ||
54 | * Payload-to-kernel: | ||
55 | * None. ('data_size' in the dm_ulog_request struct should be 0.) | ||
56 | * | ||
57 | * The UUID contained in the dm_ulog_request structure is the reference that | ||
58 | * will be used by all request types to a specific log. The constructor must | ||
59 | * record this assotiation with instance created. | ||
60 | * | ||
61 | * When the request has been processed, user-space must return the | ||
62 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
63 | * 'data_size' appropriately. | ||
64 | */ | ||
65 | #define DM_ULOG_CTR 1 | ||
66 | |||
67 | /* | ||
68 | * DM_ULOG_DTR corresponds to (found in dm-dirty-log.h): | ||
69 | * void (*dtr)(struct dm_dirty_log *log); | ||
70 | * | ||
71 | * Payload-to-userspace: | ||
72 | * A single string containing all the argv arguments separated by ' 's | ||
73 | * Payload-to-kernel: | ||
74 | * None. ('data_size' in the dm_ulog_request struct should be 0.) | ||
75 | * | ||
76 | * The UUID contained in the dm_ulog_request structure is all that is | ||
77 | * necessary to identify the log instance being destroyed. There is no | ||
78 | * payload data. | ||
79 | * | ||
80 | * When the request has been processed, user-space must return the | ||
81 | * dm_ulog_request to the kernel - setting the 'error' field and clearing | ||
82 | * 'data_size' appropriately. | ||
83 | */ | ||
84 | #define DM_ULOG_DTR 2 | ||
85 | |||
86 | /* | ||
87 | * DM_ULOG_PRESUSPEND corresponds to (found in dm-dirty-log.h): | ||
88 | * int (*presuspend)(struct dm_dirty_log *log); | ||
89 | * | ||
90 | * Payload-to-userspace: | ||
91 | * None. | ||
92 | * Payload-to-kernel: | ||
93 | * None. | ||
94 | * | ||
95 | * The UUID contained in the dm_ulog_request structure is all that is | ||
96 | * necessary to identify the log instance being presuspended. There is no | ||
97 | * payload data. | ||
98 | * | ||
99 | * When the request has been processed, user-space must return the | ||
100 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
101 | * 'data_size' appropriately. | ||
102 | */ | ||
103 | #define DM_ULOG_PRESUSPEND 3 | ||
104 | |||
105 | /* | ||
106 | * DM_ULOG_POSTSUSPEND corresponds to (found in dm-dirty-log.h): | ||
107 | * int (*postsuspend)(struct dm_dirty_log *log); | ||
108 | * | ||
109 | * Payload-to-userspace: | ||
110 | * None. | ||
111 | * Payload-to-kernel: | ||
112 | * None. | ||
113 | * | ||
114 | * The UUID contained in the dm_ulog_request structure is all that is | ||
115 | * necessary to identify the log instance being postsuspended. There is no | ||
116 | * payload data. | ||
117 | * | ||
118 | * When the request has been processed, user-space must return the | ||
119 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
120 | * 'data_size' appropriately. | ||
121 | */ | ||
122 | #define DM_ULOG_POSTSUSPEND 4 | ||
123 | |||
124 | /* | ||
125 | * DM_ULOG_RESUME corresponds to (found in dm-dirty-log.h): | ||
126 | * int (*resume)(struct dm_dirty_log *log); | ||
127 | * | ||
128 | * Payload-to-userspace: | ||
129 | * None. | ||
130 | * Payload-to-kernel: | ||
131 | * None. | ||
132 | * | ||
133 | * The UUID contained in the dm_ulog_request structure is all that is | ||
134 | * necessary to identify the log instance being resumed. There is no | ||
135 | * payload data. | ||
136 | * | ||
137 | * When the request has been processed, user-space must return the | ||
138 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
139 | * 'data_size' appropriately. | ||
140 | */ | ||
141 | #define DM_ULOG_RESUME 5 | ||
142 | |||
143 | /* | ||
144 | * DM_ULOG_GET_REGION_SIZE corresponds to (found in dm-dirty-log.h): | ||
145 | * uint32_t (*get_region_size)(struct dm_dirty_log *log); | ||
146 | * | ||
147 | * Payload-to-userspace: | ||
148 | * None. | ||
149 | * Payload-to-kernel: | ||
150 | * uint64_t - contains the region size | ||
151 | * | ||
152 | * The region size is something that was determined at constructor time. | ||
153 | * It is returned in the payload area and 'data_size' is set to | ||
154 | * reflect this. | ||
155 | * | ||
156 | * When the request has been processed, user-space must return the | ||
157 | * dm_ulog_request to the kernel - setting the 'error' field appropriately. | ||
158 | */ | ||
159 | #define DM_ULOG_GET_REGION_SIZE 6 | ||
160 | |||
161 | /* | ||
162 | * DM_ULOG_IS_CLEAN corresponds to (found in dm-dirty-log.h): | ||
163 | * int (*is_clean)(struct dm_dirty_log *log, region_t region); | ||
164 | * | ||
165 | * Payload-to-userspace: | ||
166 | * uint64_t - the region to get clean status on | ||
167 | * Payload-to-kernel: | ||
168 | * int64_t - 1 if clean, 0 otherwise | ||
169 | * | ||
170 | * Payload is sizeof(uint64_t) and contains the region for which the clean | ||
171 | * status is being made. | ||
172 | * | ||
173 | * When the request has been processed, user-space must return the | ||
174 | * dm_ulog_request to the kernel - filling the payload with 0 (not clean) or | ||
175 | * 1 (clean), setting 'data_size' and 'error' appropriately. | ||
176 | */ | ||
177 | #define DM_ULOG_IS_CLEAN 7 | ||
178 | |||
179 | /* | ||
180 | * DM_ULOG_IN_SYNC corresponds to (found in dm-dirty-log.h): | ||
181 | * int (*in_sync)(struct dm_dirty_log *log, region_t region, | ||
182 | * int can_block); | ||
183 | * | ||
184 | * Payload-to-userspace: | ||
185 | * uint64_t - the region to get sync status on | ||
186 | * Payload-to-kernel: | ||
187 | * int64_t - 1 if in-sync, 0 otherwise | ||
188 | * | ||
189 | * Exactly the same as 'is_clean' above, except this time asking "has the | ||
190 | * region been recovered?" vs. "is the region not being modified?" | ||
191 | */ | ||
192 | #define DM_ULOG_IN_SYNC 8 | ||
193 | |||
194 | /* | ||
195 | * DM_ULOG_FLUSH corresponds to (found in dm-dirty-log.h): | ||
196 | * int (*flush)(struct dm_dirty_log *log); | ||
197 | * | ||
198 | * Payload-to-userspace: | ||
199 | * None. | ||
200 | * Payload-to-kernel: | ||
201 | * None. | ||
202 | * | ||
203 | * No incoming or outgoing payload. Simply flush log state to disk. | ||
204 | * | ||
205 | * When the request has been processed, user-space must return the | ||
206 | * dm_ulog_request to the kernel - setting the 'error' field and clearing | ||
207 | * 'data_size' appropriately. | ||
208 | */ | ||
209 | #define DM_ULOG_FLUSH 9 | ||
210 | |||
211 | /* | ||
212 | * DM_ULOG_MARK_REGION corresponds to (found in dm-dirty-log.h): | ||
213 | * void (*mark_region)(struct dm_dirty_log *log, region_t region); | ||
214 | * | ||
215 | * Payload-to-userspace: | ||
216 | * uint64_t [] - region(s) to mark | ||
217 | * Payload-to-kernel: | ||
218 | * None. | ||
219 | * | ||
220 | * Incoming payload contains the one or more regions to mark dirty. | ||
221 | * The number of regions contained in the payload can be determined from | ||
222 | * 'data_size/sizeof(uint64_t)'. | ||
223 | * | ||
224 | * When the request has been processed, user-space must return the | ||
225 | * dm_ulog_request to the kernel - setting the 'error' field and clearing | ||
226 | * 'data_size' appropriately. | ||
227 | */ | ||
228 | #define DM_ULOG_MARK_REGION 10 | ||
229 | |||
230 | /* | ||
231 | * DM_ULOG_CLEAR_REGION corresponds to (found in dm-dirty-log.h): | ||
232 | * void (*clear_region)(struct dm_dirty_log *log, region_t region); | ||
233 | * | ||
234 | * Payload-to-userspace: | ||
235 | * uint64_t [] - region(s) to clear | ||
236 | * Payload-to-kernel: | ||
237 | * None. | ||
238 | * | ||
239 | * Incoming payload contains the one or more regions to mark clean. | ||
240 | * The number of regions contained in the payload can be determined from | ||
241 | * 'data_size/sizeof(uint64_t)'. | ||
242 | * | ||
243 | * When the request has been processed, user-space must return the | ||
244 | * dm_ulog_request to the kernel - setting the 'error' field and clearing | ||
245 | * 'data_size' appropriately. | ||
246 | */ | ||
247 | #define DM_ULOG_CLEAR_REGION 11 | ||
248 | |||
249 | /* | ||
250 | * DM_ULOG_GET_RESYNC_WORK corresponds to (found in dm-dirty-log.h): | ||
251 | * int (*get_resync_work)(struct dm_dirty_log *log, region_t *region); | ||
252 | * | ||
253 | * Payload-to-userspace: | ||
254 | * None. | ||
255 | * Payload-to-kernel: | ||
256 | * { | ||
257 | * int64_t i; -- 1 if recovery necessary, 0 otherwise | ||
258 | * uint64_t r; -- The region to recover if i=1 | ||
259 | * } | ||
260 | * 'data_size' should be set appropriately. | ||
261 | * | ||
262 | * When the request has been processed, user-space must return the | ||
263 | * dm_ulog_request to the kernel - setting the 'error' field appropriately. | ||
264 | */ | ||
265 | #define DM_ULOG_GET_RESYNC_WORK 12 | ||
266 | |||
267 | /* | ||
268 | * DM_ULOG_SET_REGION_SYNC corresponds to (found in dm-dirty-log.h): | ||
269 | * void (*set_region_sync)(struct dm_dirty_log *log, | ||
270 | * region_t region, int in_sync); | ||
271 | * | ||
272 | * Payload-to-userspace: | ||
273 | * { | ||
274 | * uint64_t - region to set sync state on | ||
275 | * int64_t - 0 if not-in-sync, 1 if in-sync | ||
276 | * } | ||
277 | * Payload-to-kernel: | ||
278 | * None. | ||
279 | * | ||
280 | * When the request has been processed, user-space must return the | ||
281 | * dm_ulog_request to the kernel - setting the 'error' field and clearing | ||
282 | * 'data_size' appropriately. | ||
283 | */ | ||
284 | #define DM_ULOG_SET_REGION_SYNC 13 | ||
285 | |||
286 | /* | ||
287 | * DM_ULOG_GET_SYNC_COUNT corresponds to (found in dm-dirty-log.h): | ||
288 | * region_t (*get_sync_count)(struct dm_dirty_log *log); | ||
289 | * | ||
290 | * Payload-to-userspace: | ||
291 | * None. | ||
292 | * Payload-to-kernel: | ||
293 | * uint64_t - the number of in-sync regions | ||
294 | * | ||
295 | * No incoming payload. Kernel-bound payload contains the number of | ||
296 | * regions that are in-sync (in a size_t). | ||
297 | * | ||
298 | * When the request has been processed, user-space must return the | ||
299 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
300 | * 'data_size' appropriately. | ||
301 | */ | ||
302 | #define DM_ULOG_GET_SYNC_COUNT 14 | ||
303 | |||
304 | /* | ||
305 | * DM_ULOG_STATUS_INFO corresponds to (found in dm-dirty-log.h): | ||
306 | * int (*status)(struct dm_dirty_log *log, STATUSTYPE_INFO, | ||
307 | * char *result, unsigned maxlen); | ||
308 | * | ||
309 | * Payload-to-userspace: | ||
310 | * None. | ||
311 | * Payload-to-kernel: | ||
312 | * Character string containing STATUSTYPE_INFO | ||
313 | * | ||
314 | * When the request has been processed, user-space must return the | ||
315 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
316 | * 'data_size' appropriately. | ||
317 | */ | ||
318 | #define DM_ULOG_STATUS_INFO 15 | ||
319 | |||
320 | /* | ||
321 | * DM_ULOG_STATUS_TABLE corresponds to (found in dm-dirty-log.h): | ||
322 | * int (*status)(struct dm_dirty_log *log, STATUSTYPE_TABLE, | ||
323 | * char *result, unsigned maxlen); | ||
324 | * | ||
325 | * Payload-to-userspace: | ||
326 | * None. | ||
327 | * Payload-to-kernel: | ||
328 | * Character string containing STATUSTYPE_TABLE | ||
329 | * | ||
330 | * When the request has been processed, user-space must return the | ||
331 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
332 | * 'data_size' appropriately. | ||
333 | */ | ||
334 | #define DM_ULOG_STATUS_TABLE 16 | ||
335 | |||
336 | /* | ||
337 | * DM_ULOG_IS_REMOTE_RECOVERING corresponds to (found in dm-dirty-log.h): | ||
338 | * int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region); | ||
339 | * | ||
340 | * Payload-to-userspace: | ||
341 | * uint64_t - region to determine recovery status on | ||
342 | * Payload-to-kernel: | ||
343 | * { | ||
344 | * int64_t is_recovering; -- 0 if no, 1 if yes | ||
345 | * uint64_t in_sync_hint; -- lowest region still needing resync | ||
346 | * } | ||
347 | * | ||
348 | * When the request has been processed, user-space must return the | ||
349 | * dm_ulog_request to the kernel - setting the 'error' field and | ||
350 | * 'data_size' appropriately. | ||
351 | */ | ||
352 | #define DM_ULOG_IS_REMOTE_RECOVERING 17 | ||
353 | |||
354 | /* | ||
355 | * (DM_ULOG_REQUEST_MASK & request_type) to get the request type | ||
356 | * | ||
357 | * Payload-to-userspace: | ||
358 | * A single string containing all the argv arguments separated by ' 's | ||
359 | * Payload-to-kernel: | ||
360 | * None. ('data_size' in the dm_ulog_request struct should be 0.) | ||
361 | * | ||
362 | * We are reserving 8 bits of the 32-bit 'request_type' field for the | ||
363 | * various request types above. The remaining 24-bits are currently | ||
364 | * set to zero and are reserved for future use and compatibility concerns. | ||
365 | * | ||
366 | * User-space should always use DM_ULOG_REQUEST_TYPE to aquire the | ||
367 | * request type from the 'request_type' field to maintain forward compatibility. | ||
368 | */ | ||
369 | #define DM_ULOG_REQUEST_MASK 0xFF | ||
370 | #define DM_ULOG_REQUEST_TYPE(request_type) \ | ||
371 | (DM_ULOG_REQUEST_MASK & (request_type)) | ||
372 | |||
373 | struct dm_ulog_request { | ||
374 | char uuid[DM_UUID_LEN]; /* Ties a request to a specific mirror log */ | ||
375 | char padding[7]; /* Padding because DM_UUID_LEN = 129 */ | ||
376 | |||
377 | int32_t error; /* Used to report back processing errors */ | ||
378 | |||
379 | uint32_t seq; /* Sequence number for request */ | ||
380 | uint32_t request_type; /* DM_ULOG_* defined above */ | ||
381 | uint32_t data_size; /* How much data (not including this struct) */ | ||
382 | |||
383 | char data[0]; | ||
384 | }; | ||
385 | |||
386 | #endif /* __DM_LOG_USERSPACE_H__ */ | ||