aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-log-userspace-transfer.c
diff options
context:
space:
mode:
authorJonthan Brassow <jbrassow@redhat.com>2009-06-22 05:12:35 -0400
committerAlasdair G Kergon <agk@redhat.com>2009-06-22 05:12:35 -0400
commitf5db4af466e2dca0fe822019812d586ca910b00c (patch)
tree1bbaaa36509df9f7eecc19ccffa434048cf4b555 /drivers/md/dm-log-userspace-transfer.c
parent754c5fc7ebb417b23601a6222a6005cc2e7f2913 (diff)
dm raid1: add userspace log
This patch contains a device-mapper mirror log module that forwards requests to userspace for processing. The structures used for communication between kernel and userspace are located in include/linux/dm-log-userspace.h. Due to the frequency, diversity, and 2-way communication nature of the exchanges between kernel and userspace, 'connector' was chosen as the interface for communication. The first log implementations written in userspace - "clustered-disk" and "clustered-core" - support clustered shared storage. A userspace daemon (in the LVM2 source code repository) uses openAIS/corosync to process requests in an ordered fashion with the rest of the nodes in the cluster so as to prevent log state corruption. Other implementations with no association to LVM or openAIS/corosync, are certainly possible. (Imagine if two machines are writing to the same region of a mirror. They would both mark the region dirty, but you need a cluster-aware entity that can handle properly marking the region clean when they are done. Otherwise, you might clear the region when the first machine is done, not the second.) Signed-off-by: Jonathan Brassow <jbrassow@redhat.com> Cc: Evgeniy Polyakov <johnpol@2ka.mipt.ru> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md/dm-log-userspace-transfer.c')
-rw-r--r--drivers/md/dm-log-userspace-transfer.c276
1 files changed, 276 insertions, 0 deletions
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
new file mode 100644
index 000000000000..0ca1ee768a1f
--- /dev/null
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -0,0 +1,276 @@
1/*
2 * Copyright (C) 2006-2009 Red Hat, Inc.
3 *
4 * This file is released under the LGPL.
5 */
6
7#include <linux/kernel.h>
8#include <linux/module.h>
9#include <net/sock.h>
10#include <linux/workqueue.h>
11#include <linux/connector.h>
12#include <linux/device-mapper.h>
13#include <linux/dm-log-userspace.h>
14
15#include "dm-log-userspace-transfer.h"
16
17static uint32_t dm_ulog_seq;
18
19/*
20 * Netlink/Connector is an unreliable protocol. How long should
21 * we wait for a response before assuming it was lost and retrying?
22 * (If we do receive a response after this time, it will be discarded
23 * and the response to the resent request will be waited for.
24 */
25#define DM_ULOG_RETRY_TIMEOUT (15 * HZ)
26
27/*
28 * Pre-allocated space for speed
29 */
30#define DM_ULOG_PREALLOCED_SIZE 512
31static struct cn_msg *prealloced_cn_msg;
32static struct dm_ulog_request *prealloced_ulog_tfr;
33
34static struct cb_id ulog_cn_id = {
35 .idx = CN_IDX_DM,
36 .val = CN_VAL_DM_USERSPACE_LOG
37};
38
39static DEFINE_MUTEX(dm_ulog_lock);
40
41struct receiving_pkg {
42 struct list_head list;
43 struct completion complete;
44
45 uint32_t seq;
46
47 int error;
48 size_t *data_size;
49 char *data;
50};
51
52static DEFINE_SPINLOCK(receiving_list_lock);
53static struct list_head receiving_list;
54
55static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
56{
57 int r;
58 struct cn_msg *msg = prealloced_cn_msg;
59
60 memset(msg, 0, sizeof(struct cn_msg));
61
62 msg->id.idx = ulog_cn_id.idx;
63 msg->id.val = ulog_cn_id.val;
64 msg->ack = 0;
65 msg->seq = tfr->seq;
66 msg->len = sizeof(struct dm_ulog_request) + tfr->data_size;
67
68 r = cn_netlink_send(msg, 0, gfp_any());
69
70 return r;
71}
72
73/*
74 * Parameters for this function can be either msg or tfr, but not
75 * both. This function fills in the reply for a waiting request.
76 * If just msg is given, then the reply is simply an ACK from userspace
77 * that the request was received.
78 *
79 * Returns: 0 on success, -ENOENT on failure
80 */
81static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
82{
83 uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0;
84 struct receiving_pkg *pkg;
85
86 /*
87 * The 'receiving_pkg' entries in this list are statically
88 * allocated on the stack in 'dm_consult_userspace'.
89 * Each process that is waiting for a reply from the user
90 * space server will have an entry in this list.
91 *
92 * We are safe to do it this way because the stack space
93 * is unique to each process, but still addressable by
94 * other processes.
95 */
96 list_for_each_entry(pkg, &receiving_list, list) {
97 if (rtn_seq != pkg->seq)
98 continue;
99
100 if (msg) {
101 pkg->error = -msg->ack;
102 /*
103 * If we are trying again, we will need to know our
104 * storage capacity. Otherwise, along with the
105 * error code, we make explicit that we have no data.
106 */
107 if (pkg->error != -EAGAIN)
108 *(pkg->data_size) = 0;
109 } else if (tfr->data_size > *(pkg->data_size)) {
110 DMERR("Insufficient space to receive package [%u] "
111 "(%u vs %lu)", tfr->request_type,
112 tfr->data_size, *(pkg->data_size));
113
114 *(pkg->data_size) = 0;
115 pkg->error = -ENOSPC;
116 } else {
117 pkg->error = tfr->error;
118 memcpy(pkg->data, tfr->data, tfr->data_size);
119 *(pkg->data_size) = tfr->data_size;
120 }
121 complete(&pkg->complete);
122 return 0;
123 }
124
125 return -ENOENT;
126}
127
128/*
129 * This is the connector callback that delivers data
130 * that was sent from userspace.
131 */
132static void cn_ulog_callback(void *data)
133{
134 struct cn_msg *msg = (struct cn_msg *)data;
135 struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
136
137 spin_lock(&receiving_list_lock);
138 if (msg->len == 0)
139 fill_pkg(msg, NULL);
140 else if (msg->len < sizeof(*tfr))
141 DMERR("Incomplete message received (expected %u, got %u): [%u]",
142 (unsigned)sizeof(*tfr), msg->len, msg->seq);
143 else
144 fill_pkg(NULL, tfr);
145 spin_unlock(&receiving_list_lock);
146}
147
148/**
149 * dm_consult_userspace
150 * @uuid: log's uuid (must be DM_UUID_LEN in size)
151 * @request_type: found in include/linux/dm-log-userspace.h
152 * @data: data to tx to the server
153 * @data_size: size of data in bytes
154 * @rdata: place to put return data from server
155 * @rdata_size: value-result (amount of space given/amount of space used)
156 *
157 * rdata_size is undefined on failure.
158 *
159 * Memory used to communicate with userspace is zero'ed
160 * before populating to ensure that no unwanted bits leak
161 * from kernel space to user-space. All userspace log communications
162 * between kernel and user space go through this function.
163 *
164 * Returns: 0 on success, -EXXX on failure
165 **/
166int dm_consult_userspace(const char *uuid, int request_type,
167 char *data, size_t data_size,
168 char *rdata, size_t *rdata_size)
169{
170 int r = 0;
171 size_t dummy = 0;
172 int overhead_size =
173 sizeof(struct dm_ulog_request *) + sizeof(struct cn_msg);
174 struct dm_ulog_request *tfr = prealloced_ulog_tfr;
175 struct receiving_pkg pkg;
176
177 if (data_size > (DM_ULOG_PREALLOCED_SIZE - overhead_size)) {
178 DMINFO("Size of tfr exceeds preallocated size");
179 return -EINVAL;
180 }
181
182 if (!rdata_size)
183 rdata_size = &dummy;
184resend:
185 /*
186 * We serialize the sending of requests so we can
187 * use the preallocated space.
188 */
189 mutex_lock(&dm_ulog_lock);
190
191 memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size);
192 memcpy(tfr->uuid, uuid, DM_UUID_LEN);
193 tfr->seq = dm_ulog_seq++;
194
195 /*
196 * Must be valid request type (all other bits set to
197 * zero). This reserves other bits for possible future
198 * use.
199 */
200 tfr->request_type = request_type & DM_ULOG_REQUEST_MASK;
201
202 tfr->data_size = data_size;
203 if (data && data_size)
204 memcpy(tfr->data, data, data_size);
205
206 memset(&pkg, 0, sizeof(pkg));
207 init_completion(&pkg.complete);
208 pkg.seq = tfr->seq;
209 pkg.data_size = rdata_size;
210 pkg.data = rdata;
211 spin_lock(&receiving_list_lock);
212 list_add(&(pkg.list), &receiving_list);
213 spin_unlock(&receiving_list_lock);
214
215 r = dm_ulog_sendto_server(tfr);
216
217 mutex_unlock(&dm_ulog_lock);
218
219 if (r) {
220 DMERR("Unable to send log request [%u] to userspace: %d",
221 request_type, r);
222 spin_lock(&receiving_list_lock);
223 list_del_init(&(pkg.list));
224 spin_unlock(&receiving_list_lock);
225
226 goto out;
227 }
228
229 r = wait_for_completion_timeout(&(pkg.complete), DM_ULOG_RETRY_TIMEOUT);
230 spin_lock(&receiving_list_lock);
231 list_del_init(&(pkg.list));
232 spin_unlock(&receiving_list_lock);
233 if (!r) {
234 DMWARN("[%s] Request timed out: [%u/%u] - retrying",
235 (strlen(uuid) > 8) ?
236 (uuid + (strlen(uuid) - 8)) : (uuid),
237 request_type, pkg.seq);
238 goto resend;
239 }
240
241 r = pkg.error;
242 if (r == -EAGAIN)
243 goto resend;
244
245out:
246 return r;
247}
248
249int dm_ulog_tfr_init(void)
250{
251 int r;
252 void *prealloced;
253
254 INIT_LIST_HEAD(&receiving_list);
255
256 prealloced = kmalloc(DM_ULOG_PREALLOCED_SIZE, GFP_KERNEL);
257 if (!prealloced)
258 return -ENOMEM;
259
260 prealloced_cn_msg = prealloced;
261 prealloced_ulog_tfr = prealloced + sizeof(struct cn_msg);
262
263 r = cn_add_callback(&ulog_cn_id, "dmlogusr", cn_ulog_callback);
264 if (r) {
265 cn_del_callback(&ulog_cn_id);
266 return r;
267 }
268
269 return 0;
270}
271
272void dm_ulog_tfr_exit(void)
273{
274 cn_del_callback(&ulog_cn_id);
275 kfree(prealloced_cn_msg);
276}