diff options
-rw-r--r-- | drivers/staging/Kconfig | 2 | ||||
-rw-r--r-- | drivers/staging/Makefile | 1 | ||||
-rw-r--r-- | drivers/staging/dst/Kconfig | 67 | ||||
-rw-r--r-- | drivers/staging/dst/Makefile | 3 | ||||
-rw-r--r-- | drivers/staging/dst/crypto.c | 733 | ||||
-rw-r--r-- | drivers/staging/dst/dcore.c | 968 | ||||
-rw-r--r-- | drivers/staging/dst/export.c | 660 | ||||
-rw-r--r-- | drivers/staging/dst/state.c | 844 | ||||
-rw-r--r-- | drivers/staging/dst/thread_pool.c | 348 | ||||
-rw-r--r-- | drivers/staging/dst/trans.c | 337 | ||||
-rw-r--r-- | include/linux/dst.h | 587 |
11 files changed, 0 insertions, 4550 deletions
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index db0de940949e..94eb86319ff3 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig | |||
@@ -87,8 +87,6 @@ source "drivers/staging/frontier/Kconfig" | |||
87 | 87 | ||
88 | source "drivers/staging/dream/Kconfig" | 88 | source "drivers/staging/dream/Kconfig" |
89 | 89 | ||
90 | source "drivers/staging/dst/Kconfig" | ||
91 | |||
92 | source "drivers/staging/pohmelfs/Kconfig" | 90 | source "drivers/staging/pohmelfs/Kconfig" |
93 | 91 | ||
94 | source "drivers/staging/b3dfg/Kconfig" | 92 | source "drivers/staging/b3dfg/Kconfig" |
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 73c6a71155e0..b5e67b889f60 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile | |||
@@ -26,7 +26,6 @@ obj-$(CONFIG_RTL8192E) += rtl8192e/ | |||
26 | obj-$(CONFIG_INPUT_MIMIO) += mimio/ | 26 | obj-$(CONFIG_INPUT_MIMIO) += mimio/ |
27 | obj-$(CONFIG_TRANZPORT) += frontier/ | 27 | obj-$(CONFIG_TRANZPORT) += frontier/ |
28 | obj-$(CONFIG_DREAM) += dream/ | 28 | obj-$(CONFIG_DREAM) += dream/ |
29 | obj-$(CONFIG_DST) += dst/ | ||
30 | obj-$(CONFIG_POHMELFS) += pohmelfs/ | 29 | obj-$(CONFIG_POHMELFS) += pohmelfs/ |
31 | obj-$(CONFIG_B3DFG) += b3dfg/ | 30 | obj-$(CONFIG_B3DFG) += b3dfg/ |
32 | obj-$(CONFIG_IDE_PHISON) += phison/ | 31 | obj-$(CONFIG_IDE_PHISON) += phison/ |
diff --git a/drivers/staging/dst/Kconfig b/drivers/staging/dst/Kconfig deleted file mode 100644 index 448d342ac2a2..000000000000 --- a/drivers/staging/dst/Kconfig +++ /dev/null | |||
@@ -1,67 +0,0 @@ | |||
1 | config DST | ||
2 | tristate "Distributed storage" | ||
3 | depends on NET && CRYPTO && SYSFS && BLK_DEV | ||
4 | select CONNECTOR | ||
5 | ---help--- | ||
6 | DST is a network block device storage, which can be used to organize | ||
7 | exported storage on the remote nodes into the local block device. | ||
8 | |||
9 | DST works on top of any network media and protocol; it is just a matter | ||
10 | of configuration utility to understand the correct addresses. The most | ||
11 | common example is TCP over IP, which allows to pass through firewalls and | ||
12 | create remote backup storage in a different datacenter. DST requires | ||
13 | single port to be enabled on the exporting node and outgoing connections | ||
14 | on the local node. | ||
15 | |||
16 | DST works with in-kernel client and server, which improves performance by | ||
17 | eliminating unneded data copies and by not depending on the version | ||
18 | of the external IO components. It requires userspace configuration utility | ||
19 | though. | ||
20 | |||
21 | DST uses transaction model, when each store has to be explicitly acked | ||
22 | from the remote node to be considered as successfully written. There | ||
23 | may be lots of in-flight transactions. When remote host does not ack | ||
24 | the transaction it will be resent predefined number of times with specified | ||
25 | timeouts between them. All those parameters are configurable. Transactions | ||
26 | are marked as failed after all resends complete unsuccessfully; having | ||
27 | long enough resend timeout and/or large number of resends allows not to | ||
28 | return error to the higher (FS usually) layer in case of short network | ||
29 | problems or remote node outages. In case of network RAID setup this means | ||
30 | that storage will not degrade until transactions are marked as failed, and | ||
31 | thus will not force checksum recalculation and data rebuild. In case of | ||
32 | connection failure DST will try to reconnect to the remote node automatically. | ||
33 | DST sends ping commands at idle time to detect if remote node is alive. | ||
34 | |||
35 | Because of transactional model it is possible to use zero-copy sending | ||
36 | without worry of data corruption (which in turn could be detected by the | ||
37 | strong checksums though). | ||
38 | |||
39 | DST may fully encrypt the data channel in case of untrusted channel and implement | ||
40 | strong checksum of the transferred data. It is possible to configure algorithms | ||
41 | and crypto keys; they should match on both sides of the network channel. | ||
42 | Crypto processing does not introduce noticeble performance overhead, since DST | ||
43 | uses configurable pool of threads to perform crypto processing. | ||
44 | |||
45 | DST utilizes memory pool model of all its transaction allocations (it is the | ||
46 | only additional allocation on the client) and server allocations (bio pools, | ||
47 | while pages are allocated from the slab). | ||
48 | |||
49 | At startup DST performs a simple negotiation with the export node to determine | ||
50 | access permissions and size of the exported storage. It can be extended if | ||
51 | new parameters should be autonegotiated. | ||
52 | |||
53 | DST carries block IO flags in the protocol, which allows to transparently implement | ||
54 | barriers and sync/flush operations. Those flags are used in the export node where | ||
55 | IO against the local storage is performed, which means that sync write will be sync | ||
56 | on the remote node too, which in turn improves data integrity and improved resistance | ||
57 | to errors and data corruption during power outages or storage damages. | ||
58 | |||
59 | Homepage: http://www.ioremap.net/projects/dst | ||
60 | Userspace configuration utility and the latest releases: http://www.ioremap.net/archive/dst/ | ||
61 | |||
62 | config DST_DEBUG | ||
63 | bool "DST debug" | ||
64 | depends on DST | ||
65 | ---help--- | ||
66 | This option will enable HEAVY debugging of the DST. | ||
67 | Turn it on ONLY if you have to debug some really obscure problem. | ||
diff --git a/drivers/staging/dst/Makefile b/drivers/staging/dst/Makefile deleted file mode 100644 index 3a8b0cf9643e..000000000000 --- a/drivers/staging/dst/Makefile +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | obj-$(CONFIG_DST) += nst.o | ||
2 | |||
3 | nst-y := dcore.o state.o export.o thread_pool.o crypto.o trans.o | ||
diff --git a/drivers/staging/dst/crypto.c b/drivers/staging/dst/crypto.c deleted file mode 100644 index 351295c97a4b..000000000000 --- a/drivers/staging/dst/crypto.c +++ /dev/null | |||
@@ -1,733 +0,0 @@ | |||
1 | /* | ||
2 | * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/bio.h> | ||
17 | #include <linux/crypto.h> | ||
18 | #include <linux/dst.h> | ||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/scatterlist.h> | ||
21 | #include <linux/slab.h> | ||
22 | |||
23 | /* | ||
24 | * Tricky bastard, but IV can be more complex with time... | ||
25 | */ | ||
26 | static inline u64 dst_gen_iv(struct dst_trans *t) | ||
27 | { | ||
28 | return t->gen; | ||
29 | } | ||
30 | |||
31 | /* | ||
32 | * Crypto machinery: hash/cipher support for the given crypto controls. | ||
33 | */ | ||
34 | static struct crypto_hash *dst_init_hash(struct dst_crypto_ctl *ctl, u8 *key) | ||
35 | { | ||
36 | int err; | ||
37 | struct crypto_hash *hash; | ||
38 | |||
39 | hash = crypto_alloc_hash(ctl->hash_algo, 0, CRYPTO_ALG_ASYNC); | ||
40 | if (IS_ERR(hash)) { | ||
41 | err = PTR_ERR(hash); | ||
42 | dprintk("%s: failed to allocate hash '%s', err: %d.\n", | ||
43 | __func__, ctl->hash_algo, err); | ||
44 | goto err_out_exit; | ||
45 | } | ||
46 | |||
47 | ctl->crypto_attached_size = crypto_hash_digestsize(hash); | ||
48 | |||
49 | if (!ctl->hash_keysize) | ||
50 | return hash; | ||
51 | |||
52 | err = crypto_hash_setkey(hash, key, ctl->hash_keysize); | ||
53 | if (err) { | ||
54 | dprintk("%s: failed to set key for hash '%s', err: %d.\n", | ||
55 | __func__, ctl->hash_algo, err); | ||
56 | goto err_out_free; | ||
57 | } | ||
58 | |||
59 | return hash; | ||
60 | |||
61 | err_out_free: | ||
62 | crypto_free_hash(hash); | ||
63 | err_out_exit: | ||
64 | return ERR_PTR(err); | ||
65 | } | ||
66 | |||
67 | static struct crypto_ablkcipher *dst_init_cipher(struct dst_crypto_ctl *ctl, | ||
68 | u8 *key) | ||
69 | { | ||
70 | int err = -EINVAL; | ||
71 | struct crypto_ablkcipher *cipher; | ||
72 | |||
73 | if (!ctl->cipher_keysize) | ||
74 | goto err_out_exit; | ||
75 | |||
76 | cipher = crypto_alloc_ablkcipher(ctl->cipher_algo, 0, 0); | ||
77 | if (IS_ERR(cipher)) { | ||
78 | err = PTR_ERR(cipher); | ||
79 | dprintk("%s: failed to allocate cipher '%s', err: %d.\n", | ||
80 | __func__, ctl->cipher_algo, err); | ||
81 | goto err_out_exit; | ||
82 | } | ||
83 | |||
84 | crypto_ablkcipher_clear_flags(cipher, ~0); | ||
85 | |||
86 | err = crypto_ablkcipher_setkey(cipher, key, ctl->cipher_keysize); | ||
87 | if (err) { | ||
88 | dprintk("%s: failed to set key for cipher '%s', err: %d.\n", | ||
89 | __func__, ctl->cipher_algo, err); | ||
90 | goto err_out_free; | ||
91 | } | ||
92 | |||
93 | return cipher; | ||
94 | |||
95 | err_out_free: | ||
96 | crypto_free_ablkcipher(cipher); | ||
97 | err_out_exit: | ||
98 | return ERR_PTR(err); | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * Crypto engine has a pool of pages to encrypt data into before sending | ||
103 | * it over the network. This pool is freed/allocated here. | ||
104 | */ | ||
105 | static void dst_crypto_pages_free(struct dst_crypto_engine *e) | ||
106 | { | ||
107 | unsigned int i; | ||
108 | |||
109 | for (i = 0; i < e->page_num; ++i) | ||
110 | __free_page(e->pages[i]); | ||
111 | kfree(e->pages); | ||
112 | } | ||
113 | |||
114 | static int dst_crypto_pages_alloc(struct dst_crypto_engine *e, int num) | ||
115 | { | ||
116 | int i; | ||
117 | |||
118 | e->pages = kmalloc(num * sizeof(struct page **), GFP_KERNEL); | ||
119 | if (!e->pages) | ||
120 | return -ENOMEM; | ||
121 | |||
122 | for (i = 0; i < num; ++i) { | ||
123 | e->pages[i] = alloc_page(GFP_KERNEL); | ||
124 | if (!e->pages[i]) | ||
125 | goto err_out_free_pages; | ||
126 | } | ||
127 | |||
128 | e->page_num = num; | ||
129 | return 0; | ||
130 | |||
131 | err_out_free_pages: | ||
132 | while (--i >= 0) | ||
133 | __free_page(e->pages[i]); | ||
134 | |||
135 | kfree(e->pages); | ||
136 | return -ENOMEM; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Initialize crypto engine for given node. | ||
141 | * Setup cipher/hash, keys, pool of threads and private data. | ||
142 | */ | ||
143 | static int dst_crypto_engine_init(struct dst_crypto_engine *e, | ||
144 | struct dst_node *n) | ||
145 | { | ||
146 | int err; | ||
147 | struct dst_crypto_ctl *ctl = &n->crypto; | ||
148 | |||
149 | err = dst_crypto_pages_alloc(e, n->max_pages); | ||
150 | if (err) | ||
151 | goto err_out_exit; | ||
152 | |||
153 | e->size = PAGE_SIZE; | ||
154 | e->data = kmalloc(e->size, GFP_KERNEL); | ||
155 | if (!e->data) { | ||
156 | err = -ENOMEM; | ||
157 | goto err_out_free_pages; | ||
158 | } | ||
159 | |||
160 | if (ctl->hash_algo[0]) { | ||
161 | e->hash = dst_init_hash(ctl, n->hash_key); | ||
162 | if (IS_ERR(e->hash)) { | ||
163 | err = PTR_ERR(e->hash); | ||
164 | e->hash = NULL; | ||
165 | goto err_out_free; | ||
166 | } | ||
167 | } | ||
168 | |||
169 | if (ctl->cipher_algo[0]) { | ||
170 | e->cipher = dst_init_cipher(ctl, n->cipher_key); | ||
171 | if (IS_ERR(e->cipher)) { | ||
172 | err = PTR_ERR(e->cipher); | ||
173 | e->cipher = NULL; | ||
174 | goto err_out_free_hash; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | return 0; | ||
179 | |||
180 | err_out_free_hash: | ||
181 | crypto_free_hash(e->hash); | ||
182 | err_out_free: | ||
183 | kfree(e->data); | ||
184 | err_out_free_pages: | ||
185 | dst_crypto_pages_free(e); | ||
186 | err_out_exit: | ||
187 | return err; | ||
188 | } | ||
189 | |||
190 | static void dst_crypto_engine_exit(struct dst_crypto_engine *e) | ||
191 | { | ||
192 | if (e->hash) | ||
193 | crypto_free_hash(e->hash); | ||
194 | if (e->cipher) | ||
195 | crypto_free_ablkcipher(e->cipher); | ||
196 | dst_crypto_pages_free(e); | ||
197 | kfree(e->data); | ||
198 | } | ||
199 | |||
200 | /* | ||
201 | * Waiting for cipher processing to be completed. | ||
202 | */ | ||
203 | struct dst_crypto_completion { | ||
204 | struct completion complete; | ||
205 | int error; | ||
206 | }; | ||
207 | |||
208 | static void dst_crypto_complete(struct crypto_async_request *req, int err) | ||
209 | { | ||
210 | struct dst_crypto_completion *c = req->data; | ||
211 | |||
212 | if (err == -EINPROGRESS) | ||
213 | return; | ||
214 | |||
215 | dprintk("%s: req: %p, err: %d.\n", __func__, req, err); | ||
216 | c->error = err; | ||
217 | complete(&c->complete); | ||
218 | } | ||
219 | |||
220 | static int dst_crypto_process(struct ablkcipher_request *req, | ||
221 | struct scatterlist *sg_dst, struct scatterlist *sg_src, | ||
222 | void *iv, int enc, unsigned long timeout) | ||
223 | { | ||
224 | struct dst_crypto_completion c; | ||
225 | int err; | ||
226 | |||
227 | init_completion(&c.complete); | ||
228 | c.error = -EINPROGRESS; | ||
229 | |||
230 | ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, | ||
231 | dst_crypto_complete, &c); | ||
232 | |||
233 | ablkcipher_request_set_crypt(req, sg_src, sg_dst, sg_src->length, iv); | ||
234 | |||
235 | if (enc) | ||
236 | err = crypto_ablkcipher_encrypt(req); | ||
237 | else | ||
238 | err = crypto_ablkcipher_decrypt(req); | ||
239 | |||
240 | switch (err) { | ||
241 | case -EINPROGRESS: | ||
242 | case -EBUSY: | ||
243 | err = wait_for_completion_interruptible_timeout(&c.complete, | ||
244 | timeout); | ||
245 | if (!err) | ||
246 | err = -ETIMEDOUT; | ||
247 | else | ||
248 | err = c.error; | ||
249 | break; | ||
250 | default: | ||
251 | break; | ||
252 | } | ||
253 | |||
254 | return err; | ||
255 | } | ||
256 | |||
257 | /* | ||
258 | * DST uses generic iteration approach for data crypto processing. | ||
259 | * Single block IO request is switched into array of scatterlists, | ||
260 | * which are submitted to the crypto processing iterator. | ||
261 | * | ||
262 | * Input and output iterator initialization are different, since | ||
263 | * in output case we can not encrypt data in-place and need a | ||
264 | * temporary storage, which is then being sent to the remote peer. | ||
265 | */ | ||
266 | static int dst_trans_iter_out(struct bio *bio, struct dst_crypto_engine *e, | ||
267 | int (*iterator) (struct dst_crypto_engine *e, | ||
268 | struct scatterlist *dst, | ||
269 | struct scatterlist *src)) | ||
270 | { | ||
271 | struct bio_vec *bv; | ||
272 | int err, i; | ||
273 | |||
274 | sg_init_table(e->src, bio->bi_vcnt); | ||
275 | sg_init_table(e->dst, bio->bi_vcnt); | ||
276 | |||
277 | bio_for_each_segment(bv, bio, i) { | ||
278 | sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset); | ||
279 | sg_set_page(&e->dst[i], e->pages[i], bv->bv_len, bv->bv_offset); | ||
280 | |||
281 | err = iterator(e, &e->dst[i], &e->src[i]); | ||
282 | if (err) | ||
283 | return err; | ||
284 | } | ||
285 | |||
286 | return 0; | ||
287 | } | ||
288 | |||
289 | static int dst_trans_iter_in(struct bio *bio, struct dst_crypto_engine *e, | ||
290 | int (*iterator) (struct dst_crypto_engine *e, | ||
291 | struct scatterlist *dst, | ||
292 | struct scatterlist *src)) | ||
293 | { | ||
294 | struct bio_vec *bv; | ||
295 | int err, i; | ||
296 | |||
297 | sg_init_table(e->src, bio->bi_vcnt); | ||
298 | sg_init_table(e->dst, bio->bi_vcnt); | ||
299 | |||
300 | bio_for_each_segment(bv, bio, i) { | ||
301 | sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset); | ||
302 | sg_set_page(&e->dst[i], bv->bv_page, bv->bv_len, bv->bv_offset); | ||
303 | |||
304 | err = iterator(e, &e->dst[i], &e->src[i]); | ||
305 | if (err) | ||
306 | return err; | ||
307 | } | ||
308 | |||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | static int dst_crypt_iterator(struct dst_crypto_engine *e, | ||
313 | struct scatterlist *sg_dst, struct scatterlist *sg_src) | ||
314 | { | ||
315 | struct ablkcipher_request *req = e->data; | ||
316 | u8 iv[32]; | ||
317 | |||
318 | memset(iv, 0, sizeof(iv)); | ||
319 | |||
320 | memcpy(iv, &e->iv, sizeof(e->iv)); | ||
321 | |||
322 | return dst_crypto_process(req, sg_dst, sg_src, iv, e->enc, e->timeout); | ||
323 | } | ||
324 | |||
325 | static int dst_crypt(struct dst_crypto_engine *e, struct bio *bio) | ||
326 | { | ||
327 | struct ablkcipher_request *req = e->data; | ||
328 | |||
329 | memset(req, 0, sizeof(struct ablkcipher_request)); | ||
330 | ablkcipher_request_set_tfm(req, e->cipher); | ||
331 | |||
332 | if (e->enc) | ||
333 | return dst_trans_iter_out(bio, e, dst_crypt_iterator); | ||
334 | else | ||
335 | return dst_trans_iter_in(bio, e, dst_crypt_iterator); | ||
336 | } | ||
337 | |||
338 | static int dst_hash_iterator(struct dst_crypto_engine *e, | ||
339 | struct scatterlist *sg_dst, struct scatterlist *sg_src) | ||
340 | { | ||
341 | return crypto_hash_update(e->data, sg_src, sg_src->length); | ||
342 | } | ||
343 | |||
344 | static int dst_hash(struct dst_crypto_engine *e, struct bio *bio, void *dst) | ||
345 | { | ||
346 | struct hash_desc *desc = e->data; | ||
347 | int err; | ||
348 | |||
349 | desc->tfm = e->hash; | ||
350 | desc->flags = 0; | ||
351 | |||
352 | err = crypto_hash_init(desc); | ||
353 | if (err) | ||
354 | return err; | ||
355 | |||
356 | err = dst_trans_iter_in(bio, e, dst_hash_iterator); | ||
357 | if (err) | ||
358 | return err; | ||
359 | |||
360 | err = crypto_hash_final(desc, dst); | ||
361 | if (err) | ||
362 | return err; | ||
363 | |||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * Initialize/cleanup a crypto thread. The only thing it should | ||
369 | * do is to allocate a pool of pages as temporary storage. | ||
370 | * And to setup cipher and/or hash. | ||
371 | */ | ||
372 | static void *dst_crypto_thread_init(void *data) | ||
373 | { | ||
374 | struct dst_node *n = data; | ||
375 | struct dst_crypto_engine *e; | ||
376 | int err = -ENOMEM; | ||
377 | |||
378 | e = kzalloc(sizeof(struct dst_crypto_engine), GFP_KERNEL); | ||
379 | if (!e) | ||
380 | goto err_out_exit; | ||
381 | e->src = kcalloc(2 * n->max_pages, sizeof(struct scatterlist), | ||
382 | GFP_KERNEL); | ||
383 | if (!e->src) | ||
384 | goto err_out_free; | ||
385 | |||
386 | e->dst = e->src + n->max_pages; | ||
387 | |||
388 | err = dst_crypto_engine_init(e, n); | ||
389 | if (err) | ||
390 | goto err_out_free_all; | ||
391 | |||
392 | return e; | ||
393 | |||
394 | err_out_free_all: | ||
395 | kfree(e->src); | ||
396 | err_out_free: | ||
397 | kfree(e); | ||
398 | err_out_exit: | ||
399 | return ERR_PTR(err); | ||
400 | } | ||
401 | |||
402 | static void dst_crypto_thread_cleanup(void *private) | ||
403 | { | ||
404 | struct dst_crypto_engine *e = private; | ||
405 | |||
406 | dst_crypto_engine_exit(e); | ||
407 | kfree(e->src); | ||
408 | kfree(e); | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * Initialize crypto engine for given node: store keys, create pool | ||
413 | * of threads, initialize each one. | ||
414 | * | ||
415 | * Each thread has unique ID, but 0 and 1 are reserved for receiving and | ||
416 | * accepting threads (if export node), so IDs could start from 2, but starting | ||
417 | * them from 10 allows easily understand what this thread is for. | ||
418 | */ | ||
419 | int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl) | ||
420 | { | ||
421 | void *key = (ctl + 1); | ||
422 | int err = -ENOMEM, i; | ||
423 | char name[32]; | ||
424 | |||
425 | if (ctl->hash_keysize) { | ||
426 | n->hash_key = kmalloc(ctl->hash_keysize, GFP_KERNEL); | ||
427 | if (!n->hash_key) | ||
428 | goto err_out_exit; | ||
429 | memcpy(n->hash_key, key, ctl->hash_keysize); | ||
430 | } | ||
431 | |||
432 | if (ctl->cipher_keysize) { | ||
433 | n->cipher_key = kmalloc(ctl->cipher_keysize, GFP_KERNEL); | ||
434 | if (!n->cipher_key) | ||
435 | goto err_out_free_hash; | ||
436 | memcpy(n->cipher_key, key, ctl->cipher_keysize); | ||
437 | } | ||
438 | memcpy(&n->crypto, ctl, sizeof(struct dst_crypto_ctl)); | ||
439 | |||
440 | for (i = 0; i < ctl->thread_num; ++i) { | ||
441 | snprintf(name, sizeof(name), "%s-crypto-%d", n->name, i); | ||
442 | /* Unique ids... */ | ||
443 | err = thread_pool_add_worker(n->pool, name, i + 10, | ||
444 | dst_crypto_thread_init, dst_crypto_thread_cleanup, n); | ||
445 | if (err) | ||
446 | goto err_out_free_threads; | ||
447 | } | ||
448 | |||
449 | return 0; | ||
450 | |||
451 | err_out_free_threads: | ||
452 | while (--i >= 0) | ||
453 | thread_pool_del_worker_id(n->pool, i+10); | ||
454 | |||
455 | if (ctl->cipher_keysize) | ||
456 | kfree(n->cipher_key); | ||
457 | ctl->cipher_keysize = 0; | ||
458 | err_out_free_hash: | ||
459 | if (ctl->hash_keysize) | ||
460 | kfree(n->hash_key); | ||
461 | ctl->hash_keysize = 0; | ||
462 | err_out_exit: | ||
463 | return err; | ||
464 | } | ||
465 | |||
466 | void dst_node_crypto_exit(struct dst_node *n) | ||
467 | { | ||
468 | struct dst_crypto_ctl *ctl = &n->crypto; | ||
469 | |||
470 | if (ctl->cipher_algo[0] || ctl->hash_algo[0]) { | ||
471 | kfree(n->hash_key); | ||
472 | kfree(n->cipher_key); | ||
473 | } | ||
474 | } | ||
475 | |||
476 | /* | ||
477 | * Thrad pool setup callback. Just stores a transaction in private data. | ||
478 | */ | ||
479 | static int dst_trans_crypto_setup(void *crypto_engine, void *trans) | ||
480 | { | ||
481 | struct dst_crypto_engine *e = crypto_engine; | ||
482 | |||
483 | e->private = trans; | ||
484 | return 0; | ||
485 | } | ||
486 | |||
487 | #if 0 | ||
488 | static void dst_dump_bio(struct bio *bio) | ||
489 | { | ||
490 | u8 *p; | ||
491 | struct bio_vec *bv; | ||
492 | int i; | ||
493 | |||
494 | bio_for_each_segment(bv, bio, i) { | ||
495 | dprintk("%s: %llu/%u: size: %u, offset: %u, data: ", | ||
496 | __func__, bio->bi_sector, bio->bi_size, | ||
497 | bv->bv_len, bv->bv_offset); | ||
498 | |||
499 | p = kmap(bv->bv_page) + bv->bv_offset; | ||
500 | for (i = 0; i < bv->bv_len; ++i) | ||
501 | printk(KERN_DEBUG "%02x ", p[i]); | ||
502 | kunmap(bv->bv_page); | ||
503 | printk("\n"); | ||
504 | } | ||
505 | } | ||
506 | #endif | ||
507 | |||
508 | /* | ||
509 | * Encrypt/hash data and send it to the network. | ||
510 | */ | ||
511 | static int dst_crypto_process_sending(struct dst_crypto_engine *e, | ||
512 | struct bio *bio, u8 *hash) | ||
513 | { | ||
514 | int err; | ||
515 | |||
516 | if (e->cipher) { | ||
517 | err = dst_crypt(e, bio); | ||
518 | if (err) | ||
519 | goto err_out_exit; | ||
520 | } | ||
521 | |||
522 | if (e->hash) { | ||
523 | err = dst_hash(e, bio, hash); | ||
524 | if (err) | ||
525 | goto err_out_exit; | ||
526 | |||
527 | #ifdef CONFIG_DST_DEBUG | ||
528 | { | ||
529 | unsigned int i; | ||
530 | |||
531 | /* dst_dump_bio(bio); */ | ||
532 | |||
533 | printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash: ", | ||
534 | __func__, (u64)bio->bi_sector, | ||
535 | bio->bi_size, bio_data_dir(bio)); | ||
536 | for (i = 0; i < crypto_hash_digestsize(e->hash); ++i) | ||
537 | printk("%02x ", hash[i]); | ||
538 | printk("\n"); | ||
539 | } | ||
540 | #endif | ||
541 | } | ||
542 | |||
543 | return 0; | ||
544 | |||
545 | err_out_exit: | ||
546 | return err; | ||
547 | } | ||
548 | |||
549 | /* | ||
550 | * Check if received data is valid. Decipher if it is. | ||
551 | */ | ||
552 | static int dst_crypto_process_receiving(struct dst_crypto_engine *e, | ||
553 | struct bio *bio, u8 *hash, u8 *recv_hash) | ||
554 | { | ||
555 | int err; | ||
556 | |||
557 | if (e->hash) { | ||
558 | int mismatch; | ||
559 | |||
560 | err = dst_hash(e, bio, hash); | ||
561 | if (err) | ||
562 | goto err_out_exit; | ||
563 | |||
564 | mismatch = !!memcmp(recv_hash, hash, | ||
565 | crypto_hash_digestsize(e->hash)); | ||
566 | #ifdef CONFIG_DST_DEBUG | ||
567 | /* dst_dump_bio(bio); */ | ||
568 | |||
569 | printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash mismatch: %d", | ||
570 | __func__, (u64)bio->bi_sector, bio->bi_size, | ||
571 | bio_data_dir(bio), mismatch); | ||
572 | if (mismatch) { | ||
573 | unsigned int i; | ||
574 | |||
575 | printk(", recv/calc: "); | ||
576 | for (i = 0; i < crypto_hash_digestsize(e->hash); ++i) | ||
577 | printk("%02x/%02x ", recv_hash[i], hash[i]); | ||
578 | |||
579 | } | ||
580 | printk("\n"); | ||
581 | #endif | ||
582 | err = -1; | ||
583 | if (mismatch) | ||
584 | goto err_out_exit; | ||
585 | } | ||
586 | |||
587 | if (e->cipher) { | ||
588 | err = dst_crypt(e, bio); | ||
589 | if (err) | ||
590 | goto err_out_exit; | ||
591 | } | ||
592 | |||
593 | return 0; | ||
594 | |||
595 | err_out_exit: | ||
596 | return err; | ||
597 | } | ||
598 | |||
599 | /* | ||
600 | * Thread pool callback to encrypt data and send it to the netowork. | ||
601 | */ | ||
602 | static int dst_trans_crypto_action(void *crypto_engine, void *schedule_data) | ||
603 | { | ||
604 | struct dst_crypto_engine *e = crypto_engine; | ||
605 | struct dst_trans *t = schedule_data; | ||
606 | struct bio *bio = t->bio; | ||
607 | int err; | ||
608 | |||
609 | dprintk("%s: t: %p, gen: %llu, cipher: %p, hash: %p.\n", | ||
610 | __func__, t, t->gen, e->cipher, e->hash); | ||
611 | |||
612 | e->enc = t->enc; | ||
613 | e->iv = dst_gen_iv(t); | ||
614 | |||
615 | if (bio_data_dir(bio) == WRITE) { | ||
616 | err = dst_crypto_process_sending(e, bio, t->cmd.hash); | ||
617 | if (err) | ||
618 | goto err_out_exit; | ||
619 | |||
620 | if (e->hash) { | ||
621 | t->cmd.csize = crypto_hash_digestsize(e->hash); | ||
622 | t->cmd.size += t->cmd.csize; | ||
623 | } | ||
624 | |||
625 | return dst_trans_send(t); | ||
626 | } else { | ||
627 | u8 *hash = e->data + e->size/2; | ||
628 | |||
629 | err = dst_crypto_process_receiving(e, bio, hash, t->cmd.hash); | ||
630 | if (err) | ||
631 | goto err_out_exit; | ||
632 | |||
633 | dst_trans_remove(t); | ||
634 | dst_trans_put(t); | ||
635 | } | ||
636 | |||
637 | return 0; | ||
638 | |||
639 | err_out_exit: | ||
640 | t->error = err; | ||
641 | dst_trans_put(t); | ||
642 | return err; | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * Schedule crypto processing for given transaction. | ||
647 | */ | ||
648 | int dst_trans_crypto(struct dst_trans *t) | ||
649 | { | ||
650 | struct dst_node *n = t->n; | ||
651 | int err; | ||
652 | |||
653 | err = thread_pool_schedule(n->pool, | ||
654 | dst_trans_crypto_setup, dst_trans_crypto_action, | ||
655 | t, MAX_SCHEDULE_TIMEOUT); | ||
656 | if (err) | ||
657 | goto err_out_exit; | ||
658 | |||
659 | return 0; | ||
660 | |||
661 | err_out_exit: | ||
662 | dst_trans_put(t); | ||
663 | return err; | ||
664 | } | ||
665 | |||
666 | /* | ||
667 | * Crypto machinery for the export node. | ||
668 | */ | ||
669 | static int dst_export_crypto_setup(void *crypto_engine, void *bio) | ||
670 | { | ||
671 | struct dst_crypto_engine *e = crypto_engine; | ||
672 | |||
673 | e->private = bio; | ||
674 | return 0; | ||
675 | } | ||
676 | |||
677 | static int dst_export_crypto_action(void *crypto_engine, void *schedule_data) | ||
678 | { | ||
679 | struct dst_crypto_engine *e = crypto_engine; | ||
680 | struct bio *bio = schedule_data; | ||
681 | struct dst_export_priv *p = bio->bi_private; | ||
682 | int err; | ||
683 | |||
684 | dprintk("%s: e: %p, data: %p, bio: %llu/%u, dir: %lu.\n", | ||
685 | __func__, e, e->data, (u64)bio->bi_sector, | ||
686 | bio->bi_size, bio_data_dir(bio)); | ||
687 | |||
688 | e->enc = (bio_data_dir(bio) == READ); | ||
689 | e->iv = p->cmd.id; | ||
690 | |||
691 | if (bio_data_dir(bio) == WRITE) { | ||
692 | u8 *hash = e->data + e->size/2; | ||
693 | |||
694 | err = dst_crypto_process_receiving(e, bio, hash, p->cmd.hash); | ||
695 | if (err) | ||
696 | goto err_out_exit; | ||
697 | |||
698 | generic_make_request(bio); | ||
699 | } else { | ||
700 | err = dst_crypto_process_sending(e, bio, p->cmd.hash); | ||
701 | if (err) | ||
702 | goto err_out_exit; | ||
703 | |||
704 | if (e->hash) { | ||
705 | p->cmd.csize = crypto_hash_digestsize(e->hash); | ||
706 | p->cmd.size += p->cmd.csize; | ||
707 | } | ||
708 | |||
709 | err = dst_export_send_bio(bio); | ||
710 | } | ||
711 | return 0; | ||
712 | |||
713 | err_out_exit: | ||
714 | bio_put(bio); | ||
715 | return err; | ||
716 | } | ||
717 | |||
718 | int dst_export_crypto(struct dst_node *n, struct bio *bio) | ||
719 | { | ||
720 | int err; | ||
721 | |||
722 | err = thread_pool_schedule(n->pool, | ||
723 | dst_export_crypto_setup, dst_export_crypto_action, | ||
724 | bio, MAX_SCHEDULE_TIMEOUT); | ||
725 | if (err) | ||
726 | goto err_out_exit; | ||
727 | |||
728 | return 0; | ||
729 | |||
730 | err_out_exit: | ||
731 | bio_put(bio); | ||
732 | return err; | ||
733 | } | ||
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c deleted file mode 100644 index c83ca7e3d048..000000000000 --- a/drivers/staging/dst/dcore.c +++ /dev/null | |||
@@ -1,968 +0,0 @@ | |||
1 | /* | ||
2 | * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/module.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/blkdev.h> | ||
19 | #include <linux/bio.h> | ||
20 | #include <linux/buffer_head.h> | ||
21 | #include <linux/connector.h> | ||
22 | #include <linux/dst.h> | ||
23 | #include <linux/device.h> | ||
24 | #include <linux/jhash.h> | ||
25 | #include <linux/idr.h> | ||
26 | #include <linux/init.h> | ||
27 | #include <linux/namei.h> | ||
28 | #include <linux/slab.h> | ||
29 | #include <linux/socket.h> | ||
30 | |||
31 | #include <linux/in.h> | ||
32 | #include <linux/in6.h> | ||
33 | |||
34 | #include <net/sock.h> | ||
35 | |||
36 | static int dst_major; | ||
37 | |||
38 | static DEFINE_MUTEX(dst_hash_lock); | ||
39 | static struct list_head *dst_hashtable; | ||
40 | static unsigned int dst_hashtable_size = 128; | ||
41 | module_param(dst_hashtable_size, uint, 0644); | ||
42 | |||
43 | static char dst_name[] = "Dementianting goldfish"; | ||
44 | |||
45 | static DEFINE_IDR(dst_index_idr); | ||
46 | static struct cb_id cn_dst_id = { CN_DST_IDX, CN_DST_VAL }; | ||
47 | |||
48 | /* | ||
49 | * DST sysfs tree for device called 'storage': | ||
50 | * | ||
51 | * /sys/bus/dst/devices/storage/ | ||
52 | * /sys/bus/dst/devices/storage/type : 192.168.4.80:1025 | ||
53 | * /sys/bus/dst/devices/storage/size : 800 | ||
54 | * /sys/bus/dst/devices/storage/name : storage | ||
55 | */ | ||
56 | |||
57 | static int dst_dev_match(struct device *dev, struct device_driver *drv) | ||
58 | { | ||
59 | return 1; | ||
60 | } | ||
61 | |||
62 | static struct bus_type dst_dev_bus_type = { | ||
63 | .name = "dst", | ||
64 | .match = &dst_dev_match, | ||
65 | }; | ||
66 | |||
67 | static void dst_node_release(struct device *dev) | ||
68 | { | ||
69 | struct dst_info *info = container_of(dev, struct dst_info, device); | ||
70 | |||
71 | kfree(info); | ||
72 | } | ||
73 | |||
74 | static struct device dst_node_dev = { | ||
75 | .bus = &dst_dev_bus_type, | ||
76 | .release = &dst_node_release | ||
77 | }; | ||
78 | |||
79 | /* | ||
80 | * Setting size of the node after it was changed. | ||
81 | */ | ||
82 | static void dst_node_set_size(struct dst_node *n) | ||
83 | { | ||
84 | struct block_device *bdev; | ||
85 | |||
86 | set_capacity(n->disk, n->size >> 9); | ||
87 | |||
88 | bdev = bdget_disk(n->disk, 0); | ||
89 | if (bdev) { | ||
90 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
91 | i_size_write(bdev->bd_inode, n->size); | ||
92 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
93 | bdput(bdev); | ||
94 | } | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Distributed storage request processing function. | ||
99 | */ | ||
100 | static int dst_request(struct request_queue *q, struct bio *bio) | ||
101 | { | ||
102 | struct dst_node *n = q->queuedata; | ||
103 | int err = -EIO; | ||
104 | |||
105 | if (bio_empty_barrier(bio) && !blk_queue_discard(q)) { | ||
106 | /* | ||
107 | * This is a dirty^Wnice hack, but if we complete this | ||
108 | * operation with -EOPNOTSUPP like intended, XFS | ||
109 | * will stuck and freeze the machine. This may be | ||
110 | * not particulary XFS problem though, but it is the | ||
111 | * only FS which sends empty barrier at umount time | ||
112 | * I worked with. | ||
113 | * | ||
114 | * Empty barriers are not allowed anyway, see 51fd77bd9f512 | ||
115 | * for example, although later it was changed to | ||
116 | * bio_rw_flagged(bio, BIO_RW_DISCARD) only, which does not | ||
117 | * work in this case. | ||
118 | */ | ||
119 | /* err = -EOPNOTSUPP; */ | ||
120 | err = 0; | ||
121 | goto end_io; | ||
122 | } | ||
123 | |||
124 | bio_get(bio); | ||
125 | |||
126 | return dst_process_bio(n, bio); | ||
127 | |||
128 | end_io: | ||
129 | bio_endio(bio, err); | ||
130 | return err; | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * Open/close callbacks for appropriate block device. | ||
135 | */ | ||
136 | static int dst_bdev_open(struct block_device *bdev, fmode_t mode) | ||
137 | { | ||
138 | struct dst_node *n = bdev->bd_disk->private_data; | ||
139 | |||
140 | dst_node_get(n); | ||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | static int dst_bdev_release(struct gendisk *disk, fmode_t mode) | ||
145 | { | ||
146 | struct dst_node *n = disk->private_data; | ||
147 | |||
148 | dst_node_put(n); | ||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | static struct block_device_operations dst_blk_ops = { | ||
153 | .open = dst_bdev_open, | ||
154 | .release = dst_bdev_release, | ||
155 | .owner = THIS_MODULE, | ||
156 | }; | ||
157 | |||
158 | /* | ||
159 | * Block layer binding - disk is created when array is fully configured | ||
160 | * by userspace request. | ||
161 | */ | ||
162 | static int dst_node_create_disk(struct dst_node *n) | ||
163 | { | ||
164 | int err = -ENOMEM; | ||
165 | u32 index = 0; | ||
166 | |||
167 | n->queue = blk_init_queue(NULL, NULL); | ||
168 | if (!n->queue) | ||
169 | goto err_out_exit; | ||
170 | |||
171 | n->queue->queuedata = n; | ||
172 | blk_queue_make_request(n->queue, dst_request); | ||
173 | blk_queue_max_phys_segments(n->queue, n->max_pages); | ||
174 | blk_queue_max_hw_segments(n->queue, n->max_pages); | ||
175 | |||
176 | err = -ENOMEM; | ||
177 | n->disk = alloc_disk(1); | ||
178 | if (!n->disk) | ||
179 | goto err_out_free_queue; | ||
180 | |||
181 | if (!(n->state->permissions & DST_PERM_WRITE)) { | ||
182 | printk(KERN_INFO "DST node %s attached read-only.\n", n->name); | ||
183 | set_disk_ro(n->disk, 1); | ||
184 | } | ||
185 | |||
186 | if (!idr_pre_get(&dst_index_idr, GFP_KERNEL)) | ||
187 | goto err_out_put; | ||
188 | |||
189 | mutex_lock(&dst_hash_lock); | ||
190 | err = idr_get_new(&dst_index_idr, NULL, &index); | ||
191 | mutex_unlock(&dst_hash_lock); | ||
192 | if (err) | ||
193 | goto err_out_put; | ||
194 | |||
195 | n->disk->major = dst_major; | ||
196 | n->disk->first_minor = index; | ||
197 | n->disk->fops = &dst_blk_ops; | ||
198 | n->disk->queue = n->queue; | ||
199 | n->disk->private_data = n; | ||
200 | snprintf(n->disk->disk_name, sizeof(n->disk->disk_name), | ||
201 | "dst-%s", n->name); | ||
202 | |||
203 | return 0; | ||
204 | |||
205 | err_out_put: | ||
206 | put_disk(n->disk); | ||
207 | err_out_free_queue: | ||
208 | blk_cleanup_queue(n->queue); | ||
209 | err_out_exit: | ||
210 | return err; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * Sysfs machinery: show device's size. | ||
215 | */ | ||
216 | static ssize_t dst_show_size(struct device *dev, | ||
217 | struct device_attribute *attr, char *buf) | ||
218 | { | ||
219 | struct dst_info *info = container_of(dev, struct dst_info, device); | ||
220 | |||
221 | return sprintf(buf, "%llu\n", info->size); | ||
222 | } | ||
223 | |||
224 | /* | ||
225 | * Show local exported device. | ||
226 | */ | ||
227 | static ssize_t dst_show_local(struct device *dev, | ||
228 | struct device_attribute *attr, char *buf) | ||
229 | { | ||
230 | struct dst_info *info = container_of(dev, struct dst_info, device); | ||
231 | |||
232 | return sprintf(buf, "%s\n", info->local); | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * Shows type of the remote node - device major/minor number | ||
237 | * for local nodes and address (af_inet ipv4/ipv6 only) for remote nodes. | ||
238 | */ | ||
239 | static ssize_t dst_show_type(struct device *dev, | ||
240 | struct device_attribute *attr, char *buf) | ||
241 | { | ||
242 | struct dst_info *info = container_of(dev, struct dst_info, device); | ||
243 | int family = info->net.addr.sa_family; | ||
244 | |||
245 | if (family == AF_INET) { | ||
246 | struct sockaddr_in *sin = (struct sockaddr_in *)&info->net.addr; | ||
247 | return sprintf(buf, "%u.%u.%u.%u:%d\n", | ||
248 | NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port)); | ||
249 | } else if (family == AF_INET6) { | ||
250 | struct sockaddr_in6 *sin = (struct sockaddr_in6 *) | ||
251 | &info->net.addr; | ||
252 | return sprintf(buf, | ||
253 | "%pi6:%d\n", | ||
254 | &sin->sin6_addr, ntohs(sin->sin6_port)); | ||
255 | } else { | ||
256 | int i, sz = PAGE_SIZE - 2; /* 0 symbol and '\n' below */ | ||
257 | int size, addrlen = info->net.addr.sa_data_len; | ||
258 | unsigned char *a = (unsigned char *)&info->net.addr.sa_data; | ||
259 | char *buf_orig = buf; | ||
260 | |||
261 | size = snprintf(buf, sz, "family: %d, addrlen: %u, addr: ", | ||
262 | family, addrlen); | ||
263 | sz -= size; | ||
264 | buf += size; | ||
265 | |||
266 | for (i = 0; i < addrlen; ++i) { | ||
267 | if (sz < 3) | ||
268 | break; | ||
269 | |||
270 | size = snprintf(buf, sz, "%02x ", a[i]); | ||
271 | sz -= size; | ||
272 | buf += size; | ||
273 | } | ||
274 | buf += sprintf(buf, "\n"); | ||
275 | |||
276 | return buf - buf_orig; | ||
277 | } | ||
278 | return 0; | ||
279 | } | ||
280 | |||
281 | static struct device_attribute dst_node_attrs[] = { | ||
282 | __ATTR(size, 0444, dst_show_size, NULL), | ||
283 | __ATTR(type, 0444, dst_show_type, NULL), | ||
284 | __ATTR(local, 0444, dst_show_local, NULL), | ||
285 | }; | ||
286 | |||
287 | static int dst_create_node_attributes(struct dst_node *n) | ||
288 | { | ||
289 | int err, i; | ||
290 | |||
291 | for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i) { | ||
292 | err = device_create_file(&n->info->device, | ||
293 | &dst_node_attrs[i]); | ||
294 | if (err) | ||
295 | goto err_out_remove_all; | ||
296 | } | ||
297 | return 0; | ||
298 | |||
299 | err_out_remove_all: | ||
300 | while (--i >= 0) | ||
301 | device_remove_file(&n->info->device, | ||
302 | &dst_node_attrs[i]); | ||
303 | |||
304 | return err; | ||
305 | } | ||
306 | |||
307 | static void dst_remove_node_attributes(struct dst_node *n) | ||
308 | { | ||
309 | int i; | ||
310 | |||
311 | for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i) | ||
312 | device_remove_file(&n->info->device, | ||
313 | &dst_node_attrs[i]); | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * Sysfs cleanup and initialization. | ||
318 | * Shows number of useful parameters. | ||
319 | */ | ||
320 | static void dst_node_sysfs_exit(struct dst_node *n) | ||
321 | { | ||
322 | if (n->info) { | ||
323 | dst_remove_node_attributes(n); | ||
324 | device_unregister(&n->info->device); | ||
325 | n->info = NULL; | ||
326 | } | ||
327 | } | ||
328 | |||
329 | static int dst_node_sysfs_init(struct dst_node *n) | ||
330 | { | ||
331 | int err; | ||
332 | |||
333 | n->info = kzalloc(sizeof(struct dst_info), GFP_KERNEL); | ||
334 | if (!n->info) | ||
335 | return -ENOMEM; | ||
336 | |||
337 | memcpy(&n->info->device, &dst_node_dev, sizeof(struct device)); | ||
338 | n->info->size = n->size; | ||
339 | |||
340 | dev_set_name(&n->info->device, "dst-%s", n->name); | ||
341 | err = device_register(&n->info->device); | ||
342 | if (err) { | ||
343 | dprintk(KERN_ERR "Failed to register node '%s', err: %d.\n", | ||
344 | n->name, err); | ||
345 | goto err_out_exit; | ||
346 | } | ||
347 | |||
348 | dst_create_node_attributes(n); | ||
349 | |||
350 | return 0; | ||
351 | |||
352 | err_out_exit: | ||
353 | kfree(n->info); | ||
354 | n->info = NULL; | ||
355 | return err; | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * DST node hash tables machinery. | ||
360 | */ | ||
361 | static inline unsigned int dst_hash(char *str, unsigned int size) | ||
362 | { | ||
363 | return jhash(str, size, 0) % dst_hashtable_size; | ||
364 | } | ||
365 | |||
366 | static void dst_node_remove(struct dst_node *n) | ||
367 | { | ||
368 | mutex_lock(&dst_hash_lock); | ||
369 | list_del_init(&n->node_entry); | ||
370 | mutex_unlock(&dst_hash_lock); | ||
371 | } | ||
372 | |||
373 | static void dst_node_add(struct dst_node *n) | ||
374 | { | ||
375 | unsigned hash = dst_hash(n->name, sizeof(n->name)); | ||
376 | |||
377 | mutex_lock(&dst_hash_lock); | ||
378 | list_add_tail(&n->node_entry, &dst_hashtable[hash]); | ||
379 | mutex_unlock(&dst_hash_lock); | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * Cleaning node when it is about to be freed. | ||
384 | * There are still users of the socket though, | ||
385 | * so connection cleanup should be protected. | ||
386 | */ | ||
387 | static void dst_node_cleanup(struct dst_node *n) | ||
388 | { | ||
389 | struct dst_state *st = n->state; | ||
390 | |||
391 | if (!st) | ||
392 | return; | ||
393 | |||
394 | if (n->queue) { | ||
395 | blk_cleanup_queue(n->queue); | ||
396 | |||
397 | mutex_lock(&dst_hash_lock); | ||
398 | idr_remove(&dst_index_idr, n->disk->first_minor); | ||
399 | mutex_unlock(&dst_hash_lock); | ||
400 | |||
401 | put_disk(n->disk); | ||
402 | } | ||
403 | |||
404 | if (n->bdev) { | ||
405 | sync_blockdev(n->bdev); | ||
406 | close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE); | ||
407 | } | ||
408 | |||
409 | dst_state_lock(st); | ||
410 | st->need_exit = 1; | ||
411 | dst_state_exit_connected(st); | ||
412 | dst_state_unlock(st); | ||
413 | |||
414 | wake_up(&st->thread_wait); | ||
415 | |||
416 | dst_state_put(st); | ||
417 | n->state = NULL; | ||
418 | } | ||
419 | |||
420 | /* | ||
421 | * Free security attributes attached to given node. | ||
422 | */ | ||
423 | static void dst_security_exit(struct dst_node *n) | ||
424 | { | ||
425 | struct dst_secure *s, *tmp; | ||
426 | |||
427 | list_for_each_entry_safe(s, tmp, &n->security_list, sec_entry) { | ||
428 | list_del(&s->sec_entry); | ||
429 | kfree(s); | ||
430 | } | ||
431 | } | ||
432 | |||
433 | /* | ||
434 | * Free node when there are no more users. | ||
435 | * Actually node has to be freed on behalf od userspace process, | ||
436 | * since there are number of threads, which are embedded in the | ||
437 | * node, so they can not exit and free node from there, that is | ||
438 | * why there is a wakeup if reference counter is not equal to zero. | ||
439 | */ | ||
440 | void dst_node_put(struct dst_node *n) | ||
441 | { | ||
442 | if (unlikely(!n)) | ||
443 | return; | ||
444 | |||
445 | dprintk("%s: n: %p, refcnt: %d.\n", | ||
446 | __func__, n, atomic_read(&n->refcnt)); | ||
447 | |||
448 | if (atomic_dec_and_test(&n->refcnt)) { | ||
449 | dst_node_remove(n); | ||
450 | n->trans_scan_timeout = 0; | ||
451 | dst_node_cleanup(n); | ||
452 | thread_pool_destroy(n->pool); | ||
453 | dst_node_sysfs_exit(n); | ||
454 | dst_node_crypto_exit(n); | ||
455 | dst_security_exit(n); | ||
456 | dst_node_trans_exit(n); | ||
457 | |||
458 | kfree(n); | ||
459 | |||
460 | dprintk("%s: freed n: %p.\n", __func__, n); | ||
461 | } else { | ||
462 | wake_up(&n->wait); | ||
463 | } | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * Setting up export device: lookup by the name, get its size | ||
468 | * and setup listening socket, which will accept clients, which | ||
469 | * will submit IO for given storage. | ||
470 | */ | ||
471 | static int dst_setup_export(struct dst_node *n, struct dst_ctl *ctl, | ||
472 | struct dst_export_ctl *le) | ||
473 | { | ||
474 | int err; | ||
475 | |||
476 | snprintf(n->info->local, sizeof(n->info->local), "%s", le->device); | ||
477 | |||
478 | n->bdev = open_bdev_exclusive(le->device, FMODE_READ|FMODE_WRITE, NULL); | ||
479 | if (IS_ERR(n->bdev)) | ||
480 | return PTR_ERR(n->bdev); | ||
481 | |||
482 | if (n->size != 0) | ||
483 | n->size = min_t(loff_t, n->bdev->bd_inode->i_size, n->size); | ||
484 | else | ||
485 | n->size = n->bdev->bd_inode->i_size; | ||
486 | |||
487 | n->info->size = n->size; | ||
488 | err = dst_node_init_listened(n, le); | ||
489 | if (err) | ||
490 | goto err_out_cleanup; | ||
491 | |||
492 | return 0; | ||
493 | |||
494 | err_out_cleanup: | ||
495 | close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE); | ||
496 | n->bdev = NULL; | ||
497 | |||
498 | return err; | ||
499 | } | ||
500 | |||
501 | /* Empty thread pool callbacks for the network processing threads. */ | ||
502 | static inline void *dst_thread_network_init(void *data) | ||
503 | { | ||
504 | dprintk("%s: data: %p.\n", __func__, data); | ||
505 | return data; | ||
506 | } | ||
507 | |||
508 | static inline void dst_thread_network_cleanup(void *data) | ||
509 | { | ||
510 | dprintk("%s: data: %p.\n", __func__, data); | ||
511 | } | ||
512 | |||
513 | /* | ||
514 | * Allocate DST node and initialize some of its parameters. | ||
515 | */ | ||
516 | static struct dst_node *dst_alloc_node(struct dst_ctl *ctl, | ||
517 | int (*start)(struct dst_node *), | ||
518 | int num) | ||
519 | { | ||
520 | struct dst_node *n; | ||
521 | int err; | ||
522 | |||
523 | n = kzalloc(sizeof(struct dst_node), GFP_KERNEL); | ||
524 | if (!n) | ||
525 | return NULL; | ||
526 | |||
527 | INIT_LIST_HEAD(&n->node_entry); | ||
528 | |||
529 | INIT_LIST_HEAD(&n->security_list); | ||
530 | mutex_init(&n->security_lock); | ||
531 | |||
532 | init_waitqueue_head(&n->wait); | ||
533 | |||
534 | n->trans_scan_timeout = msecs_to_jiffies(ctl->trans_scan_timeout); | ||
535 | if (!n->trans_scan_timeout) | ||
536 | n->trans_scan_timeout = HZ; | ||
537 | |||
538 | n->trans_max_retries = ctl->trans_max_retries; | ||
539 | if (!n->trans_max_retries) | ||
540 | n->trans_max_retries = 10; | ||
541 | |||
542 | /* | ||
543 | * Pretty much arbitrary default numbers. | ||
544 | * 32 matches maximum number of pages in bio originated from ext3 (31). | ||
545 | */ | ||
546 | n->max_pages = ctl->max_pages; | ||
547 | if (!n->max_pages) | ||
548 | n->max_pages = 32; | ||
549 | |||
550 | if (n->max_pages > 1024) | ||
551 | n->max_pages = 1024; | ||
552 | |||
553 | n->start = start; | ||
554 | n->size = ctl->size; | ||
555 | |||
556 | atomic_set(&n->refcnt, 1); | ||
557 | atomic_long_set(&n->gen, 0); | ||
558 | snprintf(n->name, sizeof(n->name), "%s", ctl->name); | ||
559 | |||
560 | err = dst_node_sysfs_init(n); | ||
561 | if (err) | ||
562 | goto err_out_free; | ||
563 | |||
564 | n->pool = thread_pool_create(num, n->name, dst_thread_network_init, | ||
565 | dst_thread_network_cleanup, n); | ||
566 | if (IS_ERR(n->pool)) { | ||
567 | err = PTR_ERR(n->pool); | ||
568 | goto err_out_sysfs_exit; | ||
569 | } | ||
570 | |||
571 | dprintk("%s: n: %p, name: %s.\n", __func__, n, n->name); | ||
572 | |||
573 | return n; | ||
574 | |||
575 | err_out_sysfs_exit: | ||
576 | dst_node_sysfs_exit(n); | ||
577 | err_out_free: | ||
578 | kfree(n); | ||
579 | return NULL; | ||
580 | } | ||
581 | |||
582 | /* | ||
583 | * Starting a node, connected to the remote server: | ||
584 | * register block device and initialize transaction mechanism. | ||
585 | * In revers order though. | ||
586 | * | ||
587 | * It will autonegotiate some parameters with the remote node | ||
588 | * and update local if needed. | ||
589 | * | ||
590 | * Transaction initialization should be the last thing before | ||
591 | * starting the node, since transaction should include not only | ||
592 | * block IO, but also crypto related data (if any), which are | ||
593 | * initialized separately. | ||
594 | */ | ||
595 | static int dst_start_remote(struct dst_node *n) | ||
596 | { | ||
597 | int err; | ||
598 | |||
599 | err = dst_node_trans_init(n, sizeof(struct dst_trans)); | ||
600 | if (err) | ||
601 | return err; | ||
602 | |||
603 | err = dst_node_create_disk(n); | ||
604 | if (err) | ||
605 | return err; | ||
606 | |||
607 | dst_node_set_size(n); | ||
608 | add_disk(n->disk); | ||
609 | |||
610 | dprintk("DST: started remote node '%s', minor: %d.\n", | ||
611 | n->name, n->disk->first_minor); | ||
612 | |||
613 | return 0; | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | * Adding remote node and initialize connection. | ||
618 | */ | ||
619 | static int dst_add_remote(struct dst_node *n, struct dst_ctl *ctl, | ||
620 | void *data, unsigned int size) | ||
621 | { | ||
622 | int err; | ||
623 | struct dst_network_ctl *rctl = data; | ||
624 | |||
625 | if (n) | ||
626 | return -EEXIST; | ||
627 | |||
628 | if (size != sizeof(struct dst_network_ctl)) | ||
629 | return -EINVAL; | ||
630 | |||
631 | n = dst_alloc_node(ctl, dst_start_remote, 1); | ||
632 | if (!n) | ||
633 | return -ENOMEM; | ||
634 | |||
635 | memcpy(&n->info->net, rctl, sizeof(struct dst_network_ctl)); | ||
636 | err = dst_node_init_connected(n, rctl); | ||
637 | if (err) | ||
638 | goto err_out_free; | ||
639 | |||
640 | dst_node_add(n); | ||
641 | |||
642 | return 0; | ||
643 | |||
644 | err_out_free: | ||
645 | dst_node_put(n); | ||
646 | return err; | ||
647 | } | ||
648 | |||
649 | /* | ||
650 | * Adding export node: initializing block device and listening socket. | ||
651 | */ | ||
652 | static int dst_add_export(struct dst_node *n, struct dst_ctl *ctl, | ||
653 | void *data, unsigned int size) | ||
654 | { | ||
655 | int err; | ||
656 | struct dst_export_ctl *le = data; | ||
657 | |||
658 | if (n) | ||
659 | return -EEXIST; | ||
660 | |||
661 | if (size != sizeof(struct dst_export_ctl)) | ||
662 | return -EINVAL; | ||
663 | |||
664 | n = dst_alloc_node(ctl, dst_start_export, 2); | ||
665 | if (!n) | ||
666 | return -EINVAL; | ||
667 | |||
668 | err = dst_setup_export(n, ctl, le); | ||
669 | if (err) | ||
670 | goto err_out_free; | ||
671 | |||
672 | dst_node_add(n); | ||
673 | |||
674 | return 0; | ||
675 | |||
676 | err_out_free: | ||
677 | dst_node_put(n); | ||
678 | return err; | ||
679 | } | ||
680 | |||
681 | static int dst_node_remove_unload(struct dst_node *n) | ||
682 | { | ||
683 | printk(KERN_INFO "STOPPED name: '%s', size: %llu.\n", | ||
684 | n->name, n->size); | ||
685 | |||
686 | if (n->disk) | ||
687 | del_gendisk(n->disk); | ||
688 | |||
689 | dst_node_remove(n); | ||
690 | dst_node_sysfs_exit(n); | ||
691 | |||
692 | /* | ||
693 | * This is not a hack. Really. | ||
694 | * Node's reference counter allows to implement fine grained | ||
695 | * node freeing, but since all transactions (which hold node's | ||
696 | * reference counter) are processed in the dedicated thread, | ||
697 | * it is possible that reference will hit zero in that thread, | ||
698 | * so we will not be able to exit thread and cleanup the node. | ||
699 | * | ||
700 | * So, we remove disk, so no new activity is possible, and | ||
701 | * wait until all pending transaction are completed (either | ||
702 | * in receiving thread or by timeout in workqueue), in this | ||
703 | * case reference counter will be less or equal to 2 (once set in | ||
704 | * dst_alloc_node() and then in connector message parser; | ||
705 | * or when we force module unloading, and connector message | ||
706 | * parser does not hold a reference, in this case reference | ||
707 | * counter will be equal to 1), | ||
708 | * and subsequent dst_node_put() calls will free the node. | ||
709 | */ | ||
710 | dprintk("%s: going to sleep with %d refcnt.\n", | ||
711 | __func__, atomic_read(&n->refcnt)); | ||
712 | wait_event(n->wait, atomic_read(&n->refcnt) <= 2); | ||
713 | |||
714 | dst_node_put(n); | ||
715 | return 0; | ||
716 | } | ||
717 | |||
718 | /* | ||
719 | * Remove node from the hash table. | ||
720 | */ | ||
721 | static int dst_del_node(struct dst_node *n, struct dst_ctl *ctl, | ||
722 | void *data, unsigned int size) | ||
723 | { | ||
724 | if (!n) | ||
725 | return -ENODEV; | ||
726 | |||
727 | return dst_node_remove_unload(n); | ||
728 | } | ||
729 | |||
730 | /* | ||
731 | * Initialize crypto processing for given node. | ||
732 | */ | ||
733 | static int dst_crypto_init(struct dst_node *n, struct dst_ctl *ctl, | ||
734 | void *data, unsigned int size) | ||
735 | { | ||
736 | struct dst_crypto_ctl *crypto = data; | ||
737 | |||
738 | if (!n) | ||
739 | return -ENODEV; | ||
740 | |||
741 | if (size != sizeof(struct dst_crypto_ctl) + crypto->hash_keysize + | ||
742 | crypto->cipher_keysize) | ||
743 | return -EINVAL; | ||
744 | |||
745 | if (n->trans_cache) | ||
746 | return -EEXIST; | ||
747 | |||
748 | return dst_node_crypto_init(n, crypto); | ||
749 | } | ||
750 | |||
751 | /* | ||
752 | * Security attributes for given node. | ||
753 | */ | ||
754 | static int dst_security_init(struct dst_node *n, struct dst_ctl *ctl, | ||
755 | void *data, unsigned int size) | ||
756 | { | ||
757 | struct dst_secure *s; | ||
758 | |||
759 | if (!n) | ||
760 | return -ENODEV; | ||
761 | |||
762 | if (size != sizeof(struct dst_secure_user)) | ||
763 | return -EINVAL; | ||
764 | |||
765 | s = kmalloc(sizeof(struct dst_secure), GFP_KERNEL); | ||
766 | if (!s) | ||
767 | return -ENOMEM; | ||
768 | |||
769 | memcpy(&s->sec, data, size); | ||
770 | |||
771 | mutex_lock(&n->security_lock); | ||
772 | list_add_tail(&s->sec_entry, &n->security_list); | ||
773 | mutex_unlock(&n->security_lock); | ||
774 | |||
775 | return 0; | ||
776 | } | ||
777 | |||
778 | /* | ||
779 | * Kill'em all! | ||
780 | */ | ||
781 | static int dst_start_node(struct dst_node *n, struct dst_ctl *ctl, | ||
782 | void *data, unsigned int size) | ||
783 | { | ||
784 | int err; | ||
785 | |||
786 | if (!n) | ||
787 | return -ENODEV; | ||
788 | |||
789 | if (n->trans_cache) | ||
790 | return 0; | ||
791 | |||
792 | err = n->start(n); | ||
793 | if (err) | ||
794 | return err; | ||
795 | |||
796 | printk(KERN_INFO "STARTED name: '%s', size: %llu.\n", n->name, n->size); | ||
797 | return 0; | ||
798 | } | ||
799 | |||
800 | typedef int (*dst_command_func)(struct dst_node *n, struct dst_ctl *ctl, | ||
801 | void *data, unsigned int size); | ||
802 | |||
803 | /* | ||
804 | * List of userspace commands. | ||
805 | */ | ||
806 | static dst_command_func dst_commands[] = { | ||
807 | [DST_ADD_REMOTE] = &dst_add_remote, | ||
808 | [DST_ADD_EXPORT] = &dst_add_export, | ||
809 | [DST_DEL_NODE] = &dst_del_node, | ||
810 | [DST_CRYPTO] = &dst_crypto_init, | ||
811 | [DST_SECURITY] = &dst_security_init, | ||
812 | [DST_START] = &dst_start_node, | ||
813 | }; | ||
814 | |||
815 | /* | ||
816 | * Configuration parser. | ||
817 | */ | ||
818 | static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) | ||
819 | { | ||
820 | struct dst_ctl *ctl; | ||
821 | int err; | ||
822 | struct dst_ctl_ack ack; | ||
823 | struct dst_node *n = NULL, *tmp; | ||
824 | unsigned int hash; | ||
825 | |||
826 | if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) { | ||
827 | err = -EPERM; | ||
828 | goto out; | ||
829 | } | ||
830 | |||
831 | if (msg->len < sizeof(struct dst_ctl)) { | ||
832 | err = -EBADMSG; | ||
833 | goto out; | ||
834 | } | ||
835 | |||
836 | ctl = (struct dst_ctl *)msg->data; | ||
837 | |||
838 | if (ctl->cmd >= DST_CMD_MAX) { | ||
839 | err = -EINVAL; | ||
840 | goto out; | ||
841 | } | ||
842 | hash = dst_hash(ctl->name, sizeof(ctl->name)); | ||
843 | |||
844 | mutex_lock(&dst_hash_lock); | ||
845 | list_for_each_entry(tmp, &dst_hashtable[hash], node_entry) { | ||
846 | if (!memcmp(tmp->name, ctl->name, sizeof(tmp->name))) { | ||
847 | n = tmp; | ||
848 | dst_node_get(n); | ||
849 | break; | ||
850 | } | ||
851 | } | ||
852 | mutex_unlock(&dst_hash_lock); | ||
853 | |||
854 | err = dst_commands[ctl->cmd](n, ctl, msg->data + sizeof(struct dst_ctl), | ||
855 | msg->len - sizeof(struct dst_ctl)); | ||
856 | |||
857 | dst_node_put(n); | ||
858 | out: | ||
859 | memcpy(&ack.msg, msg, sizeof(struct cn_msg)); | ||
860 | |||
861 | ack.msg.ack = msg->ack + 1; | ||
862 | ack.msg.len = sizeof(struct dst_ctl_ack) - sizeof(struct cn_msg); | ||
863 | |||
864 | ack.error = err; | ||
865 | |||
866 | cn_netlink_send(&ack.msg, 0, GFP_KERNEL); | ||
867 | } | ||
868 | |||
869 | /* | ||
870 | * Global initialization: sysfs, hash table, block device registration, | ||
871 | * connector and various caches. | ||
872 | */ | ||
873 | static int __init dst_sysfs_init(void) | ||
874 | { | ||
875 | return bus_register(&dst_dev_bus_type); | ||
876 | } | ||
877 | |||
878 | static void dst_sysfs_exit(void) | ||
879 | { | ||
880 | bus_unregister(&dst_dev_bus_type); | ||
881 | } | ||
882 | |||
883 | static int __init dst_hashtable_init(void) | ||
884 | { | ||
885 | unsigned int i; | ||
886 | |||
887 | dst_hashtable = kcalloc(dst_hashtable_size, sizeof(struct list_head), | ||
888 | GFP_KERNEL); | ||
889 | if (!dst_hashtable) | ||
890 | return -ENOMEM; | ||
891 | |||
892 | for (i = 0; i < dst_hashtable_size; ++i) | ||
893 | INIT_LIST_HEAD(&dst_hashtable[i]); | ||
894 | |||
895 | return 0; | ||
896 | } | ||
897 | |||
898 | static void dst_hashtable_exit(void) | ||
899 | { | ||
900 | unsigned int i; | ||
901 | struct dst_node *n, *tmp; | ||
902 | |||
903 | for (i = 0; i < dst_hashtable_size; ++i) { | ||
904 | list_for_each_entry_safe(n, tmp, &dst_hashtable[i], node_entry) { | ||
905 | dst_node_remove_unload(n); | ||
906 | } | ||
907 | } | ||
908 | |||
909 | kfree(dst_hashtable); | ||
910 | } | ||
911 | |||
912 | static int __init dst_sys_init(void) | ||
913 | { | ||
914 | int err = -ENOMEM; | ||
915 | |||
916 | err = dst_hashtable_init(); | ||
917 | if (err) | ||
918 | goto err_out_exit; | ||
919 | |||
920 | err = dst_export_init(); | ||
921 | if (err) | ||
922 | goto err_out_hashtable_exit; | ||
923 | |||
924 | err = register_blkdev(dst_major, DST_NAME); | ||
925 | if (err < 0) | ||
926 | goto err_out_export_exit; | ||
927 | if (err) | ||
928 | dst_major = err; | ||
929 | |||
930 | err = dst_sysfs_init(); | ||
931 | if (err) | ||
932 | goto err_out_unregister; | ||
933 | |||
934 | err = cn_add_callback(&cn_dst_id, "DST", cn_dst_callback); | ||
935 | if (err) | ||
936 | goto err_out_sysfs_exit; | ||
937 | |||
938 | printk(KERN_INFO "Distributed storage, '%s' release.\n", dst_name); | ||
939 | |||
940 | return 0; | ||
941 | |||
942 | err_out_sysfs_exit: | ||
943 | dst_sysfs_exit(); | ||
944 | err_out_unregister: | ||
945 | unregister_blkdev(dst_major, DST_NAME); | ||
946 | err_out_export_exit: | ||
947 | dst_export_exit(); | ||
948 | err_out_hashtable_exit: | ||
949 | dst_hashtable_exit(); | ||
950 | err_out_exit: | ||
951 | return err; | ||
952 | } | ||
953 | |||
954 | static void __exit dst_sys_exit(void) | ||
955 | { | ||
956 | cn_del_callback(&cn_dst_id); | ||
957 | unregister_blkdev(dst_major, DST_NAME); | ||
958 | dst_hashtable_exit(); | ||
959 | dst_sysfs_exit(); | ||
960 | dst_export_exit(); | ||
961 | } | ||
962 | |||
963 | module_init(dst_sys_init); | ||
964 | module_exit(dst_sys_exit); | ||
965 | |||
966 | MODULE_DESCRIPTION("Distributed storage"); | ||
967 | MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); | ||
968 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/staging/dst/export.c b/drivers/staging/dst/export.c deleted file mode 100644 index c324230e8b60..000000000000 --- a/drivers/staging/dst/export.c +++ /dev/null | |||
@@ -1,660 +0,0 @@ | |||
1 | /* | ||
2 | * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/blkdev.h> | ||
17 | #include <linux/bio.h> | ||
18 | #include <linux/dst.h> | ||
19 | #include <linux/in.h> | ||
20 | #include <linux/in6.h> | ||
21 | #include <linux/poll.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/socket.h> | ||
24 | |||
25 | #include <net/sock.h> | ||
26 | |||
27 | /* | ||
28 | * Export bioset is used for server block IO requests. | ||
29 | */ | ||
30 | static struct bio_set *dst_bio_set; | ||
31 | |||
32 | int __init dst_export_init(void) | ||
33 | { | ||
34 | int err = -ENOMEM; | ||
35 | |||
36 | dst_bio_set = bioset_create(32, sizeof(struct dst_export_priv)); | ||
37 | if (!dst_bio_set) | ||
38 | goto err_out_exit; | ||
39 | |||
40 | return 0; | ||
41 | |||
42 | err_out_exit: | ||
43 | return err; | ||
44 | } | ||
45 | |||
46 | void dst_export_exit(void) | ||
47 | { | ||
48 | bioset_free(dst_bio_set); | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * When client connects and autonegotiates with the server node, | ||
53 | * its permissions are checked in a security attributes and sent | ||
54 | * back. | ||
55 | */ | ||
56 | static unsigned int dst_check_permissions(struct dst_state *main, | ||
57 | struct dst_state *st) | ||
58 | { | ||
59 | struct dst_node *n = main->node; | ||
60 | struct dst_secure *sentry; | ||
61 | struct dst_secure_user *s; | ||
62 | struct saddr *sa = &st->ctl.addr; | ||
63 | unsigned int perm = 0; | ||
64 | |||
65 | mutex_lock(&n->security_lock); | ||
66 | list_for_each_entry(sentry, &n->security_list, sec_entry) { | ||
67 | s = &sentry->sec; | ||
68 | |||
69 | if (s->addr.sa_family != sa->sa_family) | ||
70 | continue; | ||
71 | |||
72 | if (s->addr.sa_data_len != sa->sa_data_len) | ||
73 | continue; | ||
74 | |||
75 | /* | ||
76 | * This '2' below is a port field. This may be very wrong to do | ||
77 | * in atalk for example though. If there will be any need | ||
78 | * to extent protocol to something else, I can create | ||
79 | * per-family helpers and use them instead of this memcmp. | ||
80 | */ | ||
81 | if (memcmp(s->addr.sa_data + 2, sa->sa_data + 2, | ||
82 | sa->sa_data_len - 2)) | ||
83 | continue; | ||
84 | |||
85 | perm = s->permissions; | ||
86 | } | ||
87 | mutex_unlock(&n->security_lock); | ||
88 | |||
89 | return perm; | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * Accept new client: allocate appropriate network state and check permissions. | ||
94 | */ | ||
95 | static struct dst_state *dst_accept_client(struct dst_state *st) | ||
96 | { | ||
97 | unsigned int revents = 0; | ||
98 | unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP; | ||
99 | unsigned int mask = err_mask | POLLIN; | ||
100 | struct dst_node *n = st->node; | ||
101 | int err = 0; | ||
102 | struct socket *sock = NULL; | ||
103 | struct dst_state *new; | ||
104 | |||
105 | while (!err && !sock) { | ||
106 | revents = dst_state_poll(st); | ||
107 | |||
108 | if (!(revents & mask)) { | ||
109 | DEFINE_WAIT(wait); | ||
110 | |||
111 | for (;;) { | ||
112 | prepare_to_wait(&st->thread_wait, | ||
113 | &wait, TASK_INTERRUPTIBLE); | ||
114 | if (!n->trans_scan_timeout || st->need_exit) | ||
115 | break; | ||
116 | |||
117 | revents = dst_state_poll(st); | ||
118 | |||
119 | if (revents & mask) | ||
120 | break; | ||
121 | |||
122 | if (signal_pending(current)) | ||
123 | break; | ||
124 | |||
125 | /* | ||
126 | * Magic HZ? Polling check above is not safe in | ||
127 | * all cases (like socket reset in BH context), | ||
128 | * so it is simpler just to postpone it to the | ||
129 | * process context instead of implementing | ||
130 | * special locking there. | ||
131 | */ | ||
132 | schedule_timeout(HZ); | ||
133 | } | ||
134 | finish_wait(&st->thread_wait, &wait); | ||
135 | } | ||
136 | |||
137 | err = -ECONNRESET; | ||
138 | dst_state_lock(st); | ||
139 | |||
140 | dprintk("%s: st: %p, revents: %x [err: %d, in: %d].\n", | ||
141 | __func__, st, revents, revents & err_mask, | ||
142 | revents & POLLIN); | ||
143 | |||
144 | if (revents & err_mask) { | ||
145 | dprintk("%s: revents: %x, socket: %p, err: %d.\n", | ||
146 | __func__, revents, st->socket, err); | ||
147 | err = -ECONNRESET; | ||
148 | } | ||
149 | |||
150 | if (!n->trans_scan_timeout || st->need_exit) | ||
151 | err = -ENODEV; | ||
152 | |||
153 | if (st->socket && (revents & POLLIN)) | ||
154 | err = kernel_accept(st->socket, &sock, 0); | ||
155 | |||
156 | dst_state_unlock(st); | ||
157 | } | ||
158 | |||
159 | if (err) | ||
160 | goto err_out_exit; | ||
161 | |||
162 | new = dst_state_alloc(st->node); | ||
163 | if (IS_ERR(new)) { | ||
164 | err = -ENOMEM; | ||
165 | goto err_out_release; | ||
166 | } | ||
167 | new->socket = sock; | ||
168 | |||
169 | new->ctl.addr.sa_data_len = sizeof(struct sockaddr); | ||
170 | err = kernel_getpeername(sock, (struct sockaddr *)&new->ctl.addr, | ||
171 | (int *)&new->ctl.addr.sa_data_len); | ||
172 | if (err) | ||
173 | goto err_out_put; | ||
174 | |||
175 | new->permissions = dst_check_permissions(st, new); | ||
176 | if (new->permissions == 0) { | ||
177 | err = -EPERM; | ||
178 | dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr, | ||
179 | "Client is not allowed to connect"); | ||
180 | goto err_out_put; | ||
181 | } | ||
182 | |||
183 | err = dst_poll_init(new); | ||
184 | if (err) | ||
185 | goto err_out_put; | ||
186 | |||
187 | dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr, | ||
188 | "Connected client"); | ||
189 | |||
190 | return new; | ||
191 | |||
192 | err_out_put: | ||
193 | dst_state_put(new); | ||
194 | err_out_release: | ||
195 | sock_release(sock); | ||
196 | err_out_exit: | ||
197 | return ERR_PTR(err); | ||
198 | } | ||
199 | |||
200 | /* | ||
201 | * Each server's block request sometime finishes. | ||
202 | * Usually it happens in hard irq context of the appropriate controller, | ||
203 | * so to play good with all cases we just queue BIO into the queue | ||
204 | * and wake up processing thread, which gets completed request and | ||
205 | * send (encrypting if needed) it back to the client (if it was a read | ||
206 | * request), or sends back reply that writing successfully completed. | ||
207 | */ | ||
208 | static int dst_export_process_request_queue(struct dst_state *st) | ||
209 | { | ||
210 | unsigned long flags; | ||
211 | struct dst_export_priv *p = NULL; | ||
212 | struct bio *bio; | ||
213 | int err = 0; | ||
214 | |||
215 | while (!list_empty(&st->request_list)) { | ||
216 | spin_lock_irqsave(&st->request_lock, flags); | ||
217 | if (!list_empty(&st->request_list)) { | ||
218 | p = list_first_entry(&st->request_list, | ||
219 | struct dst_export_priv, request_entry); | ||
220 | list_del(&p->request_entry); | ||
221 | } | ||
222 | spin_unlock_irqrestore(&st->request_lock, flags); | ||
223 | |||
224 | if (!p) | ||
225 | break; | ||
226 | |||
227 | bio = p->bio; | ||
228 | |||
229 | if (dst_need_crypto(st->node) && (bio_data_dir(bio) == READ)) | ||
230 | err = dst_export_crypto(st->node, bio); | ||
231 | else | ||
232 | err = dst_export_send_bio(bio); | ||
233 | |||
234 | if (err) | ||
235 | break; | ||
236 | } | ||
237 | |||
238 | return err; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * Cleanup export state. | ||
243 | * It has to wait until all requests are finished, | ||
244 | * and then free them all. | ||
245 | */ | ||
246 | static void dst_state_cleanup_export(struct dst_state *st) | ||
247 | { | ||
248 | struct dst_export_priv *p; | ||
249 | unsigned long flags; | ||
250 | |||
251 | /* | ||
252 | * This loop waits for all pending bios to be completed and freed. | ||
253 | */ | ||
254 | while (atomic_read(&st->refcnt) > 1) { | ||
255 | dprintk("%s: st: %p, refcnt: %d, list_empty: %d.\n", | ||
256 | __func__, st, atomic_read(&st->refcnt), | ||
257 | list_empty(&st->request_list)); | ||
258 | wait_event_timeout(st->thread_wait, | ||
259 | (atomic_read(&st->refcnt) == 1) || | ||
260 | !list_empty(&st->request_list), | ||
261 | HZ/2); | ||
262 | |||
263 | while (!list_empty(&st->request_list)) { | ||
264 | p = NULL; | ||
265 | spin_lock_irqsave(&st->request_lock, flags); | ||
266 | if (!list_empty(&st->request_list)) { | ||
267 | p = list_first_entry(&st->request_list, | ||
268 | struct dst_export_priv, request_entry); | ||
269 | list_del(&p->request_entry); | ||
270 | } | ||
271 | spin_unlock_irqrestore(&st->request_lock, flags); | ||
272 | |||
273 | if (p) | ||
274 | bio_put(p->bio); | ||
275 | |||
276 | dprintk("%s: st: %p, refcnt: %d, list_empty: %d, p: " | ||
277 | "%p.\n", __func__, st, atomic_read(&st->refcnt), | ||
278 | list_empty(&st->request_list), p); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | dst_state_put(st); | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * Client accepting thread. | ||
287 | * Not only accepts new connection, but also schedules receiving thread | ||
288 | * and performs request completion described above. | ||
289 | */ | ||
290 | static int dst_accept(void *init_data, void *schedule_data) | ||
291 | { | ||
292 | struct dst_state *main_st = schedule_data; | ||
293 | struct dst_node *n = init_data; | ||
294 | struct dst_state *st; | ||
295 | int err; | ||
296 | |||
297 | while (n->trans_scan_timeout && !main_st->need_exit) { | ||
298 | dprintk("%s: main_st: %p, n: %p.\n", __func__, main_st, n); | ||
299 | st = dst_accept_client(main_st); | ||
300 | if (IS_ERR(st)) | ||
301 | continue; | ||
302 | |||
303 | err = dst_state_schedule_receiver(st); | ||
304 | if (!err) { | ||
305 | while (n->trans_scan_timeout) { | ||
306 | err = wait_event_interruptible_timeout(st->thread_wait, | ||
307 | !list_empty(&st->request_list) || | ||
308 | !n->trans_scan_timeout || | ||
309 | st->need_exit, | ||
310 | HZ); | ||
311 | |||
312 | if (!n->trans_scan_timeout || st->need_exit) | ||
313 | break; | ||
314 | |||
315 | if (list_empty(&st->request_list)) | ||
316 | continue; | ||
317 | |||
318 | err = dst_export_process_request_queue(st); | ||
319 | if (err) | ||
320 | break; | ||
321 | } | ||
322 | |||
323 | st->need_exit = 1; | ||
324 | wake_up(&st->thread_wait); | ||
325 | } | ||
326 | |||
327 | dst_state_cleanup_export(st); | ||
328 | } | ||
329 | |||
330 | dprintk("%s: freeing listening socket st: %p.\n", __func__, main_st); | ||
331 | |||
332 | dst_state_lock(main_st); | ||
333 | dst_poll_exit(main_st); | ||
334 | dst_state_socket_release(main_st); | ||
335 | dst_state_unlock(main_st); | ||
336 | dst_state_put(main_st); | ||
337 | dprintk("%s: freed listening socket st: %p.\n", __func__, main_st); | ||
338 | |||
339 | return 0; | ||
340 | } | ||
341 | |||
342 | int dst_start_export(struct dst_node *n) | ||
343 | { | ||
344 | if (list_empty(&n->security_list)) { | ||
345 | printk(KERN_ERR "You are trying to export node '%s' " | ||
346 | "without security attributes.\nNo clients will " | ||
347 | "be allowed to connect. Exiting.\n", n->name); | ||
348 | return -EINVAL; | ||
349 | } | ||
350 | return dst_node_trans_init(n, sizeof(struct dst_export_priv)); | ||
351 | } | ||
352 | |||
353 | /* | ||
354 | * Initialize listening state and schedule accepting thread. | ||
355 | */ | ||
356 | int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le) | ||
357 | { | ||
358 | struct dst_state *st; | ||
359 | int err = -ENOMEM; | ||
360 | struct dst_network_ctl *ctl = &le->ctl; | ||
361 | |||
362 | memcpy(&n->info->net, ctl, sizeof(struct dst_network_ctl)); | ||
363 | |||
364 | st = dst_state_alloc(n); | ||
365 | if (IS_ERR(st)) { | ||
366 | err = PTR_ERR(st); | ||
367 | goto err_out_exit; | ||
368 | } | ||
369 | memcpy(&st->ctl, ctl, sizeof(struct dst_network_ctl)); | ||
370 | |||
371 | err = dst_state_socket_create(st); | ||
372 | if (err) | ||
373 | goto err_out_put; | ||
374 | |||
375 | st->socket->sk->sk_reuse = 1; | ||
376 | |||
377 | err = kernel_bind(st->socket, (struct sockaddr *)&ctl->addr, | ||
378 | ctl->addr.sa_data_len); | ||
379 | if (err) | ||
380 | goto err_out_socket_release; | ||
381 | |||
382 | err = kernel_listen(st->socket, 1024); | ||
383 | if (err) | ||
384 | goto err_out_socket_release; | ||
385 | n->state = st; | ||
386 | |||
387 | err = dst_poll_init(st); | ||
388 | if (err) | ||
389 | goto err_out_socket_release; | ||
390 | |||
391 | dst_state_get(st); | ||
392 | |||
393 | err = thread_pool_schedule(n->pool, dst_thread_setup, | ||
394 | dst_accept, st, MAX_SCHEDULE_TIMEOUT); | ||
395 | if (err) | ||
396 | goto err_out_poll_exit; | ||
397 | |||
398 | return 0; | ||
399 | |||
400 | err_out_poll_exit: | ||
401 | dst_poll_exit(st); | ||
402 | err_out_socket_release: | ||
403 | dst_state_socket_release(st); | ||
404 | err_out_put: | ||
405 | dst_state_put(st); | ||
406 | err_out_exit: | ||
407 | n->state = NULL; | ||
408 | return err; | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * Free bio and related private data. | ||
413 | * Also drop a reference counter for appropriate state, | ||
414 | * which waits when there are no more block IOs in-flight. | ||
415 | */ | ||
416 | static void dst_bio_destructor(struct bio *bio) | ||
417 | { | ||
418 | struct bio_vec *bv; | ||
419 | struct dst_export_priv *priv = bio->bi_private; | ||
420 | int i; | ||
421 | |||
422 | bio_for_each_segment(bv, bio, i) { | ||
423 | if (!bv->bv_page) | ||
424 | break; | ||
425 | |||
426 | __free_page(bv->bv_page); | ||
427 | } | ||
428 | |||
429 | if (priv) | ||
430 | dst_state_put(priv->state); | ||
431 | bio_free(bio, dst_bio_set); | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * Block IO completion. Queue request to be sent back to | ||
436 | * the client (or just confirmation). | ||
437 | */ | ||
438 | static void dst_bio_end_io(struct bio *bio, int err) | ||
439 | { | ||
440 | struct dst_export_priv *p = bio->bi_private; | ||
441 | struct dst_state *st = p->state; | ||
442 | unsigned long flags; | ||
443 | |||
444 | spin_lock_irqsave(&st->request_lock, flags); | ||
445 | list_add_tail(&p->request_entry, &st->request_list); | ||
446 | spin_unlock_irqrestore(&st->request_lock, flags); | ||
447 | |||
448 | wake_up(&st->thread_wait); | ||
449 | } | ||
450 | |||
451 | /* | ||
452 | * Allocate read request for the server. | ||
453 | */ | ||
454 | static int dst_export_read_request(struct bio *bio, unsigned int total_size) | ||
455 | { | ||
456 | unsigned int size; | ||
457 | struct page *page; | ||
458 | int err; | ||
459 | |||
460 | while (total_size) { | ||
461 | err = -ENOMEM; | ||
462 | page = alloc_page(GFP_KERNEL); | ||
463 | if (!page) | ||
464 | goto err_out_exit; | ||
465 | |||
466 | size = min_t(unsigned int, PAGE_SIZE, total_size); | ||
467 | |||
468 | err = bio_add_page(bio, page, size, 0); | ||
469 | dprintk("%s: bio: %llu/%u, size: %u, err: %d.\n", | ||
470 | __func__, (u64)bio->bi_sector, bio->bi_size, | ||
471 | size, err); | ||
472 | if (err <= 0) | ||
473 | goto err_out_free_page; | ||
474 | |||
475 | total_size -= size; | ||
476 | } | ||
477 | |||
478 | return 0; | ||
479 | |||
480 | err_out_free_page: | ||
481 | __free_page(page); | ||
482 | err_out_exit: | ||
483 | return err; | ||
484 | } | ||
485 | |||
486 | /* | ||
487 | * Allocate write request for the server. | ||
488 | * Should not only get pages, but also read data from the network. | ||
489 | */ | ||
490 | static int dst_export_write_request(struct dst_state *st, | ||
491 | struct bio *bio, unsigned int total_size) | ||
492 | { | ||
493 | unsigned int size; | ||
494 | struct page *page; | ||
495 | void *data; | ||
496 | int err; | ||
497 | |||
498 | while (total_size) { | ||
499 | err = -ENOMEM; | ||
500 | page = alloc_page(GFP_KERNEL); | ||
501 | if (!page) | ||
502 | goto err_out_exit; | ||
503 | |||
504 | data = kmap(page); | ||
505 | if (!data) | ||
506 | goto err_out_free_page; | ||
507 | |||
508 | size = min_t(unsigned int, PAGE_SIZE, total_size); | ||
509 | |||
510 | err = dst_data_recv(st, data, size); | ||
511 | if (err) | ||
512 | goto err_out_unmap_page; | ||
513 | |||
514 | err = bio_add_page(bio, page, size, 0); | ||
515 | if (err <= 0) | ||
516 | goto err_out_unmap_page; | ||
517 | |||
518 | kunmap(page); | ||
519 | |||
520 | total_size -= size; | ||
521 | } | ||
522 | |||
523 | return 0; | ||
524 | |||
525 | err_out_unmap_page: | ||
526 | kunmap(page); | ||
527 | err_out_free_page: | ||
528 | __free_page(page); | ||
529 | err_out_exit: | ||
530 | return err; | ||
531 | } | ||
532 | |||
533 | /* | ||
534 | * Groovy, we've gotten an IO request from the client. | ||
535 | * Allocate BIO from the bioset, private data from the mempool | ||
536 | * and lots of pages for IO. | ||
537 | */ | ||
538 | int dst_process_io(struct dst_state *st) | ||
539 | { | ||
540 | struct dst_node *n = st->node; | ||
541 | struct dst_cmd *cmd = st->data; | ||
542 | struct bio *bio; | ||
543 | struct dst_export_priv *priv; | ||
544 | int err = -ENOMEM; | ||
545 | |||
546 | if (unlikely(!n->bdev)) { | ||
547 | err = -EINVAL; | ||
548 | goto err_out_exit; | ||
549 | } | ||
550 | |||
551 | bio = bio_alloc_bioset(GFP_KERNEL, | ||
552 | PAGE_ALIGN(cmd->size) >> PAGE_SHIFT, | ||
553 | dst_bio_set); | ||
554 | if (!bio) | ||
555 | goto err_out_exit; | ||
556 | |||
557 | priv = (struct dst_export_priv *)(((void *)bio) - | ||
558 | sizeof (struct dst_export_priv)); | ||
559 | |||
560 | priv->state = dst_state_get(st); | ||
561 | priv->bio = bio; | ||
562 | |||
563 | bio->bi_private = priv; | ||
564 | bio->bi_end_io = dst_bio_end_io; | ||
565 | bio->bi_destructor = dst_bio_destructor; | ||
566 | bio->bi_bdev = n->bdev; | ||
567 | |||
568 | /* | ||
569 | * Server side is only interested in two low bits: | ||
570 | * uptodate (set by itself actually) and rw block | ||
571 | */ | ||
572 | bio->bi_flags |= cmd->flags & 3; | ||
573 | |||
574 | bio->bi_rw = cmd->rw; | ||
575 | bio->bi_size = 0; | ||
576 | bio->bi_sector = cmd->sector; | ||
577 | |||
578 | dst_bio_to_cmd(bio, &priv->cmd, DST_IO_RESPONSE, cmd->id); | ||
579 | |||
580 | priv->cmd.flags = 0; | ||
581 | priv->cmd.size = cmd->size; | ||
582 | |||
583 | if (bio_data_dir(bio) == WRITE) { | ||
584 | err = dst_recv_cdata(st, priv->cmd.hash); | ||
585 | if (err) | ||
586 | goto err_out_free; | ||
587 | |||
588 | err = dst_export_write_request(st, bio, cmd->size); | ||
589 | if (err) | ||
590 | goto err_out_free; | ||
591 | |||
592 | if (dst_need_crypto(n)) | ||
593 | return dst_export_crypto(n, bio); | ||
594 | } else { | ||
595 | err = dst_export_read_request(bio, cmd->size); | ||
596 | if (err) | ||
597 | goto err_out_free; | ||
598 | } | ||
599 | |||
600 | dprintk("%s: bio: %llu/%u, rw: %lu, dir: %lu, flags: %lx, phys: %d.\n", | ||
601 | __func__, (u64)bio->bi_sector, bio->bi_size, | ||
602 | bio->bi_rw, bio_data_dir(bio), | ||
603 | bio->bi_flags, bio->bi_phys_segments); | ||
604 | |||
605 | generic_make_request(bio); | ||
606 | |||
607 | return 0; | ||
608 | |||
609 | err_out_free: | ||
610 | bio_put(bio); | ||
611 | err_out_exit: | ||
612 | return err; | ||
613 | } | ||
614 | |||
615 | /* | ||
616 | * Ok, block IO is ready, let's send it back to the client... | ||
617 | */ | ||
618 | int dst_export_send_bio(struct bio *bio) | ||
619 | { | ||
620 | struct dst_export_priv *p = bio->bi_private; | ||
621 | struct dst_state *st = p->state; | ||
622 | struct dst_cmd *cmd = &p->cmd; | ||
623 | int err; | ||
624 | |||
625 | dprintk("%s: id: %llu, bio: %llu/%u, csize: %u, flags: %lu, rw: %lu.\n", | ||
626 | __func__, cmd->id, (u64)bio->bi_sector, bio->bi_size, | ||
627 | cmd->csize, bio->bi_flags, bio->bi_rw); | ||
628 | |||
629 | dst_convert_cmd(cmd); | ||
630 | |||
631 | dst_state_lock(st); | ||
632 | if (!st->socket) { | ||
633 | err = -ECONNRESET; | ||
634 | goto err_out_unlock; | ||
635 | } | ||
636 | |||
637 | if (bio_data_dir(bio) == WRITE) { | ||
638 | /* ... or just confirmation that writing has completed. */ | ||
639 | cmd->size = cmd->csize = 0; | ||
640 | err = dst_data_send_header(st->socket, cmd, | ||
641 | sizeof(struct dst_cmd), 0); | ||
642 | if (err) | ||
643 | goto err_out_unlock; | ||
644 | } else { | ||
645 | err = dst_send_bio(st, cmd, bio); | ||
646 | if (err) | ||
647 | goto err_out_unlock; | ||
648 | } | ||
649 | |||
650 | dst_state_unlock(st); | ||
651 | |||
652 | bio_put(bio); | ||
653 | return 0; | ||
654 | |||
655 | err_out_unlock: | ||
656 | dst_state_unlock(st); | ||
657 | |||
658 | bio_put(bio); | ||
659 | return err; | ||
660 | } | ||
diff --git a/drivers/staging/dst/state.c b/drivers/staging/dst/state.c deleted file mode 100644 index 02a05e6c48c3..000000000000 --- a/drivers/staging/dst/state.c +++ /dev/null | |||
@@ -1,844 +0,0 @@ | |||
1 | /* | ||
2 | * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/buffer_head.h> | ||
17 | #include <linux/blkdev.h> | ||
18 | #include <linux/bio.h> | ||
19 | #include <linux/connector.h> | ||
20 | #include <linux/dst.h> | ||
21 | #include <linux/device.h> | ||
22 | #include <linux/in.h> | ||
23 | #include <linux/in6.h> | ||
24 | #include <linux/socket.h> | ||
25 | #include <linux/slab.h> | ||
26 | |||
27 | #include <net/sock.h> | ||
28 | |||
29 | /* | ||
30 | * Polling machinery. | ||
31 | */ | ||
32 | |||
33 | struct dst_poll_helper { | ||
34 | poll_table pt; | ||
35 | struct dst_state *st; | ||
36 | }; | ||
37 | |||
38 | static int dst_queue_wake(wait_queue_t *wait, unsigned mode, | ||
39 | int sync, void *key) | ||
40 | { | ||
41 | struct dst_state *st = container_of(wait, struct dst_state, wait); | ||
42 | |||
43 | wake_up(&st->thread_wait); | ||
44 | return 1; | ||
45 | } | ||
46 | |||
47 | static void dst_queue_func(struct file *file, wait_queue_head_t *whead, | ||
48 | poll_table *pt) | ||
49 | { | ||
50 | struct dst_state *st = container_of(pt, struct dst_poll_helper, pt)->st; | ||
51 | |||
52 | st->whead = whead; | ||
53 | init_waitqueue_func_entry(&st->wait, dst_queue_wake); | ||
54 | add_wait_queue(whead, &st->wait); | ||
55 | } | ||
56 | |||
57 | void dst_poll_exit(struct dst_state *st) | ||
58 | { | ||
59 | if (st->whead) { | ||
60 | remove_wait_queue(st->whead, &st->wait); | ||
61 | st->whead = NULL; | ||
62 | } | ||
63 | } | ||
64 | |||
65 | int dst_poll_init(struct dst_state *st) | ||
66 | { | ||
67 | struct dst_poll_helper ph; | ||
68 | |||
69 | ph.st = st; | ||
70 | init_poll_funcptr(&ph.pt, &dst_queue_func); | ||
71 | |||
72 | st->socket->ops->poll(NULL, st->socket, &ph.pt); | ||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * Header receiving function - may block. | ||
78 | */ | ||
79 | static int dst_data_recv_header(struct socket *sock, | ||
80 | void *data, unsigned int size, int block) | ||
81 | { | ||
82 | struct msghdr msg; | ||
83 | struct kvec iov; | ||
84 | int err; | ||
85 | |||
86 | iov.iov_base = data; | ||
87 | iov.iov_len = size; | ||
88 | |||
89 | msg.msg_iov = (struct iovec *)&iov; | ||
90 | msg.msg_iovlen = 1; | ||
91 | msg.msg_name = NULL; | ||
92 | msg.msg_namelen = 0; | ||
93 | msg.msg_control = NULL; | ||
94 | msg.msg_controllen = 0; | ||
95 | msg.msg_flags = (block) ? MSG_WAITALL : MSG_DONTWAIT; | ||
96 | |||
97 | err = kernel_recvmsg(sock, &msg, &iov, 1, iov.iov_len, | ||
98 | msg.msg_flags); | ||
99 | if (err != size) | ||
100 | return -1; | ||
101 | |||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * Header sending function - may block. | ||
107 | */ | ||
108 | int dst_data_send_header(struct socket *sock, | ||
109 | void *data, unsigned int size, int more) | ||
110 | { | ||
111 | struct msghdr msg; | ||
112 | struct kvec iov; | ||
113 | int err; | ||
114 | |||
115 | iov.iov_base = data; | ||
116 | iov.iov_len = size; | ||
117 | |||
118 | msg.msg_iov = (struct iovec *)&iov; | ||
119 | msg.msg_iovlen = 1; | ||
120 | msg.msg_name = NULL; | ||
121 | msg.msg_namelen = 0; | ||
122 | msg.msg_control = NULL; | ||
123 | msg.msg_controllen = 0; | ||
124 | msg.msg_flags = MSG_WAITALL | (more ? MSG_MORE : 0); | ||
125 | |||
126 | err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
127 | if (err != size) { | ||
128 | dprintk("%s: size: %u, more: %d, err: %d.\n", | ||
129 | __func__, size, more, err); | ||
130 | return -1; | ||
131 | } | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * Block autoconfiguration: request size of the storage and permissions. | ||
138 | */ | ||
139 | static int dst_request_remote_config(struct dst_state *st) | ||
140 | { | ||
141 | struct dst_node *n = st->node; | ||
142 | int err = -EINVAL; | ||
143 | struct dst_cmd *cmd = st->data; | ||
144 | |||
145 | memset(cmd, 0, sizeof(struct dst_cmd)); | ||
146 | cmd->cmd = DST_CFG; | ||
147 | |||
148 | dst_convert_cmd(cmd); | ||
149 | |||
150 | err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0); | ||
151 | if (err) | ||
152 | goto out; | ||
153 | |||
154 | err = dst_data_recv_header(st->socket, cmd, sizeof(struct dst_cmd), 1); | ||
155 | if (err) | ||
156 | goto out; | ||
157 | |||
158 | dst_convert_cmd(cmd); | ||
159 | |||
160 | if (cmd->cmd != DST_CFG) { | ||
161 | err = -EINVAL; | ||
162 | dprintk("%s: checking result: cmd: %d, size reported: %llu.\n", | ||
163 | __func__, cmd->cmd, cmd->sector); | ||
164 | goto out; | ||
165 | } | ||
166 | |||
167 | if (n->size != 0) | ||
168 | n->size = min_t(loff_t, n->size, cmd->sector); | ||
169 | else | ||
170 | n->size = cmd->sector; | ||
171 | |||
172 | n->info->size = n->size; | ||
173 | st->permissions = cmd->rw; | ||
174 | |||
175 | out: | ||
176 | dprintk("%s: n: %p, err: %d, size: %llu, permission: %x.\n", | ||
177 | __func__, n, err, n->size, st->permissions); | ||
178 | return err; | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * Socket machinery. | ||
183 | */ | ||
184 | |||
185 | #define DST_DEFAULT_TIMEO 20000 | ||
186 | |||
187 | int dst_state_socket_create(struct dst_state *st) | ||
188 | { | ||
189 | int err; | ||
190 | struct socket *sock; | ||
191 | struct dst_network_ctl *ctl = &st->ctl; | ||
192 | |||
193 | err = sock_create(ctl->addr.sa_family, ctl->type, ctl->proto, &sock); | ||
194 | if (err < 0) | ||
195 | return err; | ||
196 | |||
197 | sock->sk->sk_sndtimeo = sock->sk->sk_rcvtimeo = | ||
198 | msecs_to_jiffies(DST_DEFAULT_TIMEO); | ||
199 | sock->sk->sk_allocation = GFP_NOIO; | ||
200 | |||
201 | st->socket = st->read_socket = sock; | ||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | void dst_state_socket_release(struct dst_state *st) | ||
206 | { | ||
207 | dprintk("%s: st: %p, socket: %p, n: %p.\n", | ||
208 | __func__, st, st->socket, st->node); | ||
209 | if (st->socket) { | ||
210 | sock_release(st->socket); | ||
211 | st->socket = NULL; | ||
212 | st->read_socket = NULL; | ||
213 | } | ||
214 | } | ||
215 | |||
216 | void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str) | ||
217 | { | ||
218 | if (sk->ops->family == AF_INET) { | ||
219 | struct sockaddr_in *sin = (struct sockaddr_in *)sa; | ||
220 | printk(KERN_INFO "%s %u.%u.%u.%u:%d.\n", str, | ||
221 | NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port)); | ||
222 | } else if (sk->ops->family == AF_INET6) { | ||
223 | struct sockaddr_in6 *sin = (struct sockaddr_in6 *)sa; | ||
224 | printk(KERN_INFO "%s %pi6:%d", | ||
225 | str, &sin->sin6_addr, ntohs(sin->sin6_port)); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | void dst_state_exit_connected(struct dst_state *st) | ||
230 | { | ||
231 | if (st->socket) { | ||
232 | dst_poll_exit(st); | ||
233 | st->socket->ops->shutdown(st->socket, 2); | ||
234 | |||
235 | dst_dump_addr(st->socket, (struct sockaddr *)&st->ctl.addr, | ||
236 | "Disconnected peer"); | ||
237 | dst_state_socket_release(st); | ||
238 | } | ||
239 | } | ||
240 | |||
241 | static int dst_state_init_connected(struct dst_state *st) | ||
242 | { | ||
243 | int err; | ||
244 | struct dst_network_ctl *ctl = &st->ctl; | ||
245 | |||
246 | err = dst_state_socket_create(st); | ||
247 | if (err) | ||
248 | goto err_out_exit; | ||
249 | |||
250 | err = kernel_connect(st->socket, (struct sockaddr *)&st->ctl.addr, | ||
251 | st->ctl.addr.sa_data_len, 0); | ||
252 | if (err) | ||
253 | goto err_out_release; | ||
254 | |||
255 | err = dst_poll_init(st); | ||
256 | if (err) | ||
257 | goto err_out_release; | ||
258 | |||
259 | dst_dump_addr(st->socket, (struct sockaddr *)&ctl->addr, | ||
260 | "Connected to peer"); | ||
261 | |||
262 | return 0; | ||
263 | |||
264 | err_out_release: | ||
265 | dst_state_socket_release(st); | ||
266 | err_out_exit: | ||
267 | return err; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * State reset is used to reconnect to the remote peer. | ||
272 | * May fail, but who cares, we will try again later. | ||
273 | */ | ||
274 | static inline void dst_state_reset_nolock(struct dst_state *st) | ||
275 | { | ||
276 | dst_state_exit_connected(st); | ||
277 | dst_state_init_connected(st); | ||
278 | } | ||
279 | |||
280 | static inline void dst_state_reset(struct dst_state *st) | ||
281 | { | ||
282 | dst_state_lock(st); | ||
283 | dst_state_reset_nolock(st); | ||
284 | dst_state_unlock(st); | ||
285 | } | ||
286 | |||
287 | /* | ||
288 | * Basic network sending/receiving functions. | ||
289 | * Blocked mode is used. | ||
290 | */ | ||
291 | static int dst_data_recv_raw(struct dst_state *st, void *buf, u64 size) | ||
292 | { | ||
293 | struct msghdr msg; | ||
294 | struct kvec iov; | ||
295 | int err; | ||
296 | |||
297 | BUG_ON(!size); | ||
298 | |||
299 | iov.iov_base = buf; | ||
300 | iov.iov_len = size; | ||
301 | |||
302 | msg.msg_iov = (struct iovec *)&iov; | ||
303 | msg.msg_iovlen = 1; | ||
304 | msg.msg_name = NULL; | ||
305 | msg.msg_namelen = 0; | ||
306 | msg.msg_control = NULL; | ||
307 | msg.msg_controllen = 0; | ||
308 | msg.msg_flags = MSG_DONTWAIT; | ||
309 | |||
310 | err = kernel_recvmsg(st->socket, &msg, &iov, 1, iov.iov_len, | ||
311 | msg.msg_flags); | ||
312 | if (err <= 0) { | ||
313 | dprintk("%s: failed to recv data: size: %llu, err: %d.\n", | ||
314 | __func__, size, err); | ||
315 | if (err == 0) | ||
316 | err = -ECONNRESET; | ||
317 | |||
318 | dst_state_exit_connected(st); | ||
319 | } | ||
320 | |||
321 | return err; | ||
322 | } | ||
323 | |||
324 | /* | ||
325 | * Ping command to early detect failed nodes. | ||
326 | */ | ||
327 | static int dst_send_ping(struct dst_state *st) | ||
328 | { | ||
329 | struct dst_cmd *cmd = st->data; | ||
330 | int err = -ECONNRESET; | ||
331 | |||
332 | dst_state_lock(st); | ||
333 | if (st->socket) { | ||
334 | memset(cmd, 0, sizeof(struct dst_cmd)); | ||
335 | |||
336 | cmd->cmd = __cpu_to_be32(DST_PING); | ||
337 | |||
338 | err = dst_data_send_header(st->socket, cmd, | ||
339 | sizeof(struct dst_cmd), 0); | ||
340 | } | ||
341 | dprintk("%s: st: %p, socket: %p, err: %d.\n", __func__, | ||
342 | st, st->socket, err); | ||
343 | dst_state_unlock(st); | ||
344 | |||
345 | return err; | ||
346 | } | ||
347 | |||
348 | /* | ||
349 | * Receiving function, which should either return error or read | ||
350 | * whole block request. If there was no traffic for a one second, | ||
351 | * send a ping, since remote node may die. | ||
352 | */ | ||
353 | int dst_data_recv(struct dst_state *st, void *data, unsigned int size) | ||
354 | { | ||
355 | unsigned int revents = 0; | ||
356 | unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP; | ||
357 | unsigned int mask = err_mask | POLLIN; | ||
358 | struct dst_node *n = st->node; | ||
359 | int err = 0; | ||
360 | |||
361 | while (size && !err) { | ||
362 | revents = dst_state_poll(st); | ||
363 | |||
364 | if (!(revents & mask)) { | ||
365 | DEFINE_WAIT(wait); | ||
366 | |||
367 | for (;;) { | ||
368 | prepare_to_wait(&st->thread_wait, &wait, | ||
369 | TASK_INTERRUPTIBLE); | ||
370 | if (!n->trans_scan_timeout || st->need_exit) | ||
371 | break; | ||
372 | |||
373 | revents = dst_state_poll(st); | ||
374 | |||
375 | if (revents & mask) | ||
376 | break; | ||
377 | |||
378 | if (signal_pending(current)) | ||
379 | break; | ||
380 | |||
381 | if (!schedule_timeout(HZ)) { | ||
382 | err = dst_send_ping(st); | ||
383 | if (err) | ||
384 | return err; | ||
385 | } | ||
386 | |||
387 | continue; | ||
388 | } | ||
389 | finish_wait(&st->thread_wait, &wait); | ||
390 | } | ||
391 | |||
392 | err = -ECONNRESET; | ||
393 | dst_state_lock(st); | ||
394 | |||
395 | if (st->socket && (st->read_socket == st->socket) && | ||
396 | (revents & POLLIN)) { | ||
397 | err = dst_data_recv_raw(st, data, size); | ||
398 | if (err > 0) { | ||
399 | data += err; | ||
400 | size -= err; | ||
401 | err = 0; | ||
402 | } | ||
403 | } | ||
404 | |||
405 | if (revents & err_mask || !st->socket) { | ||
406 | dprintk("%s: revents: %x, socket: %p, size: %u, " | ||
407 | "err: %d.\n", __func__, revents, | ||
408 | st->socket, size, err); | ||
409 | err = -ECONNRESET; | ||
410 | } | ||
411 | |||
412 | dst_state_unlock(st); | ||
413 | |||
414 | if (!n->trans_scan_timeout) | ||
415 | err = -ENODEV; | ||
416 | } | ||
417 | |||
418 | return err; | ||
419 | } | ||
420 | |||
421 | /* | ||
422 | * Send block autoconf reply. | ||
423 | */ | ||
424 | static int dst_process_cfg(struct dst_state *st) | ||
425 | { | ||
426 | struct dst_node *n = st->node; | ||
427 | struct dst_cmd *cmd = st->data; | ||
428 | int err; | ||
429 | |||
430 | cmd->sector = n->size; | ||
431 | cmd->rw = st->permissions; | ||
432 | |||
433 | dst_convert_cmd(cmd); | ||
434 | |||
435 | dst_state_lock(st); | ||
436 | err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0); | ||
437 | dst_state_unlock(st); | ||
438 | |||
439 | return err; | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * Receive block IO from the network. | ||
444 | */ | ||
445 | static int dst_recv_bio(struct dst_state *st, struct bio *bio, | ||
446 | unsigned int total_size) | ||
447 | { | ||
448 | struct bio_vec *bv; | ||
449 | int i, err; | ||
450 | void *data; | ||
451 | unsigned int sz; | ||
452 | |||
453 | bio_for_each_segment(bv, bio, i) { | ||
454 | sz = min(total_size, bv->bv_len); | ||
455 | |||
456 | dprintk("%s: bio: %llu/%u, total: %u, len: %u, sz: %u, " | ||
457 | "off: %u.\n", __func__, (u64)bio->bi_sector, | ||
458 | bio->bi_size, total_size, bv->bv_len, sz, | ||
459 | bv->bv_offset); | ||
460 | |||
461 | data = kmap(bv->bv_page) + bv->bv_offset; | ||
462 | err = dst_data_recv(st, data, sz); | ||
463 | kunmap(bv->bv_page); | ||
464 | |||
465 | bv->bv_len = sz; | ||
466 | |||
467 | if (err) | ||
468 | return err; | ||
469 | |||
470 | total_size -= sz; | ||
471 | if (total_size == 0) | ||
472 | break; | ||
473 | } | ||
474 | |||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | /* | ||
479 | * Our block IO has just completed and arrived: get it. | ||
480 | */ | ||
481 | static int dst_process_io_response(struct dst_state *st) | ||
482 | { | ||
483 | struct dst_node *n = st->node; | ||
484 | struct dst_cmd *cmd = st->data; | ||
485 | struct dst_trans *t; | ||
486 | int err = 0; | ||
487 | struct bio *bio; | ||
488 | |||
489 | mutex_lock(&n->trans_lock); | ||
490 | t = dst_trans_search(n, cmd->id); | ||
491 | mutex_unlock(&n->trans_lock); | ||
492 | |||
493 | if (!t) | ||
494 | goto err_out_exit; | ||
495 | |||
496 | bio = t->bio; | ||
497 | |||
498 | dprintk("%s: bio: %llu/%u, cmd_size: %u, csize: %u, dir: %lu.\n", | ||
499 | __func__, (u64)bio->bi_sector, bio->bi_size, cmd->size, | ||
500 | cmd->csize, bio_data_dir(bio)); | ||
501 | |||
502 | if (bio_data_dir(bio) == READ) { | ||
503 | if (bio->bi_size != cmd->size - cmd->csize) | ||
504 | goto err_out_exit; | ||
505 | |||
506 | if (dst_need_crypto(n)) { | ||
507 | err = dst_recv_cdata(st, t->cmd.hash); | ||
508 | if (err) | ||
509 | goto err_out_exit; | ||
510 | } | ||
511 | |||
512 | err = dst_recv_bio(st, t->bio, bio->bi_size); | ||
513 | if (err) | ||
514 | goto err_out_exit; | ||
515 | |||
516 | if (dst_need_crypto(n)) | ||
517 | return dst_trans_crypto(t); | ||
518 | } else { | ||
519 | err = -EBADMSG; | ||
520 | if (cmd->size || cmd->csize) | ||
521 | goto err_out_exit; | ||
522 | } | ||
523 | |||
524 | dst_trans_remove(t); | ||
525 | dst_trans_put(t); | ||
526 | |||
527 | return 0; | ||
528 | |||
529 | err_out_exit: | ||
530 | return err; | ||
531 | } | ||
532 | |||
533 | /* | ||
534 | * Receive crypto data. | ||
535 | */ | ||
536 | int dst_recv_cdata(struct dst_state *st, void *cdata) | ||
537 | { | ||
538 | struct dst_cmd *cmd = st->data; | ||
539 | struct dst_node *n = st->node; | ||
540 | struct dst_crypto_ctl *c = &n->crypto; | ||
541 | int err; | ||
542 | |||
543 | if (cmd->csize != c->crypto_attached_size) { | ||
544 | dprintk("%s: cmd: cmd: %u, sector: %llu, size: %u, " | ||
545 | "csize: %u != digest size %u.\n", | ||
546 | __func__, cmd->cmd, cmd->sector, cmd->size, | ||
547 | cmd->csize, c->crypto_attached_size); | ||
548 | err = -EINVAL; | ||
549 | goto err_out_exit; | ||
550 | } | ||
551 | |||
552 | err = dst_data_recv(st, cdata, cmd->csize); | ||
553 | if (err) | ||
554 | goto err_out_exit; | ||
555 | |||
556 | cmd->size -= cmd->csize; | ||
557 | return 0; | ||
558 | |||
559 | err_out_exit: | ||
560 | return err; | ||
561 | } | ||
562 | |||
563 | /* | ||
564 | * Receive the command and start its processing. | ||
565 | */ | ||
566 | static int dst_recv_processing(struct dst_state *st) | ||
567 | { | ||
568 | int err = -EINTR; | ||
569 | struct dst_cmd *cmd = st->data; | ||
570 | |||
571 | /* | ||
572 | * If socket will be reset after this statement, then | ||
573 | * dst_data_recv() will just fail and loop will | ||
574 | * start again, so it can be done without any locks. | ||
575 | * | ||
576 | * st->read_socket is needed to prevents state machine | ||
577 | * breaking between this data reading and subsequent one | ||
578 | * in protocol specific functions during connection reset. | ||
579 | * In case of reset we have to read next command and do | ||
580 | * not expect data for old command to magically appear in | ||
581 | * new connection. | ||
582 | */ | ||
583 | st->read_socket = st->socket; | ||
584 | err = dst_data_recv(st, cmd, sizeof(struct dst_cmd)); | ||
585 | if (err) | ||
586 | goto out_exit; | ||
587 | |||
588 | dst_convert_cmd(cmd); | ||
589 | |||
590 | dprintk("%s: cmd: %u, size: %u, csize: %u, id: %llu, " | ||
591 | "sector: %llu, flags: %llx, rw: %llx.\n", | ||
592 | __func__, cmd->cmd, cmd->size, | ||
593 | cmd->csize, cmd->id, cmd->sector, | ||
594 | cmd->flags, cmd->rw); | ||
595 | |||
596 | /* | ||
597 | * This should catch protocol breakage and random garbage | ||
598 | * instead of commands. | ||
599 | */ | ||
600 | if (unlikely(cmd->csize > st->size - sizeof(struct dst_cmd))) { | ||
601 | err = -EBADMSG; | ||
602 | goto out_exit; | ||
603 | } | ||
604 | |||
605 | err = -EPROTO; | ||
606 | switch (cmd->cmd) { | ||
607 | case DST_IO_RESPONSE: | ||
608 | err = dst_process_io_response(st); | ||
609 | break; | ||
610 | case DST_IO: | ||
611 | err = dst_process_io(st); | ||
612 | break; | ||
613 | case DST_CFG: | ||
614 | err = dst_process_cfg(st); | ||
615 | break; | ||
616 | case DST_PING: | ||
617 | err = 0; | ||
618 | break; | ||
619 | default: | ||
620 | break; | ||
621 | } | ||
622 | |||
623 | out_exit: | ||
624 | return err; | ||
625 | } | ||
626 | |||
627 | /* | ||
628 | * Receiving thread. For the client node we should try to reconnect, | ||
629 | * for accepted client we just drop the state and expect it to reconnect. | ||
630 | */ | ||
631 | static int dst_recv(void *init_data, void *schedule_data) | ||
632 | { | ||
633 | struct dst_state *st = schedule_data; | ||
634 | struct dst_node *n = init_data; | ||
635 | int err = 0; | ||
636 | |||
637 | dprintk("%s: start st: %p, n: %p, scan: %lu, need_exit: %d.\n", | ||
638 | __func__, st, n, n->trans_scan_timeout, st->need_exit); | ||
639 | |||
640 | while (n->trans_scan_timeout && !st->need_exit) { | ||
641 | err = dst_recv_processing(st); | ||
642 | if (err < 0) { | ||
643 | if (!st->ctl.type) | ||
644 | break; | ||
645 | |||
646 | if (!n->trans_scan_timeout || st->need_exit) | ||
647 | break; | ||
648 | |||
649 | dst_state_reset(st); | ||
650 | msleep(1000); | ||
651 | } | ||
652 | } | ||
653 | |||
654 | st->need_exit = 1; | ||
655 | wake_up(&st->thread_wait); | ||
656 | |||
657 | dprintk("%s: freeing receiving socket st: %p.\n", __func__, st); | ||
658 | dst_state_lock(st); | ||
659 | dst_state_exit_connected(st); | ||
660 | dst_state_unlock(st); | ||
661 | dst_state_put(st); | ||
662 | |||
663 | dprintk("%s: freed receiving socket st: %p.\n", __func__, st); | ||
664 | |||
665 | return err; | ||
666 | } | ||
667 | |||
668 | /* | ||
669 | * Network state dies here and borns couple of lines below. | ||
670 | * This object is the main network state processing engine: | ||
671 | * sending, receiving, reconnections, all network related | ||
672 | * tasks are handled on behalf of the state. | ||
673 | */ | ||
674 | static void dst_state_free(struct dst_state *st) | ||
675 | { | ||
676 | dprintk("%s: st: %p.\n", __func__, st); | ||
677 | if (st->cleanup) | ||
678 | st->cleanup(st); | ||
679 | kfree(st->data); | ||
680 | kfree(st); | ||
681 | } | ||
682 | |||
683 | struct dst_state *dst_state_alloc(struct dst_node *n) | ||
684 | { | ||
685 | struct dst_state *st; | ||
686 | int err = -ENOMEM; | ||
687 | |||
688 | st = kzalloc(sizeof(struct dst_state), GFP_KERNEL); | ||
689 | if (!st) | ||
690 | goto err_out_exit; | ||
691 | |||
692 | st->node = n; | ||
693 | st->need_exit = 0; | ||
694 | |||
695 | st->size = PAGE_SIZE; | ||
696 | st->data = kmalloc(st->size, GFP_KERNEL); | ||
697 | if (!st->data) | ||
698 | goto err_out_free; | ||
699 | |||
700 | spin_lock_init(&st->request_lock); | ||
701 | INIT_LIST_HEAD(&st->request_list); | ||
702 | |||
703 | mutex_init(&st->state_lock); | ||
704 | init_waitqueue_head(&st->thread_wait); | ||
705 | |||
706 | /* | ||
707 | * One for processing thread, another one for node itself. | ||
708 | */ | ||
709 | atomic_set(&st->refcnt, 2); | ||
710 | |||
711 | dprintk("%s: st: %p, n: %p.\n", __func__, st, st->node); | ||
712 | |||
713 | return st; | ||
714 | |||
715 | err_out_free: | ||
716 | kfree(st); | ||
717 | err_out_exit: | ||
718 | return ERR_PTR(err); | ||
719 | } | ||
720 | |||
721 | int dst_state_schedule_receiver(struct dst_state *st) | ||
722 | { | ||
723 | return thread_pool_schedule_private(st->node->pool, dst_thread_setup, | ||
724 | dst_recv, st, MAX_SCHEDULE_TIMEOUT, st->node); | ||
725 | } | ||
726 | |||
727 | /* | ||
728 | * Initialize client's connection to the remote peer: allocate state, | ||
729 | * connect and perform block IO autoconfiguration. | ||
730 | */ | ||
731 | int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r) | ||
732 | { | ||
733 | struct dst_state *st; | ||
734 | int err = -ENOMEM; | ||
735 | |||
736 | st = dst_state_alloc(n); | ||
737 | if (IS_ERR(st)) { | ||
738 | err = PTR_ERR(st); | ||
739 | goto err_out_exit; | ||
740 | } | ||
741 | memcpy(&st->ctl, r, sizeof(struct dst_network_ctl)); | ||
742 | |||
743 | err = dst_state_init_connected(st); | ||
744 | if (err) | ||
745 | goto err_out_free_data; | ||
746 | |||
747 | err = dst_request_remote_config(st); | ||
748 | if (err) | ||
749 | goto err_out_exit_connected; | ||
750 | n->state = st; | ||
751 | |||
752 | err = dst_state_schedule_receiver(st); | ||
753 | if (err) | ||
754 | goto err_out_exit_connected; | ||
755 | |||
756 | return 0; | ||
757 | |||
758 | err_out_exit_connected: | ||
759 | dst_state_exit_connected(st); | ||
760 | err_out_free_data: | ||
761 | dst_state_free(st); | ||
762 | err_out_exit: | ||
763 | n->state = NULL; | ||
764 | return err; | ||
765 | } | ||
766 | |||
767 | void dst_state_put(struct dst_state *st) | ||
768 | { | ||
769 | dprintk("%s: st: %p, refcnt: %d.\n", | ||
770 | __func__, st, atomic_read(&st->refcnt)); | ||
771 | if (atomic_dec_and_test(&st->refcnt)) | ||
772 | dst_state_free(st); | ||
773 | } | ||
774 | |||
775 | /* | ||
776 | * Send block IO to the network one by one using zero-copy ->sendpage(). | ||
777 | */ | ||
778 | int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio) | ||
779 | { | ||
780 | struct bio_vec *bv; | ||
781 | struct dst_crypto_ctl *c = &st->node->crypto; | ||
782 | int err, i = 0; | ||
783 | int flags = MSG_WAITALL; | ||
784 | |||
785 | err = dst_data_send_header(st->socket, cmd, | ||
786 | sizeof(struct dst_cmd) + c->crypto_attached_size, bio->bi_vcnt); | ||
787 | if (err) | ||
788 | goto err_out_exit; | ||
789 | |||
790 | bio_for_each_segment(bv, bio, i) { | ||
791 | if (i < bio->bi_vcnt - 1) | ||
792 | flags |= MSG_MORE; | ||
793 | |||
794 | err = kernel_sendpage(st->socket, bv->bv_page, bv->bv_offset, | ||
795 | bv->bv_len, flags); | ||
796 | if (err <= 0) | ||
797 | goto err_out_exit; | ||
798 | } | ||
799 | |||
800 | return 0; | ||
801 | |||
802 | err_out_exit: | ||
803 | dprintk("%s: %d/%d, flags: %x, err: %d.\n", | ||
804 | __func__, i, bio->bi_vcnt, flags, err); | ||
805 | return err; | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * Send transaction to the remote peer. | ||
810 | */ | ||
811 | int dst_trans_send(struct dst_trans *t) | ||
812 | { | ||
813 | int err; | ||
814 | struct dst_state *st = t->n->state; | ||
815 | struct bio *bio = t->bio; | ||
816 | |||
817 | dst_convert_cmd(&t->cmd); | ||
818 | |||
819 | dst_state_lock(st); | ||
820 | if (!st->socket) { | ||
821 | err = dst_state_init_connected(st); | ||
822 | if (err) | ||
823 | goto err_out_unlock; | ||
824 | } | ||
825 | |||
826 | if (bio_data_dir(bio) == WRITE) { | ||
827 | err = dst_send_bio(st, &t->cmd, t->bio); | ||
828 | } else { | ||
829 | err = dst_data_send_header(st->socket, &t->cmd, | ||
830 | sizeof(struct dst_cmd), 0); | ||
831 | } | ||
832 | if (err) | ||
833 | goto err_out_reset; | ||
834 | |||
835 | dst_state_unlock(st); | ||
836 | return 0; | ||
837 | |||
838 | err_out_reset: | ||
839 | dst_state_reset_nolock(st); | ||
840 | err_out_unlock: | ||
841 | dst_state_unlock(st); | ||
842 | |||
843 | return err; | ||
844 | } | ||
diff --git a/drivers/staging/dst/thread_pool.c b/drivers/staging/dst/thread_pool.c deleted file mode 100644 index 29a82b2602f3..000000000000 --- a/drivers/staging/dst/thread_pool.c +++ /dev/null | |||
@@ -1,348 +0,0 @@ | |||
1 | /* | ||
2 | * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/dst.h> | ||
18 | #include <linux/kthread.h> | ||
19 | #include <linux/slab.h> | ||
20 | |||
21 | /* | ||
22 | * Thread pool abstraction allows to schedule a work to be performed | ||
23 | * on behalf of kernel thread. One does not operate with threads itself, | ||
24 | * instead user provides setup and cleanup callbacks for thread pool itself, | ||
25 | * and action and cleanup callbacks for each submitted work. | ||
26 | * | ||
27 | * Each worker has private data initialized at creation time and data, | ||
28 | * provided by user at scheduling time. | ||
29 | * | ||
30 | * When action is being performed, thread can not be used by other users, | ||
31 | * instead they will sleep until there is free thread to pick their work. | ||
32 | */ | ||
33 | struct thread_pool_worker { | ||
34 | struct list_head worker_entry; | ||
35 | |||
36 | struct task_struct *thread; | ||
37 | |||
38 | struct thread_pool *pool; | ||
39 | |||
40 | int error; | ||
41 | int has_data; | ||
42 | int need_exit; | ||
43 | unsigned int id; | ||
44 | |||
45 | wait_queue_head_t wait; | ||
46 | |||
47 | void *private; | ||
48 | void *schedule_data; | ||
49 | |||
50 | int (*action)(void *private, void *schedule_data); | ||
51 | void (*cleanup)(void *private); | ||
52 | }; | ||
53 | |||
54 | static void thread_pool_exit_worker(struct thread_pool_worker *w) | ||
55 | { | ||
56 | kthread_stop(w->thread); | ||
57 | |||
58 | w->cleanup(w->private); | ||
59 | kfree(w); | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * Called to mark thread as ready and allow users to schedule new work. | ||
64 | */ | ||
65 | static void thread_pool_worker_make_ready(struct thread_pool_worker *w) | ||
66 | { | ||
67 | struct thread_pool *p = w->pool; | ||
68 | |||
69 | mutex_lock(&p->thread_lock); | ||
70 | |||
71 | if (!w->need_exit) { | ||
72 | list_move_tail(&w->worker_entry, &p->ready_list); | ||
73 | w->has_data = 0; | ||
74 | mutex_unlock(&p->thread_lock); | ||
75 | |||
76 | wake_up(&p->wait); | ||
77 | } else { | ||
78 | p->thread_num--; | ||
79 | list_del(&w->worker_entry); | ||
80 | mutex_unlock(&p->thread_lock); | ||
81 | |||
82 | thread_pool_exit_worker(w); | ||
83 | } | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Thread action loop: waits until there is new work. | ||
88 | */ | ||
89 | static int thread_pool_worker_func(void *data) | ||
90 | { | ||
91 | struct thread_pool_worker *w = data; | ||
92 | |||
93 | while (!kthread_should_stop()) { | ||
94 | wait_event_interruptible(w->wait, | ||
95 | kthread_should_stop() || w->has_data); | ||
96 | |||
97 | if (kthread_should_stop()) | ||
98 | break; | ||
99 | |||
100 | if (!w->has_data) | ||
101 | continue; | ||
102 | |||
103 | w->action(w->private, w->schedule_data); | ||
104 | thread_pool_worker_make_ready(w); | ||
105 | } | ||
106 | |||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * Remove single worker without specifying which one. | ||
112 | */ | ||
113 | void thread_pool_del_worker(struct thread_pool *p) | ||
114 | { | ||
115 | struct thread_pool_worker *w = NULL; | ||
116 | |||
117 | while (!w && p->thread_num) { | ||
118 | wait_event(p->wait, !list_empty(&p->ready_list) || | ||
119 | !p->thread_num); | ||
120 | |||
121 | dprintk("%s: locking list_empty: %d, thread_num: %d.\n", | ||
122 | __func__, list_empty(&p->ready_list), | ||
123 | p->thread_num); | ||
124 | |||
125 | mutex_lock(&p->thread_lock); | ||
126 | if (!list_empty(&p->ready_list)) { | ||
127 | w = list_first_entry(&p->ready_list, | ||
128 | struct thread_pool_worker, | ||
129 | worker_entry); | ||
130 | |||
131 | dprintk("%s: deleting w: %p, thread_num: %d, " | ||
132 | "list: %p [%p.%p].\n", __func__, | ||
133 | w, p->thread_num, &p->ready_list, | ||
134 | p->ready_list.prev, p->ready_list.next); | ||
135 | |||
136 | p->thread_num--; | ||
137 | list_del(&w->worker_entry); | ||
138 | } | ||
139 | mutex_unlock(&p->thread_lock); | ||
140 | } | ||
141 | |||
142 | if (w) | ||
143 | thread_pool_exit_worker(w); | ||
144 | dprintk("%s: deleted w: %p, thread_num: %d.\n", | ||
145 | __func__, w, p->thread_num); | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * Remove a worker with given ID. | ||
150 | */ | ||
151 | void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id) | ||
152 | { | ||
153 | struct thread_pool_worker *w; | ||
154 | int found = 0; | ||
155 | |||
156 | mutex_lock(&p->thread_lock); | ||
157 | list_for_each_entry(w, &p->ready_list, worker_entry) { | ||
158 | if (w->id == id) { | ||
159 | found = 1; | ||
160 | p->thread_num--; | ||
161 | list_del(&w->worker_entry); | ||
162 | break; | ||
163 | } | ||
164 | } | ||
165 | |||
166 | if (!found) { | ||
167 | list_for_each_entry(w, &p->active_list, worker_entry) { | ||
168 | if (w->id == id) { | ||
169 | w->need_exit = 1; | ||
170 | break; | ||
171 | } | ||
172 | } | ||
173 | } | ||
174 | mutex_unlock(&p->thread_lock); | ||
175 | |||
176 | if (found) | ||
177 | thread_pool_exit_worker(w); | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * Add new worker thread with given parameters. | ||
182 | * If initialization callback fails, return error. | ||
183 | */ | ||
184 | int thread_pool_add_worker(struct thread_pool *p, | ||
185 | char *name, | ||
186 | unsigned int id, | ||
187 | void *(*init)(void *private), | ||
188 | void (*cleanup)(void *private), | ||
189 | void *private) | ||
190 | { | ||
191 | struct thread_pool_worker *w; | ||
192 | int err = -ENOMEM; | ||
193 | |||
194 | w = kzalloc(sizeof(struct thread_pool_worker), GFP_KERNEL); | ||
195 | if (!w) | ||
196 | goto err_out_exit; | ||
197 | |||
198 | w->pool = p; | ||
199 | init_waitqueue_head(&w->wait); | ||
200 | w->cleanup = cleanup; | ||
201 | w->id = id; | ||
202 | |||
203 | w->thread = kthread_run(thread_pool_worker_func, w, "%s", name); | ||
204 | if (IS_ERR(w->thread)) { | ||
205 | err = PTR_ERR(w->thread); | ||
206 | goto err_out_free; | ||
207 | } | ||
208 | |||
209 | w->private = init(private); | ||
210 | if (IS_ERR(w->private)) { | ||
211 | err = PTR_ERR(w->private); | ||
212 | goto err_out_stop_thread; | ||
213 | } | ||
214 | |||
215 | mutex_lock(&p->thread_lock); | ||
216 | list_add_tail(&w->worker_entry, &p->ready_list); | ||
217 | p->thread_num++; | ||
218 | mutex_unlock(&p->thread_lock); | ||
219 | |||
220 | return 0; | ||
221 | |||
222 | err_out_stop_thread: | ||
223 | kthread_stop(w->thread); | ||
224 | err_out_free: | ||
225 | kfree(w); | ||
226 | err_out_exit: | ||
227 | return err; | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * Destroy the whole pool. | ||
232 | */ | ||
233 | void thread_pool_destroy(struct thread_pool *p) | ||
234 | { | ||
235 | while (p->thread_num) { | ||
236 | dprintk("%s: num: %d.\n", __func__, p->thread_num); | ||
237 | thread_pool_del_worker(p); | ||
238 | } | ||
239 | |||
240 | kfree(p); | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * Create a pool with given number of threads. | ||
245 | * They will have sequential IDs started from zero. | ||
246 | */ | ||
247 | struct thread_pool *thread_pool_create(int num, char *name, | ||
248 | void *(*init)(void *private), | ||
249 | void (*cleanup)(void *private), | ||
250 | void *private) | ||
251 | { | ||
252 | struct thread_pool_worker *w, *tmp; | ||
253 | struct thread_pool *p; | ||
254 | int err = -ENOMEM; | ||
255 | int i; | ||
256 | |||
257 | p = kzalloc(sizeof(struct thread_pool), GFP_KERNEL); | ||
258 | if (!p) | ||
259 | goto err_out_exit; | ||
260 | |||
261 | init_waitqueue_head(&p->wait); | ||
262 | mutex_init(&p->thread_lock); | ||
263 | INIT_LIST_HEAD(&p->ready_list); | ||
264 | INIT_LIST_HEAD(&p->active_list); | ||
265 | p->thread_num = 0; | ||
266 | |||
267 | for (i = 0; i < num; ++i) { | ||
268 | err = thread_pool_add_worker(p, name, i, init, | ||
269 | cleanup, private); | ||
270 | if (err) | ||
271 | goto err_out_free_all; | ||
272 | } | ||
273 | |||
274 | return p; | ||
275 | |||
276 | err_out_free_all: | ||
277 | list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) { | ||
278 | list_del(&w->worker_entry); | ||
279 | thread_pool_exit_worker(w); | ||
280 | } | ||
281 | kfree(p); | ||
282 | err_out_exit: | ||
283 | return ERR_PTR(err); | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * Schedule execution of the action on a given thread, | ||
288 | * provided ID pointer has to match previously stored | ||
289 | * private data. | ||
290 | */ | ||
291 | int thread_pool_schedule_private(struct thread_pool *p, | ||
292 | int (*setup)(void *private, void *data), | ||
293 | int (*action)(void *private, void *data), | ||
294 | void *data, long timeout, void *id) | ||
295 | { | ||
296 | struct thread_pool_worker *w, *tmp, *worker = NULL; | ||
297 | int err = 0; | ||
298 | |||
299 | while (!worker && !err) { | ||
300 | timeout = wait_event_interruptible_timeout(p->wait, | ||
301 | !list_empty(&p->ready_list), | ||
302 | timeout); | ||
303 | |||
304 | if (!timeout) { | ||
305 | err = -ETIMEDOUT; | ||
306 | break; | ||
307 | } | ||
308 | |||
309 | worker = NULL; | ||
310 | mutex_lock(&p->thread_lock); | ||
311 | list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) { | ||
312 | if (id && id != w->private) | ||
313 | continue; | ||
314 | |||
315 | worker = w; | ||
316 | |||
317 | list_move_tail(&w->worker_entry, &p->active_list); | ||
318 | |||
319 | err = setup(w->private, data); | ||
320 | if (!err) { | ||
321 | w->schedule_data = data; | ||
322 | w->action = action; | ||
323 | w->has_data = 1; | ||
324 | wake_up(&w->wait); | ||
325 | } else { | ||
326 | list_move_tail(&w->worker_entry, | ||
327 | &p->ready_list); | ||
328 | } | ||
329 | |||
330 | break; | ||
331 | } | ||
332 | mutex_unlock(&p->thread_lock); | ||
333 | } | ||
334 | |||
335 | return err; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Schedule execution on arbitrary thread from the pool. | ||
340 | */ | ||
341 | int thread_pool_schedule(struct thread_pool *p, | ||
342 | int (*setup)(void *private, void *data), | ||
343 | int (*action)(void *private, void *data), | ||
344 | void *data, long timeout) | ||
345 | { | ||
346 | return thread_pool_schedule_private(p, setup, | ||
347 | action, data, timeout, NULL); | ||
348 | } | ||
diff --git a/drivers/staging/dst/trans.c b/drivers/staging/dst/trans.c deleted file mode 100644 index 1c36a6bc31d5..000000000000 --- a/drivers/staging/dst/trans.c +++ /dev/null | |||
@@ -1,337 +0,0 @@ | |||
1 | /* | ||
2 | * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/bio.h> | ||
17 | #include <linux/dst.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/mempool.h> | ||
20 | |||
21 | /* | ||
22 | * Transaction memory pool size. | ||
23 | */ | ||
24 | static int dst_mempool_num = 32; | ||
25 | module_param(dst_mempool_num, int, 0644); | ||
26 | |||
27 | /* | ||
28 | * Transaction tree management. | ||
29 | */ | ||
30 | static inline int dst_trans_cmp(dst_gen_t gen, dst_gen_t new) | ||
31 | { | ||
32 | if (gen < new) | ||
33 | return 1; | ||
34 | if (gen > new) | ||
35 | return -1; | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen) | ||
40 | { | ||
41 | struct rb_root *root = &node->trans_root; | ||
42 | struct rb_node *n = root->rb_node; | ||
43 | struct dst_trans *t, *ret = NULL; | ||
44 | int cmp; | ||
45 | |||
46 | while (n) { | ||
47 | t = rb_entry(n, struct dst_trans, trans_entry); | ||
48 | |||
49 | cmp = dst_trans_cmp(t->gen, gen); | ||
50 | if (cmp < 0) | ||
51 | n = n->rb_left; | ||
52 | else if (cmp > 0) | ||
53 | n = n->rb_right; | ||
54 | else { | ||
55 | ret = t; | ||
56 | break; | ||
57 | } | ||
58 | } | ||
59 | |||
60 | dprintk("%s: %s transaction: id: %llu.\n", __func__, | ||
61 | (ret) ? "found" : "not found", gen); | ||
62 | |||
63 | return ret; | ||
64 | } | ||
65 | |||
66 | static int dst_trans_insert(struct dst_trans *new) | ||
67 | { | ||
68 | struct rb_root *root = &new->n->trans_root; | ||
69 | struct rb_node **n = &root->rb_node, *parent = NULL; | ||
70 | struct dst_trans *ret = NULL, *t; | ||
71 | int cmp; | ||
72 | |||
73 | while (*n) { | ||
74 | parent = *n; | ||
75 | |||
76 | t = rb_entry(parent, struct dst_trans, trans_entry); | ||
77 | |||
78 | cmp = dst_trans_cmp(t->gen, new->gen); | ||
79 | if (cmp < 0) | ||
80 | n = &parent->rb_left; | ||
81 | else if (cmp > 0) | ||
82 | n = &parent->rb_right; | ||
83 | else { | ||
84 | ret = t; | ||
85 | break; | ||
86 | } | ||
87 | } | ||
88 | |||
89 | new->send_time = jiffies; | ||
90 | if (ret) { | ||
91 | printk(KERN_DEBUG "%s: exist: old: gen: %llu, bio: %llu/%u, " | ||
92 | "send_time: %lu, new: gen: %llu, bio: %llu/%u, " | ||
93 | "send_time: %lu.\n", __func__, | ||
94 | ret->gen, (u64)ret->bio->bi_sector, | ||
95 | ret->bio->bi_size, ret->send_time, | ||
96 | new->gen, (u64)new->bio->bi_sector, | ||
97 | new->bio->bi_size, new->send_time); | ||
98 | return -EEXIST; | ||
99 | } | ||
100 | |||
101 | rb_link_node(&new->trans_entry, parent, n); | ||
102 | rb_insert_color(&new->trans_entry, root); | ||
103 | |||
104 | dprintk("%s: inserted: gen: %llu, bio: %llu/%u, send_time: %lu.\n", | ||
105 | __func__, new->gen, (u64)new->bio->bi_sector, | ||
106 | new->bio->bi_size, new->send_time); | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | int dst_trans_remove_nolock(struct dst_trans *t) | ||
112 | { | ||
113 | struct dst_node *n = t->n; | ||
114 | |||
115 | if (t->trans_entry.rb_parent_color) { | ||
116 | rb_erase(&t->trans_entry, &n->trans_root); | ||
117 | t->trans_entry.rb_parent_color = 0; | ||
118 | } | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | int dst_trans_remove(struct dst_trans *t) | ||
123 | { | ||
124 | int ret; | ||
125 | struct dst_node *n = t->n; | ||
126 | |||
127 | mutex_lock(&n->trans_lock); | ||
128 | ret = dst_trans_remove_nolock(t); | ||
129 | mutex_unlock(&n->trans_lock); | ||
130 | |||
131 | return ret; | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * When transaction is completed and there are no more users, | ||
136 | * we complete appriate block IO request with given error status. | ||
137 | */ | ||
138 | void dst_trans_put(struct dst_trans *t) | ||
139 | { | ||
140 | if (atomic_dec_and_test(&t->refcnt)) { | ||
141 | struct bio *bio = t->bio; | ||
142 | |||
143 | dprintk("%s: completed t: %p, gen: %llu, bio: %p.\n", | ||
144 | __func__, t, t->gen, bio); | ||
145 | |||
146 | bio_endio(bio, t->error); | ||
147 | bio_put(bio); | ||
148 | |||
149 | dst_node_put(t->n); | ||
150 | mempool_free(t, t->n->trans_pool); | ||
151 | } | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * Process given block IO request: allocate transaction, insert it into the tree | ||
156 | * and send/schedule crypto processing. | ||
157 | */ | ||
158 | int dst_process_bio(struct dst_node *n, struct bio *bio) | ||
159 | { | ||
160 | struct dst_trans *t; | ||
161 | int err = -ENOMEM; | ||
162 | |||
163 | t = mempool_alloc(n->trans_pool, GFP_NOFS); | ||
164 | if (!t) | ||
165 | goto err_out_exit; | ||
166 | |||
167 | t->n = dst_node_get(n); | ||
168 | t->bio = bio; | ||
169 | t->error = 0; | ||
170 | t->retries = 0; | ||
171 | atomic_set(&t->refcnt, 1); | ||
172 | t->gen = atomic_long_inc_return(&n->gen); | ||
173 | |||
174 | t->enc = bio_data_dir(bio); | ||
175 | dst_bio_to_cmd(bio, &t->cmd, DST_IO, t->gen); | ||
176 | |||
177 | mutex_lock(&n->trans_lock); | ||
178 | err = dst_trans_insert(t); | ||
179 | mutex_unlock(&n->trans_lock); | ||
180 | if (err) | ||
181 | goto err_out_free; | ||
182 | |||
183 | dprintk("%s: gen: %llu, bio: %llu/%u, dir/enc: %d, need_crypto: %d.\n", | ||
184 | __func__, t->gen, (u64)bio->bi_sector, | ||
185 | bio->bi_size, t->enc, dst_need_crypto(n)); | ||
186 | |||
187 | if (dst_need_crypto(n) && t->enc) | ||
188 | dst_trans_crypto(t); | ||
189 | else | ||
190 | dst_trans_send(t); | ||
191 | |||
192 | return 0; | ||
193 | |||
194 | err_out_free: | ||
195 | dst_node_put(n); | ||
196 | mempool_free(t, n->trans_pool); | ||
197 | err_out_exit: | ||
198 | bio_endio(bio, err); | ||
199 | bio_put(bio); | ||
200 | return err; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Scan for timeout/stale transactions. | ||
205 | * Each transaction is being resent multiple times before error completion. | ||
206 | */ | ||
207 | static void dst_trans_scan(struct work_struct *work) | ||
208 | { | ||
209 | struct dst_node *n = container_of(work, struct dst_node, | ||
210 | trans_work.work); | ||
211 | struct rb_node *rb_node; | ||
212 | struct dst_trans *t; | ||
213 | unsigned long timeout = n->trans_scan_timeout; | ||
214 | int num = 10 * n->trans_max_retries; | ||
215 | |||
216 | mutex_lock(&n->trans_lock); | ||
217 | |||
218 | for (rb_node = rb_first(&n->trans_root); rb_node; ) { | ||
219 | t = rb_entry(rb_node, struct dst_trans, trans_entry); | ||
220 | |||
221 | if (timeout && time_after(t->send_time + timeout, jiffies) | ||
222 | && t->retries == 0) | ||
223 | break; | ||
224 | #if 0 | ||
225 | dprintk("%s: t: %p, gen: %llu, n: %s, retries: %u, max: %u.\n", | ||
226 | __func__, t, t->gen, n->name, | ||
227 | t->retries, n->trans_max_retries); | ||
228 | #endif | ||
229 | if (--num == 0) | ||
230 | break; | ||
231 | |||
232 | dst_trans_get(t); | ||
233 | |||
234 | rb_node = rb_next(rb_node); | ||
235 | |||
236 | if (timeout && (++t->retries < n->trans_max_retries)) { | ||
237 | dst_trans_send(t); | ||
238 | } else { | ||
239 | t->error = -ETIMEDOUT; | ||
240 | dst_trans_remove_nolock(t); | ||
241 | dst_trans_put(t); | ||
242 | } | ||
243 | |||
244 | dst_trans_put(t); | ||
245 | } | ||
246 | |||
247 | mutex_unlock(&n->trans_lock); | ||
248 | |||
249 | /* | ||
250 | * If no timeout specified then system is in the middle of exiting | ||
251 | * process, so no need to reschedule scanning process again. | ||
252 | */ | ||
253 | if (timeout) { | ||
254 | if (!num) | ||
255 | timeout = HZ; | ||
256 | schedule_delayed_work(&n->trans_work, timeout); | ||
257 | } | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * Flush all transactions and mark them as timed out. | ||
262 | * Destroy transaction pools. | ||
263 | */ | ||
264 | void dst_node_trans_exit(struct dst_node *n) | ||
265 | { | ||
266 | struct dst_trans *t; | ||
267 | struct rb_node *rb_node; | ||
268 | |||
269 | if (!n->trans_cache) | ||
270 | return; | ||
271 | |||
272 | dprintk("%s: n: %p, cancelling the work.\n", __func__, n); | ||
273 | cancel_delayed_work_sync(&n->trans_work); | ||
274 | flush_scheduled_work(); | ||
275 | dprintk("%s: n: %p, work has been cancelled.\n", __func__, n); | ||
276 | |||
277 | for (rb_node = rb_first(&n->trans_root); rb_node; ) { | ||
278 | t = rb_entry(rb_node, struct dst_trans, trans_entry); | ||
279 | |||
280 | dprintk("%s: t: %p, gen: %llu, n: %s.\n", | ||
281 | __func__, t, t->gen, n->name); | ||
282 | |||
283 | rb_node = rb_next(rb_node); | ||
284 | |||
285 | t->error = -ETIMEDOUT; | ||
286 | dst_trans_remove_nolock(t); | ||
287 | dst_trans_put(t); | ||
288 | } | ||
289 | |||
290 | mempool_destroy(n->trans_pool); | ||
291 | kmem_cache_destroy(n->trans_cache); | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Initialize transaction storage for given node. | ||
296 | * Transaction stores not only control information, | ||
297 | * but also network command and crypto data (if needed) | ||
298 | * to reduce number of allocations. Thus transaction size | ||
299 | * differs from node to node. | ||
300 | */ | ||
301 | int dst_node_trans_init(struct dst_node *n, unsigned int size) | ||
302 | { | ||
303 | /* | ||
304 | * We need this, since node with given name can be dropped from the | ||
305 | * hash table, but be still alive, so subsequent creation of the node | ||
306 | * with the same name may collide with existing cache name. | ||
307 | */ | ||
308 | |||
309 | snprintf(n->cache_name, sizeof(n->cache_name), "%s-%p", n->name, n); | ||
310 | |||
311 | n->trans_cache = kmem_cache_create(n->cache_name, | ||
312 | size + n->crypto.crypto_attached_size, | ||
313 | 0, 0, NULL); | ||
314 | if (!n->trans_cache) | ||
315 | goto err_out_exit; | ||
316 | |||
317 | n->trans_pool = mempool_create_slab_pool(dst_mempool_num, | ||
318 | n->trans_cache); | ||
319 | if (!n->trans_pool) | ||
320 | goto err_out_cache_destroy; | ||
321 | |||
322 | mutex_init(&n->trans_lock); | ||
323 | n->trans_root = RB_ROOT; | ||
324 | |||
325 | INIT_DELAYED_WORK(&n->trans_work, dst_trans_scan); | ||
326 | schedule_delayed_work(&n->trans_work, n->trans_scan_timeout); | ||
327 | |||
328 | dprintk("%s: n: %p, size: %u, crypto: %u.\n", | ||
329 | __func__, n, size, n->crypto.crypto_attached_size); | ||
330 | |||
331 | return 0; | ||
332 | |||
333 | err_out_cache_destroy: | ||
334 | kmem_cache_destroy(n->trans_cache); | ||
335 | err_out_exit: | ||
336 | return -ENOMEM; | ||
337 | } | ||
diff --git a/include/linux/dst.h b/include/linux/dst.h deleted file mode 100644 index e26fed84b1aa..000000000000 --- a/include/linux/dst.h +++ /dev/null | |||
@@ -1,587 +0,0 @@ | |||
1 | /* | ||
2 | * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef __DST_H | ||
17 | #define __DST_H | ||
18 | |||
19 | #include <linux/types.h> | ||
20 | #include <linux/connector.h> | ||
21 | |||
22 | #define DST_NAMELEN 32 | ||
23 | #define DST_NAME "dst" | ||
24 | |||
25 | enum { | ||
26 | /* Remove node with given id from storage */ | ||
27 | DST_DEL_NODE = 0, | ||
28 | /* Add remote node with given id to the storage */ | ||
29 | DST_ADD_REMOTE, | ||
30 | /* Add local node with given id to the storage to be exported and used by remote peers */ | ||
31 | DST_ADD_EXPORT, | ||
32 | /* Crypto initialization command (hash/cipher used to protect the connection) */ | ||
33 | DST_CRYPTO, | ||
34 | /* Security attributes for given connection (permissions for example) */ | ||
35 | DST_SECURITY, | ||
36 | /* Register given node in the block layer subsystem */ | ||
37 | DST_START, | ||
38 | DST_CMD_MAX | ||
39 | }; | ||
40 | |||
41 | struct dst_ctl | ||
42 | { | ||
43 | /* Storage name */ | ||
44 | char name[DST_NAMELEN]; | ||
45 | /* Command flags */ | ||
46 | __u32 flags; | ||
47 | /* Command itself (see above) */ | ||
48 | __u32 cmd; | ||
49 | /* Maximum number of pages per single request in this device */ | ||
50 | __u32 max_pages; | ||
51 | /* Stale/error transaction scanning timeout in milliseconds */ | ||
52 | __u32 trans_scan_timeout; | ||
53 | /* Maximum number of retry sends before completing transaction as broken */ | ||
54 | __u32 trans_max_retries; | ||
55 | /* Storage size */ | ||
56 | __u64 size; | ||
57 | }; | ||
58 | |||
59 | /* Reply command carries completion status */ | ||
60 | struct dst_ctl_ack | ||
61 | { | ||
62 | struct cn_msg msg; | ||
63 | int error; | ||
64 | int unused[3]; | ||
65 | }; | ||
66 | |||
67 | /* | ||
68 | * Unfortunaltely socket address structure is not exported to userspace | ||
69 | * and is redefined there. | ||
70 | */ | ||
71 | #define SADDR_MAX_DATA 128 | ||
72 | |||
73 | struct saddr { | ||
74 | /* address family, AF_xxx */ | ||
75 | unsigned short sa_family; | ||
76 | /* 14 bytes of protocol address */ | ||
77 | char sa_data[SADDR_MAX_DATA]; | ||
78 | /* Number of bytes used in sa_data */ | ||
79 | unsigned short sa_data_len; | ||
80 | }; | ||
81 | |||
82 | /* Address structure */ | ||
83 | struct dst_network_ctl | ||
84 | { | ||
85 | /* Socket type: datagram, stream...*/ | ||
86 | unsigned int type; | ||
87 | /* Let me guess, is it a Jupiter diameter? */ | ||
88 | unsigned int proto; | ||
89 | /* Peer's address */ | ||
90 | struct saddr addr; | ||
91 | }; | ||
92 | |||
93 | struct dst_crypto_ctl | ||
94 | { | ||
95 | /* Cipher and hash names */ | ||
96 | char cipher_algo[DST_NAMELEN]; | ||
97 | char hash_algo[DST_NAMELEN]; | ||
98 | |||
99 | /* Key sizes. Can be zero for digest for example */ | ||
100 | unsigned int cipher_keysize, hash_keysize; | ||
101 | /* Alignment. Calculated by the DST itself. */ | ||
102 | unsigned int crypto_attached_size; | ||
103 | /* Number of threads to perform crypto operations */ | ||
104 | int thread_num; | ||
105 | }; | ||
106 | |||
107 | /* Export security attributes have this bits checked in when client connects */ | ||
108 | #define DST_PERM_READ (1<<0) | ||
109 | #define DST_PERM_WRITE (1<<1) | ||
110 | |||
111 | /* | ||
112 | * Right now it is simple model, where each remote address | ||
113 | * is assigned to set of permissions it is allowed to perform. | ||
114 | * In real world block device does not know anything but | ||
115 | * reading and writing, so it should be more than enough. | ||
116 | */ | ||
117 | struct dst_secure_user | ||
118 | { | ||
119 | unsigned int permissions; | ||
120 | struct saddr addr; | ||
121 | }; | ||
122 | |||
123 | /* | ||
124 | * Export control command: device to export and network address to accept | ||
125 | * clients to work with given device | ||
126 | */ | ||
127 | struct dst_export_ctl | ||
128 | { | ||
129 | char device[DST_NAMELEN]; | ||
130 | struct dst_network_ctl ctl; | ||
131 | }; | ||
132 | |||
133 | enum { | ||
134 | DST_CFG = 1, /* Request remote configuration */ | ||
135 | DST_IO, /* IO command */ | ||
136 | DST_IO_RESPONSE, /* IO response */ | ||
137 | DST_PING, /* Keepalive message */ | ||
138 | DST_NCMD_MAX, | ||
139 | }; | ||
140 | |||
141 | struct dst_cmd | ||
142 | { | ||
143 | /* Network command itself, see above */ | ||
144 | __u32 cmd; | ||
145 | /* | ||
146 | * Size of the attached data | ||
147 | * (in most cases, for READ command it means how many bytes were requested) | ||
148 | */ | ||
149 | __u32 size; | ||
150 | /* Crypto size: number of attached bytes with digest/hmac */ | ||
151 | __u32 csize; | ||
152 | /* Here we can carry secret data */ | ||
153 | __u32 reserved; | ||
154 | /* Read/write bits, see how they are encoded in bio structure */ | ||
155 | __u64 rw; | ||
156 | /* BIO flags */ | ||
157 | __u64 flags; | ||
158 | /* Unique command id (like transaction ID) */ | ||
159 | __u64 id; | ||
160 | /* Sector to start IO from */ | ||
161 | __u64 sector; | ||
162 | /* Hash data is placed after this header */ | ||
163 | __u8 hash[0]; | ||
164 | }; | ||
165 | |||
166 | /* | ||
167 | * Convert command to/from network byte order. | ||
168 | * We do not use hton*() functions, since there is | ||
169 | * no 64-bit implementation. | ||
170 | */ | ||
171 | static inline void dst_convert_cmd(struct dst_cmd *c) | ||
172 | { | ||
173 | c->cmd = __cpu_to_be32(c->cmd); | ||
174 | c->csize = __cpu_to_be32(c->csize); | ||
175 | c->size = __cpu_to_be32(c->size); | ||
176 | c->sector = __cpu_to_be64(c->sector); | ||
177 | c->id = __cpu_to_be64(c->id); | ||
178 | c->flags = __cpu_to_be64(c->flags); | ||
179 | c->rw = __cpu_to_be64(c->rw); | ||
180 | } | ||
181 | |||
182 | /* Transaction id */ | ||
183 | typedef __u64 dst_gen_t; | ||
184 | |||
185 | #ifdef __KERNEL__ | ||
186 | |||
187 | #include <linux/blkdev.h> | ||
188 | #include <linux/bio.h> | ||
189 | #include <linux/device.h> | ||
190 | #include <linux/mempool.h> | ||
191 | #include <linux/net.h> | ||
192 | #include <linux/poll.h> | ||
193 | #include <linux/rbtree.h> | ||
194 | |||
195 | #ifdef CONFIG_DST_DEBUG | ||
196 | #define dprintk(f, a...) printk(KERN_NOTICE f, ##a) | ||
197 | #else | ||
198 | static inline void __attribute__ ((format (printf, 1, 2))) | ||
199 | dprintk(const char *fmt, ...) {} | ||
200 | #endif | ||
201 | |||
202 | struct dst_node; | ||
203 | |||
204 | struct dst_trans | ||
205 | { | ||
206 | /* DST node we are working with */ | ||
207 | struct dst_node *n; | ||
208 | |||
209 | /* Entry inside transaction tree */ | ||
210 | struct rb_node trans_entry; | ||
211 | |||
212 | /* Merlin kills this transaction when this memory cell equals zero */ | ||
213 | atomic_t refcnt; | ||
214 | |||
215 | /* How this transaction should be processed by crypto engine */ | ||
216 | short enc; | ||
217 | /* How many times this transaction was resent */ | ||
218 | short retries; | ||
219 | /* Completion status */ | ||
220 | int error; | ||
221 | |||
222 | /* When did we send it to the remote peer */ | ||
223 | long send_time; | ||
224 | |||
225 | /* My name is... | ||
226 | * Well, computers does not speak, they have unique id instead */ | ||
227 | dst_gen_t gen; | ||
228 | |||
229 | /* Block IO we are working with */ | ||
230 | struct bio *bio; | ||
231 | |||
232 | /* Network command for above block IO request */ | ||
233 | struct dst_cmd cmd; | ||
234 | }; | ||
235 | |||
236 | struct dst_crypto_engine | ||
237 | { | ||
238 | /* What should we do with all block requests */ | ||
239 | struct crypto_hash *hash; | ||
240 | struct crypto_ablkcipher *cipher; | ||
241 | |||
242 | /* Pool of pages used to encrypt data into before sending */ | ||
243 | int page_num; | ||
244 | struct page **pages; | ||
245 | |||
246 | /* What to do with current request */ | ||
247 | int enc; | ||
248 | /* Who we are and where do we go */ | ||
249 | struct scatterlist *src, *dst; | ||
250 | |||
251 | /* Maximum timeout waiting for encryption to be completed */ | ||
252 | long timeout; | ||
253 | /* IV is a 64-bit sequential counter */ | ||
254 | u64 iv; | ||
255 | |||
256 | /* Secret data */ | ||
257 | void *private; | ||
258 | |||
259 | /* Cached temporary data lives here */ | ||
260 | int size; | ||
261 | void *data; | ||
262 | }; | ||
263 | |||
264 | struct dst_state | ||
265 | { | ||
266 | /* The main state protection */ | ||
267 | struct mutex state_lock; | ||
268 | |||
269 | /* Polling machinery for sockets */ | ||
270 | wait_queue_t wait; | ||
271 | wait_queue_head_t *whead; | ||
272 | /* Most of events are being waited here */ | ||
273 | wait_queue_head_t thread_wait; | ||
274 | |||
275 | /* Who owns this? */ | ||
276 | struct dst_node *node; | ||
277 | |||
278 | /* Network address for this state */ | ||
279 | struct dst_network_ctl ctl; | ||
280 | |||
281 | /* Permissions to work with: read-only or rw connection */ | ||
282 | u32 permissions; | ||
283 | |||
284 | /* Called when we need to clean private data */ | ||
285 | void (* cleanup)(struct dst_state *st); | ||
286 | |||
287 | /* Used by the server: BIO completion queues BIOs here */ | ||
288 | struct list_head request_list; | ||
289 | spinlock_t request_lock; | ||
290 | |||
291 | /* Guess what? No, it is not number of planets */ | ||
292 | atomic_t refcnt; | ||
293 | |||
294 | /* This flags is set when connection should be dropped */ | ||
295 | int need_exit; | ||
296 | |||
297 | /* | ||
298 | * Socket to work with. Second pointer is used for | ||
299 | * lockless check if socket was changed before performing | ||
300 | * next action (like working with cached polling result) | ||
301 | */ | ||
302 | struct socket *socket, *read_socket; | ||
303 | |||
304 | /* Cached preallocated data */ | ||
305 | void *data; | ||
306 | unsigned int size; | ||
307 | |||
308 | /* Currently processed command */ | ||
309 | struct dst_cmd cmd; | ||
310 | }; | ||
311 | |||
312 | struct dst_info | ||
313 | { | ||
314 | /* Device size */ | ||
315 | u64 size; | ||
316 | |||
317 | /* Local device name for export devices */ | ||
318 | char local[DST_NAMELEN]; | ||
319 | |||
320 | /* Network setup */ | ||
321 | struct dst_network_ctl net; | ||
322 | |||
323 | /* Sysfs bits use this */ | ||
324 | struct device device; | ||
325 | }; | ||
326 | |||
327 | struct dst_node | ||
328 | { | ||
329 | struct list_head node_entry; | ||
330 | |||
331 | /* Hi, my name is stored here */ | ||
332 | char name[DST_NAMELEN]; | ||
333 | /* My cache name is stored here */ | ||
334 | char cache_name[DST_NAMELEN]; | ||
335 | |||
336 | /* Block device attached to given node. | ||
337 | * Only valid for exporting nodes */ | ||
338 | struct block_device *bdev; | ||
339 | /* Network state machine for given peer */ | ||
340 | struct dst_state *state; | ||
341 | |||
342 | /* Block IO machinery */ | ||
343 | struct request_queue *queue; | ||
344 | struct gendisk *disk; | ||
345 | |||
346 | /* Number of threads in processing pool */ | ||
347 | int thread_num; | ||
348 | /* Maximum number of pages in single IO */ | ||
349 | int max_pages; | ||
350 | |||
351 | /* I'm that big in bytes */ | ||
352 | loff_t size; | ||
353 | |||
354 | /* Exported to userspace node information */ | ||
355 | struct dst_info *info; | ||
356 | |||
357 | /* | ||
358 | * Security attribute list. | ||
359 | * Used only by exporting node currently. | ||
360 | */ | ||
361 | struct list_head security_list; | ||
362 | struct mutex security_lock; | ||
363 | |||
364 | /* | ||
365 | * When this unerflows below zero, university collapses. | ||
366 | * But this will not happen, since node will be freed, | ||
367 | * when reference counter reaches zero. | ||
368 | */ | ||
369 | atomic_t refcnt; | ||
370 | |||
371 | /* How precisely should I be started? */ | ||
372 | int (*start)(struct dst_node *); | ||
373 | |||
374 | /* Crypto capabilities */ | ||
375 | struct dst_crypto_ctl crypto; | ||
376 | u8 *hash_key; | ||
377 | u8 *cipher_key; | ||
378 | |||
379 | /* Pool of processing thread */ | ||
380 | struct thread_pool *pool; | ||
381 | |||
382 | /* Transaction IDs live here */ | ||
383 | atomic_long_t gen; | ||
384 | |||
385 | /* | ||
386 | * How frequently and how many times transaction | ||
387 | * tree should be scanned to drop stale objects. | ||
388 | */ | ||
389 | long trans_scan_timeout; | ||
390 | int trans_max_retries; | ||
391 | |||
392 | /* Small gnomes live here */ | ||
393 | struct rb_root trans_root; | ||
394 | struct mutex trans_lock; | ||
395 | |||
396 | /* | ||
397 | * Transaction cache/memory pool. | ||
398 | * It is big enough to contain not only transaction | ||
399 | * itself, but additional crypto data (digest/hmac). | ||
400 | */ | ||
401 | struct kmem_cache *trans_cache; | ||
402 | mempool_t *trans_pool; | ||
403 | |||
404 | /* This entity scans transaction tree */ | ||
405 | struct delayed_work trans_work; | ||
406 | |||
407 | wait_queue_head_t wait; | ||
408 | }; | ||
409 | |||
410 | /* Kernel representation of the security attribute */ | ||
411 | struct dst_secure | ||
412 | { | ||
413 | struct list_head sec_entry; | ||
414 | struct dst_secure_user sec; | ||
415 | }; | ||
416 | |||
417 | int dst_process_bio(struct dst_node *n, struct bio *bio); | ||
418 | |||
419 | int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r); | ||
420 | int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le); | ||
421 | |||
422 | static inline struct dst_state *dst_state_get(struct dst_state *st) | ||
423 | { | ||
424 | BUG_ON(atomic_read(&st->refcnt) == 0); | ||
425 | atomic_inc(&st->refcnt); | ||
426 | return st; | ||
427 | } | ||
428 | |||
429 | void dst_state_put(struct dst_state *st); | ||
430 | |||
431 | struct dst_state *dst_state_alloc(struct dst_node *n); | ||
432 | int dst_state_socket_create(struct dst_state *st); | ||
433 | void dst_state_socket_release(struct dst_state *st); | ||
434 | |||
435 | void dst_state_exit_connected(struct dst_state *st); | ||
436 | |||
437 | int dst_state_schedule_receiver(struct dst_state *st); | ||
438 | |||
439 | void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str); | ||
440 | |||
441 | static inline void dst_state_lock(struct dst_state *st) | ||
442 | { | ||
443 | mutex_lock(&st->state_lock); | ||
444 | } | ||
445 | |||
446 | static inline void dst_state_unlock(struct dst_state *st) | ||
447 | { | ||
448 | mutex_unlock(&st->state_lock); | ||
449 | } | ||
450 | |||
451 | void dst_poll_exit(struct dst_state *st); | ||
452 | int dst_poll_init(struct dst_state *st); | ||
453 | |||
454 | static inline unsigned int dst_state_poll(struct dst_state *st) | ||
455 | { | ||
456 | unsigned int revents = POLLHUP | POLLERR; | ||
457 | |||
458 | dst_state_lock(st); | ||
459 | if (st->socket) | ||
460 | revents = st->socket->ops->poll(NULL, st->socket, NULL); | ||
461 | dst_state_unlock(st); | ||
462 | |||
463 | return revents; | ||
464 | } | ||
465 | |||
466 | static inline int dst_thread_setup(void *private, void *data) | ||
467 | { | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | void dst_node_put(struct dst_node *n); | ||
472 | |||
473 | static inline struct dst_node *dst_node_get(struct dst_node *n) | ||
474 | { | ||
475 | atomic_inc(&n->refcnt); | ||
476 | return n; | ||
477 | } | ||
478 | |||
479 | int dst_data_recv(struct dst_state *st, void *data, unsigned int size); | ||
480 | int dst_recv_cdata(struct dst_state *st, void *cdata); | ||
481 | int dst_data_send_header(struct socket *sock, | ||
482 | void *data, unsigned int size, int more); | ||
483 | |||
484 | int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio); | ||
485 | |||
486 | int dst_process_io(struct dst_state *st); | ||
487 | int dst_export_crypto(struct dst_node *n, struct bio *bio); | ||
488 | int dst_export_send_bio(struct bio *bio); | ||
489 | int dst_start_export(struct dst_node *n); | ||
490 | |||
491 | int __init dst_export_init(void); | ||
492 | void dst_export_exit(void); | ||
493 | |||
494 | /* Private structure for export block IO requests */ | ||
495 | struct dst_export_priv | ||
496 | { | ||
497 | struct list_head request_entry; | ||
498 | struct dst_state *state; | ||
499 | struct bio *bio; | ||
500 | struct dst_cmd cmd; | ||
501 | }; | ||
502 | |||
503 | static inline void dst_trans_get(struct dst_trans *t) | ||
504 | { | ||
505 | atomic_inc(&t->refcnt); | ||
506 | } | ||
507 | |||
508 | struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen); | ||
509 | int dst_trans_remove(struct dst_trans *t); | ||
510 | int dst_trans_remove_nolock(struct dst_trans *t); | ||
511 | void dst_trans_put(struct dst_trans *t); | ||
512 | |||
513 | /* | ||
514 | * Convert bio into network command. | ||
515 | */ | ||
516 | static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd, | ||
517 | u32 command, u64 id) | ||
518 | { | ||
519 | cmd->cmd = command; | ||
520 | cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS; | ||
521 | cmd->rw = bio->bi_rw; | ||
522 | cmd->size = bio->bi_size; | ||
523 | cmd->csize = 0; | ||
524 | cmd->id = id; | ||
525 | cmd->sector = bio->bi_sector; | ||
526 | }; | ||
527 | |||
528 | int dst_trans_send(struct dst_trans *t); | ||
529 | int dst_trans_crypto(struct dst_trans *t); | ||
530 | |||
531 | int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl); | ||
532 | void dst_node_crypto_exit(struct dst_node *n); | ||
533 | |||
534 | static inline int dst_need_crypto(struct dst_node *n) | ||
535 | { | ||
536 | struct dst_crypto_ctl *c = &n->crypto; | ||
537 | /* | ||
538 | * Logical OR is appropriate here, but boolean one produces | ||
539 | * more optimal code, so it is used instead. | ||
540 | */ | ||
541 | return (c->hash_algo[0] | c->cipher_algo[0]); | ||
542 | } | ||
543 | |||
544 | int dst_node_trans_init(struct dst_node *n, unsigned int size); | ||
545 | void dst_node_trans_exit(struct dst_node *n); | ||
546 | |||
547 | /* | ||
548 | * Pool of threads. | ||
549 | * Ready list contains threads currently free to be used, | ||
550 | * active one contains threads with some work scheduled for them. | ||
551 | * Caller can wait in given queue when thread is ready. | ||
552 | */ | ||
553 | struct thread_pool | ||
554 | { | ||
555 | int thread_num; | ||
556 | struct mutex thread_lock; | ||
557 | struct list_head ready_list, active_list; | ||
558 | |||
559 | wait_queue_head_t wait; | ||
560 | }; | ||
561 | |||
562 | void thread_pool_del_worker(struct thread_pool *p); | ||
563 | void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id); | ||
564 | int thread_pool_add_worker(struct thread_pool *p, | ||
565 | char *name, | ||
566 | unsigned int id, | ||
567 | void *(* init)(void *data), | ||
568 | void (* cleanup)(void *data), | ||
569 | void *data); | ||
570 | |||
571 | void thread_pool_destroy(struct thread_pool *p); | ||
572 | struct thread_pool *thread_pool_create(int num, char *name, | ||
573 | void *(* init)(void *data), | ||
574 | void (* cleanup)(void *data), | ||
575 | void *data); | ||
576 | |||
577 | int thread_pool_schedule(struct thread_pool *p, | ||
578 | int (* setup)(void *stored_private, void *setup_data), | ||
579 | int (* action)(void *stored_private, void *setup_data), | ||
580 | void *setup_data, long timeout); | ||
581 | int thread_pool_schedule_private(struct thread_pool *p, | ||
582 | int (* setup)(void *private, void *data), | ||
583 | int (* action)(void *private, void *data), | ||
584 | void *data, long timeout, void *id); | ||
585 | |||
586 | #endif /* __KERNEL__ */ | ||
587 | #endif /* __DST_H */ | ||