aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/staging/Kconfig2
-rw-r--r--drivers/staging/Makefile1
-rw-r--r--drivers/staging/dst/Kconfig67
-rw-r--r--drivers/staging/dst/Makefile3
-rw-r--r--drivers/staging/dst/crypto.c733
-rw-r--r--drivers/staging/dst/dcore.c968
-rw-r--r--drivers/staging/dst/export.c660
-rw-r--r--drivers/staging/dst/state.c844
-rw-r--r--drivers/staging/dst/thread_pool.c348
-rw-r--r--drivers/staging/dst/trans.c337
-rw-r--r--include/linux/dst.h587
11 files changed, 0 insertions, 4550 deletions
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index db0de940949e..94eb86319ff3 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -87,8 +87,6 @@ source "drivers/staging/frontier/Kconfig"
87 87
88source "drivers/staging/dream/Kconfig" 88source "drivers/staging/dream/Kconfig"
89 89
90source "drivers/staging/dst/Kconfig"
91
92source "drivers/staging/pohmelfs/Kconfig" 90source "drivers/staging/pohmelfs/Kconfig"
93 91
94source "drivers/staging/b3dfg/Kconfig" 92source "drivers/staging/b3dfg/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 73c6a71155e0..b5e67b889f60 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -26,7 +26,6 @@ obj-$(CONFIG_RTL8192E) += rtl8192e/
26obj-$(CONFIG_INPUT_MIMIO) += mimio/ 26obj-$(CONFIG_INPUT_MIMIO) += mimio/
27obj-$(CONFIG_TRANZPORT) += frontier/ 27obj-$(CONFIG_TRANZPORT) += frontier/
28obj-$(CONFIG_DREAM) += dream/ 28obj-$(CONFIG_DREAM) += dream/
29obj-$(CONFIG_DST) += dst/
30obj-$(CONFIG_POHMELFS) += pohmelfs/ 29obj-$(CONFIG_POHMELFS) += pohmelfs/
31obj-$(CONFIG_B3DFG) += b3dfg/ 30obj-$(CONFIG_B3DFG) += b3dfg/
32obj-$(CONFIG_IDE_PHISON) += phison/ 31obj-$(CONFIG_IDE_PHISON) += phison/
diff --git a/drivers/staging/dst/Kconfig b/drivers/staging/dst/Kconfig
deleted file mode 100644
index 448d342ac2a2..000000000000
--- a/drivers/staging/dst/Kconfig
+++ /dev/null
@@ -1,67 +0,0 @@
1config DST
2 tristate "Distributed storage"
3 depends on NET && CRYPTO && SYSFS && BLK_DEV
4 select CONNECTOR
5 ---help---
6 DST is a network block device storage, which can be used to organize
7 exported storage on the remote nodes into the local block device.
8
9 DST works on top of any network media and protocol; it is just a matter
10 of configuration utility to understand the correct addresses. The most
11 common example is TCP over IP, which allows to pass through firewalls and
12 create remote backup storage in a different datacenter. DST requires
13 single port to be enabled on the exporting node and outgoing connections
14 on the local node.
15
16 DST works with in-kernel client and server, which improves performance by
17 eliminating unneded data copies and by not depending on the version
18 of the external IO components. It requires userspace configuration utility
19 though.
20
21 DST uses transaction model, when each store has to be explicitly acked
22 from the remote node to be considered as successfully written. There
23 may be lots of in-flight transactions. When remote host does not ack
24 the transaction it will be resent predefined number of times with specified
25 timeouts between them. All those parameters are configurable. Transactions
26 are marked as failed after all resends complete unsuccessfully; having
27 long enough resend timeout and/or large number of resends allows not to
28 return error to the higher (FS usually) layer in case of short network
29 problems or remote node outages. In case of network RAID setup this means
30 that storage will not degrade until transactions are marked as failed, and
31 thus will not force checksum recalculation and data rebuild. In case of
32 connection failure DST will try to reconnect to the remote node automatically.
33 DST sends ping commands at idle time to detect if remote node is alive.
34
35 Because of transactional model it is possible to use zero-copy sending
36 without worry of data corruption (which in turn could be detected by the
37 strong checksums though).
38
39 DST may fully encrypt the data channel in case of untrusted channel and implement
40 strong checksum of the transferred data. It is possible to configure algorithms
41 and crypto keys; they should match on both sides of the network channel.
42 Crypto processing does not introduce noticeble performance overhead, since DST
43 uses configurable pool of threads to perform crypto processing.
44
45 DST utilizes memory pool model of all its transaction allocations (it is the
46 only additional allocation on the client) and server allocations (bio pools,
47 while pages are allocated from the slab).
48
49 At startup DST performs a simple negotiation with the export node to determine
50 access permissions and size of the exported storage. It can be extended if
51 new parameters should be autonegotiated.
52
53 DST carries block IO flags in the protocol, which allows to transparently implement
54 barriers and sync/flush operations. Those flags are used in the export node where
55 IO against the local storage is performed, which means that sync write will be sync
56 on the remote node too, which in turn improves data integrity and improved resistance
57 to errors and data corruption during power outages or storage damages.
58
59 Homepage: http://www.ioremap.net/projects/dst
60 Userspace configuration utility and the latest releases: http://www.ioremap.net/archive/dst/
61
62config DST_DEBUG
63 bool "DST debug"
64 depends on DST
65 ---help---
66 This option will enable HEAVY debugging of the DST.
67 Turn it on ONLY if you have to debug some really obscure problem.
diff --git a/drivers/staging/dst/Makefile b/drivers/staging/dst/Makefile
deleted file mode 100644
index 3a8b0cf9643e..000000000000
--- a/drivers/staging/dst/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
1obj-$(CONFIG_DST) += nst.o
2
3nst-y := dcore.o state.o export.o thread_pool.o crypto.o trans.o
diff --git a/drivers/staging/dst/crypto.c b/drivers/staging/dst/crypto.c
deleted file mode 100644
index 351295c97a4b..000000000000
--- a/drivers/staging/dst/crypto.c
+++ /dev/null
@@ -1,733 +0,0 @@
1/*
2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/bio.h>
17#include <linux/crypto.h>
18#include <linux/dst.h>
19#include <linux/kernel.h>
20#include <linux/scatterlist.h>
21#include <linux/slab.h>
22
23/*
24 * Tricky bastard, but IV can be more complex with time...
25 */
26static inline u64 dst_gen_iv(struct dst_trans *t)
27{
28 return t->gen;
29}
30
31/*
32 * Crypto machinery: hash/cipher support for the given crypto controls.
33 */
34static struct crypto_hash *dst_init_hash(struct dst_crypto_ctl *ctl, u8 *key)
35{
36 int err;
37 struct crypto_hash *hash;
38
39 hash = crypto_alloc_hash(ctl->hash_algo, 0, CRYPTO_ALG_ASYNC);
40 if (IS_ERR(hash)) {
41 err = PTR_ERR(hash);
42 dprintk("%s: failed to allocate hash '%s', err: %d.\n",
43 __func__, ctl->hash_algo, err);
44 goto err_out_exit;
45 }
46
47 ctl->crypto_attached_size = crypto_hash_digestsize(hash);
48
49 if (!ctl->hash_keysize)
50 return hash;
51
52 err = crypto_hash_setkey(hash, key, ctl->hash_keysize);
53 if (err) {
54 dprintk("%s: failed to set key for hash '%s', err: %d.\n",
55 __func__, ctl->hash_algo, err);
56 goto err_out_free;
57 }
58
59 return hash;
60
61err_out_free:
62 crypto_free_hash(hash);
63err_out_exit:
64 return ERR_PTR(err);
65}
66
67static struct crypto_ablkcipher *dst_init_cipher(struct dst_crypto_ctl *ctl,
68 u8 *key)
69{
70 int err = -EINVAL;
71 struct crypto_ablkcipher *cipher;
72
73 if (!ctl->cipher_keysize)
74 goto err_out_exit;
75
76 cipher = crypto_alloc_ablkcipher(ctl->cipher_algo, 0, 0);
77 if (IS_ERR(cipher)) {
78 err = PTR_ERR(cipher);
79 dprintk("%s: failed to allocate cipher '%s', err: %d.\n",
80 __func__, ctl->cipher_algo, err);
81 goto err_out_exit;
82 }
83
84 crypto_ablkcipher_clear_flags(cipher, ~0);
85
86 err = crypto_ablkcipher_setkey(cipher, key, ctl->cipher_keysize);
87 if (err) {
88 dprintk("%s: failed to set key for cipher '%s', err: %d.\n",
89 __func__, ctl->cipher_algo, err);
90 goto err_out_free;
91 }
92
93 return cipher;
94
95err_out_free:
96 crypto_free_ablkcipher(cipher);
97err_out_exit:
98 return ERR_PTR(err);
99}
100
101/*
102 * Crypto engine has a pool of pages to encrypt data into before sending
103 * it over the network. This pool is freed/allocated here.
104 */
105static void dst_crypto_pages_free(struct dst_crypto_engine *e)
106{
107 unsigned int i;
108
109 for (i = 0; i < e->page_num; ++i)
110 __free_page(e->pages[i]);
111 kfree(e->pages);
112}
113
114static int dst_crypto_pages_alloc(struct dst_crypto_engine *e, int num)
115{
116 int i;
117
118 e->pages = kmalloc(num * sizeof(struct page **), GFP_KERNEL);
119 if (!e->pages)
120 return -ENOMEM;
121
122 for (i = 0; i < num; ++i) {
123 e->pages[i] = alloc_page(GFP_KERNEL);
124 if (!e->pages[i])
125 goto err_out_free_pages;
126 }
127
128 e->page_num = num;
129 return 0;
130
131err_out_free_pages:
132 while (--i >= 0)
133 __free_page(e->pages[i]);
134
135 kfree(e->pages);
136 return -ENOMEM;
137}
138
139/*
140 * Initialize crypto engine for given node.
141 * Setup cipher/hash, keys, pool of threads and private data.
142 */
143static int dst_crypto_engine_init(struct dst_crypto_engine *e,
144 struct dst_node *n)
145{
146 int err;
147 struct dst_crypto_ctl *ctl = &n->crypto;
148
149 err = dst_crypto_pages_alloc(e, n->max_pages);
150 if (err)
151 goto err_out_exit;
152
153 e->size = PAGE_SIZE;
154 e->data = kmalloc(e->size, GFP_KERNEL);
155 if (!e->data) {
156 err = -ENOMEM;
157 goto err_out_free_pages;
158 }
159
160 if (ctl->hash_algo[0]) {
161 e->hash = dst_init_hash(ctl, n->hash_key);
162 if (IS_ERR(e->hash)) {
163 err = PTR_ERR(e->hash);
164 e->hash = NULL;
165 goto err_out_free;
166 }
167 }
168
169 if (ctl->cipher_algo[0]) {
170 e->cipher = dst_init_cipher(ctl, n->cipher_key);
171 if (IS_ERR(e->cipher)) {
172 err = PTR_ERR(e->cipher);
173 e->cipher = NULL;
174 goto err_out_free_hash;
175 }
176 }
177
178 return 0;
179
180err_out_free_hash:
181 crypto_free_hash(e->hash);
182err_out_free:
183 kfree(e->data);
184err_out_free_pages:
185 dst_crypto_pages_free(e);
186err_out_exit:
187 return err;
188}
189
190static void dst_crypto_engine_exit(struct dst_crypto_engine *e)
191{
192 if (e->hash)
193 crypto_free_hash(e->hash);
194 if (e->cipher)
195 crypto_free_ablkcipher(e->cipher);
196 dst_crypto_pages_free(e);
197 kfree(e->data);
198}
199
200/*
201 * Waiting for cipher processing to be completed.
202 */
203struct dst_crypto_completion {
204 struct completion complete;
205 int error;
206};
207
208static void dst_crypto_complete(struct crypto_async_request *req, int err)
209{
210 struct dst_crypto_completion *c = req->data;
211
212 if (err == -EINPROGRESS)
213 return;
214
215 dprintk("%s: req: %p, err: %d.\n", __func__, req, err);
216 c->error = err;
217 complete(&c->complete);
218}
219
220static int dst_crypto_process(struct ablkcipher_request *req,
221 struct scatterlist *sg_dst, struct scatterlist *sg_src,
222 void *iv, int enc, unsigned long timeout)
223{
224 struct dst_crypto_completion c;
225 int err;
226
227 init_completion(&c.complete);
228 c.error = -EINPROGRESS;
229
230 ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
231 dst_crypto_complete, &c);
232
233 ablkcipher_request_set_crypt(req, sg_src, sg_dst, sg_src->length, iv);
234
235 if (enc)
236 err = crypto_ablkcipher_encrypt(req);
237 else
238 err = crypto_ablkcipher_decrypt(req);
239
240 switch (err) {
241 case -EINPROGRESS:
242 case -EBUSY:
243 err = wait_for_completion_interruptible_timeout(&c.complete,
244 timeout);
245 if (!err)
246 err = -ETIMEDOUT;
247 else
248 err = c.error;
249 break;
250 default:
251 break;
252 }
253
254 return err;
255}
256
257/*
258 * DST uses generic iteration approach for data crypto processing.
259 * Single block IO request is switched into array of scatterlists,
260 * which are submitted to the crypto processing iterator.
261 *
262 * Input and output iterator initialization are different, since
263 * in output case we can not encrypt data in-place and need a
264 * temporary storage, which is then being sent to the remote peer.
265 */
266static int dst_trans_iter_out(struct bio *bio, struct dst_crypto_engine *e,
267 int (*iterator) (struct dst_crypto_engine *e,
268 struct scatterlist *dst,
269 struct scatterlist *src))
270{
271 struct bio_vec *bv;
272 int err, i;
273
274 sg_init_table(e->src, bio->bi_vcnt);
275 sg_init_table(e->dst, bio->bi_vcnt);
276
277 bio_for_each_segment(bv, bio, i) {
278 sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset);
279 sg_set_page(&e->dst[i], e->pages[i], bv->bv_len, bv->bv_offset);
280
281 err = iterator(e, &e->dst[i], &e->src[i]);
282 if (err)
283 return err;
284 }
285
286 return 0;
287}
288
289static int dst_trans_iter_in(struct bio *bio, struct dst_crypto_engine *e,
290 int (*iterator) (struct dst_crypto_engine *e,
291 struct scatterlist *dst,
292 struct scatterlist *src))
293{
294 struct bio_vec *bv;
295 int err, i;
296
297 sg_init_table(e->src, bio->bi_vcnt);
298 sg_init_table(e->dst, bio->bi_vcnt);
299
300 bio_for_each_segment(bv, bio, i) {
301 sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset);
302 sg_set_page(&e->dst[i], bv->bv_page, bv->bv_len, bv->bv_offset);
303
304 err = iterator(e, &e->dst[i], &e->src[i]);
305 if (err)
306 return err;
307 }
308
309 return 0;
310}
311
312static int dst_crypt_iterator(struct dst_crypto_engine *e,
313 struct scatterlist *sg_dst, struct scatterlist *sg_src)
314{
315 struct ablkcipher_request *req = e->data;
316 u8 iv[32];
317
318 memset(iv, 0, sizeof(iv));
319
320 memcpy(iv, &e->iv, sizeof(e->iv));
321
322 return dst_crypto_process(req, sg_dst, sg_src, iv, e->enc, e->timeout);
323}
324
325static int dst_crypt(struct dst_crypto_engine *e, struct bio *bio)
326{
327 struct ablkcipher_request *req = e->data;
328
329 memset(req, 0, sizeof(struct ablkcipher_request));
330 ablkcipher_request_set_tfm(req, e->cipher);
331
332 if (e->enc)
333 return dst_trans_iter_out(bio, e, dst_crypt_iterator);
334 else
335 return dst_trans_iter_in(bio, e, dst_crypt_iterator);
336}
337
338static int dst_hash_iterator(struct dst_crypto_engine *e,
339 struct scatterlist *sg_dst, struct scatterlist *sg_src)
340{
341 return crypto_hash_update(e->data, sg_src, sg_src->length);
342}
343
344static int dst_hash(struct dst_crypto_engine *e, struct bio *bio, void *dst)
345{
346 struct hash_desc *desc = e->data;
347 int err;
348
349 desc->tfm = e->hash;
350 desc->flags = 0;
351
352 err = crypto_hash_init(desc);
353 if (err)
354 return err;
355
356 err = dst_trans_iter_in(bio, e, dst_hash_iterator);
357 if (err)
358 return err;
359
360 err = crypto_hash_final(desc, dst);
361 if (err)
362 return err;
363
364 return 0;
365}
366
367/*
368 * Initialize/cleanup a crypto thread. The only thing it should
369 * do is to allocate a pool of pages as temporary storage.
370 * And to setup cipher and/or hash.
371 */
372static void *dst_crypto_thread_init(void *data)
373{
374 struct dst_node *n = data;
375 struct dst_crypto_engine *e;
376 int err = -ENOMEM;
377
378 e = kzalloc(sizeof(struct dst_crypto_engine), GFP_KERNEL);
379 if (!e)
380 goto err_out_exit;
381 e->src = kcalloc(2 * n->max_pages, sizeof(struct scatterlist),
382 GFP_KERNEL);
383 if (!e->src)
384 goto err_out_free;
385
386 e->dst = e->src + n->max_pages;
387
388 err = dst_crypto_engine_init(e, n);
389 if (err)
390 goto err_out_free_all;
391
392 return e;
393
394err_out_free_all:
395 kfree(e->src);
396err_out_free:
397 kfree(e);
398err_out_exit:
399 return ERR_PTR(err);
400}
401
402static void dst_crypto_thread_cleanup(void *private)
403{
404 struct dst_crypto_engine *e = private;
405
406 dst_crypto_engine_exit(e);
407 kfree(e->src);
408 kfree(e);
409}
410
411/*
412 * Initialize crypto engine for given node: store keys, create pool
413 * of threads, initialize each one.
414 *
415 * Each thread has unique ID, but 0 and 1 are reserved for receiving and
416 * accepting threads (if export node), so IDs could start from 2, but starting
417 * them from 10 allows easily understand what this thread is for.
418 */
419int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl)
420{
421 void *key = (ctl + 1);
422 int err = -ENOMEM, i;
423 char name[32];
424
425 if (ctl->hash_keysize) {
426 n->hash_key = kmalloc(ctl->hash_keysize, GFP_KERNEL);
427 if (!n->hash_key)
428 goto err_out_exit;
429 memcpy(n->hash_key, key, ctl->hash_keysize);
430 }
431
432 if (ctl->cipher_keysize) {
433 n->cipher_key = kmalloc(ctl->cipher_keysize, GFP_KERNEL);
434 if (!n->cipher_key)
435 goto err_out_free_hash;
436 memcpy(n->cipher_key, key, ctl->cipher_keysize);
437 }
438 memcpy(&n->crypto, ctl, sizeof(struct dst_crypto_ctl));
439
440 for (i = 0; i < ctl->thread_num; ++i) {
441 snprintf(name, sizeof(name), "%s-crypto-%d", n->name, i);
442 /* Unique ids... */
443 err = thread_pool_add_worker(n->pool, name, i + 10,
444 dst_crypto_thread_init, dst_crypto_thread_cleanup, n);
445 if (err)
446 goto err_out_free_threads;
447 }
448
449 return 0;
450
451err_out_free_threads:
452 while (--i >= 0)
453 thread_pool_del_worker_id(n->pool, i+10);
454
455 if (ctl->cipher_keysize)
456 kfree(n->cipher_key);
457 ctl->cipher_keysize = 0;
458err_out_free_hash:
459 if (ctl->hash_keysize)
460 kfree(n->hash_key);
461 ctl->hash_keysize = 0;
462err_out_exit:
463 return err;
464}
465
466void dst_node_crypto_exit(struct dst_node *n)
467{
468 struct dst_crypto_ctl *ctl = &n->crypto;
469
470 if (ctl->cipher_algo[0] || ctl->hash_algo[0]) {
471 kfree(n->hash_key);
472 kfree(n->cipher_key);
473 }
474}
475
476/*
477 * Thrad pool setup callback. Just stores a transaction in private data.
478 */
479static int dst_trans_crypto_setup(void *crypto_engine, void *trans)
480{
481 struct dst_crypto_engine *e = crypto_engine;
482
483 e->private = trans;
484 return 0;
485}
486
487#if 0
488static void dst_dump_bio(struct bio *bio)
489{
490 u8 *p;
491 struct bio_vec *bv;
492 int i;
493
494 bio_for_each_segment(bv, bio, i) {
495 dprintk("%s: %llu/%u: size: %u, offset: %u, data: ",
496 __func__, bio->bi_sector, bio->bi_size,
497 bv->bv_len, bv->bv_offset);
498
499 p = kmap(bv->bv_page) + bv->bv_offset;
500 for (i = 0; i < bv->bv_len; ++i)
501 printk(KERN_DEBUG "%02x ", p[i]);
502 kunmap(bv->bv_page);
503 printk("\n");
504 }
505}
506#endif
507
508/*
509 * Encrypt/hash data and send it to the network.
510 */
511static int dst_crypto_process_sending(struct dst_crypto_engine *e,
512 struct bio *bio, u8 *hash)
513{
514 int err;
515
516 if (e->cipher) {
517 err = dst_crypt(e, bio);
518 if (err)
519 goto err_out_exit;
520 }
521
522 if (e->hash) {
523 err = dst_hash(e, bio, hash);
524 if (err)
525 goto err_out_exit;
526
527#ifdef CONFIG_DST_DEBUG
528 {
529 unsigned int i;
530
531 /* dst_dump_bio(bio); */
532
533 printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash: ",
534 __func__, (u64)bio->bi_sector,
535 bio->bi_size, bio_data_dir(bio));
536 for (i = 0; i < crypto_hash_digestsize(e->hash); ++i)
537 printk("%02x ", hash[i]);
538 printk("\n");
539 }
540#endif
541 }
542
543 return 0;
544
545err_out_exit:
546 return err;
547}
548
549/*
550 * Check if received data is valid. Decipher if it is.
551 */
552static int dst_crypto_process_receiving(struct dst_crypto_engine *e,
553 struct bio *bio, u8 *hash, u8 *recv_hash)
554{
555 int err;
556
557 if (e->hash) {
558 int mismatch;
559
560 err = dst_hash(e, bio, hash);
561 if (err)
562 goto err_out_exit;
563
564 mismatch = !!memcmp(recv_hash, hash,
565 crypto_hash_digestsize(e->hash));
566#ifdef CONFIG_DST_DEBUG
567 /* dst_dump_bio(bio); */
568
569 printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash mismatch: %d",
570 __func__, (u64)bio->bi_sector, bio->bi_size,
571 bio_data_dir(bio), mismatch);
572 if (mismatch) {
573 unsigned int i;
574
575 printk(", recv/calc: ");
576 for (i = 0; i < crypto_hash_digestsize(e->hash); ++i)
577 printk("%02x/%02x ", recv_hash[i], hash[i]);
578
579 }
580 printk("\n");
581#endif
582 err = -1;
583 if (mismatch)
584 goto err_out_exit;
585 }
586
587 if (e->cipher) {
588 err = dst_crypt(e, bio);
589 if (err)
590 goto err_out_exit;
591 }
592
593 return 0;
594
595err_out_exit:
596 return err;
597}
598
599/*
600 * Thread pool callback to encrypt data and send it to the netowork.
601 */
602static int dst_trans_crypto_action(void *crypto_engine, void *schedule_data)
603{
604 struct dst_crypto_engine *e = crypto_engine;
605 struct dst_trans *t = schedule_data;
606 struct bio *bio = t->bio;
607 int err;
608
609 dprintk("%s: t: %p, gen: %llu, cipher: %p, hash: %p.\n",
610 __func__, t, t->gen, e->cipher, e->hash);
611
612 e->enc = t->enc;
613 e->iv = dst_gen_iv(t);
614
615 if (bio_data_dir(bio) == WRITE) {
616 err = dst_crypto_process_sending(e, bio, t->cmd.hash);
617 if (err)
618 goto err_out_exit;
619
620 if (e->hash) {
621 t->cmd.csize = crypto_hash_digestsize(e->hash);
622 t->cmd.size += t->cmd.csize;
623 }
624
625 return dst_trans_send(t);
626 } else {
627 u8 *hash = e->data + e->size/2;
628
629 err = dst_crypto_process_receiving(e, bio, hash, t->cmd.hash);
630 if (err)
631 goto err_out_exit;
632
633 dst_trans_remove(t);
634 dst_trans_put(t);
635 }
636
637 return 0;
638
639err_out_exit:
640 t->error = err;
641 dst_trans_put(t);
642 return err;
643}
644
645/*
646 * Schedule crypto processing for given transaction.
647 */
648int dst_trans_crypto(struct dst_trans *t)
649{
650 struct dst_node *n = t->n;
651 int err;
652
653 err = thread_pool_schedule(n->pool,
654 dst_trans_crypto_setup, dst_trans_crypto_action,
655 t, MAX_SCHEDULE_TIMEOUT);
656 if (err)
657 goto err_out_exit;
658
659 return 0;
660
661err_out_exit:
662 dst_trans_put(t);
663 return err;
664}
665
666/*
667 * Crypto machinery for the export node.
668 */
669static int dst_export_crypto_setup(void *crypto_engine, void *bio)
670{
671 struct dst_crypto_engine *e = crypto_engine;
672
673 e->private = bio;
674 return 0;
675}
676
677static int dst_export_crypto_action(void *crypto_engine, void *schedule_data)
678{
679 struct dst_crypto_engine *e = crypto_engine;
680 struct bio *bio = schedule_data;
681 struct dst_export_priv *p = bio->bi_private;
682 int err;
683
684 dprintk("%s: e: %p, data: %p, bio: %llu/%u, dir: %lu.\n",
685 __func__, e, e->data, (u64)bio->bi_sector,
686 bio->bi_size, bio_data_dir(bio));
687
688 e->enc = (bio_data_dir(bio) == READ);
689 e->iv = p->cmd.id;
690
691 if (bio_data_dir(bio) == WRITE) {
692 u8 *hash = e->data + e->size/2;
693
694 err = dst_crypto_process_receiving(e, bio, hash, p->cmd.hash);
695 if (err)
696 goto err_out_exit;
697
698 generic_make_request(bio);
699 } else {
700 err = dst_crypto_process_sending(e, bio, p->cmd.hash);
701 if (err)
702 goto err_out_exit;
703
704 if (e->hash) {
705 p->cmd.csize = crypto_hash_digestsize(e->hash);
706 p->cmd.size += p->cmd.csize;
707 }
708
709 err = dst_export_send_bio(bio);
710 }
711 return 0;
712
713err_out_exit:
714 bio_put(bio);
715 return err;
716}
717
718int dst_export_crypto(struct dst_node *n, struct bio *bio)
719{
720 int err;
721
722 err = thread_pool_schedule(n->pool,
723 dst_export_crypto_setup, dst_export_crypto_action,
724 bio, MAX_SCHEDULE_TIMEOUT);
725 if (err)
726 goto err_out_exit;
727
728 return 0;
729
730err_out_exit:
731 bio_put(bio);
732 return err;
733}
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
deleted file mode 100644
index c83ca7e3d048..000000000000
--- a/drivers/staging/dst/dcore.c
+++ /dev/null
@@ -1,968 +0,0 @@
1/*
2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/module.h>
17#include <linux/kernel.h>
18#include <linux/blkdev.h>
19#include <linux/bio.h>
20#include <linux/buffer_head.h>
21#include <linux/connector.h>
22#include <linux/dst.h>
23#include <linux/device.h>
24#include <linux/jhash.h>
25#include <linux/idr.h>
26#include <linux/init.h>
27#include <linux/namei.h>
28#include <linux/slab.h>
29#include <linux/socket.h>
30
31#include <linux/in.h>
32#include <linux/in6.h>
33
34#include <net/sock.h>
35
36static int dst_major;
37
38static DEFINE_MUTEX(dst_hash_lock);
39static struct list_head *dst_hashtable;
40static unsigned int dst_hashtable_size = 128;
41module_param(dst_hashtable_size, uint, 0644);
42
43static char dst_name[] = "Dementianting goldfish";
44
45static DEFINE_IDR(dst_index_idr);
46static struct cb_id cn_dst_id = { CN_DST_IDX, CN_DST_VAL };
47
48/*
49 * DST sysfs tree for device called 'storage':
50 *
51 * /sys/bus/dst/devices/storage/
52 * /sys/bus/dst/devices/storage/type : 192.168.4.80:1025
53 * /sys/bus/dst/devices/storage/size : 800
54 * /sys/bus/dst/devices/storage/name : storage
55 */
56
57static int dst_dev_match(struct device *dev, struct device_driver *drv)
58{
59 return 1;
60}
61
62static struct bus_type dst_dev_bus_type = {
63 .name = "dst",
64 .match = &dst_dev_match,
65};
66
67static void dst_node_release(struct device *dev)
68{
69 struct dst_info *info = container_of(dev, struct dst_info, device);
70
71 kfree(info);
72}
73
74static struct device dst_node_dev = {
75 .bus = &dst_dev_bus_type,
76 .release = &dst_node_release
77};
78
79/*
80 * Setting size of the node after it was changed.
81 */
82static void dst_node_set_size(struct dst_node *n)
83{
84 struct block_device *bdev;
85
86 set_capacity(n->disk, n->size >> 9);
87
88 bdev = bdget_disk(n->disk, 0);
89 if (bdev) {
90 mutex_lock(&bdev->bd_inode->i_mutex);
91 i_size_write(bdev->bd_inode, n->size);
92 mutex_unlock(&bdev->bd_inode->i_mutex);
93 bdput(bdev);
94 }
95}
96
97/*
98 * Distributed storage request processing function.
99 */
100static int dst_request(struct request_queue *q, struct bio *bio)
101{
102 struct dst_node *n = q->queuedata;
103 int err = -EIO;
104
105 if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
106 /*
107 * This is a dirty^Wnice hack, but if we complete this
108 * operation with -EOPNOTSUPP like intended, XFS
109 * will stuck and freeze the machine. This may be
110 * not particulary XFS problem though, but it is the
111 * only FS which sends empty barrier at umount time
112 * I worked with.
113 *
114 * Empty barriers are not allowed anyway, see 51fd77bd9f512
115 * for example, although later it was changed to
116 * bio_rw_flagged(bio, BIO_RW_DISCARD) only, which does not
117 * work in this case.
118 */
119 /* err = -EOPNOTSUPP; */
120 err = 0;
121 goto end_io;
122 }
123
124 bio_get(bio);
125
126 return dst_process_bio(n, bio);
127
128end_io:
129 bio_endio(bio, err);
130 return err;
131}
132
133/*
134 * Open/close callbacks for appropriate block device.
135 */
136static int dst_bdev_open(struct block_device *bdev, fmode_t mode)
137{
138 struct dst_node *n = bdev->bd_disk->private_data;
139
140 dst_node_get(n);
141 return 0;
142}
143
144static int dst_bdev_release(struct gendisk *disk, fmode_t mode)
145{
146 struct dst_node *n = disk->private_data;
147
148 dst_node_put(n);
149 return 0;
150}
151
152static struct block_device_operations dst_blk_ops = {
153 .open = dst_bdev_open,
154 .release = dst_bdev_release,
155 .owner = THIS_MODULE,
156};
157
158/*
159 * Block layer binding - disk is created when array is fully configured
160 * by userspace request.
161 */
162static int dst_node_create_disk(struct dst_node *n)
163{
164 int err = -ENOMEM;
165 u32 index = 0;
166
167 n->queue = blk_init_queue(NULL, NULL);
168 if (!n->queue)
169 goto err_out_exit;
170
171 n->queue->queuedata = n;
172 blk_queue_make_request(n->queue, dst_request);
173 blk_queue_max_phys_segments(n->queue, n->max_pages);
174 blk_queue_max_hw_segments(n->queue, n->max_pages);
175
176 err = -ENOMEM;
177 n->disk = alloc_disk(1);
178 if (!n->disk)
179 goto err_out_free_queue;
180
181 if (!(n->state->permissions & DST_PERM_WRITE)) {
182 printk(KERN_INFO "DST node %s attached read-only.\n", n->name);
183 set_disk_ro(n->disk, 1);
184 }
185
186 if (!idr_pre_get(&dst_index_idr, GFP_KERNEL))
187 goto err_out_put;
188
189 mutex_lock(&dst_hash_lock);
190 err = idr_get_new(&dst_index_idr, NULL, &index);
191 mutex_unlock(&dst_hash_lock);
192 if (err)
193 goto err_out_put;
194
195 n->disk->major = dst_major;
196 n->disk->first_minor = index;
197 n->disk->fops = &dst_blk_ops;
198 n->disk->queue = n->queue;
199 n->disk->private_data = n;
200 snprintf(n->disk->disk_name, sizeof(n->disk->disk_name),
201 "dst-%s", n->name);
202
203 return 0;
204
205err_out_put:
206 put_disk(n->disk);
207err_out_free_queue:
208 blk_cleanup_queue(n->queue);
209err_out_exit:
210 return err;
211}
212
213/*
214 * Sysfs machinery: show device's size.
215 */
216static ssize_t dst_show_size(struct device *dev,
217 struct device_attribute *attr, char *buf)
218{
219 struct dst_info *info = container_of(dev, struct dst_info, device);
220
221 return sprintf(buf, "%llu\n", info->size);
222}
223
224/*
225 * Show local exported device.
226 */
227static ssize_t dst_show_local(struct device *dev,
228 struct device_attribute *attr, char *buf)
229{
230 struct dst_info *info = container_of(dev, struct dst_info, device);
231
232 return sprintf(buf, "%s\n", info->local);
233}
234
235/*
236 * Shows type of the remote node - device major/minor number
237 * for local nodes and address (af_inet ipv4/ipv6 only) for remote nodes.
238 */
239static ssize_t dst_show_type(struct device *dev,
240 struct device_attribute *attr, char *buf)
241{
242 struct dst_info *info = container_of(dev, struct dst_info, device);
243 int family = info->net.addr.sa_family;
244
245 if (family == AF_INET) {
246 struct sockaddr_in *sin = (struct sockaddr_in *)&info->net.addr;
247 return sprintf(buf, "%u.%u.%u.%u:%d\n",
248 NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
249 } else if (family == AF_INET6) {
250 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)
251 &info->net.addr;
252 return sprintf(buf,
253 "%pi6:%d\n",
254 &sin->sin6_addr, ntohs(sin->sin6_port));
255 } else {
256 int i, sz = PAGE_SIZE - 2; /* 0 symbol and '\n' below */
257 int size, addrlen = info->net.addr.sa_data_len;
258 unsigned char *a = (unsigned char *)&info->net.addr.sa_data;
259 char *buf_orig = buf;
260
261 size = snprintf(buf, sz, "family: %d, addrlen: %u, addr: ",
262 family, addrlen);
263 sz -= size;
264 buf += size;
265
266 for (i = 0; i < addrlen; ++i) {
267 if (sz < 3)
268 break;
269
270 size = snprintf(buf, sz, "%02x ", a[i]);
271 sz -= size;
272 buf += size;
273 }
274 buf += sprintf(buf, "\n");
275
276 return buf - buf_orig;
277 }
278 return 0;
279}
280
281static struct device_attribute dst_node_attrs[] = {
282 __ATTR(size, 0444, dst_show_size, NULL),
283 __ATTR(type, 0444, dst_show_type, NULL),
284 __ATTR(local, 0444, dst_show_local, NULL),
285};
286
287static int dst_create_node_attributes(struct dst_node *n)
288{
289 int err, i;
290
291 for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i) {
292 err = device_create_file(&n->info->device,
293 &dst_node_attrs[i]);
294 if (err)
295 goto err_out_remove_all;
296 }
297 return 0;
298
299err_out_remove_all:
300 while (--i >= 0)
301 device_remove_file(&n->info->device,
302 &dst_node_attrs[i]);
303
304 return err;
305}
306
307static void dst_remove_node_attributes(struct dst_node *n)
308{
309 int i;
310
311 for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i)
312 device_remove_file(&n->info->device,
313 &dst_node_attrs[i]);
314}
315
316/*
317 * Sysfs cleanup and initialization.
318 * Shows number of useful parameters.
319 */
320static void dst_node_sysfs_exit(struct dst_node *n)
321{
322 if (n->info) {
323 dst_remove_node_attributes(n);
324 device_unregister(&n->info->device);
325 n->info = NULL;
326 }
327}
328
329static int dst_node_sysfs_init(struct dst_node *n)
330{
331 int err;
332
333 n->info = kzalloc(sizeof(struct dst_info), GFP_KERNEL);
334 if (!n->info)
335 return -ENOMEM;
336
337 memcpy(&n->info->device, &dst_node_dev, sizeof(struct device));
338 n->info->size = n->size;
339
340 dev_set_name(&n->info->device, "dst-%s", n->name);
341 err = device_register(&n->info->device);
342 if (err) {
343 dprintk(KERN_ERR "Failed to register node '%s', err: %d.\n",
344 n->name, err);
345 goto err_out_exit;
346 }
347
348 dst_create_node_attributes(n);
349
350 return 0;
351
352err_out_exit:
353 kfree(n->info);
354 n->info = NULL;
355 return err;
356}
357
358/*
359 * DST node hash tables machinery.
360 */
361static inline unsigned int dst_hash(char *str, unsigned int size)
362{
363 return jhash(str, size, 0) % dst_hashtable_size;
364}
365
366static void dst_node_remove(struct dst_node *n)
367{
368 mutex_lock(&dst_hash_lock);
369 list_del_init(&n->node_entry);
370 mutex_unlock(&dst_hash_lock);
371}
372
373static void dst_node_add(struct dst_node *n)
374{
375 unsigned hash = dst_hash(n->name, sizeof(n->name));
376
377 mutex_lock(&dst_hash_lock);
378 list_add_tail(&n->node_entry, &dst_hashtable[hash]);
379 mutex_unlock(&dst_hash_lock);
380}
381
382/*
383 * Cleaning node when it is about to be freed.
384 * There are still users of the socket though,
385 * so connection cleanup should be protected.
386 */
387static void dst_node_cleanup(struct dst_node *n)
388{
389 struct dst_state *st = n->state;
390
391 if (!st)
392 return;
393
394 if (n->queue) {
395 blk_cleanup_queue(n->queue);
396
397 mutex_lock(&dst_hash_lock);
398 idr_remove(&dst_index_idr, n->disk->first_minor);
399 mutex_unlock(&dst_hash_lock);
400
401 put_disk(n->disk);
402 }
403
404 if (n->bdev) {
405 sync_blockdev(n->bdev);
406 close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE);
407 }
408
409 dst_state_lock(st);
410 st->need_exit = 1;
411 dst_state_exit_connected(st);
412 dst_state_unlock(st);
413
414 wake_up(&st->thread_wait);
415
416 dst_state_put(st);
417 n->state = NULL;
418}
419
420/*
421 * Free security attributes attached to given node.
422 */
423static void dst_security_exit(struct dst_node *n)
424{
425 struct dst_secure *s, *tmp;
426
427 list_for_each_entry_safe(s, tmp, &n->security_list, sec_entry) {
428 list_del(&s->sec_entry);
429 kfree(s);
430 }
431}
432
433/*
434 * Free node when there are no more users.
435 * Actually node has to be freed on behalf od userspace process,
436 * since there are number of threads, which are embedded in the
437 * node, so they can not exit and free node from there, that is
438 * why there is a wakeup if reference counter is not equal to zero.
439 */
440void dst_node_put(struct dst_node *n)
441{
442 if (unlikely(!n))
443 return;
444
445 dprintk("%s: n: %p, refcnt: %d.\n",
446 __func__, n, atomic_read(&n->refcnt));
447
448 if (atomic_dec_and_test(&n->refcnt)) {
449 dst_node_remove(n);
450 n->trans_scan_timeout = 0;
451 dst_node_cleanup(n);
452 thread_pool_destroy(n->pool);
453 dst_node_sysfs_exit(n);
454 dst_node_crypto_exit(n);
455 dst_security_exit(n);
456 dst_node_trans_exit(n);
457
458 kfree(n);
459
460 dprintk("%s: freed n: %p.\n", __func__, n);
461 } else {
462 wake_up(&n->wait);
463 }
464}
465
466/*
467 * Setting up export device: lookup by the name, get its size
468 * and setup listening socket, which will accept clients, which
469 * will submit IO for given storage.
470 */
471static int dst_setup_export(struct dst_node *n, struct dst_ctl *ctl,
472 struct dst_export_ctl *le)
473{
474 int err;
475
476 snprintf(n->info->local, sizeof(n->info->local), "%s", le->device);
477
478 n->bdev = open_bdev_exclusive(le->device, FMODE_READ|FMODE_WRITE, NULL);
479 if (IS_ERR(n->bdev))
480 return PTR_ERR(n->bdev);
481
482 if (n->size != 0)
483 n->size = min_t(loff_t, n->bdev->bd_inode->i_size, n->size);
484 else
485 n->size = n->bdev->bd_inode->i_size;
486
487 n->info->size = n->size;
488 err = dst_node_init_listened(n, le);
489 if (err)
490 goto err_out_cleanup;
491
492 return 0;
493
494err_out_cleanup:
495 close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE);
496 n->bdev = NULL;
497
498 return err;
499}
500
501/* Empty thread pool callbacks for the network processing threads. */
502static inline void *dst_thread_network_init(void *data)
503{
504 dprintk("%s: data: %p.\n", __func__, data);
505 return data;
506}
507
508static inline void dst_thread_network_cleanup(void *data)
509{
510 dprintk("%s: data: %p.\n", __func__, data);
511}
512
513/*
514 * Allocate DST node and initialize some of its parameters.
515 */
516static struct dst_node *dst_alloc_node(struct dst_ctl *ctl,
517 int (*start)(struct dst_node *),
518 int num)
519{
520 struct dst_node *n;
521 int err;
522
523 n = kzalloc(sizeof(struct dst_node), GFP_KERNEL);
524 if (!n)
525 return NULL;
526
527 INIT_LIST_HEAD(&n->node_entry);
528
529 INIT_LIST_HEAD(&n->security_list);
530 mutex_init(&n->security_lock);
531
532 init_waitqueue_head(&n->wait);
533
534 n->trans_scan_timeout = msecs_to_jiffies(ctl->trans_scan_timeout);
535 if (!n->trans_scan_timeout)
536 n->trans_scan_timeout = HZ;
537
538 n->trans_max_retries = ctl->trans_max_retries;
539 if (!n->trans_max_retries)
540 n->trans_max_retries = 10;
541
542 /*
543 * Pretty much arbitrary default numbers.
544 * 32 matches maximum number of pages in bio originated from ext3 (31).
545 */
546 n->max_pages = ctl->max_pages;
547 if (!n->max_pages)
548 n->max_pages = 32;
549
550 if (n->max_pages > 1024)
551 n->max_pages = 1024;
552
553 n->start = start;
554 n->size = ctl->size;
555
556 atomic_set(&n->refcnt, 1);
557 atomic_long_set(&n->gen, 0);
558 snprintf(n->name, sizeof(n->name), "%s", ctl->name);
559
560 err = dst_node_sysfs_init(n);
561 if (err)
562 goto err_out_free;
563
564 n->pool = thread_pool_create(num, n->name, dst_thread_network_init,
565 dst_thread_network_cleanup, n);
566 if (IS_ERR(n->pool)) {
567 err = PTR_ERR(n->pool);
568 goto err_out_sysfs_exit;
569 }
570
571 dprintk("%s: n: %p, name: %s.\n", __func__, n, n->name);
572
573 return n;
574
575err_out_sysfs_exit:
576 dst_node_sysfs_exit(n);
577err_out_free:
578 kfree(n);
579 return NULL;
580}
581
582/*
583 * Starting a node, connected to the remote server:
584 * register block device and initialize transaction mechanism.
585 * In revers order though.
586 *
587 * It will autonegotiate some parameters with the remote node
588 * and update local if needed.
589 *
590 * Transaction initialization should be the last thing before
591 * starting the node, since transaction should include not only
592 * block IO, but also crypto related data (if any), which are
593 * initialized separately.
594 */
595static int dst_start_remote(struct dst_node *n)
596{
597 int err;
598
599 err = dst_node_trans_init(n, sizeof(struct dst_trans));
600 if (err)
601 return err;
602
603 err = dst_node_create_disk(n);
604 if (err)
605 return err;
606
607 dst_node_set_size(n);
608 add_disk(n->disk);
609
610 dprintk("DST: started remote node '%s', minor: %d.\n",
611 n->name, n->disk->first_minor);
612
613 return 0;
614}
615
616/*
617 * Adding remote node and initialize connection.
618 */
619static int dst_add_remote(struct dst_node *n, struct dst_ctl *ctl,
620 void *data, unsigned int size)
621{
622 int err;
623 struct dst_network_ctl *rctl = data;
624
625 if (n)
626 return -EEXIST;
627
628 if (size != sizeof(struct dst_network_ctl))
629 return -EINVAL;
630
631 n = dst_alloc_node(ctl, dst_start_remote, 1);
632 if (!n)
633 return -ENOMEM;
634
635 memcpy(&n->info->net, rctl, sizeof(struct dst_network_ctl));
636 err = dst_node_init_connected(n, rctl);
637 if (err)
638 goto err_out_free;
639
640 dst_node_add(n);
641
642 return 0;
643
644err_out_free:
645 dst_node_put(n);
646 return err;
647}
648
649/*
650 * Adding export node: initializing block device and listening socket.
651 */
652static int dst_add_export(struct dst_node *n, struct dst_ctl *ctl,
653 void *data, unsigned int size)
654{
655 int err;
656 struct dst_export_ctl *le = data;
657
658 if (n)
659 return -EEXIST;
660
661 if (size != sizeof(struct dst_export_ctl))
662 return -EINVAL;
663
664 n = dst_alloc_node(ctl, dst_start_export, 2);
665 if (!n)
666 return -EINVAL;
667
668 err = dst_setup_export(n, ctl, le);
669 if (err)
670 goto err_out_free;
671
672 dst_node_add(n);
673
674 return 0;
675
676err_out_free:
677 dst_node_put(n);
678 return err;
679}
680
681static int dst_node_remove_unload(struct dst_node *n)
682{
683 printk(KERN_INFO "STOPPED name: '%s', size: %llu.\n",
684 n->name, n->size);
685
686 if (n->disk)
687 del_gendisk(n->disk);
688
689 dst_node_remove(n);
690 dst_node_sysfs_exit(n);
691
692 /*
693 * This is not a hack. Really.
694 * Node's reference counter allows to implement fine grained
695 * node freeing, but since all transactions (which hold node's
696 * reference counter) are processed in the dedicated thread,
697 * it is possible that reference will hit zero in that thread,
698 * so we will not be able to exit thread and cleanup the node.
699 *
700 * So, we remove disk, so no new activity is possible, and
701 * wait until all pending transaction are completed (either
702 * in receiving thread or by timeout in workqueue), in this
703 * case reference counter will be less or equal to 2 (once set in
704 * dst_alloc_node() and then in connector message parser;
705 * or when we force module unloading, and connector message
706 * parser does not hold a reference, in this case reference
707 * counter will be equal to 1),
708 * and subsequent dst_node_put() calls will free the node.
709 */
710 dprintk("%s: going to sleep with %d refcnt.\n",
711 __func__, atomic_read(&n->refcnt));
712 wait_event(n->wait, atomic_read(&n->refcnt) <= 2);
713
714 dst_node_put(n);
715 return 0;
716}
717
718/*
719 * Remove node from the hash table.
720 */
721static int dst_del_node(struct dst_node *n, struct dst_ctl *ctl,
722 void *data, unsigned int size)
723{
724 if (!n)
725 return -ENODEV;
726
727 return dst_node_remove_unload(n);
728}
729
730/*
731 * Initialize crypto processing for given node.
732 */
733static int dst_crypto_init(struct dst_node *n, struct dst_ctl *ctl,
734 void *data, unsigned int size)
735{
736 struct dst_crypto_ctl *crypto = data;
737
738 if (!n)
739 return -ENODEV;
740
741 if (size != sizeof(struct dst_crypto_ctl) + crypto->hash_keysize +
742 crypto->cipher_keysize)
743 return -EINVAL;
744
745 if (n->trans_cache)
746 return -EEXIST;
747
748 return dst_node_crypto_init(n, crypto);
749}
750
751/*
752 * Security attributes for given node.
753 */
754static int dst_security_init(struct dst_node *n, struct dst_ctl *ctl,
755 void *data, unsigned int size)
756{
757 struct dst_secure *s;
758
759 if (!n)
760 return -ENODEV;
761
762 if (size != sizeof(struct dst_secure_user))
763 return -EINVAL;
764
765 s = kmalloc(sizeof(struct dst_secure), GFP_KERNEL);
766 if (!s)
767 return -ENOMEM;
768
769 memcpy(&s->sec, data, size);
770
771 mutex_lock(&n->security_lock);
772 list_add_tail(&s->sec_entry, &n->security_list);
773 mutex_unlock(&n->security_lock);
774
775 return 0;
776}
777
778/*
779 * Kill'em all!
780 */
781static int dst_start_node(struct dst_node *n, struct dst_ctl *ctl,
782 void *data, unsigned int size)
783{
784 int err;
785
786 if (!n)
787 return -ENODEV;
788
789 if (n->trans_cache)
790 return 0;
791
792 err = n->start(n);
793 if (err)
794 return err;
795
796 printk(KERN_INFO "STARTED name: '%s', size: %llu.\n", n->name, n->size);
797 return 0;
798}
799
800typedef int (*dst_command_func)(struct dst_node *n, struct dst_ctl *ctl,
801 void *data, unsigned int size);
802
803/*
804 * List of userspace commands.
805 */
806static dst_command_func dst_commands[] = {
807 [DST_ADD_REMOTE] = &dst_add_remote,
808 [DST_ADD_EXPORT] = &dst_add_export,
809 [DST_DEL_NODE] = &dst_del_node,
810 [DST_CRYPTO] = &dst_crypto_init,
811 [DST_SECURITY] = &dst_security_init,
812 [DST_START] = &dst_start_node,
813};
814
815/*
816 * Configuration parser.
817 */
818static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
819{
820 struct dst_ctl *ctl;
821 int err;
822 struct dst_ctl_ack ack;
823 struct dst_node *n = NULL, *tmp;
824 unsigned int hash;
825
826 if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) {
827 err = -EPERM;
828 goto out;
829 }
830
831 if (msg->len < sizeof(struct dst_ctl)) {
832 err = -EBADMSG;
833 goto out;
834 }
835
836 ctl = (struct dst_ctl *)msg->data;
837
838 if (ctl->cmd >= DST_CMD_MAX) {
839 err = -EINVAL;
840 goto out;
841 }
842 hash = dst_hash(ctl->name, sizeof(ctl->name));
843
844 mutex_lock(&dst_hash_lock);
845 list_for_each_entry(tmp, &dst_hashtable[hash], node_entry) {
846 if (!memcmp(tmp->name, ctl->name, sizeof(tmp->name))) {
847 n = tmp;
848 dst_node_get(n);
849 break;
850 }
851 }
852 mutex_unlock(&dst_hash_lock);
853
854 err = dst_commands[ctl->cmd](n, ctl, msg->data + sizeof(struct dst_ctl),
855 msg->len - sizeof(struct dst_ctl));
856
857 dst_node_put(n);
858out:
859 memcpy(&ack.msg, msg, sizeof(struct cn_msg));
860
861 ack.msg.ack = msg->ack + 1;
862 ack.msg.len = sizeof(struct dst_ctl_ack) - sizeof(struct cn_msg);
863
864 ack.error = err;
865
866 cn_netlink_send(&ack.msg, 0, GFP_KERNEL);
867}
868
869/*
870 * Global initialization: sysfs, hash table, block device registration,
871 * connector and various caches.
872 */
873static int __init dst_sysfs_init(void)
874{
875 return bus_register(&dst_dev_bus_type);
876}
877
878static void dst_sysfs_exit(void)
879{
880 bus_unregister(&dst_dev_bus_type);
881}
882
883static int __init dst_hashtable_init(void)
884{
885 unsigned int i;
886
887 dst_hashtable = kcalloc(dst_hashtable_size, sizeof(struct list_head),
888 GFP_KERNEL);
889 if (!dst_hashtable)
890 return -ENOMEM;
891
892 for (i = 0; i < dst_hashtable_size; ++i)
893 INIT_LIST_HEAD(&dst_hashtable[i]);
894
895 return 0;
896}
897
898static void dst_hashtable_exit(void)
899{
900 unsigned int i;
901 struct dst_node *n, *tmp;
902
903 for (i = 0; i < dst_hashtable_size; ++i) {
904 list_for_each_entry_safe(n, tmp, &dst_hashtable[i], node_entry) {
905 dst_node_remove_unload(n);
906 }
907 }
908
909 kfree(dst_hashtable);
910}
911
912static int __init dst_sys_init(void)
913{
914 int err = -ENOMEM;
915
916 err = dst_hashtable_init();
917 if (err)
918 goto err_out_exit;
919
920 err = dst_export_init();
921 if (err)
922 goto err_out_hashtable_exit;
923
924 err = register_blkdev(dst_major, DST_NAME);
925 if (err < 0)
926 goto err_out_export_exit;
927 if (err)
928 dst_major = err;
929
930 err = dst_sysfs_init();
931 if (err)
932 goto err_out_unregister;
933
934 err = cn_add_callback(&cn_dst_id, "DST", cn_dst_callback);
935 if (err)
936 goto err_out_sysfs_exit;
937
938 printk(KERN_INFO "Distributed storage, '%s' release.\n", dst_name);
939
940 return 0;
941
942err_out_sysfs_exit:
943 dst_sysfs_exit();
944err_out_unregister:
945 unregister_blkdev(dst_major, DST_NAME);
946err_out_export_exit:
947 dst_export_exit();
948err_out_hashtable_exit:
949 dst_hashtable_exit();
950err_out_exit:
951 return err;
952}
953
954static void __exit dst_sys_exit(void)
955{
956 cn_del_callback(&cn_dst_id);
957 unregister_blkdev(dst_major, DST_NAME);
958 dst_hashtable_exit();
959 dst_sysfs_exit();
960 dst_export_exit();
961}
962
963module_init(dst_sys_init);
964module_exit(dst_sys_exit);
965
966MODULE_DESCRIPTION("Distributed storage");
967MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
968MODULE_LICENSE("GPL");
diff --git a/drivers/staging/dst/export.c b/drivers/staging/dst/export.c
deleted file mode 100644
index c324230e8b60..000000000000
--- a/drivers/staging/dst/export.c
+++ /dev/null
@@ -1,660 +0,0 @@
1/*
2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/blkdev.h>
17#include <linux/bio.h>
18#include <linux/dst.h>
19#include <linux/in.h>
20#include <linux/in6.h>
21#include <linux/poll.h>
22#include <linux/slab.h>
23#include <linux/socket.h>
24
25#include <net/sock.h>
26
27/*
28 * Export bioset is used for server block IO requests.
29 */
30static struct bio_set *dst_bio_set;
31
32int __init dst_export_init(void)
33{
34 int err = -ENOMEM;
35
36 dst_bio_set = bioset_create(32, sizeof(struct dst_export_priv));
37 if (!dst_bio_set)
38 goto err_out_exit;
39
40 return 0;
41
42err_out_exit:
43 return err;
44}
45
46void dst_export_exit(void)
47{
48 bioset_free(dst_bio_set);
49}
50
51/*
52 * When client connects and autonegotiates with the server node,
53 * its permissions are checked in a security attributes and sent
54 * back.
55 */
56static unsigned int dst_check_permissions(struct dst_state *main,
57 struct dst_state *st)
58{
59 struct dst_node *n = main->node;
60 struct dst_secure *sentry;
61 struct dst_secure_user *s;
62 struct saddr *sa = &st->ctl.addr;
63 unsigned int perm = 0;
64
65 mutex_lock(&n->security_lock);
66 list_for_each_entry(sentry, &n->security_list, sec_entry) {
67 s = &sentry->sec;
68
69 if (s->addr.sa_family != sa->sa_family)
70 continue;
71
72 if (s->addr.sa_data_len != sa->sa_data_len)
73 continue;
74
75 /*
76 * This '2' below is a port field. This may be very wrong to do
77 * in atalk for example though. If there will be any need
78 * to extent protocol to something else, I can create
79 * per-family helpers and use them instead of this memcmp.
80 */
81 if (memcmp(s->addr.sa_data + 2, sa->sa_data + 2,
82 sa->sa_data_len - 2))
83 continue;
84
85 perm = s->permissions;
86 }
87 mutex_unlock(&n->security_lock);
88
89 return perm;
90}
91
92/*
93 * Accept new client: allocate appropriate network state and check permissions.
94 */
95static struct dst_state *dst_accept_client(struct dst_state *st)
96{
97 unsigned int revents = 0;
98 unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP;
99 unsigned int mask = err_mask | POLLIN;
100 struct dst_node *n = st->node;
101 int err = 0;
102 struct socket *sock = NULL;
103 struct dst_state *new;
104
105 while (!err && !sock) {
106 revents = dst_state_poll(st);
107
108 if (!(revents & mask)) {
109 DEFINE_WAIT(wait);
110
111 for (;;) {
112 prepare_to_wait(&st->thread_wait,
113 &wait, TASK_INTERRUPTIBLE);
114 if (!n->trans_scan_timeout || st->need_exit)
115 break;
116
117 revents = dst_state_poll(st);
118
119 if (revents & mask)
120 break;
121
122 if (signal_pending(current))
123 break;
124
125 /*
126 * Magic HZ? Polling check above is not safe in
127 * all cases (like socket reset in BH context),
128 * so it is simpler just to postpone it to the
129 * process context instead of implementing
130 * special locking there.
131 */
132 schedule_timeout(HZ);
133 }
134 finish_wait(&st->thread_wait, &wait);
135 }
136
137 err = -ECONNRESET;
138 dst_state_lock(st);
139
140 dprintk("%s: st: %p, revents: %x [err: %d, in: %d].\n",
141 __func__, st, revents, revents & err_mask,
142 revents & POLLIN);
143
144 if (revents & err_mask) {
145 dprintk("%s: revents: %x, socket: %p, err: %d.\n",
146 __func__, revents, st->socket, err);
147 err = -ECONNRESET;
148 }
149
150 if (!n->trans_scan_timeout || st->need_exit)
151 err = -ENODEV;
152
153 if (st->socket && (revents & POLLIN))
154 err = kernel_accept(st->socket, &sock, 0);
155
156 dst_state_unlock(st);
157 }
158
159 if (err)
160 goto err_out_exit;
161
162 new = dst_state_alloc(st->node);
163 if (IS_ERR(new)) {
164 err = -ENOMEM;
165 goto err_out_release;
166 }
167 new->socket = sock;
168
169 new->ctl.addr.sa_data_len = sizeof(struct sockaddr);
170 err = kernel_getpeername(sock, (struct sockaddr *)&new->ctl.addr,
171 (int *)&new->ctl.addr.sa_data_len);
172 if (err)
173 goto err_out_put;
174
175 new->permissions = dst_check_permissions(st, new);
176 if (new->permissions == 0) {
177 err = -EPERM;
178 dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
179 "Client is not allowed to connect");
180 goto err_out_put;
181 }
182
183 err = dst_poll_init(new);
184 if (err)
185 goto err_out_put;
186
187 dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
188 "Connected client");
189
190 return new;
191
192err_out_put:
193 dst_state_put(new);
194err_out_release:
195 sock_release(sock);
196err_out_exit:
197 return ERR_PTR(err);
198}
199
200/*
201 * Each server's block request sometime finishes.
202 * Usually it happens in hard irq context of the appropriate controller,
203 * so to play good with all cases we just queue BIO into the queue
204 * and wake up processing thread, which gets completed request and
205 * send (encrypting if needed) it back to the client (if it was a read
206 * request), or sends back reply that writing successfully completed.
207 */
208static int dst_export_process_request_queue(struct dst_state *st)
209{
210 unsigned long flags;
211 struct dst_export_priv *p = NULL;
212 struct bio *bio;
213 int err = 0;
214
215 while (!list_empty(&st->request_list)) {
216 spin_lock_irqsave(&st->request_lock, flags);
217 if (!list_empty(&st->request_list)) {
218 p = list_first_entry(&st->request_list,
219 struct dst_export_priv, request_entry);
220 list_del(&p->request_entry);
221 }
222 spin_unlock_irqrestore(&st->request_lock, flags);
223
224 if (!p)
225 break;
226
227 bio = p->bio;
228
229 if (dst_need_crypto(st->node) && (bio_data_dir(bio) == READ))
230 err = dst_export_crypto(st->node, bio);
231 else
232 err = dst_export_send_bio(bio);
233
234 if (err)
235 break;
236 }
237
238 return err;
239}
240
241/*
242 * Cleanup export state.
243 * It has to wait until all requests are finished,
244 * and then free them all.
245 */
246static void dst_state_cleanup_export(struct dst_state *st)
247{
248 struct dst_export_priv *p;
249 unsigned long flags;
250
251 /*
252 * This loop waits for all pending bios to be completed and freed.
253 */
254 while (atomic_read(&st->refcnt) > 1) {
255 dprintk("%s: st: %p, refcnt: %d, list_empty: %d.\n",
256 __func__, st, atomic_read(&st->refcnt),
257 list_empty(&st->request_list));
258 wait_event_timeout(st->thread_wait,
259 (atomic_read(&st->refcnt) == 1) ||
260 !list_empty(&st->request_list),
261 HZ/2);
262
263 while (!list_empty(&st->request_list)) {
264 p = NULL;
265 spin_lock_irqsave(&st->request_lock, flags);
266 if (!list_empty(&st->request_list)) {
267 p = list_first_entry(&st->request_list,
268 struct dst_export_priv, request_entry);
269 list_del(&p->request_entry);
270 }
271 spin_unlock_irqrestore(&st->request_lock, flags);
272
273 if (p)
274 bio_put(p->bio);
275
276 dprintk("%s: st: %p, refcnt: %d, list_empty: %d, p: "
277 "%p.\n", __func__, st, atomic_read(&st->refcnt),
278 list_empty(&st->request_list), p);
279 }
280 }
281
282 dst_state_put(st);
283}
284
285/*
286 * Client accepting thread.
287 * Not only accepts new connection, but also schedules receiving thread
288 * and performs request completion described above.
289 */
290static int dst_accept(void *init_data, void *schedule_data)
291{
292 struct dst_state *main_st = schedule_data;
293 struct dst_node *n = init_data;
294 struct dst_state *st;
295 int err;
296
297 while (n->trans_scan_timeout && !main_st->need_exit) {
298 dprintk("%s: main_st: %p, n: %p.\n", __func__, main_st, n);
299 st = dst_accept_client(main_st);
300 if (IS_ERR(st))
301 continue;
302
303 err = dst_state_schedule_receiver(st);
304 if (!err) {
305 while (n->trans_scan_timeout) {
306 err = wait_event_interruptible_timeout(st->thread_wait,
307 !list_empty(&st->request_list) ||
308 !n->trans_scan_timeout ||
309 st->need_exit,
310 HZ);
311
312 if (!n->trans_scan_timeout || st->need_exit)
313 break;
314
315 if (list_empty(&st->request_list))
316 continue;
317
318 err = dst_export_process_request_queue(st);
319 if (err)
320 break;
321 }
322
323 st->need_exit = 1;
324 wake_up(&st->thread_wait);
325 }
326
327 dst_state_cleanup_export(st);
328 }
329
330 dprintk("%s: freeing listening socket st: %p.\n", __func__, main_st);
331
332 dst_state_lock(main_st);
333 dst_poll_exit(main_st);
334 dst_state_socket_release(main_st);
335 dst_state_unlock(main_st);
336 dst_state_put(main_st);
337 dprintk("%s: freed listening socket st: %p.\n", __func__, main_st);
338
339 return 0;
340}
341
342int dst_start_export(struct dst_node *n)
343{
344 if (list_empty(&n->security_list)) {
345 printk(KERN_ERR "You are trying to export node '%s' "
346 "without security attributes.\nNo clients will "
347 "be allowed to connect. Exiting.\n", n->name);
348 return -EINVAL;
349 }
350 return dst_node_trans_init(n, sizeof(struct dst_export_priv));
351}
352
353/*
354 * Initialize listening state and schedule accepting thread.
355 */
356int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le)
357{
358 struct dst_state *st;
359 int err = -ENOMEM;
360 struct dst_network_ctl *ctl = &le->ctl;
361
362 memcpy(&n->info->net, ctl, sizeof(struct dst_network_ctl));
363
364 st = dst_state_alloc(n);
365 if (IS_ERR(st)) {
366 err = PTR_ERR(st);
367 goto err_out_exit;
368 }
369 memcpy(&st->ctl, ctl, sizeof(struct dst_network_ctl));
370
371 err = dst_state_socket_create(st);
372 if (err)
373 goto err_out_put;
374
375 st->socket->sk->sk_reuse = 1;
376
377 err = kernel_bind(st->socket, (struct sockaddr *)&ctl->addr,
378 ctl->addr.sa_data_len);
379 if (err)
380 goto err_out_socket_release;
381
382 err = kernel_listen(st->socket, 1024);
383 if (err)
384 goto err_out_socket_release;
385 n->state = st;
386
387 err = dst_poll_init(st);
388 if (err)
389 goto err_out_socket_release;
390
391 dst_state_get(st);
392
393 err = thread_pool_schedule(n->pool, dst_thread_setup,
394 dst_accept, st, MAX_SCHEDULE_TIMEOUT);
395 if (err)
396 goto err_out_poll_exit;
397
398 return 0;
399
400err_out_poll_exit:
401 dst_poll_exit(st);
402err_out_socket_release:
403 dst_state_socket_release(st);
404err_out_put:
405 dst_state_put(st);
406err_out_exit:
407 n->state = NULL;
408 return err;
409}
410
411/*
412 * Free bio and related private data.
413 * Also drop a reference counter for appropriate state,
414 * which waits when there are no more block IOs in-flight.
415 */
416static void dst_bio_destructor(struct bio *bio)
417{
418 struct bio_vec *bv;
419 struct dst_export_priv *priv = bio->bi_private;
420 int i;
421
422 bio_for_each_segment(bv, bio, i) {
423 if (!bv->bv_page)
424 break;
425
426 __free_page(bv->bv_page);
427 }
428
429 if (priv)
430 dst_state_put(priv->state);
431 bio_free(bio, dst_bio_set);
432}
433
434/*
435 * Block IO completion. Queue request to be sent back to
436 * the client (or just confirmation).
437 */
438static void dst_bio_end_io(struct bio *bio, int err)
439{
440 struct dst_export_priv *p = bio->bi_private;
441 struct dst_state *st = p->state;
442 unsigned long flags;
443
444 spin_lock_irqsave(&st->request_lock, flags);
445 list_add_tail(&p->request_entry, &st->request_list);
446 spin_unlock_irqrestore(&st->request_lock, flags);
447
448 wake_up(&st->thread_wait);
449}
450
451/*
452 * Allocate read request for the server.
453 */
454static int dst_export_read_request(struct bio *bio, unsigned int total_size)
455{
456 unsigned int size;
457 struct page *page;
458 int err;
459
460 while (total_size) {
461 err = -ENOMEM;
462 page = alloc_page(GFP_KERNEL);
463 if (!page)
464 goto err_out_exit;
465
466 size = min_t(unsigned int, PAGE_SIZE, total_size);
467
468 err = bio_add_page(bio, page, size, 0);
469 dprintk("%s: bio: %llu/%u, size: %u, err: %d.\n",
470 __func__, (u64)bio->bi_sector, bio->bi_size,
471 size, err);
472 if (err <= 0)
473 goto err_out_free_page;
474
475 total_size -= size;
476 }
477
478 return 0;
479
480err_out_free_page:
481 __free_page(page);
482err_out_exit:
483 return err;
484}
485
486/*
487 * Allocate write request for the server.
488 * Should not only get pages, but also read data from the network.
489 */
490static int dst_export_write_request(struct dst_state *st,
491 struct bio *bio, unsigned int total_size)
492{
493 unsigned int size;
494 struct page *page;
495 void *data;
496 int err;
497
498 while (total_size) {
499 err = -ENOMEM;
500 page = alloc_page(GFP_KERNEL);
501 if (!page)
502 goto err_out_exit;
503
504 data = kmap(page);
505 if (!data)
506 goto err_out_free_page;
507
508 size = min_t(unsigned int, PAGE_SIZE, total_size);
509
510 err = dst_data_recv(st, data, size);
511 if (err)
512 goto err_out_unmap_page;
513
514 err = bio_add_page(bio, page, size, 0);
515 if (err <= 0)
516 goto err_out_unmap_page;
517
518 kunmap(page);
519
520 total_size -= size;
521 }
522
523 return 0;
524
525err_out_unmap_page:
526 kunmap(page);
527err_out_free_page:
528 __free_page(page);
529err_out_exit:
530 return err;
531}
532
533/*
534 * Groovy, we've gotten an IO request from the client.
535 * Allocate BIO from the bioset, private data from the mempool
536 * and lots of pages for IO.
537 */
538int dst_process_io(struct dst_state *st)
539{
540 struct dst_node *n = st->node;
541 struct dst_cmd *cmd = st->data;
542 struct bio *bio;
543 struct dst_export_priv *priv;
544 int err = -ENOMEM;
545
546 if (unlikely(!n->bdev)) {
547 err = -EINVAL;
548 goto err_out_exit;
549 }
550
551 bio = bio_alloc_bioset(GFP_KERNEL,
552 PAGE_ALIGN(cmd->size) >> PAGE_SHIFT,
553 dst_bio_set);
554 if (!bio)
555 goto err_out_exit;
556
557 priv = (struct dst_export_priv *)(((void *)bio) -
558 sizeof (struct dst_export_priv));
559
560 priv->state = dst_state_get(st);
561 priv->bio = bio;
562
563 bio->bi_private = priv;
564 bio->bi_end_io = dst_bio_end_io;
565 bio->bi_destructor = dst_bio_destructor;
566 bio->bi_bdev = n->bdev;
567
568 /*
569 * Server side is only interested in two low bits:
570 * uptodate (set by itself actually) and rw block
571 */
572 bio->bi_flags |= cmd->flags & 3;
573
574 bio->bi_rw = cmd->rw;
575 bio->bi_size = 0;
576 bio->bi_sector = cmd->sector;
577
578 dst_bio_to_cmd(bio, &priv->cmd, DST_IO_RESPONSE, cmd->id);
579
580 priv->cmd.flags = 0;
581 priv->cmd.size = cmd->size;
582
583 if (bio_data_dir(bio) == WRITE) {
584 err = dst_recv_cdata(st, priv->cmd.hash);
585 if (err)
586 goto err_out_free;
587
588 err = dst_export_write_request(st, bio, cmd->size);
589 if (err)
590 goto err_out_free;
591
592 if (dst_need_crypto(n))
593 return dst_export_crypto(n, bio);
594 } else {
595 err = dst_export_read_request(bio, cmd->size);
596 if (err)
597 goto err_out_free;
598 }
599
600 dprintk("%s: bio: %llu/%u, rw: %lu, dir: %lu, flags: %lx, phys: %d.\n",
601 __func__, (u64)bio->bi_sector, bio->bi_size,
602 bio->bi_rw, bio_data_dir(bio),
603 bio->bi_flags, bio->bi_phys_segments);
604
605 generic_make_request(bio);
606
607 return 0;
608
609err_out_free:
610 bio_put(bio);
611err_out_exit:
612 return err;
613}
614
615/*
616 * Ok, block IO is ready, let's send it back to the client...
617 */
618int dst_export_send_bio(struct bio *bio)
619{
620 struct dst_export_priv *p = bio->bi_private;
621 struct dst_state *st = p->state;
622 struct dst_cmd *cmd = &p->cmd;
623 int err;
624
625 dprintk("%s: id: %llu, bio: %llu/%u, csize: %u, flags: %lu, rw: %lu.\n",
626 __func__, cmd->id, (u64)bio->bi_sector, bio->bi_size,
627 cmd->csize, bio->bi_flags, bio->bi_rw);
628
629 dst_convert_cmd(cmd);
630
631 dst_state_lock(st);
632 if (!st->socket) {
633 err = -ECONNRESET;
634 goto err_out_unlock;
635 }
636
637 if (bio_data_dir(bio) == WRITE) {
638 /* ... or just confirmation that writing has completed. */
639 cmd->size = cmd->csize = 0;
640 err = dst_data_send_header(st->socket, cmd,
641 sizeof(struct dst_cmd), 0);
642 if (err)
643 goto err_out_unlock;
644 } else {
645 err = dst_send_bio(st, cmd, bio);
646 if (err)
647 goto err_out_unlock;
648 }
649
650 dst_state_unlock(st);
651
652 bio_put(bio);
653 return 0;
654
655err_out_unlock:
656 dst_state_unlock(st);
657
658 bio_put(bio);
659 return err;
660}
diff --git a/drivers/staging/dst/state.c b/drivers/staging/dst/state.c
deleted file mode 100644
index 02a05e6c48c3..000000000000
--- a/drivers/staging/dst/state.c
+++ /dev/null
@@ -1,844 +0,0 @@
1/*
2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/buffer_head.h>
17#include <linux/blkdev.h>
18#include <linux/bio.h>
19#include <linux/connector.h>
20#include <linux/dst.h>
21#include <linux/device.h>
22#include <linux/in.h>
23#include <linux/in6.h>
24#include <linux/socket.h>
25#include <linux/slab.h>
26
27#include <net/sock.h>
28
29/*
30 * Polling machinery.
31 */
32
33struct dst_poll_helper {
34 poll_table pt;
35 struct dst_state *st;
36};
37
38static int dst_queue_wake(wait_queue_t *wait, unsigned mode,
39 int sync, void *key)
40{
41 struct dst_state *st = container_of(wait, struct dst_state, wait);
42
43 wake_up(&st->thread_wait);
44 return 1;
45}
46
47static void dst_queue_func(struct file *file, wait_queue_head_t *whead,
48 poll_table *pt)
49{
50 struct dst_state *st = container_of(pt, struct dst_poll_helper, pt)->st;
51
52 st->whead = whead;
53 init_waitqueue_func_entry(&st->wait, dst_queue_wake);
54 add_wait_queue(whead, &st->wait);
55}
56
57void dst_poll_exit(struct dst_state *st)
58{
59 if (st->whead) {
60 remove_wait_queue(st->whead, &st->wait);
61 st->whead = NULL;
62 }
63}
64
65int dst_poll_init(struct dst_state *st)
66{
67 struct dst_poll_helper ph;
68
69 ph.st = st;
70 init_poll_funcptr(&ph.pt, &dst_queue_func);
71
72 st->socket->ops->poll(NULL, st->socket, &ph.pt);
73 return 0;
74}
75
76/*
77 * Header receiving function - may block.
78 */
79static int dst_data_recv_header(struct socket *sock,
80 void *data, unsigned int size, int block)
81{
82 struct msghdr msg;
83 struct kvec iov;
84 int err;
85
86 iov.iov_base = data;
87 iov.iov_len = size;
88
89 msg.msg_iov = (struct iovec *)&iov;
90 msg.msg_iovlen = 1;
91 msg.msg_name = NULL;
92 msg.msg_namelen = 0;
93 msg.msg_control = NULL;
94 msg.msg_controllen = 0;
95 msg.msg_flags = (block) ? MSG_WAITALL : MSG_DONTWAIT;
96
97 err = kernel_recvmsg(sock, &msg, &iov, 1, iov.iov_len,
98 msg.msg_flags);
99 if (err != size)
100 return -1;
101
102 return 0;
103}
104
105/*
106 * Header sending function - may block.
107 */
108int dst_data_send_header(struct socket *sock,
109 void *data, unsigned int size, int more)
110{
111 struct msghdr msg;
112 struct kvec iov;
113 int err;
114
115 iov.iov_base = data;
116 iov.iov_len = size;
117
118 msg.msg_iov = (struct iovec *)&iov;
119 msg.msg_iovlen = 1;
120 msg.msg_name = NULL;
121 msg.msg_namelen = 0;
122 msg.msg_control = NULL;
123 msg.msg_controllen = 0;
124 msg.msg_flags = MSG_WAITALL | (more ? MSG_MORE : 0);
125
126 err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
127 if (err != size) {
128 dprintk("%s: size: %u, more: %d, err: %d.\n",
129 __func__, size, more, err);
130 return -1;
131 }
132
133 return 0;
134}
135
136/*
137 * Block autoconfiguration: request size of the storage and permissions.
138 */
139static int dst_request_remote_config(struct dst_state *st)
140{
141 struct dst_node *n = st->node;
142 int err = -EINVAL;
143 struct dst_cmd *cmd = st->data;
144
145 memset(cmd, 0, sizeof(struct dst_cmd));
146 cmd->cmd = DST_CFG;
147
148 dst_convert_cmd(cmd);
149
150 err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0);
151 if (err)
152 goto out;
153
154 err = dst_data_recv_header(st->socket, cmd, sizeof(struct dst_cmd), 1);
155 if (err)
156 goto out;
157
158 dst_convert_cmd(cmd);
159
160 if (cmd->cmd != DST_CFG) {
161 err = -EINVAL;
162 dprintk("%s: checking result: cmd: %d, size reported: %llu.\n",
163 __func__, cmd->cmd, cmd->sector);
164 goto out;
165 }
166
167 if (n->size != 0)
168 n->size = min_t(loff_t, n->size, cmd->sector);
169 else
170 n->size = cmd->sector;
171
172 n->info->size = n->size;
173 st->permissions = cmd->rw;
174
175out:
176 dprintk("%s: n: %p, err: %d, size: %llu, permission: %x.\n",
177 __func__, n, err, n->size, st->permissions);
178 return err;
179}
180
181/*
182 * Socket machinery.
183 */
184
185#define DST_DEFAULT_TIMEO 20000
186
187int dst_state_socket_create(struct dst_state *st)
188{
189 int err;
190 struct socket *sock;
191 struct dst_network_ctl *ctl = &st->ctl;
192
193 err = sock_create(ctl->addr.sa_family, ctl->type, ctl->proto, &sock);
194 if (err < 0)
195 return err;
196
197 sock->sk->sk_sndtimeo = sock->sk->sk_rcvtimeo =
198 msecs_to_jiffies(DST_DEFAULT_TIMEO);
199 sock->sk->sk_allocation = GFP_NOIO;
200
201 st->socket = st->read_socket = sock;
202 return 0;
203}
204
205void dst_state_socket_release(struct dst_state *st)
206{
207 dprintk("%s: st: %p, socket: %p, n: %p.\n",
208 __func__, st, st->socket, st->node);
209 if (st->socket) {
210 sock_release(st->socket);
211 st->socket = NULL;
212 st->read_socket = NULL;
213 }
214}
215
216void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str)
217{
218 if (sk->ops->family == AF_INET) {
219 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
220 printk(KERN_INFO "%s %u.%u.%u.%u:%d.\n", str,
221 NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
222 } else if (sk->ops->family == AF_INET6) {
223 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)sa;
224 printk(KERN_INFO "%s %pi6:%d",
225 str, &sin->sin6_addr, ntohs(sin->sin6_port));
226 }
227}
228
229void dst_state_exit_connected(struct dst_state *st)
230{
231 if (st->socket) {
232 dst_poll_exit(st);
233 st->socket->ops->shutdown(st->socket, 2);
234
235 dst_dump_addr(st->socket, (struct sockaddr *)&st->ctl.addr,
236 "Disconnected peer");
237 dst_state_socket_release(st);
238 }
239}
240
241static int dst_state_init_connected(struct dst_state *st)
242{
243 int err;
244 struct dst_network_ctl *ctl = &st->ctl;
245
246 err = dst_state_socket_create(st);
247 if (err)
248 goto err_out_exit;
249
250 err = kernel_connect(st->socket, (struct sockaddr *)&st->ctl.addr,
251 st->ctl.addr.sa_data_len, 0);
252 if (err)
253 goto err_out_release;
254
255 err = dst_poll_init(st);
256 if (err)
257 goto err_out_release;
258
259 dst_dump_addr(st->socket, (struct sockaddr *)&ctl->addr,
260 "Connected to peer");
261
262 return 0;
263
264err_out_release:
265 dst_state_socket_release(st);
266err_out_exit:
267 return err;
268}
269
270/*
271 * State reset is used to reconnect to the remote peer.
272 * May fail, but who cares, we will try again later.
273 */
274static inline void dst_state_reset_nolock(struct dst_state *st)
275{
276 dst_state_exit_connected(st);
277 dst_state_init_connected(st);
278}
279
280static inline void dst_state_reset(struct dst_state *st)
281{
282 dst_state_lock(st);
283 dst_state_reset_nolock(st);
284 dst_state_unlock(st);
285}
286
287/*
288 * Basic network sending/receiving functions.
289 * Blocked mode is used.
290 */
291static int dst_data_recv_raw(struct dst_state *st, void *buf, u64 size)
292{
293 struct msghdr msg;
294 struct kvec iov;
295 int err;
296
297 BUG_ON(!size);
298
299 iov.iov_base = buf;
300 iov.iov_len = size;
301
302 msg.msg_iov = (struct iovec *)&iov;
303 msg.msg_iovlen = 1;
304 msg.msg_name = NULL;
305 msg.msg_namelen = 0;
306 msg.msg_control = NULL;
307 msg.msg_controllen = 0;
308 msg.msg_flags = MSG_DONTWAIT;
309
310 err = kernel_recvmsg(st->socket, &msg, &iov, 1, iov.iov_len,
311 msg.msg_flags);
312 if (err <= 0) {
313 dprintk("%s: failed to recv data: size: %llu, err: %d.\n",
314 __func__, size, err);
315 if (err == 0)
316 err = -ECONNRESET;
317
318 dst_state_exit_connected(st);
319 }
320
321 return err;
322}
323
324/*
325 * Ping command to early detect failed nodes.
326 */
327static int dst_send_ping(struct dst_state *st)
328{
329 struct dst_cmd *cmd = st->data;
330 int err = -ECONNRESET;
331
332 dst_state_lock(st);
333 if (st->socket) {
334 memset(cmd, 0, sizeof(struct dst_cmd));
335
336 cmd->cmd = __cpu_to_be32(DST_PING);
337
338 err = dst_data_send_header(st->socket, cmd,
339 sizeof(struct dst_cmd), 0);
340 }
341 dprintk("%s: st: %p, socket: %p, err: %d.\n", __func__,
342 st, st->socket, err);
343 dst_state_unlock(st);
344
345 return err;
346}
347
348/*
349 * Receiving function, which should either return error or read
350 * whole block request. If there was no traffic for a one second,
351 * send a ping, since remote node may die.
352 */
353int dst_data_recv(struct dst_state *st, void *data, unsigned int size)
354{
355 unsigned int revents = 0;
356 unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP;
357 unsigned int mask = err_mask | POLLIN;
358 struct dst_node *n = st->node;
359 int err = 0;
360
361 while (size && !err) {
362 revents = dst_state_poll(st);
363
364 if (!(revents & mask)) {
365 DEFINE_WAIT(wait);
366
367 for (;;) {
368 prepare_to_wait(&st->thread_wait, &wait,
369 TASK_INTERRUPTIBLE);
370 if (!n->trans_scan_timeout || st->need_exit)
371 break;
372
373 revents = dst_state_poll(st);
374
375 if (revents & mask)
376 break;
377
378 if (signal_pending(current))
379 break;
380
381 if (!schedule_timeout(HZ)) {
382 err = dst_send_ping(st);
383 if (err)
384 return err;
385 }
386
387 continue;
388 }
389 finish_wait(&st->thread_wait, &wait);
390 }
391
392 err = -ECONNRESET;
393 dst_state_lock(st);
394
395 if (st->socket && (st->read_socket == st->socket) &&
396 (revents & POLLIN)) {
397 err = dst_data_recv_raw(st, data, size);
398 if (err > 0) {
399 data += err;
400 size -= err;
401 err = 0;
402 }
403 }
404
405 if (revents & err_mask || !st->socket) {
406 dprintk("%s: revents: %x, socket: %p, size: %u, "
407 "err: %d.\n", __func__, revents,
408 st->socket, size, err);
409 err = -ECONNRESET;
410 }
411
412 dst_state_unlock(st);
413
414 if (!n->trans_scan_timeout)
415 err = -ENODEV;
416 }
417
418 return err;
419}
420
421/*
422 * Send block autoconf reply.
423 */
424static int dst_process_cfg(struct dst_state *st)
425{
426 struct dst_node *n = st->node;
427 struct dst_cmd *cmd = st->data;
428 int err;
429
430 cmd->sector = n->size;
431 cmd->rw = st->permissions;
432
433 dst_convert_cmd(cmd);
434
435 dst_state_lock(st);
436 err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0);
437 dst_state_unlock(st);
438
439 return err;
440}
441
442/*
443 * Receive block IO from the network.
444 */
445static int dst_recv_bio(struct dst_state *st, struct bio *bio,
446 unsigned int total_size)
447{
448 struct bio_vec *bv;
449 int i, err;
450 void *data;
451 unsigned int sz;
452
453 bio_for_each_segment(bv, bio, i) {
454 sz = min(total_size, bv->bv_len);
455
456 dprintk("%s: bio: %llu/%u, total: %u, len: %u, sz: %u, "
457 "off: %u.\n", __func__, (u64)bio->bi_sector,
458 bio->bi_size, total_size, bv->bv_len, sz,
459 bv->bv_offset);
460
461 data = kmap(bv->bv_page) + bv->bv_offset;
462 err = dst_data_recv(st, data, sz);
463 kunmap(bv->bv_page);
464
465 bv->bv_len = sz;
466
467 if (err)
468 return err;
469
470 total_size -= sz;
471 if (total_size == 0)
472 break;
473 }
474
475 return 0;
476}
477
478/*
479 * Our block IO has just completed and arrived: get it.
480 */
481static int dst_process_io_response(struct dst_state *st)
482{
483 struct dst_node *n = st->node;
484 struct dst_cmd *cmd = st->data;
485 struct dst_trans *t;
486 int err = 0;
487 struct bio *bio;
488
489 mutex_lock(&n->trans_lock);
490 t = dst_trans_search(n, cmd->id);
491 mutex_unlock(&n->trans_lock);
492
493 if (!t)
494 goto err_out_exit;
495
496 bio = t->bio;
497
498 dprintk("%s: bio: %llu/%u, cmd_size: %u, csize: %u, dir: %lu.\n",
499 __func__, (u64)bio->bi_sector, bio->bi_size, cmd->size,
500 cmd->csize, bio_data_dir(bio));
501
502 if (bio_data_dir(bio) == READ) {
503 if (bio->bi_size != cmd->size - cmd->csize)
504 goto err_out_exit;
505
506 if (dst_need_crypto(n)) {
507 err = dst_recv_cdata(st, t->cmd.hash);
508 if (err)
509 goto err_out_exit;
510 }
511
512 err = dst_recv_bio(st, t->bio, bio->bi_size);
513 if (err)
514 goto err_out_exit;
515
516 if (dst_need_crypto(n))
517 return dst_trans_crypto(t);
518 } else {
519 err = -EBADMSG;
520 if (cmd->size || cmd->csize)
521 goto err_out_exit;
522 }
523
524 dst_trans_remove(t);
525 dst_trans_put(t);
526
527 return 0;
528
529err_out_exit:
530 return err;
531}
532
533/*
534 * Receive crypto data.
535 */
536int dst_recv_cdata(struct dst_state *st, void *cdata)
537{
538 struct dst_cmd *cmd = st->data;
539 struct dst_node *n = st->node;
540 struct dst_crypto_ctl *c = &n->crypto;
541 int err;
542
543 if (cmd->csize != c->crypto_attached_size) {
544 dprintk("%s: cmd: cmd: %u, sector: %llu, size: %u, "
545 "csize: %u != digest size %u.\n",
546 __func__, cmd->cmd, cmd->sector, cmd->size,
547 cmd->csize, c->crypto_attached_size);
548 err = -EINVAL;
549 goto err_out_exit;
550 }
551
552 err = dst_data_recv(st, cdata, cmd->csize);
553 if (err)
554 goto err_out_exit;
555
556 cmd->size -= cmd->csize;
557 return 0;
558
559err_out_exit:
560 return err;
561}
562
563/*
564 * Receive the command and start its processing.
565 */
566static int dst_recv_processing(struct dst_state *st)
567{
568 int err = -EINTR;
569 struct dst_cmd *cmd = st->data;
570
571 /*
572 * If socket will be reset after this statement, then
573 * dst_data_recv() will just fail and loop will
574 * start again, so it can be done without any locks.
575 *
576 * st->read_socket is needed to prevents state machine
577 * breaking between this data reading and subsequent one
578 * in protocol specific functions during connection reset.
579 * In case of reset we have to read next command and do
580 * not expect data for old command to magically appear in
581 * new connection.
582 */
583 st->read_socket = st->socket;
584 err = dst_data_recv(st, cmd, sizeof(struct dst_cmd));
585 if (err)
586 goto out_exit;
587
588 dst_convert_cmd(cmd);
589
590 dprintk("%s: cmd: %u, size: %u, csize: %u, id: %llu, "
591 "sector: %llu, flags: %llx, rw: %llx.\n",
592 __func__, cmd->cmd, cmd->size,
593 cmd->csize, cmd->id, cmd->sector,
594 cmd->flags, cmd->rw);
595
596 /*
597 * This should catch protocol breakage and random garbage
598 * instead of commands.
599 */
600 if (unlikely(cmd->csize > st->size - sizeof(struct dst_cmd))) {
601 err = -EBADMSG;
602 goto out_exit;
603 }
604
605 err = -EPROTO;
606 switch (cmd->cmd) {
607 case DST_IO_RESPONSE:
608 err = dst_process_io_response(st);
609 break;
610 case DST_IO:
611 err = dst_process_io(st);
612 break;
613 case DST_CFG:
614 err = dst_process_cfg(st);
615 break;
616 case DST_PING:
617 err = 0;
618 break;
619 default:
620 break;
621 }
622
623out_exit:
624 return err;
625}
626
627/*
628 * Receiving thread. For the client node we should try to reconnect,
629 * for accepted client we just drop the state and expect it to reconnect.
630 */
631static int dst_recv(void *init_data, void *schedule_data)
632{
633 struct dst_state *st = schedule_data;
634 struct dst_node *n = init_data;
635 int err = 0;
636
637 dprintk("%s: start st: %p, n: %p, scan: %lu, need_exit: %d.\n",
638 __func__, st, n, n->trans_scan_timeout, st->need_exit);
639
640 while (n->trans_scan_timeout && !st->need_exit) {
641 err = dst_recv_processing(st);
642 if (err < 0) {
643 if (!st->ctl.type)
644 break;
645
646 if (!n->trans_scan_timeout || st->need_exit)
647 break;
648
649 dst_state_reset(st);
650 msleep(1000);
651 }
652 }
653
654 st->need_exit = 1;
655 wake_up(&st->thread_wait);
656
657 dprintk("%s: freeing receiving socket st: %p.\n", __func__, st);
658 dst_state_lock(st);
659 dst_state_exit_connected(st);
660 dst_state_unlock(st);
661 dst_state_put(st);
662
663 dprintk("%s: freed receiving socket st: %p.\n", __func__, st);
664
665 return err;
666}
667
668/*
669 * Network state dies here and borns couple of lines below.
670 * This object is the main network state processing engine:
671 * sending, receiving, reconnections, all network related
672 * tasks are handled on behalf of the state.
673 */
674static void dst_state_free(struct dst_state *st)
675{
676 dprintk("%s: st: %p.\n", __func__, st);
677 if (st->cleanup)
678 st->cleanup(st);
679 kfree(st->data);
680 kfree(st);
681}
682
683struct dst_state *dst_state_alloc(struct dst_node *n)
684{
685 struct dst_state *st;
686 int err = -ENOMEM;
687
688 st = kzalloc(sizeof(struct dst_state), GFP_KERNEL);
689 if (!st)
690 goto err_out_exit;
691
692 st->node = n;
693 st->need_exit = 0;
694
695 st->size = PAGE_SIZE;
696 st->data = kmalloc(st->size, GFP_KERNEL);
697 if (!st->data)
698 goto err_out_free;
699
700 spin_lock_init(&st->request_lock);
701 INIT_LIST_HEAD(&st->request_list);
702
703 mutex_init(&st->state_lock);
704 init_waitqueue_head(&st->thread_wait);
705
706 /*
707 * One for processing thread, another one for node itself.
708 */
709 atomic_set(&st->refcnt, 2);
710
711 dprintk("%s: st: %p, n: %p.\n", __func__, st, st->node);
712
713 return st;
714
715err_out_free:
716 kfree(st);
717err_out_exit:
718 return ERR_PTR(err);
719}
720
721int dst_state_schedule_receiver(struct dst_state *st)
722{
723 return thread_pool_schedule_private(st->node->pool, dst_thread_setup,
724 dst_recv, st, MAX_SCHEDULE_TIMEOUT, st->node);
725}
726
727/*
728 * Initialize client's connection to the remote peer: allocate state,
729 * connect and perform block IO autoconfiguration.
730 */
731int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r)
732{
733 struct dst_state *st;
734 int err = -ENOMEM;
735
736 st = dst_state_alloc(n);
737 if (IS_ERR(st)) {
738 err = PTR_ERR(st);
739 goto err_out_exit;
740 }
741 memcpy(&st->ctl, r, sizeof(struct dst_network_ctl));
742
743 err = dst_state_init_connected(st);
744 if (err)
745 goto err_out_free_data;
746
747 err = dst_request_remote_config(st);
748 if (err)
749 goto err_out_exit_connected;
750 n->state = st;
751
752 err = dst_state_schedule_receiver(st);
753 if (err)
754 goto err_out_exit_connected;
755
756 return 0;
757
758err_out_exit_connected:
759 dst_state_exit_connected(st);
760err_out_free_data:
761 dst_state_free(st);
762err_out_exit:
763 n->state = NULL;
764 return err;
765}
766
767void dst_state_put(struct dst_state *st)
768{
769 dprintk("%s: st: %p, refcnt: %d.\n",
770 __func__, st, atomic_read(&st->refcnt));
771 if (atomic_dec_and_test(&st->refcnt))
772 dst_state_free(st);
773}
774
775/*
776 * Send block IO to the network one by one using zero-copy ->sendpage().
777 */
778int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio)
779{
780 struct bio_vec *bv;
781 struct dst_crypto_ctl *c = &st->node->crypto;
782 int err, i = 0;
783 int flags = MSG_WAITALL;
784
785 err = dst_data_send_header(st->socket, cmd,
786 sizeof(struct dst_cmd) + c->crypto_attached_size, bio->bi_vcnt);
787 if (err)
788 goto err_out_exit;
789
790 bio_for_each_segment(bv, bio, i) {
791 if (i < bio->bi_vcnt - 1)
792 flags |= MSG_MORE;
793
794 err = kernel_sendpage(st->socket, bv->bv_page, bv->bv_offset,
795 bv->bv_len, flags);
796 if (err <= 0)
797 goto err_out_exit;
798 }
799
800 return 0;
801
802err_out_exit:
803 dprintk("%s: %d/%d, flags: %x, err: %d.\n",
804 __func__, i, bio->bi_vcnt, flags, err);
805 return err;
806}
807
808/*
809 * Send transaction to the remote peer.
810 */
811int dst_trans_send(struct dst_trans *t)
812{
813 int err;
814 struct dst_state *st = t->n->state;
815 struct bio *bio = t->bio;
816
817 dst_convert_cmd(&t->cmd);
818
819 dst_state_lock(st);
820 if (!st->socket) {
821 err = dst_state_init_connected(st);
822 if (err)
823 goto err_out_unlock;
824 }
825
826 if (bio_data_dir(bio) == WRITE) {
827 err = dst_send_bio(st, &t->cmd, t->bio);
828 } else {
829 err = dst_data_send_header(st->socket, &t->cmd,
830 sizeof(struct dst_cmd), 0);
831 }
832 if (err)
833 goto err_out_reset;
834
835 dst_state_unlock(st);
836 return 0;
837
838err_out_reset:
839 dst_state_reset_nolock(st);
840err_out_unlock:
841 dst_state_unlock(st);
842
843 return err;
844}
diff --git a/drivers/staging/dst/thread_pool.c b/drivers/staging/dst/thread_pool.c
deleted file mode 100644
index 29a82b2602f3..000000000000
--- a/drivers/staging/dst/thread_pool.c
+++ /dev/null
@@ -1,348 +0,0 @@
1/*
2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/kernel.h>
17#include <linux/dst.h>
18#include <linux/kthread.h>
19#include <linux/slab.h>
20
21/*
22 * Thread pool abstraction allows to schedule a work to be performed
23 * on behalf of kernel thread. One does not operate with threads itself,
24 * instead user provides setup and cleanup callbacks for thread pool itself,
25 * and action and cleanup callbacks for each submitted work.
26 *
27 * Each worker has private data initialized at creation time and data,
28 * provided by user at scheduling time.
29 *
30 * When action is being performed, thread can not be used by other users,
31 * instead they will sleep until there is free thread to pick their work.
32 */
33struct thread_pool_worker {
34 struct list_head worker_entry;
35
36 struct task_struct *thread;
37
38 struct thread_pool *pool;
39
40 int error;
41 int has_data;
42 int need_exit;
43 unsigned int id;
44
45 wait_queue_head_t wait;
46
47 void *private;
48 void *schedule_data;
49
50 int (*action)(void *private, void *schedule_data);
51 void (*cleanup)(void *private);
52};
53
54static void thread_pool_exit_worker(struct thread_pool_worker *w)
55{
56 kthread_stop(w->thread);
57
58 w->cleanup(w->private);
59 kfree(w);
60}
61
62/*
63 * Called to mark thread as ready and allow users to schedule new work.
64 */
65static void thread_pool_worker_make_ready(struct thread_pool_worker *w)
66{
67 struct thread_pool *p = w->pool;
68
69 mutex_lock(&p->thread_lock);
70
71 if (!w->need_exit) {
72 list_move_tail(&w->worker_entry, &p->ready_list);
73 w->has_data = 0;
74 mutex_unlock(&p->thread_lock);
75
76 wake_up(&p->wait);
77 } else {
78 p->thread_num--;
79 list_del(&w->worker_entry);
80 mutex_unlock(&p->thread_lock);
81
82 thread_pool_exit_worker(w);
83 }
84}
85
86/*
87 * Thread action loop: waits until there is new work.
88 */
89static int thread_pool_worker_func(void *data)
90{
91 struct thread_pool_worker *w = data;
92
93 while (!kthread_should_stop()) {
94 wait_event_interruptible(w->wait,
95 kthread_should_stop() || w->has_data);
96
97 if (kthread_should_stop())
98 break;
99
100 if (!w->has_data)
101 continue;
102
103 w->action(w->private, w->schedule_data);
104 thread_pool_worker_make_ready(w);
105 }
106
107 return 0;
108}
109
110/*
111 * Remove single worker without specifying which one.
112 */
113void thread_pool_del_worker(struct thread_pool *p)
114{
115 struct thread_pool_worker *w = NULL;
116
117 while (!w && p->thread_num) {
118 wait_event(p->wait, !list_empty(&p->ready_list) ||
119 !p->thread_num);
120
121 dprintk("%s: locking list_empty: %d, thread_num: %d.\n",
122 __func__, list_empty(&p->ready_list),
123 p->thread_num);
124
125 mutex_lock(&p->thread_lock);
126 if (!list_empty(&p->ready_list)) {
127 w = list_first_entry(&p->ready_list,
128 struct thread_pool_worker,
129 worker_entry);
130
131 dprintk("%s: deleting w: %p, thread_num: %d, "
132 "list: %p [%p.%p].\n", __func__,
133 w, p->thread_num, &p->ready_list,
134 p->ready_list.prev, p->ready_list.next);
135
136 p->thread_num--;
137 list_del(&w->worker_entry);
138 }
139 mutex_unlock(&p->thread_lock);
140 }
141
142 if (w)
143 thread_pool_exit_worker(w);
144 dprintk("%s: deleted w: %p, thread_num: %d.\n",
145 __func__, w, p->thread_num);
146}
147
148/*
149 * Remove a worker with given ID.
150 */
151void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id)
152{
153 struct thread_pool_worker *w;
154 int found = 0;
155
156 mutex_lock(&p->thread_lock);
157 list_for_each_entry(w, &p->ready_list, worker_entry) {
158 if (w->id == id) {
159 found = 1;
160 p->thread_num--;
161 list_del(&w->worker_entry);
162 break;
163 }
164 }
165
166 if (!found) {
167 list_for_each_entry(w, &p->active_list, worker_entry) {
168 if (w->id == id) {
169 w->need_exit = 1;
170 break;
171 }
172 }
173 }
174 mutex_unlock(&p->thread_lock);
175
176 if (found)
177 thread_pool_exit_worker(w);
178}
179
180/*
181 * Add new worker thread with given parameters.
182 * If initialization callback fails, return error.
183 */
184int thread_pool_add_worker(struct thread_pool *p,
185 char *name,
186 unsigned int id,
187 void *(*init)(void *private),
188 void (*cleanup)(void *private),
189 void *private)
190{
191 struct thread_pool_worker *w;
192 int err = -ENOMEM;
193
194 w = kzalloc(sizeof(struct thread_pool_worker), GFP_KERNEL);
195 if (!w)
196 goto err_out_exit;
197
198 w->pool = p;
199 init_waitqueue_head(&w->wait);
200 w->cleanup = cleanup;
201 w->id = id;
202
203 w->thread = kthread_run(thread_pool_worker_func, w, "%s", name);
204 if (IS_ERR(w->thread)) {
205 err = PTR_ERR(w->thread);
206 goto err_out_free;
207 }
208
209 w->private = init(private);
210 if (IS_ERR(w->private)) {
211 err = PTR_ERR(w->private);
212 goto err_out_stop_thread;
213 }
214
215 mutex_lock(&p->thread_lock);
216 list_add_tail(&w->worker_entry, &p->ready_list);
217 p->thread_num++;
218 mutex_unlock(&p->thread_lock);
219
220 return 0;
221
222err_out_stop_thread:
223 kthread_stop(w->thread);
224err_out_free:
225 kfree(w);
226err_out_exit:
227 return err;
228}
229
230/*
231 * Destroy the whole pool.
232 */
233void thread_pool_destroy(struct thread_pool *p)
234{
235 while (p->thread_num) {
236 dprintk("%s: num: %d.\n", __func__, p->thread_num);
237 thread_pool_del_worker(p);
238 }
239
240 kfree(p);
241}
242
243/*
244 * Create a pool with given number of threads.
245 * They will have sequential IDs started from zero.
246 */
247struct thread_pool *thread_pool_create(int num, char *name,
248 void *(*init)(void *private),
249 void (*cleanup)(void *private),
250 void *private)
251{
252 struct thread_pool_worker *w, *tmp;
253 struct thread_pool *p;
254 int err = -ENOMEM;
255 int i;
256
257 p = kzalloc(sizeof(struct thread_pool), GFP_KERNEL);
258 if (!p)
259 goto err_out_exit;
260
261 init_waitqueue_head(&p->wait);
262 mutex_init(&p->thread_lock);
263 INIT_LIST_HEAD(&p->ready_list);
264 INIT_LIST_HEAD(&p->active_list);
265 p->thread_num = 0;
266
267 for (i = 0; i < num; ++i) {
268 err = thread_pool_add_worker(p, name, i, init,
269 cleanup, private);
270 if (err)
271 goto err_out_free_all;
272 }
273
274 return p;
275
276err_out_free_all:
277 list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) {
278 list_del(&w->worker_entry);
279 thread_pool_exit_worker(w);
280 }
281 kfree(p);
282err_out_exit:
283 return ERR_PTR(err);
284}
285
286/*
287 * Schedule execution of the action on a given thread,
288 * provided ID pointer has to match previously stored
289 * private data.
290 */
291int thread_pool_schedule_private(struct thread_pool *p,
292 int (*setup)(void *private, void *data),
293 int (*action)(void *private, void *data),
294 void *data, long timeout, void *id)
295{
296 struct thread_pool_worker *w, *tmp, *worker = NULL;
297 int err = 0;
298
299 while (!worker && !err) {
300 timeout = wait_event_interruptible_timeout(p->wait,
301 !list_empty(&p->ready_list),
302 timeout);
303
304 if (!timeout) {
305 err = -ETIMEDOUT;
306 break;
307 }
308
309 worker = NULL;
310 mutex_lock(&p->thread_lock);
311 list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) {
312 if (id && id != w->private)
313 continue;
314
315 worker = w;
316
317 list_move_tail(&w->worker_entry, &p->active_list);
318
319 err = setup(w->private, data);
320 if (!err) {
321 w->schedule_data = data;
322 w->action = action;
323 w->has_data = 1;
324 wake_up(&w->wait);
325 } else {
326 list_move_tail(&w->worker_entry,
327 &p->ready_list);
328 }
329
330 break;
331 }
332 mutex_unlock(&p->thread_lock);
333 }
334
335 return err;
336}
337
338/*
339 * Schedule execution on arbitrary thread from the pool.
340 */
341int thread_pool_schedule(struct thread_pool *p,
342 int (*setup)(void *private, void *data),
343 int (*action)(void *private, void *data),
344 void *data, long timeout)
345{
346 return thread_pool_schedule_private(p, setup,
347 action, data, timeout, NULL);
348}
diff --git a/drivers/staging/dst/trans.c b/drivers/staging/dst/trans.c
deleted file mode 100644
index 1c36a6bc31d5..000000000000
--- a/drivers/staging/dst/trans.c
+++ /dev/null
@@ -1,337 +0,0 @@
1/*
2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/bio.h>
17#include <linux/dst.h>
18#include <linux/slab.h>
19#include <linux/mempool.h>
20
21/*
22 * Transaction memory pool size.
23 */
24static int dst_mempool_num = 32;
25module_param(dst_mempool_num, int, 0644);
26
27/*
28 * Transaction tree management.
29 */
30static inline int dst_trans_cmp(dst_gen_t gen, dst_gen_t new)
31{
32 if (gen < new)
33 return 1;
34 if (gen > new)
35 return -1;
36 return 0;
37}
38
39struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen)
40{
41 struct rb_root *root = &node->trans_root;
42 struct rb_node *n = root->rb_node;
43 struct dst_trans *t, *ret = NULL;
44 int cmp;
45
46 while (n) {
47 t = rb_entry(n, struct dst_trans, trans_entry);
48
49 cmp = dst_trans_cmp(t->gen, gen);
50 if (cmp < 0)
51 n = n->rb_left;
52 else if (cmp > 0)
53 n = n->rb_right;
54 else {
55 ret = t;
56 break;
57 }
58 }
59
60 dprintk("%s: %s transaction: id: %llu.\n", __func__,
61 (ret) ? "found" : "not found", gen);
62
63 return ret;
64}
65
66static int dst_trans_insert(struct dst_trans *new)
67{
68 struct rb_root *root = &new->n->trans_root;
69 struct rb_node **n = &root->rb_node, *parent = NULL;
70 struct dst_trans *ret = NULL, *t;
71 int cmp;
72
73 while (*n) {
74 parent = *n;
75
76 t = rb_entry(parent, struct dst_trans, trans_entry);
77
78 cmp = dst_trans_cmp(t->gen, new->gen);
79 if (cmp < 0)
80 n = &parent->rb_left;
81 else if (cmp > 0)
82 n = &parent->rb_right;
83 else {
84 ret = t;
85 break;
86 }
87 }
88
89 new->send_time = jiffies;
90 if (ret) {
91 printk(KERN_DEBUG "%s: exist: old: gen: %llu, bio: %llu/%u, "
92 "send_time: %lu, new: gen: %llu, bio: %llu/%u, "
93 "send_time: %lu.\n", __func__,
94 ret->gen, (u64)ret->bio->bi_sector,
95 ret->bio->bi_size, ret->send_time,
96 new->gen, (u64)new->bio->bi_sector,
97 new->bio->bi_size, new->send_time);
98 return -EEXIST;
99 }
100
101 rb_link_node(&new->trans_entry, parent, n);
102 rb_insert_color(&new->trans_entry, root);
103
104 dprintk("%s: inserted: gen: %llu, bio: %llu/%u, send_time: %lu.\n",
105 __func__, new->gen, (u64)new->bio->bi_sector,
106 new->bio->bi_size, new->send_time);
107
108 return 0;
109}
110
111int dst_trans_remove_nolock(struct dst_trans *t)
112{
113 struct dst_node *n = t->n;
114
115 if (t->trans_entry.rb_parent_color) {
116 rb_erase(&t->trans_entry, &n->trans_root);
117 t->trans_entry.rb_parent_color = 0;
118 }
119 return 0;
120}
121
122int dst_trans_remove(struct dst_trans *t)
123{
124 int ret;
125 struct dst_node *n = t->n;
126
127 mutex_lock(&n->trans_lock);
128 ret = dst_trans_remove_nolock(t);
129 mutex_unlock(&n->trans_lock);
130
131 return ret;
132}
133
134/*
135 * When transaction is completed and there are no more users,
136 * we complete appriate block IO request with given error status.
137 */
138void dst_trans_put(struct dst_trans *t)
139{
140 if (atomic_dec_and_test(&t->refcnt)) {
141 struct bio *bio = t->bio;
142
143 dprintk("%s: completed t: %p, gen: %llu, bio: %p.\n",
144 __func__, t, t->gen, bio);
145
146 bio_endio(bio, t->error);
147 bio_put(bio);
148
149 dst_node_put(t->n);
150 mempool_free(t, t->n->trans_pool);
151 }
152}
153
154/*
155 * Process given block IO request: allocate transaction, insert it into the tree
156 * and send/schedule crypto processing.
157 */
158int dst_process_bio(struct dst_node *n, struct bio *bio)
159{
160 struct dst_trans *t;
161 int err = -ENOMEM;
162
163 t = mempool_alloc(n->trans_pool, GFP_NOFS);
164 if (!t)
165 goto err_out_exit;
166
167 t->n = dst_node_get(n);
168 t->bio = bio;
169 t->error = 0;
170 t->retries = 0;
171 atomic_set(&t->refcnt, 1);
172 t->gen = atomic_long_inc_return(&n->gen);
173
174 t->enc = bio_data_dir(bio);
175 dst_bio_to_cmd(bio, &t->cmd, DST_IO, t->gen);
176
177 mutex_lock(&n->trans_lock);
178 err = dst_trans_insert(t);
179 mutex_unlock(&n->trans_lock);
180 if (err)
181 goto err_out_free;
182
183 dprintk("%s: gen: %llu, bio: %llu/%u, dir/enc: %d, need_crypto: %d.\n",
184 __func__, t->gen, (u64)bio->bi_sector,
185 bio->bi_size, t->enc, dst_need_crypto(n));
186
187 if (dst_need_crypto(n) && t->enc)
188 dst_trans_crypto(t);
189 else
190 dst_trans_send(t);
191
192 return 0;
193
194err_out_free:
195 dst_node_put(n);
196 mempool_free(t, n->trans_pool);
197err_out_exit:
198 bio_endio(bio, err);
199 bio_put(bio);
200 return err;
201}
202
203/*
204 * Scan for timeout/stale transactions.
205 * Each transaction is being resent multiple times before error completion.
206 */
207static void dst_trans_scan(struct work_struct *work)
208{
209 struct dst_node *n = container_of(work, struct dst_node,
210 trans_work.work);
211 struct rb_node *rb_node;
212 struct dst_trans *t;
213 unsigned long timeout = n->trans_scan_timeout;
214 int num = 10 * n->trans_max_retries;
215
216 mutex_lock(&n->trans_lock);
217
218 for (rb_node = rb_first(&n->trans_root); rb_node; ) {
219 t = rb_entry(rb_node, struct dst_trans, trans_entry);
220
221 if (timeout && time_after(t->send_time + timeout, jiffies)
222 && t->retries == 0)
223 break;
224#if 0
225 dprintk("%s: t: %p, gen: %llu, n: %s, retries: %u, max: %u.\n",
226 __func__, t, t->gen, n->name,
227 t->retries, n->trans_max_retries);
228#endif
229 if (--num == 0)
230 break;
231
232 dst_trans_get(t);
233
234 rb_node = rb_next(rb_node);
235
236 if (timeout && (++t->retries < n->trans_max_retries)) {
237 dst_trans_send(t);
238 } else {
239 t->error = -ETIMEDOUT;
240 dst_trans_remove_nolock(t);
241 dst_trans_put(t);
242 }
243
244 dst_trans_put(t);
245 }
246
247 mutex_unlock(&n->trans_lock);
248
249 /*
250 * If no timeout specified then system is in the middle of exiting
251 * process, so no need to reschedule scanning process again.
252 */
253 if (timeout) {
254 if (!num)
255 timeout = HZ;
256 schedule_delayed_work(&n->trans_work, timeout);
257 }
258}
259
260/*
261 * Flush all transactions and mark them as timed out.
262 * Destroy transaction pools.
263 */
264void dst_node_trans_exit(struct dst_node *n)
265{
266 struct dst_trans *t;
267 struct rb_node *rb_node;
268
269 if (!n->trans_cache)
270 return;
271
272 dprintk("%s: n: %p, cancelling the work.\n", __func__, n);
273 cancel_delayed_work_sync(&n->trans_work);
274 flush_scheduled_work();
275 dprintk("%s: n: %p, work has been cancelled.\n", __func__, n);
276
277 for (rb_node = rb_first(&n->trans_root); rb_node; ) {
278 t = rb_entry(rb_node, struct dst_trans, trans_entry);
279
280 dprintk("%s: t: %p, gen: %llu, n: %s.\n",
281 __func__, t, t->gen, n->name);
282
283 rb_node = rb_next(rb_node);
284
285 t->error = -ETIMEDOUT;
286 dst_trans_remove_nolock(t);
287 dst_trans_put(t);
288 }
289
290 mempool_destroy(n->trans_pool);
291 kmem_cache_destroy(n->trans_cache);
292}
293
294/*
295 * Initialize transaction storage for given node.
296 * Transaction stores not only control information,
297 * but also network command and crypto data (if needed)
298 * to reduce number of allocations. Thus transaction size
299 * differs from node to node.
300 */
301int dst_node_trans_init(struct dst_node *n, unsigned int size)
302{
303 /*
304 * We need this, since node with given name can be dropped from the
305 * hash table, but be still alive, so subsequent creation of the node
306 * with the same name may collide with existing cache name.
307 */
308
309 snprintf(n->cache_name, sizeof(n->cache_name), "%s-%p", n->name, n);
310
311 n->trans_cache = kmem_cache_create(n->cache_name,
312 size + n->crypto.crypto_attached_size,
313 0, 0, NULL);
314 if (!n->trans_cache)
315 goto err_out_exit;
316
317 n->trans_pool = mempool_create_slab_pool(dst_mempool_num,
318 n->trans_cache);
319 if (!n->trans_pool)
320 goto err_out_cache_destroy;
321
322 mutex_init(&n->trans_lock);
323 n->trans_root = RB_ROOT;
324
325 INIT_DELAYED_WORK(&n->trans_work, dst_trans_scan);
326 schedule_delayed_work(&n->trans_work, n->trans_scan_timeout);
327
328 dprintk("%s: n: %p, size: %u, crypto: %u.\n",
329 __func__, n, size, n->crypto.crypto_attached_size);
330
331 return 0;
332
333err_out_cache_destroy:
334 kmem_cache_destroy(n->trans_cache);
335err_out_exit:
336 return -ENOMEM;
337}
diff --git a/include/linux/dst.h b/include/linux/dst.h
deleted file mode 100644
index e26fed84b1aa..000000000000
--- a/include/linux/dst.h
+++ /dev/null
@@ -1,587 +0,0 @@
1/*
2 * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#ifndef __DST_H
17#define __DST_H
18
19#include <linux/types.h>
20#include <linux/connector.h>
21
22#define DST_NAMELEN 32
23#define DST_NAME "dst"
24
25enum {
26 /* Remove node with given id from storage */
27 DST_DEL_NODE = 0,
28 /* Add remote node with given id to the storage */
29 DST_ADD_REMOTE,
30 /* Add local node with given id to the storage to be exported and used by remote peers */
31 DST_ADD_EXPORT,
32 /* Crypto initialization command (hash/cipher used to protect the connection) */
33 DST_CRYPTO,
34 /* Security attributes for given connection (permissions for example) */
35 DST_SECURITY,
36 /* Register given node in the block layer subsystem */
37 DST_START,
38 DST_CMD_MAX
39};
40
41struct dst_ctl
42{
43 /* Storage name */
44 char name[DST_NAMELEN];
45 /* Command flags */
46 __u32 flags;
47 /* Command itself (see above) */
48 __u32 cmd;
49 /* Maximum number of pages per single request in this device */
50 __u32 max_pages;
51 /* Stale/error transaction scanning timeout in milliseconds */
52 __u32 trans_scan_timeout;
53 /* Maximum number of retry sends before completing transaction as broken */
54 __u32 trans_max_retries;
55 /* Storage size */
56 __u64 size;
57};
58
59/* Reply command carries completion status */
60struct dst_ctl_ack
61{
62 struct cn_msg msg;
63 int error;
64 int unused[3];
65};
66
67/*
68 * Unfortunaltely socket address structure is not exported to userspace
69 * and is redefined there.
70 */
71#define SADDR_MAX_DATA 128
72
73struct saddr {
74 /* address family, AF_xxx */
75 unsigned short sa_family;
76 /* 14 bytes of protocol address */
77 char sa_data[SADDR_MAX_DATA];
78 /* Number of bytes used in sa_data */
79 unsigned short sa_data_len;
80};
81
82/* Address structure */
83struct dst_network_ctl
84{
85 /* Socket type: datagram, stream...*/
86 unsigned int type;
87 /* Let me guess, is it a Jupiter diameter? */
88 unsigned int proto;
89 /* Peer's address */
90 struct saddr addr;
91};
92
93struct dst_crypto_ctl
94{
95 /* Cipher and hash names */
96 char cipher_algo[DST_NAMELEN];
97 char hash_algo[DST_NAMELEN];
98
99 /* Key sizes. Can be zero for digest for example */
100 unsigned int cipher_keysize, hash_keysize;
101 /* Alignment. Calculated by the DST itself. */
102 unsigned int crypto_attached_size;
103 /* Number of threads to perform crypto operations */
104 int thread_num;
105};
106
107/* Export security attributes have this bits checked in when client connects */
108#define DST_PERM_READ (1<<0)
109#define DST_PERM_WRITE (1<<1)
110
111/*
112 * Right now it is simple model, where each remote address
113 * is assigned to set of permissions it is allowed to perform.
114 * In real world block device does not know anything but
115 * reading and writing, so it should be more than enough.
116 */
117struct dst_secure_user
118{
119 unsigned int permissions;
120 struct saddr addr;
121};
122
123/*
124 * Export control command: device to export and network address to accept
125 * clients to work with given device
126 */
127struct dst_export_ctl
128{
129 char device[DST_NAMELEN];
130 struct dst_network_ctl ctl;
131};
132
133enum {
134 DST_CFG = 1, /* Request remote configuration */
135 DST_IO, /* IO command */
136 DST_IO_RESPONSE, /* IO response */
137 DST_PING, /* Keepalive message */
138 DST_NCMD_MAX,
139};
140
141struct dst_cmd
142{
143 /* Network command itself, see above */
144 __u32 cmd;
145 /*
146 * Size of the attached data
147 * (in most cases, for READ command it means how many bytes were requested)
148 */
149 __u32 size;
150 /* Crypto size: number of attached bytes with digest/hmac */
151 __u32 csize;
152 /* Here we can carry secret data */
153 __u32 reserved;
154 /* Read/write bits, see how they are encoded in bio structure */
155 __u64 rw;
156 /* BIO flags */
157 __u64 flags;
158 /* Unique command id (like transaction ID) */
159 __u64 id;
160 /* Sector to start IO from */
161 __u64 sector;
162 /* Hash data is placed after this header */
163 __u8 hash[0];
164};
165
166/*
167 * Convert command to/from network byte order.
168 * We do not use hton*() functions, since there is
169 * no 64-bit implementation.
170 */
171static inline void dst_convert_cmd(struct dst_cmd *c)
172{
173 c->cmd = __cpu_to_be32(c->cmd);
174 c->csize = __cpu_to_be32(c->csize);
175 c->size = __cpu_to_be32(c->size);
176 c->sector = __cpu_to_be64(c->sector);
177 c->id = __cpu_to_be64(c->id);
178 c->flags = __cpu_to_be64(c->flags);
179 c->rw = __cpu_to_be64(c->rw);
180}
181
182/* Transaction id */
183typedef __u64 dst_gen_t;
184
185#ifdef __KERNEL__
186
187#include <linux/blkdev.h>
188#include <linux/bio.h>
189#include <linux/device.h>
190#include <linux/mempool.h>
191#include <linux/net.h>
192#include <linux/poll.h>
193#include <linux/rbtree.h>
194
195#ifdef CONFIG_DST_DEBUG
196#define dprintk(f, a...) printk(KERN_NOTICE f, ##a)
197#else
198static inline void __attribute__ ((format (printf, 1, 2)))
199 dprintk(const char *fmt, ...) {}
200#endif
201
202struct dst_node;
203
204struct dst_trans
205{
206 /* DST node we are working with */
207 struct dst_node *n;
208
209 /* Entry inside transaction tree */
210 struct rb_node trans_entry;
211
212 /* Merlin kills this transaction when this memory cell equals zero */
213 atomic_t refcnt;
214
215 /* How this transaction should be processed by crypto engine */
216 short enc;
217 /* How many times this transaction was resent */
218 short retries;
219 /* Completion status */
220 int error;
221
222 /* When did we send it to the remote peer */
223 long send_time;
224
225 /* My name is...
226 * Well, computers does not speak, they have unique id instead */
227 dst_gen_t gen;
228
229 /* Block IO we are working with */
230 struct bio *bio;
231
232 /* Network command for above block IO request */
233 struct dst_cmd cmd;
234};
235
236struct dst_crypto_engine
237{
238 /* What should we do with all block requests */
239 struct crypto_hash *hash;
240 struct crypto_ablkcipher *cipher;
241
242 /* Pool of pages used to encrypt data into before sending */
243 int page_num;
244 struct page **pages;
245
246 /* What to do with current request */
247 int enc;
248 /* Who we are and where do we go */
249 struct scatterlist *src, *dst;
250
251 /* Maximum timeout waiting for encryption to be completed */
252 long timeout;
253 /* IV is a 64-bit sequential counter */
254 u64 iv;
255
256 /* Secret data */
257 void *private;
258
259 /* Cached temporary data lives here */
260 int size;
261 void *data;
262};
263
264struct dst_state
265{
266 /* The main state protection */
267 struct mutex state_lock;
268
269 /* Polling machinery for sockets */
270 wait_queue_t wait;
271 wait_queue_head_t *whead;
272 /* Most of events are being waited here */
273 wait_queue_head_t thread_wait;
274
275 /* Who owns this? */
276 struct dst_node *node;
277
278 /* Network address for this state */
279 struct dst_network_ctl ctl;
280
281 /* Permissions to work with: read-only or rw connection */
282 u32 permissions;
283
284 /* Called when we need to clean private data */
285 void (* cleanup)(struct dst_state *st);
286
287 /* Used by the server: BIO completion queues BIOs here */
288 struct list_head request_list;
289 spinlock_t request_lock;
290
291 /* Guess what? No, it is not number of planets */
292 atomic_t refcnt;
293
294 /* This flags is set when connection should be dropped */
295 int need_exit;
296
297 /*
298 * Socket to work with. Second pointer is used for
299 * lockless check if socket was changed before performing
300 * next action (like working with cached polling result)
301 */
302 struct socket *socket, *read_socket;
303
304 /* Cached preallocated data */
305 void *data;
306 unsigned int size;
307
308 /* Currently processed command */
309 struct dst_cmd cmd;
310};
311
312struct dst_info
313{
314 /* Device size */
315 u64 size;
316
317 /* Local device name for export devices */
318 char local[DST_NAMELEN];
319
320 /* Network setup */
321 struct dst_network_ctl net;
322
323 /* Sysfs bits use this */
324 struct device device;
325};
326
327struct dst_node
328{
329 struct list_head node_entry;
330
331 /* Hi, my name is stored here */
332 char name[DST_NAMELEN];
333 /* My cache name is stored here */
334 char cache_name[DST_NAMELEN];
335
336 /* Block device attached to given node.
337 * Only valid for exporting nodes */
338 struct block_device *bdev;
339 /* Network state machine for given peer */
340 struct dst_state *state;
341
342 /* Block IO machinery */
343 struct request_queue *queue;
344 struct gendisk *disk;
345
346 /* Number of threads in processing pool */
347 int thread_num;
348 /* Maximum number of pages in single IO */
349 int max_pages;
350
351 /* I'm that big in bytes */
352 loff_t size;
353
354 /* Exported to userspace node information */
355 struct dst_info *info;
356
357 /*
358 * Security attribute list.
359 * Used only by exporting node currently.
360 */
361 struct list_head security_list;
362 struct mutex security_lock;
363
364 /*
365 * When this unerflows below zero, university collapses.
366 * But this will not happen, since node will be freed,
367 * when reference counter reaches zero.
368 */
369 atomic_t refcnt;
370
371 /* How precisely should I be started? */
372 int (*start)(struct dst_node *);
373
374 /* Crypto capabilities */
375 struct dst_crypto_ctl crypto;
376 u8 *hash_key;
377 u8 *cipher_key;
378
379 /* Pool of processing thread */
380 struct thread_pool *pool;
381
382 /* Transaction IDs live here */
383 atomic_long_t gen;
384
385 /*
386 * How frequently and how many times transaction
387 * tree should be scanned to drop stale objects.
388 */
389 long trans_scan_timeout;
390 int trans_max_retries;
391
392 /* Small gnomes live here */
393 struct rb_root trans_root;
394 struct mutex trans_lock;
395
396 /*
397 * Transaction cache/memory pool.
398 * It is big enough to contain not only transaction
399 * itself, but additional crypto data (digest/hmac).
400 */
401 struct kmem_cache *trans_cache;
402 mempool_t *trans_pool;
403
404 /* This entity scans transaction tree */
405 struct delayed_work trans_work;
406
407 wait_queue_head_t wait;
408};
409
410/* Kernel representation of the security attribute */
411struct dst_secure
412{
413 struct list_head sec_entry;
414 struct dst_secure_user sec;
415};
416
417int dst_process_bio(struct dst_node *n, struct bio *bio);
418
419int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r);
420int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le);
421
422static inline struct dst_state *dst_state_get(struct dst_state *st)
423{
424 BUG_ON(atomic_read(&st->refcnt) == 0);
425 atomic_inc(&st->refcnt);
426 return st;
427}
428
429void dst_state_put(struct dst_state *st);
430
431struct dst_state *dst_state_alloc(struct dst_node *n);
432int dst_state_socket_create(struct dst_state *st);
433void dst_state_socket_release(struct dst_state *st);
434
435void dst_state_exit_connected(struct dst_state *st);
436
437int dst_state_schedule_receiver(struct dst_state *st);
438
439void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str);
440
441static inline void dst_state_lock(struct dst_state *st)
442{
443 mutex_lock(&st->state_lock);
444}
445
446static inline void dst_state_unlock(struct dst_state *st)
447{
448 mutex_unlock(&st->state_lock);
449}
450
451void dst_poll_exit(struct dst_state *st);
452int dst_poll_init(struct dst_state *st);
453
454static inline unsigned int dst_state_poll(struct dst_state *st)
455{
456 unsigned int revents = POLLHUP | POLLERR;
457
458 dst_state_lock(st);
459 if (st->socket)
460 revents = st->socket->ops->poll(NULL, st->socket, NULL);
461 dst_state_unlock(st);
462
463 return revents;
464}
465
466static inline int dst_thread_setup(void *private, void *data)
467{
468 return 0;
469}
470
471void dst_node_put(struct dst_node *n);
472
473static inline struct dst_node *dst_node_get(struct dst_node *n)
474{
475 atomic_inc(&n->refcnt);
476 return n;
477}
478
479int dst_data_recv(struct dst_state *st, void *data, unsigned int size);
480int dst_recv_cdata(struct dst_state *st, void *cdata);
481int dst_data_send_header(struct socket *sock,
482 void *data, unsigned int size, int more);
483
484int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio);
485
486int dst_process_io(struct dst_state *st);
487int dst_export_crypto(struct dst_node *n, struct bio *bio);
488int dst_export_send_bio(struct bio *bio);
489int dst_start_export(struct dst_node *n);
490
491int __init dst_export_init(void);
492void dst_export_exit(void);
493
494/* Private structure for export block IO requests */
495struct dst_export_priv
496{
497 struct list_head request_entry;
498 struct dst_state *state;
499 struct bio *bio;
500 struct dst_cmd cmd;
501};
502
503static inline void dst_trans_get(struct dst_trans *t)
504{
505 atomic_inc(&t->refcnt);
506}
507
508struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen);
509int dst_trans_remove(struct dst_trans *t);
510int dst_trans_remove_nolock(struct dst_trans *t);
511void dst_trans_put(struct dst_trans *t);
512
513/*
514 * Convert bio into network command.
515 */
516static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd,
517 u32 command, u64 id)
518{
519 cmd->cmd = command;
520 cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS;
521 cmd->rw = bio->bi_rw;
522 cmd->size = bio->bi_size;
523 cmd->csize = 0;
524 cmd->id = id;
525 cmd->sector = bio->bi_sector;
526};
527
528int dst_trans_send(struct dst_trans *t);
529int dst_trans_crypto(struct dst_trans *t);
530
531int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl);
532void dst_node_crypto_exit(struct dst_node *n);
533
534static inline int dst_need_crypto(struct dst_node *n)
535{
536 struct dst_crypto_ctl *c = &n->crypto;
537 /*
538 * Logical OR is appropriate here, but boolean one produces
539 * more optimal code, so it is used instead.
540 */
541 return (c->hash_algo[0] | c->cipher_algo[0]);
542}
543
544int dst_node_trans_init(struct dst_node *n, unsigned int size);
545void dst_node_trans_exit(struct dst_node *n);
546
547/*
548 * Pool of threads.
549 * Ready list contains threads currently free to be used,
550 * active one contains threads with some work scheduled for them.
551 * Caller can wait in given queue when thread is ready.
552 */
553struct thread_pool
554{
555 int thread_num;
556 struct mutex thread_lock;
557 struct list_head ready_list, active_list;
558
559 wait_queue_head_t wait;
560};
561
562void thread_pool_del_worker(struct thread_pool *p);
563void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id);
564int thread_pool_add_worker(struct thread_pool *p,
565 char *name,
566 unsigned int id,
567 void *(* init)(void *data),
568 void (* cleanup)(void *data),
569 void *data);
570
571void thread_pool_destroy(struct thread_pool *p);
572struct thread_pool *thread_pool_create(int num, char *name,
573 void *(* init)(void *data),
574 void (* cleanup)(void *data),
575 void *data);
576
577int thread_pool_schedule(struct thread_pool *p,
578 int (* setup)(void *stored_private, void *setup_data),
579 int (* action)(void *stored_private, void *setup_data),
580 void *setup_data, long timeout);
581int thread_pool_schedule_private(struct thread_pool *p,
582 int (* setup)(void *private, void *data),
583 int (* action)(void *private, void *data),
584 void *data, long timeout, void *id);
585
586#endif /* __KERNEL__ */
587#endif /* __DST_H */