diff options
-rw-r--r-- | drivers/staging/dst/dcore.c | 972 | ||||
-rw-r--r-- | include/linux/connector.h | 4 | ||||
-rw-r--r-- | include/linux/dst.h | 587 |
3 files changed, 1562 insertions, 1 deletions
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c new file mode 100644 index 000000000000..c6e3cd1a5051 --- /dev/null +++ b/drivers/staging/dst/dcore.c | |||
@@ -0,0 +1,972 @@ | |||
1 | /* | ||
2 | * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/module.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/blkdev.h> | ||
19 | #include <linux/bio.h> | ||
20 | #include <linux/buffer_head.h> | ||
21 | #include <linux/connector.h> | ||
22 | #include <linux/dst.h> | ||
23 | #include <linux/device.h> | ||
24 | #include <linux/jhash.h> | ||
25 | #include <linux/idr.h> | ||
26 | #include <linux/init.h> | ||
27 | #include <linux/namei.h> | ||
28 | #include <linux/slab.h> | ||
29 | #include <linux/socket.h> | ||
30 | |||
31 | #include <linux/in.h> | ||
32 | #include <linux/in6.h> | ||
33 | |||
34 | #include <net/sock.h> | ||
35 | |||
36 | static int dst_major; | ||
37 | |||
38 | static DEFINE_MUTEX(dst_hash_lock); | ||
39 | static struct list_head *dst_hashtable; | ||
40 | static unsigned int dst_hashtable_size = 128; | ||
41 | module_param(dst_hashtable_size, uint, 0644); | ||
42 | |||
43 | static char dst_name[] = "Dementianting goldfish"; | ||
44 | |||
45 | static DEFINE_IDR(dst_index_idr); | ||
46 | static struct cb_id cn_dst_id = { CN_DST_IDX, CN_DST_VAL }; | ||
47 | |||
48 | /* | ||
49 | * DST sysfs tree for device called 'storage': | ||
50 | * | ||
51 | * /sys/bus/dst/devices/storage/ | ||
52 | * /sys/bus/dst/devices/storage/type : 192.168.4.80:1025 | ||
53 | * /sys/bus/dst/devices/storage/size : 800 | ||
54 | * /sys/bus/dst/devices/storage/name : storage | ||
55 | */ | ||
56 | |||
57 | static int dst_dev_match(struct device *dev, struct device_driver *drv) | ||
58 | { | ||
59 | return 1; | ||
60 | } | ||
61 | |||
62 | static struct bus_type dst_dev_bus_type = { | ||
63 | .name = "dst", | ||
64 | .match = &dst_dev_match, | ||
65 | }; | ||
66 | |||
67 | static void dst_node_release(struct device *dev) | ||
68 | { | ||
69 | struct dst_info *info = container_of(dev, struct dst_info, device); | ||
70 | |||
71 | kfree(info); | ||
72 | } | ||
73 | |||
74 | static struct device dst_node_dev = { | ||
75 | .bus = &dst_dev_bus_type, | ||
76 | .release = &dst_node_release | ||
77 | }; | ||
78 | |||
79 | /* | ||
80 | * Setting size of the node after it was changed. | ||
81 | */ | ||
82 | static void dst_node_set_size(struct dst_node *n) | ||
83 | { | ||
84 | struct block_device *bdev; | ||
85 | |||
86 | set_capacity(n->disk, n->size >> 9); | ||
87 | |||
88 | bdev = bdget_disk(n->disk, 0); | ||
89 | if (bdev) { | ||
90 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
91 | i_size_write(bdev->bd_inode, n->size); | ||
92 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
93 | bdput(bdev); | ||
94 | } | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Distributed storage request processing function. | ||
99 | */ | ||
100 | static int dst_request(struct request_queue *q, struct bio *bio) | ||
101 | { | ||
102 | struct dst_node *n = q->queuedata; | ||
103 | |||
104 | bio_get(bio); | ||
105 | |||
106 | return dst_process_bio(n, bio); | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * Open/close callbacks for appropriate block device. | ||
111 | */ | ||
112 | static int dst_bdev_open(struct block_device *bdev, fmode_t mode) | ||
113 | { | ||
114 | struct dst_node *n = bdev->bd_disk->private_data; | ||
115 | |||
116 | dst_node_get(n); | ||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static int dst_bdev_release(struct gendisk *disk, fmode_t mode) | ||
121 | { | ||
122 | struct dst_node *n = disk->private_data; | ||
123 | |||
124 | dst_node_put(n); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | static struct block_device_operations dst_blk_ops = { | ||
129 | .open = dst_bdev_open, | ||
130 | .release = dst_bdev_release, | ||
131 | .owner = THIS_MODULE, | ||
132 | }; | ||
133 | |||
134 | /* | ||
135 | * Block layer binding - disk is created when array is fully configured | ||
136 | * by userspace request. | ||
137 | */ | ||
138 | static int dst_node_create_disk(struct dst_node *n) | ||
139 | { | ||
140 | int err = -ENOMEM; | ||
141 | u32 index = 0; | ||
142 | |||
143 | n->queue = blk_init_queue(NULL, NULL); | ||
144 | if (!n->queue) | ||
145 | goto err_out_exit; | ||
146 | |||
147 | n->queue->queuedata = n; | ||
148 | blk_queue_make_request(n->queue, dst_request); | ||
149 | blk_queue_max_phys_segments(n->queue, n->max_pages); | ||
150 | blk_queue_max_hw_segments(n->queue, n->max_pages); | ||
151 | |||
152 | err = -ENOMEM; | ||
153 | n->disk = alloc_disk(1); | ||
154 | if (!n->disk) | ||
155 | goto err_out_free_queue; | ||
156 | |||
157 | if (!(n->state->permissions & DST_PERM_WRITE)) { | ||
158 | printk(KERN_INFO "DST node %s attached read-only.\n", n->name); | ||
159 | set_disk_ro(n->disk, 1); | ||
160 | } | ||
161 | |||
162 | if (!idr_pre_get(&dst_index_idr, GFP_KERNEL)) | ||
163 | goto err_out_put; | ||
164 | |||
165 | mutex_lock(&dst_hash_lock); | ||
166 | err = idr_get_new(&dst_index_idr, NULL, &index); | ||
167 | mutex_unlock(&dst_hash_lock); | ||
168 | if (err) | ||
169 | goto err_out_put; | ||
170 | |||
171 | n->disk->major = dst_major; | ||
172 | n->disk->first_minor = index; | ||
173 | n->disk->fops = &dst_blk_ops; | ||
174 | n->disk->queue = n->queue; | ||
175 | n->disk->private_data = n; | ||
176 | snprintf(n->disk->disk_name, sizeof(n->disk->disk_name), "dst-%s", n->name); | ||
177 | |||
178 | return 0; | ||
179 | |||
180 | err_out_put: | ||
181 | put_disk(n->disk); | ||
182 | err_out_free_queue: | ||
183 | blk_cleanup_queue(n->queue); | ||
184 | err_out_exit: | ||
185 | return err; | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Sysfs machinery: show device's size. | ||
190 | */ | ||
191 | static ssize_t dst_show_size(struct device *dev, | ||
192 | struct device_attribute *attr, char *buf) | ||
193 | { | ||
194 | struct dst_info *info = container_of(dev, struct dst_info, device); | ||
195 | |||
196 | return sprintf(buf, "%llu\n", info->size); | ||
197 | } | ||
198 | |||
199 | /* | ||
200 | * Show local exported device. | ||
201 | */ | ||
202 | static ssize_t dst_show_local(struct device *dev, | ||
203 | struct device_attribute *attr, char *buf) | ||
204 | { | ||
205 | struct dst_info *info = container_of(dev, struct dst_info, device); | ||
206 | |||
207 | return sprintf(buf, "%s\n", info->local); | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Shows type of the remote node - device major/minor number | ||
212 | * for local nodes and address (af_inet ipv4/ipv6 only) for remote nodes. | ||
213 | */ | ||
214 | static ssize_t dst_show_type(struct device *dev, | ||
215 | struct device_attribute *attr, char *buf) | ||
216 | { | ||
217 | struct dst_info *info = container_of(dev, struct dst_info, device); | ||
218 | int family = info->net.addr.sa_family; | ||
219 | |||
220 | if (family == AF_INET) { | ||
221 | struct sockaddr_in *sin = (struct sockaddr_in *)&info->net.addr; | ||
222 | return sprintf(buf, "%u.%u.%u.%u:%d\n", | ||
223 | NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port)); | ||
224 | } else if (family == AF_INET6) { | ||
225 | struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&info->net.addr; | ||
226 | return sprintf(buf, | ||
227 | "%pi6:%d\n", | ||
228 | &sin->sin6_addr, ntohs(sin->sin6_port)); | ||
229 | } else { | ||
230 | int i, sz = PAGE_SIZE - 2; /* 0 symbol and '\n' below */ | ||
231 | int size, addrlen = info->net.addr.sa_data_len; | ||
232 | unsigned char *a = (unsigned char *)&info->net.addr.sa_data; | ||
233 | char *buf_orig = buf; | ||
234 | |||
235 | size = snprintf(buf, sz, "family: %d, addrlen: %u, addr: ", | ||
236 | family, addrlen); | ||
237 | sz -= size; | ||
238 | buf += size; | ||
239 | |||
240 | for (i=0; i<addrlen; ++i) { | ||
241 | if (sz < 3) | ||
242 | break; | ||
243 | |||
244 | size = snprintf(buf, sz, "%02x ", a[i]); | ||
245 | sz -= size; | ||
246 | buf += size; | ||
247 | } | ||
248 | buf += sprintf(buf, "\n"); | ||
249 | |||
250 | return buf - buf_orig; | ||
251 | } | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | static struct device_attribute dst_node_attrs[] = { | ||
256 | __ATTR(size, 0444, dst_show_size, NULL), | ||
257 | __ATTR(type, 0444, dst_show_type, NULL), | ||
258 | __ATTR(local, 0444, dst_show_local, NULL), | ||
259 | }; | ||
260 | |||
261 | static int dst_create_node_attributes(struct dst_node *n) | ||
262 | { | ||
263 | int err, i; | ||
264 | |||
265 | for (i=0; i<ARRAY_SIZE(dst_node_attrs); ++i) { | ||
266 | err = device_create_file(&n->info->device, | ||
267 | &dst_node_attrs[i]); | ||
268 | if (err) | ||
269 | goto err_out_remove_all; | ||
270 | } | ||
271 | return 0; | ||
272 | |||
273 | err_out_remove_all: | ||
274 | while (--i >= 0) | ||
275 | device_remove_file(&n->info->device, | ||
276 | &dst_node_attrs[i]); | ||
277 | |||
278 | return err; | ||
279 | } | ||
280 | |||
281 | static void dst_remove_node_attributes(struct dst_node *n) | ||
282 | { | ||
283 | int i; | ||
284 | |||
285 | for (i=0; i<ARRAY_SIZE(dst_node_attrs); ++i) | ||
286 | device_remove_file(&n->info->device, | ||
287 | &dst_node_attrs[i]); | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * Sysfs cleanup and initialization. | ||
292 | * Shows number of useful parameters. | ||
293 | */ | ||
294 | static void dst_node_sysfs_exit(struct dst_node *n) | ||
295 | { | ||
296 | if (n->info) { | ||
297 | dst_remove_node_attributes(n); | ||
298 | device_unregister(&n->info->device); | ||
299 | n->info = NULL; | ||
300 | } | ||
301 | } | ||
302 | |||
303 | static int dst_node_sysfs_init(struct dst_node *n) | ||
304 | { | ||
305 | int err; | ||
306 | |||
307 | n->info = kzalloc(sizeof(struct dst_info), GFP_KERNEL); | ||
308 | if (!n->info) | ||
309 | return -ENOMEM; | ||
310 | |||
311 | memcpy(&n->info->device, &dst_node_dev, sizeof(struct device)); | ||
312 | n->info->size = n->size; | ||
313 | |||
314 | snprintf(n->info->device.bus_id, sizeof(n->info->device.bus_id), "dst-%s", n->name); | ||
315 | err = device_register(&n->info->device); | ||
316 | if (err) { | ||
317 | dprintk(KERN_ERR "Failed to register node '%s', err: %d.\n", | ||
318 | n->name, err); | ||
319 | goto err_out_exit; | ||
320 | } | ||
321 | |||
322 | dst_create_node_attributes(n); | ||
323 | |||
324 | return 0; | ||
325 | |||
326 | err_out_exit: | ||
327 | kfree(n->info); | ||
328 | n->info = NULL; | ||
329 | return err; | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * DST node hash tables machinery. | ||
334 | */ | ||
335 | static inline unsigned int dst_hash(char *str, unsigned int size) | ||
336 | { | ||
337 | return (jhash(str, size, 0) % dst_hashtable_size); | ||
338 | } | ||
339 | |||
340 | static void dst_node_remove(struct dst_node *n) | ||
341 | { | ||
342 | mutex_lock(&dst_hash_lock); | ||
343 | list_del_init(&n->node_entry); | ||
344 | mutex_unlock(&dst_hash_lock); | ||
345 | } | ||
346 | |||
347 | static void dst_node_add(struct dst_node *n) | ||
348 | { | ||
349 | unsigned hash = dst_hash(n->name, sizeof(n->name)); | ||
350 | |||
351 | mutex_lock(&dst_hash_lock); | ||
352 | list_add_tail(&n->node_entry, &dst_hashtable[hash]); | ||
353 | mutex_unlock(&dst_hash_lock); | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * Cleaning node when it is about to be freed. | ||
358 | * There are still users of the socket though, | ||
359 | * so connection cleanup should be protected. | ||
360 | */ | ||
361 | static void dst_node_cleanup(struct dst_node *n) | ||
362 | { | ||
363 | struct dst_state *st = n->state; | ||
364 | |||
365 | if (!st) | ||
366 | return; | ||
367 | |||
368 | if (n->queue) { | ||
369 | blk_cleanup_queue(n->queue); | ||
370 | |||
371 | mutex_lock(&dst_hash_lock); | ||
372 | idr_remove(&dst_index_idr, n->disk->first_minor); | ||
373 | mutex_unlock(&dst_hash_lock); | ||
374 | |||
375 | put_disk(n->disk); | ||
376 | } | ||
377 | |||
378 | if (n->bdev) { | ||
379 | sync_blockdev(n->bdev); | ||
380 | blkdev_put(n->bdev, FMODE_READ|FMODE_WRITE); | ||
381 | } | ||
382 | |||
383 | dst_state_lock(st); | ||
384 | st->need_exit = 1; | ||
385 | dst_state_exit_connected(st); | ||
386 | dst_state_unlock(st); | ||
387 | |||
388 | wake_up(&st->thread_wait); | ||
389 | |||
390 | dst_state_put(st); | ||
391 | n->state = NULL; | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * Free security attributes attached to given node. | ||
396 | */ | ||
397 | static void dst_security_exit(struct dst_node *n) | ||
398 | { | ||
399 | struct dst_secure *s, *tmp; | ||
400 | |||
401 | list_for_each_entry_safe(s, tmp, &n->security_list, sec_entry) { | ||
402 | list_del(&s->sec_entry); | ||
403 | kfree(s); | ||
404 | } | ||
405 | } | ||
406 | |||
407 | /* | ||
408 | * Free node when there are no more users. | ||
409 | * Actually node has to be freed on behalf od userspace process, | ||
410 | * since there are number of threads, which are embedded in the | ||
411 | * node, so they can not exit and free node from there, that is | ||
412 | * why there is a wakeup if reference counter is not equal to zero. | ||
413 | */ | ||
414 | void dst_node_put(struct dst_node *n) | ||
415 | { | ||
416 | if (unlikely(!n)) | ||
417 | return; | ||
418 | |||
419 | dprintk("%s: n: %p, refcnt: %d.\n", | ||
420 | __func__, n, atomic_read(&n->refcnt)); | ||
421 | |||
422 | if (atomic_dec_and_test(&n->refcnt)) { | ||
423 | dst_node_remove(n); | ||
424 | n->trans_scan_timeout = 0; | ||
425 | dst_node_cleanup(n); | ||
426 | thread_pool_destroy(n->pool); | ||
427 | dst_node_sysfs_exit(n); | ||
428 | dst_node_crypto_exit(n); | ||
429 | dst_security_exit(n); | ||
430 | dst_node_trans_exit(n); | ||
431 | |||
432 | kfree(n); | ||
433 | |||
434 | dprintk("%s: freed n: %p.\n", __func__, n); | ||
435 | } else { | ||
436 | wake_up(&n->wait); | ||
437 | } | ||
438 | } | ||
439 | |||
440 | /* | ||
441 | * This function finds devices major/minor numbers for given pathname. | ||
442 | */ | ||
443 | static int dst_lookup_device(const char *path, dev_t *dev) | ||
444 | { | ||
445 | int err; | ||
446 | struct nameidata nd; | ||
447 | struct inode *inode; | ||
448 | |||
449 | err = path_lookup(path, LOOKUP_FOLLOW, &nd); | ||
450 | if (err) | ||
451 | return err; | ||
452 | |||
453 | inode = nd.path.dentry->d_inode; | ||
454 | if (!inode) { | ||
455 | err = -ENOENT; | ||
456 | goto out; | ||
457 | } | ||
458 | |||
459 | if (!S_ISBLK(inode->i_mode)) { | ||
460 | err = -ENOTBLK; | ||
461 | goto out; | ||
462 | } | ||
463 | |||
464 | *dev = inode->i_rdev; | ||
465 | |||
466 | out: | ||
467 | path_put(&nd.path); | ||
468 | return err; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Setting up export device: lookup by the name, get its size | ||
473 | * and setup listening socket, which will accept clients, which | ||
474 | * will submit IO for given storage. | ||
475 | */ | ||
476 | static int dst_setup_export(struct dst_node *n, struct dst_ctl *ctl, | ||
477 | struct dst_export_ctl *le) | ||
478 | { | ||
479 | int err; | ||
480 | dev_t dev = 0; /* gcc likes to scream here */ | ||
481 | |||
482 | snprintf(n->info->local, sizeof(n->info->local), "%s", le->device); | ||
483 | |||
484 | err = dst_lookup_device(le->device, &dev); | ||
485 | if (err) | ||
486 | return err; | ||
487 | |||
488 | n->bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); | ||
489 | if (!n->bdev) | ||
490 | return -ENODEV; | ||
491 | |||
492 | if (n->size != 0) | ||
493 | n->size = min_t(loff_t, n->bdev->bd_inode->i_size, n->size); | ||
494 | else | ||
495 | n->size = n->bdev->bd_inode->i_size; | ||
496 | |||
497 | n->info->size = n->size; | ||
498 | err = dst_node_init_listened(n, le); | ||
499 | if (err) | ||
500 | goto err_out_cleanup; | ||
501 | |||
502 | return 0; | ||
503 | |||
504 | err_out_cleanup: | ||
505 | blkdev_put(n->bdev, FMODE_READ|FMODE_WRITE); | ||
506 | n->bdev = NULL; | ||
507 | |||
508 | return err; | ||
509 | } | ||
510 | |||
511 | /* Empty thread pool callbacks for the network processing threads. */ | ||
512 | static inline void *dst_thread_network_init(void *data) | ||
513 | { | ||
514 | dprintk("%s: data: %p.\n", __func__, data); | ||
515 | return data; | ||
516 | } | ||
517 | |||
518 | static inline void dst_thread_network_cleanup(void *data) | ||
519 | { | ||
520 | dprintk("%s: data: %p.\n", __func__, data); | ||
521 | } | ||
522 | |||
523 | /* | ||
524 | * Allocate DST node and initialize some of its parameters. | ||
525 | */ | ||
526 | static struct dst_node *dst_alloc_node(struct dst_ctl *ctl, | ||
527 | int (*start)(struct dst_node *), | ||
528 | int num) | ||
529 | { | ||
530 | struct dst_node *n; | ||
531 | int err; | ||
532 | |||
533 | n = kzalloc(sizeof(struct dst_node), GFP_KERNEL); | ||
534 | if (!n) | ||
535 | return NULL; | ||
536 | |||
537 | INIT_LIST_HEAD(&n->node_entry); | ||
538 | |||
539 | INIT_LIST_HEAD(&n->security_list); | ||
540 | mutex_init(&n->security_lock); | ||
541 | |||
542 | init_waitqueue_head(&n->wait); | ||
543 | |||
544 | n->trans_scan_timeout = msecs_to_jiffies(ctl->trans_scan_timeout); | ||
545 | if (!n->trans_scan_timeout) | ||
546 | n->trans_scan_timeout = HZ; | ||
547 | |||
548 | n->trans_max_retries = ctl->trans_max_retries; | ||
549 | if (!n->trans_max_retries) | ||
550 | n->trans_max_retries = 10; | ||
551 | |||
552 | /* | ||
553 | * Pretty much arbitrary default numbers. | ||
554 | * 32 matches maximum number of pages in bio originated from ext3 (31). | ||
555 | */ | ||
556 | n->max_pages = ctl->max_pages; | ||
557 | if (!n->max_pages) | ||
558 | n->max_pages = 32; | ||
559 | |||
560 | if (n->max_pages > 1024) | ||
561 | n->max_pages = 1024; | ||
562 | |||
563 | n->start = start; | ||
564 | n->size = ctl->size; | ||
565 | |||
566 | atomic_set(&n->refcnt, 1); | ||
567 | atomic_long_set(&n->gen, 0); | ||
568 | snprintf(n->name, sizeof(n->name), "%s", ctl->name); | ||
569 | |||
570 | err = dst_node_sysfs_init(n); | ||
571 | if (err) | ||
572 | goto err_out_free; | ||
573 | |||
574 | n->pool = thread_pool_create(num, n->name, dst_thread_network_init, | ||
575 | dst_thread_network_cleanup, n); | ||
576 | if (IS_ERR(n->pool)) { | ||
577 | err = PTR_ERR(n->pool); | ||
578 | goto err_out_sysfs_exit; | ||
579 | } | ||
580 | |||
581 | dprintk("%s: n: %p, name: %s.\n", __func__, n, n->name); | ||
582 | |||
583 | return n; | ||
584 | |||
585 | err_out_sysfs_exit: | ||
586 | dst_node_sysfs_exit(n); | ||
587 | err_out_free: | ||
588 | kfree(n); | ||
589 | return NULL; | ||
590 | } | ||
591 | |||
592 | /* | ||
593 | * Starting a node, connected to the remote server: | ||
594 | * register block device and initialize transaction mechanism. | ||
595 | * In revers order though. | ||
596 | * | ||
597 | * It will autonegotiate some parameters with the remote node | ||
598 | * and update local if needed. | ||
599 | * | ||
600 | * Transaction initialization should be the last thing before | ||
601 | * starting the node, since transaction should include not only | ||
602 | * block IO, but also crypto related data (if any), which are | ||
603 | * initialized separately. | ||
604 | */ | ||
605 | static int dst_start_remote(struct dst_node *n) | ||
606 | { | ||
607 | int err; | ||
608 | |||
609 | err = dst_node_trans_init(n, sizeof(struct dst_trans)); | ||
610 | if (err) | ||
611 | return err; | ||
612 | |||
613 | err = dst_node_create_disk(n); | ||
614 | if (err) | ||
615 | return err; | ||
616 | |||
617 | dst_node_set_size(n); | ||
618 | add_disk(n->disk); | ||
619 | |||
620 | dprintk("DST: started remote node '%s', minor: %d.\n", n->name, n->disk->first_minor); | ||
621 | |||
622 | return 0; | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | * Adding remote node and initialize connection. | ||
627 | */ | ||
628 | static int dst_add_remote(struct dst_node *n, struct dst_ctl *ctl, | ||
629 | void *data, unsigned int size) | ||
630 | { | ||
631 | int err; | ||
632 | struct dst_network_ctl *rctl = data; | ||
633 | |||
634 | if (n) | ||
635 | return -EEXIST; | ||
636 | |||
637 | if (size != sizeof(struct dst_network_ctl)) | ||
638 | return -EINVAL; | ||
639 | |||
640 | n = dst_alloc_node(ctl, dst_start_remote, 1); | ||
641 | if (!n) | ||
642 | return -ENOMEM; | ||
643 | |||
644 | memcpy(&n->info->net, rctl, sizeof(struct dst_network_ctl)); | ||
645 | err = dst_node_init_connected(n, rctl); | ||
646 | if (err) | ||
647 | goto err_out_free; | ||
648 | |||
649 | dst_node_add(n); | ||
650 | |||
651 | return 0; | ||
652 | |||
653 | err_out_free: | ||
654 | dst_node_put(n); | ||
655 | return err; | ||
656 | } | ||
657 | |||
658 | /* | ||
659 | * Adding export node: initializing block device and listening socket. | ||
660 | */ | ||
661 | static int dst_add_export(struct dst_node *n, struct dst_ctl *ctl, | ||
662 | void *data, unsigned int size) | ||
663 | { | ||
664 | int err; | ||
665 | struct dst_export_ctl *le = data; | ||
666 | |||
667 | if (n) | ||
668 | return -EEXIST; | ||
669 | |||
670 | if (size != sizeof(struct dst_export_ctl)) | ||
671 | return -EINVAL; | ||
672 | |||
673 | n = dst_alloc_node(ctl, dst_start_export, 2); | ||
674 | if (!n) | ||
675 | return -EINVAL; | ||
676 | |||
677 | err = dst_setup_export(n, ctl, le); | ||
678 | if (err) | ||
679 | goto err_out_free; | ||
680 | |||
681 | dst_node_add(n); | ||
682 | |||
683 | return 0; | ||
684 | |||
685 | err_out_free: | ||
686 | dst_node_put(n); | ||
687 | return err; | ||
688 | } | ||
689 | |||
690 | static int dst_node_remove_unload(struct dst_node *n) | ||
691 | { | ||
692 | printk(KERN_INFO "STOPPED name: '%s', size: %llu.\n", | ||
693 | n->name, n->size); | ||
694 | |||
695 | if (n->disk) | ||
696 | del_gendisk(n->disk); | ||
697 | |||
698 | dst_node_remove(n); | ||
699 | dst_node_sysfs_exit(n); | ||
700 | |||
701 | /* | ||
702 | * This is not a hack. Really. | ||
703 | * Node's reference counter allows to implement fine grained | ||
704 | * node freeing, but since all transactions (which hold node's | ||
705 | * reference counter) are processed in the dedicated thread, | ||
706 | * it is possible that reference will hit zero in that thread, | ||
707 | * so we will not be able to exit thread and cleanup the node. | ||
708 | * | ||
709 | * So, we remove disk, so no new activity is possible, and | ||
710 | * wait until all pending transaction are completed (either | ||
711 | * in receiving thread or by timeout in workqueue), in this | ||
712 | * case reference counter will be less or equal to 2 (once set in | ||
713 | * dst_alloc_node() and then in connector message parser; | ||
714 | * or when we force module unloading, and connector message | ||
715 | * parser does not hold a reference, in this case reference | ||
716 | * counter will be equal to 1), | ||
717 | * and subsequent dst_node_put() calls will free the node. | ||
718 | */ | ||
719 | dprintk("%s: going to sleep with %d refcnt.\n", __func__, atomic_read(&n->refcnt)); | ||
720 | wait_event(n->wait, atomic_read(&n->refcnt) <= 2); | ||
721 | |||
722 | dst_node_put(n); | ||
723 | return 0; | ||
724 | } | ||
725 | |||
726 | /* | ||
727 | * Remove node from the hash table. | ||
728 | */ | ||
729 | static int dst_del_node(struct dst_node *n, struct dst_ctl *ctl, | ||
730 | void *data, unsigned int size) | ||
731 | { | ||
732 | if (!n) | ||
733 | return -ENODEV; | ||
734 | |||
735 | return dst_node_remove_unload(n); | ||
736 | } | ||
737 | |||
738 | /* | ||
739 | * Initialize crypto processing for given node. | ||
740 | */ | ||
741 | static int dst_crypto_init(struct dst_node *n, struct dst_ctl *ctl, | ||
742 | void *data, unsigned int size) | ||
743 | { | ||
744 | struct dst_crypto_ctl *crypto = data; | ||
745 | |||
746 | if (!n) | ||
747 | return -ENODEV; | ||
748 | |||
749 | if (size != sizeof(struct dst_crypto_ctl) + crypto->hash_keysize + | ||
750 | crypto->cipher_keysize) | ||
751 | return -EINVAL; | ||
752 | |||
753 | if (n->trans_cache) | ||
754 | return -EEXIST; | ||
755 | |||
756 | return dst_node_crypto_init(n, crypto); | ||
757 | } | ||
758 | |||
759 | /* | ||
760 | * Security attributes for given node. | ||
761 | */ | ||
762 | static int dst_security_init(struct dst_node *n, struct dst_ctl *ctl, | ||
763 | void *data, unsigned int size) | ||
764 | { | ||
765 | struct dst_secure *s; | ||
766 | |||
767 | if (!n) | ||
768 | return -ENODEV; | ||
769 | |||
770 | if (size != sizeof(struct dst_secure_user)) | ||
771 | return -EINVAL; | ||
772 | |||
773 | s = kmalloc(sizeof(struct dst_secure), GFP_KERNEL); | ||
774 | if (!s) | ||
775 | return -ENOMEM; | ||
776 | |||
777 | memcpy(&s->sec, data, size); | ||
778 | |||
779 | mutex_lock(&n->security_lock); | ||
780 | list_add_tail(&s->sec_entry, &n->security_list); | ||
781 | mutex_unlock(&n->security_lock); | ||
782 | |||
783 | return 0; | ||
784 | } | ||
785 | |||
786 | /* | ||
787 | * Kill'em all! | ||
788 | */ | ||
789 | static int dst_start_node(struct dst_node *n, struct dst_ctl *ctl, | ||
790 | void *data, unsigned int size) | ||
791 | { | ||
792 | int err; | ||
793 | |||
794 | if (!n) | ||
795 | return -ENODEV; | ||
796 | |||
797 | if (n->trans_cache) | ||
798 | return 0; | ||
799 | |||
800 | err = n->start(n); | ||
801 | if (err) | ||
802 | return err; | ||
803 | |||
804 | printk(KERN_INFO "STARTED name: '%s', size: %llu.\n", n->name, n->size); | ||
805 | return 0; | ||
806 | } | ||
807 | |||
808 | typedef int (*dst_command_func)(struct dst_node *n, struct dst_ctl *ctl, | ||
809 | void *data, unsigned int size); | ||
810 | |||
811 | /* | ||
812 | * List of userspace commands. | ||
813 | */ | ||
814 | static dst_command_func dst_commands[] = { | ||
815 | [DST_ADD_REMOTE] = &dst_add_remote, | ||
816 | [DST_ADD_EXPORT] = &dst_add_export, | ||
817 | [DST_DEL_NODE] = &dst_del_node, | ||
818 | [DST_CRYPTO] = &dst_crypto_init, | ||
819 | [DST_SECURITY] = &dst_security_init, | ||
820 | [DST_START] = &dst_start_node, | ||
821 | }; | ||
822 | |||
823 | /* | ||
824 | * Configuration parser. | ||
825 | */ | ||
826 | static void cn_dst_callback(void *data) | ||
827 | { | ||
828 | struct dst_ctl *ctl; | ||
829 | struct cn_msg *msg = data; | ||
830 | int err; | ||
831 | struct dst_ctl_ack ack; | ||
832 | struct dst_node *n = NULL, *tmp; | ||
833 | unsigned int hash; | ||
834 | |||
835 | if (msg->len < sizeof(struct dst_ctl)) { | ||
836 | err = -EBADMSG; | ||
837 | goto out; | ||
838 | } | ||
839 | |||
840 | ctl = (struct dst_ctl *)msg->data; | ||
841 | |||
842 | if (ctl->cmd >= DST_CMD_MAX) { | ||
843 | err = -EINVAL; | ||
844 | goto out; | ||
845 | } | ||
846 | hash = dst_hash(ctl->name, sizeof(ctl->name)); | ||
847 | |||
848 | mutex_lock(&dst_hash_lock); | ||
849 | list_for_each_entry(tmp, &dst_hashtable[hash], node_entry) { | ||
850 | if (!memcmp(tmp->name, ctl->name, sizeof(tmp->name))) { | ||
851 | n = tmp; | ||
852 | dst_node_get(n); | ||
853 | break; | ||
854 | } | ||
855 | } | ||
856 | mutex_unlock(&dst_hash_lock); | ||
857 | |||
858 | err = dst_commands[ctl->cmd](n, ctl, msg->data + sizeof(struct dst_ctl), | ||
859 | msg->len - sizeof(struct dst_ctl)); | ||
860 | |||
861 | dst_node_put(n); | ||
862 | out: | ||
863 | memcpy(&ack.msg, msg, sizeof(struct cn_msg)); | ||
864 | |||
865 | ack.msg.ack = msg->ack + 1; | ||
866 | ack.msg.len = sizeof(struct dst_ctl_ack) - sizeof(struct cn_msg); | ||
867 | |||
868 | ack.error = err; | ||
869 | |||
870 | cn_netlink_send(&ack.msg, 0, GFP_KERNEL); | ||
871 | } | ||
872 | |||
873 | /* | ||
874 | * Global initialization: sysfs, hash table, block device registration, | ||
875 | * connector and various caches. | ||
876 | */ | ||
877 | static int __init dst_sysfs_init(void) | ||
878 | { | ||
879 | return bus_register(&dst_dev_bus_type); | ||
880 | } | ||
881 | |||
882 | static void dst_sysfs_exit(void) | ||
883 | { | ||
884 | bus_unregister(&dst_dev_bus_type); | ||
885 | } | ||
886 | |||
887 | static int __init dst_hashtable_init(void) | ||
888 | { | ||
889 | unsigned int i; | ||
890 | |||
891 | dst_hashtable = kcalloc(dst_hashtable_size, sizeof(struct list_head), | ||
892 | GFP_KERNEL); | ||
893 | if (!dst_hashtable) | ||
894 | return -ENOMEM; | ||
895 | |||
896 | for (i=0; i<dst_hashtable_size; ++i) | ||
897 | INIT_LIST_HEAD(&dst_hashtable[i]); | ||
898 | |||
899 | return 0; | ||
900 | } | ||
901 | |||
902 | static void dst_hashtable_exit(void) | ||
903 | { | ||
904 | unsigned int i; | ||
905 | struct dst_node *n, *tmp; | ||
906 | |||
907 | for (i=0; i<dst_hashtable_size; ++i) { | ||
908 | list_for_each_entry_safe(n, tmp, &dst_hashtable[i], node_entry) { | ||
909 | dst_node_remove_unload(n); | ||
910 | } | ||
911 | } | ||
912 | |||
913 | kfree(dst_hashtable); | ||
914 | } | ||
915 | |||
916 | static int __init dst_sys_init(void) | ||
917 | { | ||
918 | int err = -ENOMEM; | ||
919 | |||
920 | err = dst_hashtable_init(); | ||
921 | if (err) | ||
922 | goto err_out_exit; | ||
923 | |||
924 | err = dst_export_init(); | ||
925 | if (err) | ||
926 | goto err_out_hashtable_exit; | ||
927 | |||
928 | err = register_blkdev(dst_major, DST_NAME); | ||
929 | if (err < 0) | ||
930 | goto err_out_export_exit; | ||
931 | if (err) | ||
932 | dst_major = err; | ||
933 | |||
934 | err = dst_sysfs_init(); | ||
935 | if (err) | ||
936 | goto err_out_unregister; | ||
937 | |||
938 | err = cn_add_callback(&cn_dst_id, "DST", cn_dst_callback); | ||
939 | if (err) | ||
940 | goto err_out_sysfs_exit; | ||
941 | |||
942 | printk(KERN_INFO "Distributed storage, '%s' release.\n", dst_name); | ||
943 | |||
944 | return 0; | ||
945 | |||
946 | err_out_sysfs_exit: | ||
947 | dst_sysfs_exit(); | ||
948 | err_out_unregister: | ||
949 | unregister_blkdev(dst_major, DST_NAME); | ||
950 | err_out_export_exit: | ||
951 | dst_export_exit(); | ||
952 | err_out_hashtable_exit: | ||
953 | dst_hashtable_exit(); | ||
954 | err_out_exit: | ||
955 | return err; | ||
956 | } | ||
957 | |||
958 | static void __exit dst_sys_exit(void) | ||
959 | { | ||
960 | cn_del_callback(&cn_dst_id); | ||
961 | unregister_blkdev(dst_major, DST_NAME); | ||
962 | dst_hashtable_exit(); | ||
963 | dst_sysfs_exit(); | ||
964 | dst_export_exit(); | ||
965 | } | ||
966 | |||
967 | module_init(dst_sys_init); | ||
968 | module_exit(dst_sys_exit); | ||
969 | |||
970 | MODULE_DESCRIPTION("Distributed storage"); | ||
971 | MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); | ||
972 | MODULE_LICENSE("GPL"); | ||
diff --git a/include/linux/connector.h b/include/linux/connector.h index fc65d219d88c..b9966e64604e 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h | |||
@@ -39,8 +39,10 @@ | |||
39 | #define CN_IDX_V86D 0x4 | 39 | #define CN_IDX_V86D 0x4 |
40 | #define CN_VAL_V86D_UVESAFB 0x1 | 40 | #define CN_VAL_V86D_UVESAFB 0x1 |
41 | #define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ | 41 | #define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ |
42 | #define CN_DST_IDX 0x6 | ||
43 | #define CN_DST_VAL 0x1 | ||
42 | 44 | ||
43 | #define CN_NETLINK_USERS 6 | 45 | #define CN_NETLINK_USERS 7 |
44 | 46 | ||
45 | /* | 47 | /* |
46 | * Maximum connector's message size. | 48 | * Maximum connector's message size. |
diff --git a/include/linux/dst.h b/include/linux/dst.h new file mode 100644 index 000000000000..e26fed84b1aa --- /dev/null +++ b/include/linux/dst.h | |||
@@ -0,0 +1,587 @@ | |||
1 | /* | ||
2 | * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef __DST_H | ||
17 | #define __DST_H | ||
18 | |||
19 | #include <linux/types.h> | ||
20 | #include <linux/connector.h> | ||
21 | |||
22 | #define DST_NAMELEN 32 | ||
23 | #define DST_NAME "dst" | ||
24 | |||
25 | enum { | ||
26 | /* Remove node with given id from storage */ | ||
27 | DST_DEL_NODE = 0, | ||
28 | /* Add remote node with given id to the storage */ | ||
29 | DST_ADD_REMOTE, | ||
30 | /* Add local node with given id to the storage to be exported and used by remote peers */ | ||
31 | DST_ADD_EXPORT, | ||
32 | /* Crypto initialization command (hash/cipher used to protect the connection) */ | ||
33 | DST_CRYPTO, | ||
34 | /* Security attributes for given connection (permissions for example) */ | ||
35 | DST_SECURITY, | ||
36 | /* Register given node in the block layer subsystem */ | ||
37 | DST_START, | ||
38 | DST_CMD_MAX | ||
39 | }; | ||
40 | |||
41 | struct dst_ctl | ||
42 | { | ||
43 | /* Storage name */ | ||
44 | char name[DST_NAMELEN]; | ||
45 | /* Command flags */ | ||
46 | __u32 flags; | ||
47 | /* Command itself (see above) */ | ||
48 | __u32 cmd; | ||
49 | /* Maximum number of pages per single request in this device */ | ||
50 | __u32 max_pages; | ||
51 | /* Stale/error transaction scanning timeout in milliseconds */ | ||
52 | __u32 trans_scan_timeout; | ||
53 | /* Maximum number of retry sends before completing transaction as broken */ | ||
54 | __u32 trans_max_retries; | ||
55 | /* Storage size */ | ||
56 | __u64 size; | ||
57 | }; | ||
58 | |||
59 | /* Reply command carries completion status */ | ||
60 | struct dst_ctl_ack | ||
61 | { | ||
62 | struct cn_msg msg; | ||
63 | int error; | ||
64 | int unused[3]; | ||
65 | }; | ||
66 | |||
67 | /* | ||
68 | * Unfortunaltely socket address structure is not exported to userspace | ||
69 | * and is redefined there. | ||
70 | */ | ||
71 | #define SADDR_MAX_DATA 128 | ||
72 | |||
73 | struct saddr { | ||
74 | /* address family, AF_xxx */ | ||
75 | unsigned short sa_family; | ||
76 | /* 14 bytes of protocol address */ | ||
77 | char sa_data[SADDR_MAX_DATA]; | ||
78 | /* Number of bytes used in sa_data */ | ||
79 | unsigned short sa_data_len; | ||
80 | }; | ||
81 | |||
82 | /* Address structure */ | ||
83 | struct dst_network_ctl | ||
84 | { | ||
85 | /* Socket type: datagram, stream...*/ | ||
86 | unsigned int type; | ||
87 | /* Let me guess, is it a Jupiter diameter? */ | ||
88 | unsigned int proto; | ||
89 | /* Peer's address */ | ||
90 | struct saddr addr; | ||
91 | }; | ||
92 | |||
93 | struct dst_crypto_ctl | ||
94 | { | ||
95 | /* Cipher and hash names */ | ||
96 | char cipher_algo[DST_NAMELEN]; | ||
97 | char hash_algo[DST_NAMELEN]; | ||
98 | |||
99 | /* Key sizes. Can be zero for digest for example */ | ||
100 | unsigned int cipher_keysize, hash_keysize; | ||
101 | /* Alignment. Calculated by the DST itself. */ | ||
102 | unsigned int crypto_attached_size; | ||
103 | /* Number of threads to perform crypto operations */ | ||
104 | int thread_num; | ||
105 | }; | ||
106 | |||
107 | /* Export security attributes have this bits checked in when client connects */ | ||
108 | #define DST_PERM_READ (1<<0) | ||
109 | #define DST_PERM_WRITE (1<<1) | ||
110 | |||
111 | /* | ||
112 | * Right now it is simple model, where each remote address | ||
113 | * is assigned to set of permissions it is allowed to perform. | ||
114 | * In real world block device does not know anything but | ||
115 | * reading and writing, so it should be more than enough. | ||
116 | */ | ||
117 | struct dst_secure_user | ||
118 | { | ||
119 | unsigned int permissions; | ||
120 | struct saddr addr; | ||
121 | }; | ||
122 | |||
123 | /* | ||
124 | * Export control command: device to export and network address to accept | ||
125 | * clients to work with given device | ||
126 | */ | ||
127 | struct dst_export_ctl | ||
128 | { | ||
129 | char device[DST_NAMELEN]; | ||
130 | struct dst_network_ctl ctl; | ||
131 | }; | ||
132 | |||
133 | enum { | ||
134 | DST_CFG = 1, /* Request remote configuration */ | ||
135 | DST_IO, /* IO command */ | ||
136 | DST_IO_RESPONSE, /* IO response */ | ||
137 | DST_PING, /* Keepalive message */ | ||
138 | DST_NCMD_MAX, | ||
139 | }; | ||
140 | |||
141 | struct dst_cmd | ||
142 | { | ||
143 | /* Network command itself, see above */ | ||
144 | __u32 cmd; | ||
145 | /* | ||
146 | * Size of the attached data | ||
147 | * (in most cases, for READ command it means how many bytes were requested) | ||
148 | */ | ||
149 | __u32 size; | ||
150 | /* Crypto size: number of attached bytes with digest/hmac */ | ||
151 | __u32 csize; | ||
152 | /* Here we can carry secret data */ | ||
153 | __u32 reserved; | ||
154 | /* Read/write bits, see how they are encoded in bio structure */ | ||
155 | __u64 rw; | ||
156 | /* BIO flags */ | ||
157 | __u64 flags; | ||
158 | /* Unique command id (like transaction ID) */ | ||
159 | __u64 id; | ||
160 | /* Sector to start IO from */ | ||
161 | __u64 sector; | ||
162 | /* Hash data is placed after this header */ | ||
163 | __u8 hash[0]; | ||
164 | }; | ||
165 | |||
166 | /* | ||
167 | * Convert command to/from network byte order. | ||
168 | * We do not use hton*() functions, since there is | ||
169 | * no 64-bit implementation. | ||
170 | */ | ||
171 | static inline void dst_convert_cmd(struct dst_cmd *c) | ||
172 | { | ||
173 | c->cmd = __cpu_to_be32(c->cmd); | ||
174 | c->csize = __cpu_to_be32(c->csize); | ||
175 | c->size = __cpu_to_be32(c->size); | ||
176 | c->sector = __cpu_to_be64(c->sector); | ||
177 | c->id = __cpu_to_be64(c->id); | ||
178 | c->flags = __cpu_to_be64(c->flags); | ||
179 | c->rw = __cpu_to_be64(c->rw); | ||
180 | } | ||
181 | |||
182 | /* Transaction id */ | ||
183 | typedef __u64 dst_gen_t; | ||
184 | |||
185 | #ifdef __KERNEL__ | ||
186 | |||
187 | #include <linux/blkdev.h> | ||
188 | #include <linux/bio.h> | ||
189 | #include <linux/device.h> | ||
190 | #include <linux/mempool.h> | ||
191 | #include <linux/net.h> | ||
192 | #include <linux/poll.h> | ||
193 | #include <linux/rbtree.h> | ||
194 | |||
195 | #ifdef CONFIG_DST_DEBUG | ||
196 | #define dprintk(f, a...) printk(KERN_NOTICE f, ##a) | ||
197 | #else | ||
198 | static inline void __attribute__ ((format (printf, 1, 2))) | ||
199 | dprintk(const char *fmt, ...) {} | ||
200 | #endif | ||
201 | |||
202 | struct dst_node; | ||
203 | |||
204 | struct dst_trans | ||
205 | { | ||
206 | /* DST node we are working with */ | ||
207 | struct dst_node *n; | ||
208 | |||
209 | /* Entry inside transaction tree */ | ||
210 | struct rb_node trans_entry; | ||
211 | |||
212 | /* Merlin kills this transaction when this memory cell equals zero */ | ||
213 | atomic_t refcnt; | ||
214 | |||
215 | /* How this transaction should be processed by crypto engine */ | ||
216 | short enc; | ||
217 | /* How many times this transaction was resent */ | ||
218 | short retries; | ||
219 | /* Completion status */ | ||
220 | int error; | ||
221 | |||
222 | /* When did we send it to the remote peer */ | ||
223 | long send_time; | ||
224 | |||
225 | /* My name is... | ||
226 | * Well, computers does not speak, they have unique id instead */ | ||
227 | dst_gen_t gen; | ||
228 | |||
229 | /* Block IO we are working with */ | ||
230 | struct bio *bio; | ||
231 | |||
232 | /* Network command for above block IO request */ | ||
233 | struct dst_cmd cmd; | ||
234 | }; | ||
235 | |||
236 | struct dst_crypto_engine | ||
237 | { | ||
238 | /* What should we do with all block requests */ | ||
239 | struct crypto_hash *hash; | ||
240 | struct crypto_ablkcipher *cipher; | ||
241 | |||
242 | /* Pool of pages used to encrypt data into before sending */ | ||
243 | int page_num; | ||
244 | struct page **pages; | ||
245 | |||
246 | /* What to do with current request */ | ||
247 | int enc; | ||
248 | /* Who we are and where do we go */ | ||
249 | struct scatterlist *src, *dst; | ||
250 | |||
251 | /* Maximum timeout waiting for encryption to be completed */ | ||
252 | long timeout; | ||
253 | /* IV is a 64-bit sequential counter */ | ||
254 | u64 iv; | ||
255 | |||
256 | /* Secret data */ | ||
257 | void *private; | ||
258 | |||
259 | /* Cached temporary data lives here */ | ||
260 | int size; | ||
261 | void *data; | ||
262 | }; | ||
263 | |||
264 | struct dst_state | ||
265 | { | ||
266 | /* The main state protection */ | ||
267 | struct mutex state_lock; | ||
268 | |||
269 | /* Polling machinery for sockets */ | ||
270 | wait_queue_t wait; | ||
271 | wait_queue_head_t *whead; | ||
272 | /* Most of events are being waited here */ | ||
273 | wait_queue_head_t thread_wait; | ||
274 | |||
275 | /* Who owns this? */ | ||
276 | struct dst_node *node; | ||
277 | |||
278 | /* Network address for this state */ | ||
279 | struct dst_network_ctl ctl; | ||
280 | |||
281 | /* Permissions to work with: read-only or rw connection */ | ||
282 | u32 permissions; | ||
283 | |||
284 | /* Called when we need to clean private data */ | ||
285 | void (* cleanup)(struct dst_state *st); | ||
286 | |||
287 | /* Used by the server: BIO completion queues BIOs here */ | ||
288 | struct list_head request_list; | ||
289 | spinlock_t request_lock; | ||
290 | |||
291 | /* Guess what? No, it is not number of planets */ | ||
292 | atomic_t refcnt; | ||
293 | |||
294 | /* This flags is set when connection should be dropped */ | ||
295 | int need_exit; | ||
296 | |||
297 | /* | ||
298 | * Socket to work with. Second pointer is used for | ||
299 | * lockless check if socket was changed before performing | ||
300 | * next action (like working with cached polling result) | ||
301 | */ | ||
302 | struct socket *socket, *read_socket; | ||
303 | |||
304 | /* Cached preallocated data */ | ||
305 | void *data; | ||
306 | unsigned int size; | ||
307 | |||
308 | /* Currently processed command */ | ||
309 | struct dst_cmd cmd; | ||
310 | }; | ||
311 | |||
312 | struct dst_info | ||
313 | { | ||
314 | /* Device size */ | ||
315 | u64 size; | ||
316 | |||
317 | /* Local device name for export devices */ | ||
318 | char local[DST_NAMELEN]; | ||
319 | |||
320 | /* Network setup */ | ||
321 | struct dst_network_ctl net; | ||
322 | |||
323 | /* Sysfs bits use this */ | ||
324 | struct device device; | ||
325 | }; | ||
326 | |||
327 | struct dst_node | ||
328 | { | ||
329 | struct list_head node_entry; | ||
330 | |||
331 | /* Hi, my name is stored here */ | ||
332 | char name[DST_NAMELEN]; | ||
333 | /* My cache name is stored here */ | ||
334 | char cache_name[DST_NAMELEN]; | ||
335 | |||
336 | /* Block device attached to given node. | ||
337 | * Only valid for exporting nodes */ | ||
338 | struct block_device *bdev; | ||
339 | /* Network state machine for given peer */ | ||
340 | struct dst_state *state; | ||
341 | |||
342 | /* Block IO machinery */ | ||
343 | struct request_queue *queue; | ||
344 | struct gendisk *disk; | ||
345 | |||
346 | /* Number of threads in processing pool */ | ||
347 | int thread_num; | ||
348 | /* Maximum number of pages in single IO */ | ||
349 | int max_pages; | ||
350 | |||
351 | /* I'm that big in bytes */ | ||
352 | loff_t size; | ||
353 | |||
354 | /* Exported to userspace node information */ | ||
355 | struct dst_info *info; | ||
356 | |||
357 | /* | ||
358 | * Security attribute list. | ||
359 | * Used only by exporting node currently. | ||
360 | */ | ||
361 | struct list_head security_list; | ||
362 | struct mutex security_lock; | ||
363 | |||
364 | /* | ||
365 | * When this unerflows below zero, university collapses. | ||
366 | * But this will not happen, since node will be freed, | ||
367 | * when reference counter reaches zero. | ||
368 | */ | ||
369 | atomic_t refcnt; | ||
370 | |||
371 | /* How precisely should I be started? */ | ||
372 | int (*start)(struct dst_node *); | ||
373 | |||
374 | /* Crypto capabilities */ | ||
375 | struct dst_crypto_ctl crypto; | ||
376 | u8 *hash_key; | ||
377 | u8 *cipher_key; | ||
378 | |||
379 | /* Pool of processing thread */ | ||
380 | struct thread_pool *pool; | ||
381 | |||
382 | /* Transaction IDs live here */ | ||
383 | atomic_long_t gen; | ||
384 | |||
385 | /* | ||
386 | * How frequently and how many times transaction | ||
387 | * tree should be scanned to drop stale objects. | ||
388 | */ | ||
389 | long trans_scan_timeout; | ||
390 | int trans_max_retries; | ||
391 | |||
392 | /* Small gnomes live here */ | ||
393 | struct rb_root trans_root; | ||
394 | struct mutex trans_lock; | ||
395 | |||
396 | /* | ||
397 | * Transaction cache/memory pool. | ||
398 | * It is big enough to contain not only transaction | ||
399 | * itself, but additional crypto data (digest/hmac). | ||
400 | */ | ||
401 | struct kmem_cache *trans_cache; | ||
402 | mempool_t *trans_pool; | ||
403 | |||
404 | /* This entity scans transaction tree */ | ||
405 | struct delayed_work trans_work; | ||
406 | |||
407 | wait_queue_head_t wait; | ||
408 | }; | ||
409 | |||
410 | /* Kernel representation of the security attribute */ | ||
411 | struct dst_secure | ||
412 | { | ||
413 | struct list_head sec_entry; | ||
414 | struct dst_secure_user sec; | ||
415 | }; | ||
416 | |||
417 | int dst_process_bio(struct dst_node *n, struct bio *bio); | ||
418 | |||
419 | int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r); | ||
420 | int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le); | ||
421 | |||
422 | static inline struct dst_state *dst_state_get(struct dst_state *st) | ||
423 | { | ||
424 | BUG_ON(atomic_read(&st->refcnt) == 0); | ||
425 | atomic_inc(&st->refcnt); | ||
426 | return st; | ||
427 | } | ||
428 | |||
429 | void dst_state_put(struct dst_state *st); | ||
430 | |||
431 | struct dst_state *dst_state_alloc(struct dst_node *n); | ||
432 | int dst_state_socket_create(struct dst_state *st); | ||
433 | void dst_state_socket_release(struct dst_state *st); | ||
434 | |||
435 | void dst_state_exit_connected(struct dst_state *st); | ||
436 | |||
437 | int dst_state_schedule_receiver(struct dst_state *st); | ||
438 | |||
439 | void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str); | ||
440 | |||
441 | static inline void dst_state_lock(struct dst_state *st) | ||
442 | { | ||
443 | mutex_lock(&st->state_lock); | ||
444 | } | ||
445 | |||
446 | static inline void dst_state_unlock(struct dst_state *st) | ||
447 | { | ||
448 | mutex_unlock(&st->state_lock); | ||
449 | } | ||
450 | |||
451 | void dst_poll_exit(struct dst_state *st); | ||
452 | int dst_poll_init(struct dst_state *st); | ||
453 | |||
454 | static inline unsigned int dst_state_poll(struct dst_state *st) | ||
455 | { | ||
456 | unsigned int revents = POLLHUP | POLLERR; | ||
457 | |||
458 | dst_state_lock(st); | ||
459 | if (st->socket) | ||
460 | revents = st->socket->ops->poll(NULL, st->socket, NULL); | ||
461 | dst_state_unlock(st); | ||
462 | |||
463 | return revents; | ||
464 | } | ||
465 | |||
466 | static inline int dst_thread_setup(void *private, void *data) | ||
467 | { | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | void dst_node_put(struct dst_node *n); | ||
472 | |||
473 | static inline struct dst_node *dst_node_get(struct dst_node *n) | ||
474 | { | ||
475 | atomic_inc(&n->refcnt); | ||
476 | return n; | ||
477 | } | ||
478 | |||
479 | int dst_data_recv(struct dst_state *st, void *data, unsigned int size); | ||
480 | int dst_recv_cdata(struct dst_state *st, void *cdata); | ||
481 | int dst_data_send_header(struct socket *sock, | ||
482 | void *data, unsigned int size, int more); | ||
483 | |||
484 | int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio); | ||
485 | |||
486 | int dst_process_io(struct dst_state *st); | ||
487 | int dst_export_crypto(struct dst_node *n, struct bio *bio); | ||
488 | int dst_export_send_bio(struct bio *bio); | ||
489 | int dst_start_export(struct dst_node *n); | ||
490 | |||
491 | int __init dst_export_init(void); | ||
492 | void dst_export_exit(void); | ||
493 | |||
494 | /* Private structure for export block IO requests */ | ||
495 | struct dst_export_priv | ||
496 | { | ||
497 | struct list_head request_entry; | ||
498 | struct dst_state *state; | ||
499 | struct bio *bio; | ||
500 | struct dst_cmd cmd; | ||
501 | }; | ||
502 | |||
503 | static inline void dst_trans_get(struct dst_trans *t) | ||
504 | { | ||
505 | atomic_inc(&t->refcnt); | ||
506 | } | ||
507 | |||
508 | struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen); | ||
509 | int dst_trans_remove(struct dst_trans *t); | ||
510 | int dst_trans_remove_nolock(struct dst_trans *t); | ||
511 | void dst_trans_put(struct dst_trans *t); | ||
512 | |||
513 | /* | ||
514 | * Convert bio into network command. | ||
515 | */ | ||
516 | static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd, | ||
517 | u32 command, u64 id) | ||
518 | { | ||
519 | cmd->cmd = command; | ||
520 | cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS; | ||
521 | cmd->rw = bio->bi_rw; | ||
522 | cmd->size = bio->bi_size; | ||
523 | cmd->csize = 0; | ||
524 | cmd->id = id; | ||
525 | cmd->sector = bio->bi_sector; | ||
526 | }; | ||
527 | |||
528 | int dst_trans_send(struct dst_trans *t); | ||
529 | int dst_trans_crypto(struct dst_trans *t); | ||
530 | |||
531 | int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl); | ||
532 | void dst_node_crypto_exit(struct dst_node *n); | ||
533 | |||
534 | static inline int dst_need_crypto(struct dst_node *n) | ||
535 | { | ||
536 | struct dst_crypto_ctl *c = &n->crypto; | ||
537 | /* | ||
538 | * Logical OR is appropriate here, but boolean one produces | ||
539 | * more optimal code, so it is used instead. | ||
540 | */ | ||
541 | return (c->hash_algo[0] | c->cipher_algo[0]); | ||
542 | } | ||
543 | |||
544 | int dst_node_trans_init(struct dst_node *n, unsigned int size); | ||
545 | void dst_node_trans_exit(struct dst_node *n); | ||
546 | |||
547 | /* | ||
548 | * Pool of threads. | ||
549 | * Ready list contains threads currently free to be used, | ||
550 | * active one contains threads with some work scheduled for them. | ||
551 | * Caller can wait in given queue when thread is ready. | ||
552 | */ | ||
553 | struct thread_pool | ||
554 | { | ||
555 | int thread_num; | ||
556 | struct mutex thread_lock; | ||
557 | struct list_head ready_list, active_list; | ||
558 | |||
559 | wait_queue_head_t wait; | ||
560 | }; | ||
561 | |||
562 | void thread_pool_del_worker(struct thread_pool *p); | ||
563 | void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id); | ||
564 | int thread_pool_add_worker(struct thread_pool *p, | ||
565 | char *name, | ||
566 | unsigned int id, | ||
567 | void *(* init)(void *data), | ||
568 | void (* cleanup)(void *data), | ||
569 | void *data); | ||
570 | |||
571 | void thread_pool_destroy(struct thread_pool *p); | ||
572 | struct thread_pool *thread_pool_create(int num, char *name, | ||
573 | void *(* init)(void *data), | ||
574 | void (* cleanup)(void *data), | ||
575 | void *data); | ||
576 | |||
577 | int thread_pool_schedule(struct thread_pool *p, | ||
578 | int (* setup)(void *stored_private, void *setup_data), | ||
579 | int (* action)(void *stored_private, void *setup_data), | ||
580 | void *setup_data, long timeout); | ||
581 | int thread_pool_schedule_private(struct thread_pool *p, | ||
582 | int (* setup)(void *private, void *data), | ||
583 | int (* action)(void *private, void *data), | ||
584 | void *data, long timeout, void *id); | ||
585 | |||
586 | #endif /* __KERNEL__ */ | ||
587 | #endif /* __DST_H */ | ||