diff options
Diffstat (limited to 'include/linux/dst.h')
-rw-r--r-- | include/linux/dst.h | 587 |
1 files changed, 587 insertions, 0 deletions
diff --git a/include/linux/dst.h b/include/linux/dst.h new file mode 100644 index 000000000000..e26fed84b1aa --- /dev/null +++ b/include/linux/dst.h | |||
@@ -0,0 +1,587 @@ | |||
1 | /* | ||
2 | * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef __DST_H | ||
17 | #define __DST_H | ||
18 | |||
19 | #include <linux/types.h> | ||
20 | #include <linux/connector.h> | ||
21 | |||
22 | #define DST_NAMELEN 32 | ||
23 | #define DST_NAME "dst" | ||
24 | |||
25 | enum { | ||
26 | /* Remove node with given id from storage */ | ||
27 | DST_DEL_NODE = 0, | ||
28 | /* Add remote node with given id to the storage */ | ||
29 | DST_ADD_REMOTE, | ||
30 | /* Add local node with given id to the storage to be exported and used by remote peers */ | ||
31 | DST_ADD_EXPORT, | ||
32 | /* Crypto initialization command (hash/cipher used to protect the connection) */ | ||
33 | DST_CRYPTO, | ||
34 | /* Security attributes for given connection (permissions for example) */ | ||
35 | DST_SECURITY, | ||
36 | /* Register given node in the block layer subsystem */ | ||
37 | DST_START, | ||
38 | DST_CMD_MAX | ||
39 | }; | ||
40 | |||
41 | struct dst_ctl | ||
42 | { | ||
43 | /* Storage name */ | ||
44 | char name[DST_NAMELEN]; | ||
45 | /* Command flags */ | ||
46 | __u32 flags; | ||
47 | /* Command itself (see above) */ | ||
48 | __u32 cmd; | ||
49 | /* Maximum number of pages per single request in this device */ | ||
50 | __u32 max_pages; | ||
51 | /* Stale/error transaction scanning timeout in milliseconds */ | ||
52 | __u32 trans_scan_timeout; | ||
53 | /* Maximum number of retry sends before completing transaction as broken */ | ||
54 | __u32 trans_max_retries; | ||
55 | /* Storage size */ | ||
56 | __u64 size; | ||
57 | }; | ||
58 | |||
59 | /* Reply command carries completion status */ | ||
60 | struct dst_ctl_ack | ||
61 | { | ||
62 | struct cn_msg msg; | ||
63 | int error; | ||
64 | int unused[3]; | ||
65 | }; | ||
66 | |||
67 | /* | ||
68 | * Unfortunaltely socket address structure is not exported to userspace | ||
69 | * and is redefined there. | ||
70 | */ | ||
71 | #define SADDR_MAX_DATA 128 | ||
72 | |||
73 | struct saddr { | ||
74 | /* address family, AF_xxx */ | ||
75 | unsigned short sa_family; | ||
76 | /* 14 bytes of protocol address */ | ||
77 | char sa_data[SADDR_MAX_DATA]; | ||
78 | /* Number of bytes used in sa_data */ | ||
79 | unsigned short sa_data_len; | ||
80 | }; | ||
81 | |||
82 | /* Address structure */ | ||
83 | struct dst_network_ctl | ||
84 | { | ||
85 | /* Socket type: datagram, stream...*/ | ||
86 | unsigned int type; | ||
87 | /* Let me guess, is it a Jupiter diameter? */ | ||
88 | unsigned int proto; | ||
89 | /* Peer's address */ | ||
90 | struct saddr addr; | ||
91 | }; | ||
92 | |||
93 | struct dst_crypto_ctl | ||
94 | { | ||
95 | /* Cipher and hash names */ | ||
96 | char cipher_algo[DST_NAMELEN]; | ||
97 | char hash_algo[DST_NAMELEN]; | ||
98 | |||
99 | /* Key sizes. Can be zero for digest for example */ | ||
100 | unsigned int cipher_keysize, hash_keysize; | ||
101 | /* Alignment. Calculated by the DST itself. */ | ||
102 | unsigned int crypto_attached_size; | ||
103 | /* Number of threads to perform crypto operations */ | ||
104 | int thread_num; | ||
105 | }; | ||
106 | |||
107 | /* Export security attributes have this bits checked in when client connects */ | ||
108 | #define DST_PERM_READ (1<<0) | ||
109 | #define DST_PERM_WRITE (1<<1) | ||
110 | |||
111 | /* | ||
112 | * Right now it is simple model, where each remote address | ||
113 | * is assigned to set of permissions it is allowed to perform. | ||
114 | * In real world block device does not know anything but | ||
115 | * reading and writing, so it should be more than enough. | ||
116 | */ | ||
117 | struct dst_secure_user | ||
118 | { | ||
119 | unsigned int permissions; | ||
120 | struct saddr addr; | ||
121 | }; | ||
122 | |||
123 | /* | ||
124 | * Export control command: device to export and network address to accept | ||
125 | * clients to work with given device | ||
126 | */ | ||
127 | struct dst_export_ctl | ||
128 | { | ||
129 | char device[DST_NAMELEN]; | ||
130 | struct dst_network_ctl ctl; | ||
131 | }; | ||
132 | |||
133 | enum { | ||
134 | DST_CFG = 1, /* Request remote configuration */ | ||
135 | DST_IO, /* IO command */ | ||
136 | DST_IO_RESPONSE, /* IO response */ | ||
137 | DST_PING, /* Keepalive message */ | ||
138 | DST_NCMD_MAX, | ||
139 | }; | ||
140 | |||
141 | struct dst_cmd | ||
142 | { | ||
143 | /* Network command itself, see above */ | ||
144 | __u32 cmd; | ||
145 | /* | ||
146 | * Size of the attached data | ||
147 | * (in most cases, for READ command it means how many bytes were requested) | ||
148 | */ | ||
149 | __u32 size; | ||
150 | /* Crypto size: number of attached bytes with digest/hmac */ | ||
151 | __u32 csize; | ||
152 | /* Here we can carry secret data */ | ||
153 | __u32 reserved; | ||
154 | /* Read/write bits, see how they are encoded in bio structure */ | ||
155 | __u64 rw; | ||
156 | /* BIO flags */ | ||
157 | __u64 flags; | ||
158 | /* Unique command id (like transaction ID) */ | ||
159 | __u64 id; | ||
160 | /* Sector to start IO from */ | ||
161 | __u64 sector; | ||
162 | /* Hash data is placed after this header */ | ||
163 | __u8 hash[0]; | ||
164 | }; | ||
165 | |||
166 | /* | ||
167 | * Convert command to/from network byte order. | ||
168 | * We do not use hton*() functions, since there is | ||
169 | * no 64-bit implementation. | ||
170 | */ | ||
171 | static inline void dst_convert_cmd(struct dst_cmd *c) | ||
172 | { | ||
173 | c->cmd = __cpu_to_be32(c->cmd); | ||
174 | c->csize = __cpu_to_be32(c->csize); | ||
175 | c->size = __cpu_to_be32(c->size); | ||
176 | c->sector = __cpu_to_be64(c->sector); | ||
177 | c->id = __cpu_to_be64(c->id); | ||
178 | c->flags = __cpu_to_be64(c->flags); | ||
179 | c->rw = __cpu_to_be64(c->rw); | ||
180 | } | ||
181 | |||
182 | /* Transaction id */ | ||
183 | typedef __u64 dst_gen_t; | ||
184 | |||
185 | #ifdef __KERNEL__ | ||
186 | |||
187 | #include <linux/blkdev.h> | ||
188 | #include <linux/bio.h> | ||
189 | #include <linux/device.h> | ||
190 | #include <linux/mempool.h> | ||
191 | #include <linux/net.h> | ||
192 | #include <linux/poll.h> | ||
193 | #include <linux/rbtree.h> | ||
194 | |||
195 | #ifdef CONFIG_DST_DEBUG | ||
196 | #define dprintk(f, a...) printk(KERN_NOTICE f, ##a) | ||
197 | #else | ||
198 | static inline void __attribute__ ((format (printf, 1, 2))) | ||
199 | dprintk(const char *fmt, ...) {} | ||
200 | #endif | ||
201 | |||
202 | struct dst_node; | ||
203 | |||
204 | struct dst_trans | ||
205 | { | ||
206 | /* DST node we are working with */ | ||
207 | struct dst_node *n; | ||
208 | |||
209 | /* Entry inside transaction tree */ | ||
210 | struct rb_node trans_entry; | ||
211 | |||
212 | /* Merlin kills this transaction when this memory cell equals zero */ | ||
213 | atomic_t refcnt; | ||
214 | |||
215 | /* How this transaction should be processed by crypto engine */ | ||
216 | short enc; | ||
217 | /* How many times this transaction was resent */ | ||
218 | short retries; | ||
219 | /* Completion status */ | ||
220 | int error; | ||
221 | |||
222 | /* When did we send it to the remote peer */ | ||
223 | long send_time; | ||
224 | |||
225 | /* My name is... | ||
226 | * Well, computers does not speak, they have unique id instead */ | ||
227 | dst_gen_t gen; | ||
228 | |||
229 | /* Block IO we are working with */ | ||
230 | struct bio *bio; | ||
231 | |||
232 | /* Network command for above block IO request */ | ||
233 | struct dst_cmd cmd; | ||
234 | }; | ||
235 | |||
236 | struct dst_crypto_engine | ||
237 | { | ||
238 | /* What should we do with all block requests */ | ||
239 | struct crypto_hash *hash; | ||
240 | struct crypto_ablkcipher *cipher; | ||
241 | |||
242 | /* Pool of pages used to encrypt data into before sending */ | ||
243 | int page_num; | ||
244 | struct page **pages; | ||
245 | |||
246 | /* What to do with current request */ | ||
247 | int enc; | ||
248 | /* Who we are and where do we go */ | ||
249 | struct scatterlist *src, *dst; | ||
250 | |||
251 | /* Maximum timeout waiting for encryption to be completed */ | ||
252 | long timeout; | ||
253 | /* IV is a 64-bit sequential counter */ | ||
254 | u64 iv; | ||
255 | |||
256 | /* Secret data */ | ||
257 | void *private; | ||
258 | |||
259 | /* Cached temporary data lives here */ | ||
260 | int size; | ||
261 | void *data; | ||
262 | }; | ||
263 | |||
264 | struct dst_state | ||
265 | { | ||
266 | /* The main state protection */ | ||
267 | struct mutex state_lock; | ||
268 | |||
269 | /* Polling machinery for sockets */ | ||
270 | wait_queue_t wait; | ||
271 | wait_queue_head_t *whead; | ||
272 | /* Most of events are being waited here */ | ||
273 | wait_queue_head_t thread_wait; | ||
274 | |||
275 | /* Who owns this? */ | ||
276 | struct dst_node *node; | ||
277 | |||
278 | /* Network address for this state */ | ||
279 | struct dst_network_ctl ctl; | ||
280 | |||
281 | /* Permissions to work with: read-only or rw connection */ | ||
282 | u32 permissions; | ||
283 | |||
284 | /* Called when we need to clean private data */ | ||
285 | void (* cleanup)(struct dst_state *st); | ||
286 | |||
287 | /* Used by the server: BIO completion queues BIOs here */ | ||
288 | struct list_head request_list; | ||
289 | spinlock_t request_lock; | ||
290 | |||
291 | /* Guess what? No, it is not number of planets */ | ||
292 | atomic_t refcnt; | ||
293 | |||
294 | /* This flags is set when connection should be dropped */ | ||
295 | int need_exit; | ||
296 | |||
297 | /* | ||
298 | * Socket to work with. Second pointer is used for | ||
299 | * lockless check if socket was changed before performing | ||
300 | * next action (like working with cached polling result) | ||
301 | */ | ||
302 | struct socket *socket, *read_socket; | ||
303 | |||
304 | /* Cached preallocated data */ | ||
305 | void *data; | ||
306 | unsigned int size; | ||
307 | |||
308 | /* Currently processed command */ | ||
309 | struct dst_cmd cmd; | ||
310 | }; | ||
311 | |||
312 | struct dst_info | ||
313 | { | ||
314 | /* Device size */ | ||
315 | u64 size; | ||
316 | |||
317 | /* Local device name for export devices */ | ||
318 | char local[DST_NAMELEN]; | ||
319 | |||
320 | /* Network setup */ | ||
321 | struct dst_network_ctl net; | ||
322 | |||
323 | /* Sysfs bits use this */ | ||
324 | struct device device; | ||
325 | }; | ||
326 | |||
327 | struct dst_node | ||
328 | { | ||
329 | struct list_head node_entry; | ||
330 | |||
331 | /* Hi, my name is stored here */ | ||
332 | char name[DST_NAMELEN]; | ||
333 | /* My cache name is stored here */ | ||
334 | char cache_name[DST_NAMELEN]; | ||
335 | |||
336 | /* Block device attached to given node. | ||
337 | * Only valid for exporting nodes */ | ||
338 | struct block_device *bdev; | ||
339 | /* Network state machine for given peer */ | ||
340 | struct dst_state *state; | ||
341 | |||
342 | /* Block IO machinery */ | ||
343 | struct request_queue *queue; | ||
344 | struct gendisk *disk; | ||
345 | |||
346 | /* Number of threads in processing pool */ | ||
347 | int thread_num; | ||
348 | /* Maximum number of pages in single IO */ | ||
349 | int max_pages; | ||
350 | |||
351 | /* I'm that big in bytes */ | ||
352 | loff_t size; | ||
353 | |||
354 | /* Exported to userspace node information */ | ||
355 | struct dst_info *info; | ||
356 | |||
357 | /* | ||
358 | * Security attribute list. | ||
359 | * Used only by exporting node currently. | ||
360 | */ | ||
361 | struct list_head security_list; | ||
362 | struct mutex security_lock; | ||
363 | |||
364 | /* | ||
365 | * When this unerflows below zero, university collapses. | ||
366 | * But this will not happen, since node will be freed, | ||
367 | * when reference counter reaches zero. | ||
368 | */ | ||
369 | atomic_t refcnt; | ||
370 | |||
371 | /* How precisely should I be started? */ | ||
372 | int (*start)(struct dst_node *); | ||
373 | |||
374 | /* Crypto capabilities */ | ||
375 | struct dst_crypto_ctl crypto; | ||
376 | u8 *hash_key; | ||
377 | u8 *cipher_key; | ||
378 | |||
379 | /* Pool of processing thread */ | ||
380 | struct thread_pool *pool; | ||
381 | |||
382 | /* Transaction IDs live here */ | ||
383 | atomic_long_t gen; | ||
384 | |||
385 | /* | ||
386 | * How frequently and how many times transaction | ||
387 | * tree should be scanned to drop stale objects. | ||
388 | */ | ||
389 | long trans_scan_timeout; | ||
390 | int trans_max_retries; | ||
391 | |||
392 | /* Small gnomes live here */ | ||
393 | struct rb_root trans_root; | ||
394 | struct mutex trans_lock; | ||
395 | |||
396 | /* | ||
397 | * Transaction cache/memory pool. | ||
398 | * It is big enough to contain not only transaction | ||
399 | * itself, but additional crypto data (digest/hmac). | ||
400 | */ | ||
401 | struct kmem_cache *trans_cache; | ||
402 | mempool_t *trans_pool; | ||
403 | |||
404 | /* This entity scans transaction tree */ | ||
405 | struct delayed_work trans_work; | ||
406 | |||
407 | wait_queue_head_t wait; | ||
408 | }; | ||
409 | |||
410 | /* Kernel representation of the security attribute */ | ||
411 | struct dst_secure | ||
412 | { | ||
413 | struct list_head sec_entry; | ||
414 | struct dst_secure_user sec; | ||
415 | }; | ||
416 | |||
417 | int dst_process_bio(struct dst_node *n, struct bio *bio); | ||
418 | |||
419 | int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r); | ||
420 | int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le); | ||
421 | |||
422 | static inline struct dst_state *dst_state_get(struct dst_state *st) | ||
423 | { | ||
424 | BUG_ON(atomic_read(&st->refcnt) == 0); | ||
425 | atomic_inc(&st->refcnt); | ||
426 | return st; | ||
427 | } | ||
428 | |||
429 | void dst_state_put(struct dst_state *st); | ||
430 | |||
431 | struct dst_state *dst_state_alloc(struct dst_node *n); | ||
432 | int dst_state_socket_create(struct dst_state *st); | ||
433 | void dst_state_socket_release(struct dst_state *st); | ||
434 | |||
435 | void dst_state_exit_connected(struct dst_state *st); | ||
436 | |||
437 | int dst_state_schedule_receiver(struct dst_state *st); | ||
438 | |||
439 | void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str); | ||
440 | |||
441 | static inline void dst_state_lock(struct dst_state *st) | ||
442 | { | ||
443 | mutex_lock(&st->state_lock); | ||
444 | } | ||
445 | |||
446 | static inline void dst_state_unlock(struct dst_state *st) | ||
447 | { | ||
448 | mutex_unlock(&st->state_lock); | ||
449 | } | ||
450 | |||
451 | void dst_poll_exit(struct dst_state *st); | ||
452 | int dst_poll_init(struct dst_state *st); | ||
453 | |||
454 | static inline unsigned int dst_state_poll(struct dst_state *st) | ||
455 | { | ||
456 | unsigned int revents = POLLHUP | POLLERR; | ||
457 | |||
458 | dst_state_lock(st); | ||
459 | if (st->socket) | ||
460 | revents = st->socket->ops->poll(NULL, st->socket, NULL); | ||
461 | dst_state_unlock(st); | ||
462 | |||
463 | return revents; | ||
464 | } | ||
465 | |||
466 | static inline int dst_thread_setup(void *private, void *data) | ||
467 | { | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | void dst_node_put(struct dst_node *n); | ||
472 | |||
473 | static inline struct dst_node *dst_node_get(struct dst_node *n) | ||
474 | { | ||
475 | atomic_inc(&n->refcnt); | ||
476 | return n; | ||
477 | } | ||
478 | |||
479 | int dst_data_recv(struct dst_state *st, void *data, unsigned int size); | ||
480 | int dst_recv_cdata(struct dst_state *st, void *cdata); | ||
481 | int dst_data_send_header(struct socket *sock, | ||
482 | void *data, unsigned int size, int more); | ||
483 | |||
484 | int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio); | ||
485 | |||
486 | int dst_process_io(struct dst_state *st); | ||
487 | int dst_export_crypto(struct dst_node *n, struct bio *bio); | ||
488 | int dst_export_send_bio(struct bio *bio); | ||
489 | int dst_start_export(struct dst_node *n); | ||
490 | |||
491 | int __init dst_export_init(void); | ||
492 | void dst_export_exit(void); | ||
493 | |||
494 | /* Private structure for export block IO requests */ | ||
495 | struct dst_export_priv | ||
496 | { | ||
497 | struct list_head request_entry; | ||
498 | struct dst_state *state; | ||
499 | struct bio *bio; | ||
500 | struct dst_cmd cmd; | ||
501 | }; | ||
502 | |||
503 | static inline void dst_trans_get(struct dst_trans *t) | ||
504 | { | ||
505 | atomic_inc(&t->refcnt); | ||
506 | } | ||
507 | |||
508 | struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen); | ||
509 | int dst_trans_remove(struct dst_trans *t); | ||
510 | int dst_trans_remove_nolock(struct dst_trans *t); | ||
511 | void dst_trans_put(struct dst_trans *t); | ||
512 | |||
513 | /* | ||
514 | * Convert bio into network command. | ||
515 | */ | ||
516 | static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd, | ||
517 | u32 command, u64 id) | ||
518 | { | ||
519 | cmd->cmd = command; | ||
520 | cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS; | ||
521 | cmd->rw = bio->bi_rw; | ||
522 | cmd->size = bio->bi_size; | ||
523 | cmd->csize = 0; | ||
524 | cmd->id = id; | ||
525 | cmd->sector = bio->bi_sector; | ||
526 | }; | ||
527 | |||
528 | int dst_trans_send(struct dst_trans *t); | ||
529 | int dst_trans_crypto(struct dst_trans *t); | ||
530 | |||
531 | int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl); | ||
532 | void dst_node_crypto_exit(struct dst_node *n); | ||
533 | |||
534 | static inline int dst_need_crypto(struct dst_node *n) | ||
535 | { | ||
536 | struct dst_crypto_ctl *c = &n->crypto; | ||
537 | /* | ||
538 | * Logical OR is appropriate here, but boolean one produces | ||
539 | * more optimal code, so it is used instead. | ||
540 | */ | ||
541 | return (c->hash_algo[0] | c->cipher_algo[0]); | ||
542 | } | ||
543 | |||
544 | int dst_node_trans_init(struct dst_node *n, unsigned int size); | ||
545 | void dst_node_trans_exit(struct dst_node *n); | ||
546 | |||
547 | /* | ||
548 | * Pool of threads. | ||
549 | * Ready list contains threads currently free to be used, | ||
550 | * active one contains threads with some work scheduled for them. | ||
551 | * Caller can wait in given queue when thread is ready. | ||
552 | */ | ||
553 | struct thread_pool | ||
554 | { | ||
555 | int thread_num; | ||
556 | struct mutex thread_lock; | ||
557 | struct list_head ready_list, active_list; | ||
558 | |||
559 | wait_queue_head_t wait; | ||
560 | }; | ||
561 | |||
562 | void thread_pool_del_worker(struct thread_pool *p); | ||
563 | void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id); | ||
564 | int thread_pool_add_worker(struct thread_pool *p, | ||
565 | char *name, | ||
566 | unsigned int id, | ||
567 | void *(* init)(void *data), | ||
568 | void (* cleanup)(void *data), | ||
569 | void *data); | ||
570 | |||
571 | void thread_pool_destroy(struct thread_pool *p); | ||
572 | struct thread_pool *thread_pool_create(int num, char *name, | ||
573 | void *(* init)(void *data), | ||
574 | void (* cleanup)(void *data), | ||
575 | void *data); | ||
576 | |||
577 | int thread_pool_schedule(struct thread_pool *p, | ||
578 | int (* setup)(void *stored_private, void *setup_data), | ||
579 | int (* action)(void *stored_private, void *setup_data), | ||
580 | void *setup_data, long timeout); | ||
581 | int thread_pool_schedule_private(struct thread_pool *p, | ||
582 | int (* setup)(void *private, void *data), | ||
583 | int (* action)(void *private, void *data), | ||
584 | void *data, long timeout, void *id); | ||
585 | |||
586 | #endif /* __KERNEL__ */ | ||
587 | #endif /* __DST_H */ | ||