aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorEvgeniy Polyakov <zbr@ioremap.net>2009-01-13 18:05:27 -0500
committerGreg Kroah-Hartman <gregkh@suse.de>2009-04-03 17:53:32 -0400
commitce0d9d7255a55628fd3732bf583c83e90150b699 (patch)
treed8aa3910a4ba9d87f98639dafe2fdf69b591fa15 /include/linux
parentdab8c35990692026fca989c3449fd67a59275c6a (diff)
Staging: dst: core files.
This patch contains DST core files, which introduce block layer, connector and sysfs registration glue and main headers. Connector is used for the configuration of the node (its type, address, device name and so on). Sysfs provides bits of information about running devices in the following format: +/* + * DST sysfs tree for device called 'storage': + * + * /sys/bus/dst/devices/storage/ + * /sys/bus/dst/devices/storage/type : 192.168.4.80:1025 + * /sys/bus/dst/devices/storage/size : 800 + * /sys/bus/dst/devices/storage/name : storage + */ DST header contains structure definitions and protocol command description. Signed-off-by: Evgeniy Polyakov <zbr@ioremap.net> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/connector.h4
-rw-r--r--include/linux/dst.h587
2 files changed, 590 insertions, 1 deletions
diff --git a/include/linux/connector.h b/include/linux/connector.h
index fc65d219d88c..b9966e64604e 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -39,8 +39,10 @@
39#define CN_IDX_V86D 0x4 39#define CN_IDX_V86D 0x4
40#define CN_VAL_V86D_UVESAFB 0x1 40#define CN_VAL_V86D_UVESAFB 0x1
41#define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ 41#define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */
42#define CN_DST_IDX 0x6
43#define CN_DST_VAL 0x1
42 44
43#define CN_NETLINK_USERS 6 45#define CN_NETLINK_USERS 7
44 46
45/* 47/*
46 * Maximum connector's message size. 48 * Maximum connector's message size.
diff --git a/include/linux/dst.h b/include/linux/dst.h
new file mode 100644
index 000000000000..e26fed84b1aa
--- /dev/null
+++ b/include/linux/dst.h
@@ -0,0 +1,587 @@
1/*
2 * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#ifndef __DST_H
17#define __DST_H
18
19#include <linux/types.h>
20#include <linux/connector.h>
21
22#define DST_NAMELEN 32
23#define DST_NAME "dst"
24
25enum {
26 /* Remove node with given id from storage */
27 DST_DEL_NODE = 0,
28 /* Add remote node with given id to the storage */
29 DST_ADD_REMOTE,
30 /* Add local node with given id to the storage to be exported and used by remote peers */
31 DST_ADD_EXPORT,
32 /* Crypto initialization command (hash/cipher used to protect the connection) */
33 DST_CRYPTO,
34 /* Security attributes for given connection (permissions for example) */
35 DST_SECURITY,
36 /* Register given node in the block layer subsystem */
37 DST_START,
38 DST_CMD_MAX
39};
40
41struct dst_ctl
42{
43 /* Storage name */
44 char name[DST_NAMELEN];
45 /* Command flags */
46 __u32 flags;
47 /* Command itself (see above) */
48 __u32 cmd;
49 /* Maximum number of pages per single request in this device */
50 __u32 max_pages;
51 /* Stale/error transaction scanning timeout in milliseconds */
52 __u32 trans_scan_timeout;
53 /* Maximum number of retry sends before completing transaction as broken */
54 __u32 trans_max_retries;
55 /* Storage size */
56 __u64 size;
57};
58
59/* Reply command carries completion status */
60struct dst_ctl_ack
61{
62 struct cn_msg msg;
63 int error;
64 int unused[3];
65};
66
67/*
68 * Unfortunaltely socket address structure is not exported to userspace
69 * and is redefined there.
70 */
71#define SADDR_MAX_DATA 128
72
73struct saddr {
74 /* address family, AF_xxx */
75 unsigned short sa_family;
76 /* 14 bytes of protocol address */
77 char sa_data[SADDR_MAX_DATA];
78 /* Number of bytes used in sa_data */
79 unsigned short sa_data_len;
80};
81
82/* Address structure */
83struct dst_network_ctl
84{
85 /* Socket type: datagram, stream...*/
86 unsigned int type;
87 /* Let me guess, is it a Jupiter diameter? */
88 unsigned int proto;
89 /* Peer's address */
90 struct saddr addr;
91};
92
93struct dst_crypto_ctl
94{
95 /* Cipher and hash names */
96 char cipher_algo[DST_NAMELEN];
97 char hash_algo[DST_NAMELEN];
98
99 /* Key sizes. Can be zero for digest for example */
100 unsigned int cipher_keysize, hash_keysize;
101 /* Alignment. Calculated by the DST itself. */
102 unsigned int crypto_attached_size;
103 /* Number of threads to perform crypto operations */
104 int thread_num;
105};
106
107/* Export security attributes have this bits checked in when client connects */
108#define DST_PERM_READ (1<<0)
109#define DST_PERM_WRITE (1<<1)
110
111/*
112 * Right now it is simple model, where each remote address
113 * is assigned to set of permissions it is allowed to perform.
114 * In real world block device does not know anything but
115 * reading and writing, so it should be more than enough.
116 */
117struct dst_secure_user
118{
119 unsigned int permissions;
120 struct saddr addr;
121};
122
123/*
124 * Export control command: device to export and network address to accept
125 * clients to work with given device
126 */
127struct dst_export_ctl
128{
129 char device[DST_NAMELEN];
130 struct dst_network_ctl ctl;
131};
132
133enum {
134 DST_CFG = 1, /* Request remote configuration */
135 DST_IO, /* IO command */
136 DST_IO_RESPONSE, /* IO response */
137 DST_PING, /* Keepalive message */
138 DST_NCMD_MAX,
139};
140
141struct dst_cmd
142{
143 /* Network command itself, see above */
144 __u32 cmd;
145 /*
146 * Size of the attached data
147 * (in most cases, for READ command it means how many bytes were requested)
148 */
149 __u32 size;
150 /* Crypto size: number of attached bytes with digest/hmac */
151 __u32 csize;
152 /* Here we can carry secret data */
153 __u32 reserved;
154 /* Read/write bits, see how they are encoded in bio structure */
155 __u64 rw;
156 /* BIO flags */
157 __u64 flags;
158 /* Unique command id (like transaction ID) */
159 __u64 id;
160 /* Sector to start IO from */
161 __u64 sector;
162 /* Hash data is placed after this header */
163 __u8 hash[0];
164};
165
166/*
167 * Convert command to/from network byte order.
168 * We do not use hton*() functions, since there is
169 * no 64-bit implementation.
170 */
171static inline void dst_convert_cmd(struct dst_cmd *c)
172{
173 c->cmd = __cpu_to_be32(c->cmd);
174 c->csize = __cpu_to_be32(c->csize);
175 c->size = __cpu_to_be32(c->size);
176 c->sector = __cpu_to_be64(c->sector);
177 c->id = __cpu_to_be64(c->id);
178 c->flags = __cpu_to_be64(c->flags);
179 c->rw = __cpu_to_be64(c->rw);
180}
181
182/* Transaction id */
183typedef __u64 dst_gen_t;
184
185#ifdef __KERNEL__
186
187#include <linux/blkdev.h>
188#include <linux/bio.h>
189#include <linux/device.h>
190#include <linux/mempool.h>
191#include <linux/net.h>
192#include <linux/poll.h>
193#include <linux/rbtree.h>
194
195#ifdef CONFIG_DST_DEBUG
196#define dprintk(f, a...) printk(KERN_NOTICE f, ##a)
197#else
198static inline void __attribute__ ((format (printf, 1, 2)))
199 dprintk(const char *fmt, ...) {}
200#endif
201
202struct dst_node;
203
204struct dst_trans
205{
206 /* DST node we are working with */
207 struct dst_node *n;
208
209 /* Entry inside transaction tree */
210 struct rb_node trans_entry;
211
212 /* Merlin kills this transaction when this memory cell equals zero */
213 atomic_t refcnt;
214
215 /* How this transaction should be processed by crypto engine */
216 short enc;
217 /* How many times this transaction was resent */
218 short retries;
219 /* Completion status */
220 int error;
221
222 /* When did we send it to the remote peer */
223 long send_time;
224
225 /* My name is...
226 * Well, computers does not speak, they have unique id instead */
227 dst_gen_t gen;
228
229 /* Block IO we are working with */
230 struct bio *bio;
231
232 /* Network command for above block IO request */
233 struct dst_cmd cmd;
234};
235
236struct dst_crypto_engine
237{
238 /* What should we do with all block requests */
239 struct crypto_hash *hash;
240 struct crypto_ablkcipher *cipher;
241
242 /* Pool of pages used to encrypt data into before sending */
243 int page_num;
244 struct page **pages;
245
246 /* What to do with current request */
247 int enc;
248 /* Who we are and where do we go */
249 struct scatterlist *src, *dst;
250
251 /* Maximum timeout waiting for encryption to be completed */
252 long timeout;
253 /* IV is a 64-bit sequential counter */
254 u64 iv;
255
256 /* Secret data */
257 void *private;
258
259 /* Cached temporary data lives here */
260 int size;
261 void *data;
262};
263
264struct dst_state
265{
266 /* The main state protection */
267 struct mutex state_lock;
268
269 /* Polling machinery for sockets */
270 wait_queue_t wait;
271 wait_queue_head_t *whead;
272 /* Most of events are being waited here */
273 wait_queue_head_t thread_wait;
274
275 /* Who owns this? */
276 struct dst_node *node;
277
278 /* Network address for this state */
279 struct dst_network_ctl ctl;
280
281 /* Permissions to work with: read-only or rw connection */
282 u32 permissions;
283
284 /* Called when we need to clean private data */
285 void (* cleanup)(struct dst_state *st);
286
287 /* Used by the server: BIO completion queues BIOs here */
288 struct list_head request_list;
289 spinlock_t request_lock;
290
291 /* Guess what? No, it is not number of planets */
292 atomic_t refcnt;
293
294 /* This flags is set when connection should be dropped */
295 int need_exit;
296
297 /*
298 * Socket to work with. Second pointer is used for
299 * lockless check if socket was changed before performing
300 * next action (like working with cached polling result)
301 */
302 struct socket *socket, *read_socket;
303
304 /* Cached preallocated data */
305 void *data;
306 unsigned int size;
307
308 /* Currently processed command */
309 struct dst_cmd cmd;
310};
311
312struct dst_info
313{
314 /* Device size */
315 u64 size;
316
317 /* Local device name for export devices */
318 char local[DST_NAMELEN];
319
320 /* Network setup */
321 struct dst_network_ctl net;
322
323 /* Sysfs bits use this */
324 struct device device;
325};
326
327struct dst_node
328{
329 struct list_head node_entry;
330
331 /* Hi, my name is stored here */
332 char name[DST_NAMELEN];
333 /* My cache name is stored here */
334 char cache_name[DST_NAMELEN];
335
336 /* Block device attached to given node.
337 * Only valid for exporting nodes */
338 struct block_device *bdev;
339 /* Network state machine for given peer */
340 struct dst_state *state;
341
342 /* Block IO machinery */
343 struct request_queue *queue;
344 struct gendisk *disk;
345
346 /* Number of threads in processing pool */
347 int thread_num;
348 /* Maximum number of pages in single IO */
349 int max_pages;
350
351 /* I'm that big in bytes */
352 loff_t size;
353
354 /* Exported to userspace node information */
355 struct dst_info *info;
356
357 /*
358 * Security attribute list.
359 * Used only by exporting node currently.
360 */
361 struct list_head security_list;
362 struct mutex security_lock;
363
364 /*
365 * When this unerflows below zero, university collapses.
366 * But this will not happen, since node will be freed,
367 * when reference counter reaches zero.
368 */
369 atomic_t refcnt;
370
371 /* How precisely should I be started? */
372 int (*start)(struct dst_node *);
373
374 /* Crypto capabilities */
375 struct dst_crypto_ctl crypto;
376 u8 *hash_key;
377 u8 *cipher_key;
378
379 /* Pool of processing thread */
380 struct thread_pool *pool;
381
382 /* Transaction IDs live here */
383 atomic_long_t gen;
384
385 /*
386 * How frequently and how many times transaction
387 * tree should be scanned to drop stale objects.
388 */
389 long trans_scan_timeout;
390 int trans_max_retries;
391
392 /* Small gnomes live here */
393 struct rb_root trans_root;
394 struct mutex trans_lock;
395
396 /*
397 * Transaction cache/memory pool.
398 * It is big enough to contain not only transaction
399 * itself, but additional crypto data (digest/hmac).
400 */
401 struct kmem_cache *trans_cache;
402 mempool_t *trans_pool;
403
404 /* This entity scans transaction tree */
405 struct delayed_work trans_work;
406
407 wait_queue_head_t wait;
408};
409
410/* Kernel representation of the security attribute */
411struct dst_secure
412{
413 struct list_head sec_entry;
414 struct dst_secure_user sec;
415};
416
417int dst_process_bio(struct dst_node *n, struct bio *bio);
418
419int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r);
420int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le);
421
422static inline struct dst_state *dst_state_get(struct dst_state *st)
423{
424 BUG_ON(atomic_read(&st->refcnt) == 0);
425 atomic_inc(&st->refcnt);
426 return st;
427}
428
429void dst_state_put(struct dst_state *st);
430
431struct dst_state *dst_state_alloc(struct dst_node *n);
432int dst_state_socket_create(struct dst_state *st);
433void dst_state_socket_release(struct dst_state *st);
434
435void dst_state_exit_connected(struct dst_state *st);
436
437int dst_state_schedule_receiver(struct dst_state *st);
438
439void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str);
440
441static inline void dst_state_lock(struct dst_state *st)
442{
443 mutex_lock(&st->state_lock);
444}
445
446static inline void dst_state_unlock(struct dst_state *st)
447{
448 mutex_unlock(&st->state_lock);
449}
450
451void dst_poll_exit(struct dst_state *st);
452int dst_poll_init(struct dst_state *st);
453
454static inline unsigned int dst_state_poll(struct dst_state *st)
455{
456 unsigned int revents = POLLHUP | POLLERR;
457
458 dst_state_lock(st);
459 if (st->socket)
460 revents = st->socket->ops->poll(NULL, st->socket, NULL);
461 dst_state_unlock(st);
462
463 return revents;
464}
465
466static inline int dst_thread_setup(void *private, void *data)
467{
468 return 0;
469}
470
471void dst_node_put(struct dst_node *n);
472
473static inline struct dst_node *dst_node_get(struct dst_node *n)
474{
475 atomic_inc(&n->refcnt);
476 return n;
477}
478
479int dst_data_recv(struct dst_state *st, void *data, unsigned int size);
480int dst_recv_cdata(struct dst_state *st, void *cdata);
481int dst_data_send_header(struct socket *sock,
482 void *data, unsigned int size, int more);
483
484int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio);
485
486int dst_process_io(struct dst_state *st);
487int dst_export_crypto(struct dst_node *n, struct bio *bio);
488int dst_export_send_bio(struct bio *bio);
489int dst_start_export(struct dst_node *n);
490
491int __init dst_export_init(void);
492void dst_export_exit(void);
493
494/* Private structure for export block IO requests */
495struct dst_export_priv
496{
497 struct list_head request_entry;
498 struct dst_state *state;
499 struct bio *bio;
500 struct dst_cmd cmd;
501};
502
503static inline void dst_trans_get(struct dst_trans *t)
504{
505 atomic_inc(&t->refcnt);
506}
507
508struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen);
509int dst_trans_remove(struct dst_trans *t);
510int dst_trans_remove_nolock(struct dst_trans *t);
511void dst_trans_put(struct dst_trans *t);
512
513/*
514 * Convert bio into network command.
515 */
516static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd,
517 u32 command, u64 id)
518{
519 cmd->cmd = command;
520 cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS;
521 cmd->rw = bio->bi_rw;
522 cmd->size = bio->bi_size;
523 cmd->csize = 0;
524 cmd->id = id;
525 cmd->sector = bio->bi_sector;
526};
527
528int dst_trans_send(struct dst_trans *t);
529int dst_trans_crypto(struct dst_trans *t);
530
531int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl);
532void dst_node_crypto_exit(struct dst_node *n);
533
534static inline int dst_need_crypto(struct dst_node *n)
535{
536 struct dst_crypto_ctl *c = &n->crypto;
537 /*
538 * Logical OR is appropriate here, but boolean one produces
539 * more optimal code, so it is used instead.
540 */
541 return (c->hash_algo[0] | c->cipher_algo[0]);
542}
543
544int dst_node_trans_init(struct dst_node *n, unsigned int size);
545void dst_node_trans_exit(struct dst_node *n);
546
547/*
548 * Pool of threads.
549 * Ready list contains threads currently free to be used,
550 * active one contains threads with some work scheduled for them.
551 * Caller can wait in given queue when thread is ready.
552 */
553struct thread_pool
554{
555 int thread_num;
556 struct mutex thread_lock;
557 struct list_head ready_list, active_list;
558
559 wait_queue_head_t wait;
560};
561
562void thread_pool_del_worker(struct thread_pool *p);
563void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id);
564int thread_pool_add_worker(struct thread_pool *p,
565 char *name,
566 unsigned int id,
567 void *(* init)(void *data),
568 void (* cleanup)(void *data),
569 void *data);
570
571void thread_pool_destroy(struct thread_pool *p);
572struct thread_pool *thread_pool_create(int num, char *name,
573 void *(* init)(void *data),
574 void (* cleanup)(void *data),
575 void *data);
576
577int thread_pool_schedule(struct thread_pool *p,
578 int (* setup)(void *stored_private, void *setup_data),
579 int (* action)(void *stored_private, void *setup_data),
580 void *setup_data, long timeout);
581int thread_pool_schedule_private(struct thread_pool *p,
582 int (* setup)(void *private, void *data),
583 int (* action)(void *private, void *data),
584 void *data, long timeout, void *id);
585
586#endif /* __KERNEL__ */
587#endif /* __DST_H */