aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvgeniy Polyakov <zbr@ioremap.net>2009-02-09 09:02:41 -0500
committerGreg Kroah-Hartman <gregkh@suse.de>2009-04-03 17:53:35 -0400
commitc09ee9d206994655d55de60222a3024702ad2055 (patch)
treee3900874413089014609c8a0086198697df24448
parentd35eef1b42befc74b33c47de46b37a0370622ec2 (diff)
Staging: pohmelfs: transaction layer.
This patch implements transaction processing helpers used to allocate/free/insert/remove and other operations with the transctions. Each transction is an object, which may embed multiple commands completed atomically. When server fails the whole transaction will be replied against it (or different server) later. This approach allows to maintain high data integrity and do not desynchronize filesystem state in case of network or server failures. Signed-off-by: Evgeniy Polyakov <zbr@ioremap.net> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--drivers/staging/pohmelfs/trans.c715
1 files changed, 715 insertions, 0 deletions
diff --git a/drivers/staging/pohmelfs/trans.c b/drivers/staging/pohmelfs/trans.c
new file mode 100644
index 000000000000..92054bdc154c
--- /dev/null
+++ b/drivers/staging/pohmelfs/trans.c
@@ -0,0 +1,715 @@
1/*
2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/module.h>
17#include <linux/crypto.h>
18#include <linux/fs.h>
19#include <linux/jhash.h>
20#include <linux/hash.h>
21#include <linux/ktime.h>
22#include <linux/mempool.h>
23#include <linux/mm.h>
24#include <linux/mount.h>
25#include <linux/pagemap.h>
26#include <linux/parser.h>
27#include <linux/poll.h>
28#include <linux/swap.h>
29#include <linux/slab.h>
30#include <linux/statfs.h>
31#include <linux/writeback.h>
32
33#include "netfs.h"
34
35static struct kmem_cache *netfs_trans_dst;
36static mempool_t *netfs_trans_dst_pool;
37
38static void netfs_trans_init_static(struct netfs_trans *t, int num, int size)
39{
40 t->page_num = num;
41 t->total_size = size;
42 atomic_set(&t->refcnt, 1);
43
44 spin_lock_init(&t->dst_lock);
45 INIT_LIST_HEAD(&t->dst_list);
46}
47
48static int netfs_trans_send_pages(struct netfs_trans *t, struct netfs_state *st)
49{
50 int err = 0;
51 unsigned int i, attached_pages = t->attached_pages, ci;
52 struct msghdr msg;
53 struct page **pages = (t->eng)?t->eng->pages:t->pages;
54 struct page *p;
55 unsigned int size;
56
57 msg.msg_name = NULL;
58 msg.msg_namelen = 0;
59 msg.msg_control = NULL;
60 msg.msg_controllen = 0;
61 msg.msg_flags = MSG_WAITALL | MSG_MORE;
62
63 ci = 0;
64 for (i=0; i<t->page_num; ++i) {
65 struct page *page = pages[ci];
66 struct netfs_cmd cmd;
67 struct iovec io;
68
69 p = t->pages[i];
70
71 if (!p)
72 continue;
73
74 size = page_private(p);
75
76 io.iov_base = &cmd;
77 io.iov_len = sizeof(struct netfs_cmd);
78
79 cmd.cmd = NETFS_WRITE_PAGE;
80 cmd.ext = 0;
81 cmd.id = 0;
82 cmd.size = size;
83 cmd.start = p->index;
84 cmd.start <<= PAGE_CACHE_SHIFT;
85 cmd.csize = 0;
86 cmd.cpad = 0;
87 cmd.iv = pohmelfs_gen_iv(t);
88
89 netfs_convert_cmd(&cmd);
90
91 msg.msg_iov = &io;
92 msg.msg_iovlen = 1;
93 msg.msg_flags = MSG_WAITALL | MSG_MORE;
94
95 err = kernel_sendmsg(st->socket, &msg, (struct kvec *)msg.msg_iov, 1, sizeof(struct netfs_cmd));
96 if (err <= 0) {
97 printk("%s: %d/%d failed to send transaction header: t: %p, gen: %u, err: %d.\n",
98 __func__, i, t->page_num, t, t->gen, err);
99 if (err == 0)
100 err = -ECONNRESET;
101 goto err_out;
102 }
103
104 msg.msg_flags = MSG_WAITALL|(attached_pages == 1)?0:MSG_MORE;
105
106 err = kernel_sendpage(st->socket, page, 0, size, msg.msg_flags);
107 if (err <= 0) {
108 printk("%s: %d/%d failed to send transaction page: t: %p, gen: %u, size: %u, err: %d.\n",
109 __func__, i, t->page_num, t, t->gen, size, err);
110 if (err == 0)
111 err = -ECONNRESET;
112 goto err_out;
113 }
114
115 dprintk("%s: %d/%d sent t: %p, gen: %u, page: %p/%p, size: %u.\n",
116 __func__, i, t->page_num, t, t->gen, page, p, size);
117
118 err = 0;
119 attached_pages--;
120 if (!attached_pages)
121 break;
122 ci++;
123
124 continue;
125
126err_out:
127 printk("%s: t: %p, gen: %u, err: %d.\n", __func__, t, t->gen, err);
128 netfs_state_exit(st);
129 break;
130 }
131
132 return err;
133}
134
135int netfs_trans_send(struct netfs_trans *t, struct netfs_state *st)
136{
137 int err;
138 struct msghdr msg;
139
140 BUG_ON(!t->iovec.iov_len);
141 BUG_ON(t->iovec.iov_len > 1024*1024*1024);
142
143 netfs_state_lock_send(st);
144 if (!st->socket) {
145 err = netfs_state_init(st);
146 if (err)
147 goto err_out_unlock_return;
148 }
149
150 msg.msg_iov = &t->iovec;
151 msg.msg_iovlen = 1;
152 msg.msg_name = NULL;
153 msg.msg_namelen = 0;
154 msg.msg_control = NULL;
155 msg.msg_controllen = 0;
156 msg.msg_flags = MSG_WAITALL;
157
158 if (t->attached_pages)
159 msg.msg_flags |= MSG_MORE;
160
161 err = kernel_sendmsg(st->socket, &msg, (struct kvec *)msg.msg_iov, 1, t->iovec.iov_len);
162 if (err <= 0) {
163 printk("%s: failed to send contig transaction: t: %p, gen: %u, size: %u, err: %d.\n",
164 __func__, t, t->gen, t->iovec.iov_len, err);
165 if (err == 0)
166 err = -ECONNRESET;
167 goto err_out_unlock_return;
168 }
169
170 dprintk("%s: sent %s transaction: t: %p, gen: %u, size: %u, page_num: %u.\n",
171 __func__, (t->page_num)?"partial":"full",
172 t, t->gen, t->iovec.iov_len, t->page_num);
173
174 err = 0;
175 if (t->attached_pages)
176 err = netfs_trans_send_pages(t, st);
177
178err_out_unlock_return:
179
180 if (st->need_reset) {
181 netfs_state_exit(st);
182 }
183 netfs_state_unlock_send(st);
184
185 dprintk("%s: t: %p, gen: %u, err: %d.\n",
186 __func__, t, t->gen, err);
187
188 t->result = err;
189 return err;
190}
191
192static inline int netfs_trans_cmp(unsigned int gen, unsigned int new)
193{
194 if (gen < new)
195 return 1;
196 if (gen > new)
197 return -1;
198 return 0;
199}
200
201struct netfs_trans_dst *netfs_trans_search(struct netfs_state *st, unsigned int gen)
202{
203 struct rb_root *root = &st->trans_root;
204 struct rb_node *n = root->rb_node;
205 struct netfs_trans_dst *tmp, *ret = NULL;
206 struct netfs_trans *t;
207 int cmp;
208
209 while (n) {
210 tmp = rb_entry(n, struct netfs_trans_dst, state_entry);
211 t = tmp->trans;
212
213 cmp = netfs_trans_cmp(t->gen, gen);
214 if (cmp < 0)
215 n = n->rb_left;
216 else if (cmp > 0)
217 n = n->rb_right;
218 else {
219 ret = tmp;
220 break;
221 }
222 }
223
224 return ret;
225}
226
227static int netfs_trans_insert(struct netfs_trans_dst *ndst, struct netfs_state *st)
228{
229 struct rb_root *root = &st->trans_root;
230 struct rb_node **n = &root->rb_node, *parent = NULL;
231 struct netfs_trans_dst *ret = NULL, *tmp;
232 struct netfs_trans *t = NULL, *new = ndst->trans;
233 int cmp;
234
235 while (*n) {
236 parent = *n;
237
238 tmp = rb_entry(parent, struct netfs_trans_dst, state_entry);
239 t = tmp->trans;
240
241 cmp = netfs_trans_cmp(t->gen, new->gen);
242 if (cmp < 0)
243 n = &parent->rb_left;
244 else if (cmp > 0)
245 n = &parent->rb_right;
246 else {
247 ret = tmp;
248 break;
249 }
250 }
251
252 if (ret) {
253 printk("%s: exist: old: gen: %u, flags: %x, send_time: %lu, "
254 "new: gen: %u, flags: %x, send_time: %lu.\n",
255 __func__, t->gen, t->flags, ret->send_time,
256 new->gen, new->flags, ndst->send_time);
257 return -EEXIST;
258 }
259
260 rb_link_node(&ndst->state_entry, parent, n);
261 rb_insert_color(&ndst->state_entry, root);
262 ndst->send_time = jiffies;
263
264 return 0;
265}
266
267int netfs_trans_remove_nolock(struct netfs_trans_dst *dst, struct netfs_state *st)
268{
269 if (dst && dst->state_entry.rb_parent_color) {
270 rb_erase(&dst->state_entry, &st->trans_root);
271 dst->state_entry.rb_parent_color = 0;
272 return 1;
273 }
274 return 0;
275}
276
277static int netfs_trans_remove_state(struct netfs_trans_dst *dst)
278{
279 int ret;
280 struct netfs_state *st = dst->state;
281
282 mutex_lock(&st->trans_lock);
283 ret = netfs_trans_remove_nolock(dst, st);
284 mutex_unlock(&st->trans_lock);
285
286 return ret;
287}
288
289/*
290 * Create new destination for given transaction associated with given network state.
291 * Transaction's reference counter is bumped and will be dropped when either
292 * reply is received or when async timeout detection task will fail resending
293 * and drop transaction.
294 */
295static int netfs_trans_push_dst(struct netfs_trans *t, struct netfs_state *st)
296{
297 struct netfs_trans_dst *dst;
298 int err;
299
300 dst = mempool_alloc(netfs_trans_dst_pool, GFP_KERNEL);
301 if (!dst)
302 return -ENOMEM;
303
304 dst->retries = 0;
305 dst->send_time = 0;
306 dst->state = st;
307 dst->trans = t;
308 netfs_trans_get(t);
309
310 mutex_lock(&st->trans_lock);
311 err = netfs_trans_insert(dst, st);
312 mutex_unlock(&st->trans_lock);
313
314 if (err)
315 goto err_out_free;
316
317 spin_lock(&t->dst_lock);
318 list_add_tail(&dst->trans_entry, &t->dst_list);
319 spin_unlock(&t->dst_lock);
320
321 return 0;
322
323err_out_free:
324 t->result = err;
325 netfs_trans_put(t);
326 mempool_free(dst, netfs_trans_dst_pool);
327 return err;
328}
329
330static void netfs_trans_free_dst(struct netfs_trans_dst *dst)
331{
332 netfs_trans_put(dst->trans);
333 mempool_free(dst, netfs_trans_dst_pool);
334}
335
336static void netfs_trans_remove_dst(struct netfs_trans_dst *dst)
337{
338 if (netfs_trans_remove_state(dst))
339 netfs_trans_free_dst(dst);
340}
341
342/*
343 * Drop destination transaction entry when we know it.
344 */
345void netfs_trans_drop_dst(struct netfs_trans_dst *dst)
346{
347 struct netfs_trans *t = dst->trans;
348
349 spin_lock(&t->dst_lock);
350 list_del_init(&dst->trans_entry);
351 spin_unlock(&t->dst_lock);
352
353 netfs_trans_remove_dst(dst);
354}
355
356/*
357 * Drop destination transaction entry when we know it and when we
358 * already removed dst from state tree.
359 */
360void netfs_trans_drop_dst_nostate(struct netfs_trans_dst *dst)
361{
362 struct netfs_trans *t = dst->trans;
363
364 spin_lock(&t->dst_lock);
365 list_del_init(&dst->trans_entry);
366 spin_unlock(&t->dst_lock);
367
368 netfs_trans_free_dst(dst);
369}
370
371/*
372 * This drops destination transaction entry from appropriate network state
373 * tree and drops related reference counter. It is possible that transaction
374 * will be freed here if its reference counter hits zero.
375 * Destination transaction entry will be freed.
376 */
377void netfs_trans_drop_trans(struct netfs_trans *t, struct netfs_state *st)
378{
379 struct netfs_trans_dst *dst, *tmp, *ret = NULL;
380
381 spin_lock(&t->dst_lock);
382 list_for_each_entry_safe(dst, tmp, &t->dst_list, trans_entry) {
383 if (dst->state == st) {
384 ret = dst;
385 list_del(&dst->trans_entry);
386 break;
387 }
388 }
389 spin_unlock(&t->dst_lock);
390
391 if (ret)
392 netfs_trans_remove_dst(ret);
393}
394
395/*
396 * This drops destination transaction entry from appropriate network state
397 * tree and drops related reference counter. It is possible that transaction
398 * will be freed here if its reference counter hits zero.
399 * Destination transaction entry will be freed.
400 */
401void netfs_trans_drop_last(struct netfs_trans *t, struct netfs_state *st)
402{
403 struct netfs_trans_dst *dst, *tmp, *ret;
404
405 spin_lock(&t->dst_lock);
406 ret = list_entry(t->dst_list.prev, struct netfs_trans_dst, trans_entry);
407 if (ret->state != st) {
408 ret = NULL;
409 list_for_each_entry_safe(dst, tmp, &t->dst_list, trans_entry) {
410 if (dst->state == st) {
411 ret = dst;
412 list_del_init(&dst->trans_entry);
413 break;
414 }
415 }
416 } else {
417 list_del(&ret->trans_entry);
418 }
419 spin_unlock(&t->dst_lock);
420
421 if (ret)
422 netfs_trans_remove_dst(ret);
423}
424
425static int netfs_trans_push(struct netfs_trans *t, struct netfs_state *st)
426{
427 int err;
428
429 err = netfs_trans_push_dst(t, st);
430 if (err)
431 return err;
432
433 err = netfs_trans_send(t, st);
434 if (err)
435 goto err_out_free;
436
437 if (t->flags & NETFS_TRANS_SINGLE_DST)
438 pohmelfs_switch_active(st->psb);
439
440 return 0;
441
442err_out_free:
443 t->result = err;
444 netfs_trans_drop_last(t, st);
445
446 return err;
447}
448
449int netfs_trans_finish_send(struct netfs_trans *t, struct pohmelfs_sb *psb)
450{
451 struct pohmelfs_config *c;
452 int err = -ENODEV;
453 struct netfs_state *st;
454#if 0
455 dprintk("%s: t: %p, gen: %u, size: %u, page_num: %u, active: %p.\n",
456 __func__, t, t->gen, t->iovec.iov_len, t->page_num, psb->active_state);
457#endif
458 mutex_lock(&psb->state_lock);
459
460 if ((t->flags & NETFS_TRANS_SINGLE_DST) && psb->active_state) {
461 st = &psb->active_state->state;
462
463 err = -EPIPE;
464 if (netfs_state_poll(st) & POLLOUT) {
465 err = netfs_trans_push_dst(t, st);
466 if (!err) {
467 err = netfs_trans_send(t, st);
468 if (err) {
469 netfs_trans_drop_last(t, st);
470 } else {
471 pohmelfs_switch_active(psb);
472 goto out;
473 }
474 }
475 }
476 pohmelfs_switch_active(psb);
477 }
478
479 list_for_each_entry(c, &psb->state_list, config_entry) {
480 st = &c->state;
481
482 err = netfs_trans_push(t, st);
483 if (!err && (t->flags & NETFS_TRANS_SINGLE_DST))
484 break;
485 }
486out:
487 mutex_unlock(&psb->state_lock);
488#if 0
489 dprintk("%s: fully sent t: %p, gen: %u, size: %u, page_num: %u, err: %d.\n",
490 __func__, t, t->gen, t->iovec.iov_len, t->page_num, err);
491#endif
492 if (err)
493 t->result = err;
494 return err;
495}
496
497int netfs_trans_finish(struct netfs_trans *t, struct pohmelfs_sb *psb)
498{
499 int err;
500 struct netfs_cmd *cmd = t->iovec.iov_base;
501
502 t->gen = atomic_inc_return(&psb->trans_gen);
503
504 pohmelfs_ftrans_clean(t->gen);
505
506 cmd->size = t->iovec.iov_len - sizeof(struct netfs_cmd) +
507 t->attached_size + t->attached_pages * sizeof(struct netfs_cmd);
508 cmd->cmd = NETFS_TRANS;
509 cmd->start = t->gen;
510 cmd->id = 0;
511
512 if (psb->perform_crypto) {
513 cmd->ext = psb->crypto_attached_size;
514 cmd->csize = psb->crypto_attached_size;
515 }
516
517 dprintk("%s: t: %u, size: %u, iov_len: %u, attached_size: %u, attached_pages: %u.\n",
518 __func__, t->gen, cmd->size, t->iovec.iov_len, t->attached_size, t->attached_pages);
519 err = pohmelfs_trans_crypt(t, psb);
520 if (err) {
521 t->result = err;
522 netfs_convert_cmd(cmd);
523 dprintk("%s: trans: %llu, crypto_attached_size: %u, attached_size: %u, attached_pages: %d, trans_size: %u, err: %d.\n",
524 __func__, cmd->start, psb->crypto_attached_size, t->attached_size, t->attached_pages, cmd->size, err);
525 }
526 netfs_trans_put(t);
527 return err;
528}
529
530/*
531 * Resend transaction to remote server(s).
532 * If new servers were added into superblock, we can try to send data
533 * to them too.
534 *
535 * It is called under superblock's state_lock, so we can safely
536 * dereference psb->state_list. Also, transaction's reference counter is
537 * bumped, so it can not go away under us, thus we can safely access all
538 * its members. State is locked.
539 *
540 * This function returns 0 if transaction was successfully sent to at
541 * least one destination target.
542 */
543int netfs_trans_resend(struct netfs_trans *t, struct pohmelfs_sb *psb)
544{
545 struct netfs_trans_dst *dst;
546 struct netfs_state *st;
547 struct pohmelfs_config *c;
548 int err, exist, error = -ENODEV;
549
550 list_for_each_entry(c, &psb->state_list, config_entry) {
551 st = &c->state;
552
553 exist = 0;
554 spin_lock(&t->dst_lock);
555 list_for_each_entry(dst, &t->dst_list, trans_entry) {
556 if (st == dst->state) {
557 exist = 1;
558 break;
559 }
560 }
561 spin_unlock(&t->dst_lock);
562
563 if (exist) {
564 if (!(t->flags & NETFS_TRANS_SINGLE_DST) ||
565 (c->config_entry.next == &psb->state_list)) {
566 dprintk("%s: resending st: %p, t: %p, gen: %u.\n",
567 __func__, st, t, t->gen);
568 err = netfs_trans_send(t, st);
569 if (!err)
570 error = 0;
571 }
572 continue;
573 }
574
575 dprintk("%s: pushing/resending st: %p, t: %p, gen: %u.\n",
576 __func__, st, t, t->gen);
577 err = netfs_trans_push(t, st);
578 if (err)
579 continue;
580 error = 0;
581 if (t->flags & NETFS_TRANS_SINGLE_DST)
582 break;
583 }
584
585 t->result = error;
586 return error;
587}
588
589void *netfs_trans_add(struct netfs_trans *t, unsigned int size)
590{
591 struct iovec *io = &t->iovec;
592 void *ptr;
593
594 if (size > t->total_size) {
595 ptr = ERR_PTR(-EINVAL);
596 goto out;
597 }
598
599 if (io->iov_len + size > t->total_size) {
600 dprintk("%s: too big size t: %p, gen: %u, iov_len: %u, size: %u, total: %u.\n",
601 __func__, t, t->gen, io->iov_len, size, t->total_size);
602 ptr = ERR_PTR(-E2BIG);
603 goto out;
604 }
605
606 ptr = io->iov_base + io->iov_len;
607 io->iov_len += size;
608
609out:
610 dprintk("%s: t: %p, gen: %u, size: %u, total: %u.\n",
611 __func__, t, t->gen, size, io->iov_len);
612 return ptr;
613}
614
615void netfs_trans_free(struct netfs_trans *t)
616{
617 if (t->eng)
618 pohmelfs_crypto_thread_make_ready(t->eng->thread);
619 kfree(t);
620}
621
622struct netfs_trans *netfs_trans_alloc(struct pohmelfs_sb *psb, unsigned int size,
623 unsigned int flags, unsigned int nr)
624{
625 struct netfs_trans *t;
626 unsigned int num, cont, pad, size_no_trans;
627 unsigned int crypto_added = 0;
628 struct netfs_cmd *cmd;
629
630 if (psb->perform_crypto)
631 crypto_added = psb->crypto_attached_size;
632
633 /*
634 * |sizeof(struct netfs_trans)|
635 * |sizeof(struct netfs_cmd)| - transaction header
636 * |size| - buffer with requested size
637 * |padding| - crypto padding, zero bytes
638 * |nr * sizeof(struct page *)| - array of page pointers
639 *
640 * Overall size should be less than PAGE_SIZE for guaranteed allocation.
641 */
642
643 cont = size;
644 size = ALIGN(size, psb->crypto_align_size);
645 pad = size - cont;
646
647 size_no_trans = size + sizeof(struct netfs_cmd) * 2 + crypto_added;
648
649 cont = sizeof(struct netfs_trans) + size_no_trans;
650
651 num = (PAGE_SIZE - cont)/sizeof(struct page *);
652
653 if (nr > num)
654 nr = num;
655
656 t = kzalloc(cont + nr*sizeof(struct page *), GFP_NOIO);
657 if (!t)
658 goto err_out_exit;
659
660 t->iovec.iov_base = (void *)(t + 1);
661 t->pages = (struct page **)(t->iovec.iov_base + size_no_trans);
662
663 /*
664 * Reserving space for transaction header.
665 */
666 t->iovec.iov_len = sizeof(struct netfs_cmd) + crypto_added;
667
668 netfs_trans_init_static(t, nr, size_no_trans);
669
670 t->flags = flags;
671 t->psb = psb;
672
673 cmd = (struct netfs_cmd *)t->iovec.iov_base;
674
675 cmd->size = size;
676 cmd->cpad = pad;
677 cmd->csize = crypto_added;
678
679 dprintk("%s: t: %p, gen: %u, size: %u, padding: %u, align_size: %u, flags: %x, "
680 "page_num: %u, base: %p, pages: %p.\n",
681 __func__, t, t->gen, size, pad, psb->crypto_align_size, flags, nr,
682 t->iovec.iov_base, t->pages);
683
684 return t;
685
686err_out_exit:
687 return NULL;
688}
689
690int netfs_trans_init(void)
691{
692 int err = -ENOMEM;
693
694 netfs_trans_dst = kmem_cache_create("netfs_trans_dst", sizeof(struct netfs_trans_dst),
695 0, 0, NULL);
696 if (!netfs_trans_dst)
697 goto err_out_exit;
698
699 netfs_trans_dst_pool = mempool_create_slab_pool(256, netfs_trans_dst);
700 if (!netfs_trans_dst_pool)
701 goto err_out_free;
702
703 return 0;
704
705err_out_free:
706 kmem_cache_destroy(netfs_trans_dst);
707err_out_exit:
708 return err;
709}
710
711void netfs_trans_exit(void)
712{
713 mempool_destroy(netfs_trans_dst_pool);
714 kmem_cache_destroy(netfs_trans_dst);
715}