summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2017-07-12 14:49:48 -0400
committerJens Axboe <axboe@kernel.dk>2017-07-29 11:00:03 -0400
commitba16b2846a8c6965d0d35be3968bc10f6277812d (patch)
tree332d1796149f4e5eb713fcf95b44cc6177965158 /fs
parent4a3ef68acacf31570066e69593de5cc49cc91638 (diff)
kernfs: add an API to get kernfs node from inode number
Add an API to get kernfs node from inode number. We will need this to implement exportfs operations. This API will be used in blktrace too later, so it should be as fast as possible. To make the API lock free, kernfs node is freed in RCU context. And we depend on kernfs_node count/ino number to filter out stale kernfs nodes. Acked-by: Tejun Heo <tj@kernel.org> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs')
-rw-r--r--fs/kernfs/dir.c57
-rw-r--r--fs/kernfs/kernfs-internal.h2
-rw-r--r--fs/kernfs/mount.c11
3 files changed, 69 insertions, 1 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 33f711f6b86e..7be37c838007 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -508,6 +508,10 @@ void kernfs_put(struct kernfs_node *kn)
508 struct kernfs_node *parent; 508 struct kernfs_node *parent;
509 struct kernfs_root *root; 509 struct kernfs_root *root;
510 510
511 /*
512 * kernfs_node is freed with ->count 0, kernfs_find_and_get_node_by_ino
513 * depends on this to filter reused stale node
514 */
511 if (!kn || !atomic_dec_and_test(&kn->count)) 515 if (!kn || !atomic_dec_and_test(&kn->count))
512 return; 516 return;
513 root = kernfs_root(kn); 517 root = kernfs_root(kn);
@@ -649,6 +653,11 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
649 kn->ino = ret; 653 kn->ino = ret;
650 kn->generation = gen; 654 kn->generation = gen;
651 655
656 /*
657 * set ino first. This barrier is paired with atomic_inc_not_zero in
658 * kernfs_find_and_get_node_by_ino
659 */
660 smp_mb__before_atomic();
652 atomic_set(&kn->count, 1); 661 atomic_set(&kn->count, 1);
653 atomic_set(&kn->active, KN_DEACTIVATED_BIAS); 662 atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
654 RB_CLEAR_NODE(&kn->rb); 663 RB_CLEAR_NODE(&kn->rb);
@@ -680,6 +689,54 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
680 return kn; 689 return kn;
681} 690}
682 691
692/*
693 * kernfs_find_and_get_node_by_ino - get kernfs_node from inode number
694 * @root: the kernfs root
695 * @ino: inode number
696 *
697 * RETURNS:
698 * NULL on failure. Return a kernfs node with reference counter incremented
699 */
700struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
701 unsigned int ino)
702{
703 struct kernfs_node *kn;
704
705 rcu_read_lock();
706 kn = idr_find(&root->ino_idr, ino);
707 if (!kn)
708 goto out;
709
710 /*
711 * Since kernfs_node is freed in RCU, it's possible an old node for ino
712 * is freed, but reused before RCU grace period. But a freed node (see
713 * kernfs_put) or an incompletedly initialized node (see
714 * __kernfs_new_node) should have 'count' 0. We can use this fact to
715 * filter out such node.
716 */
717 if (!atomic_inc_not_zero(&kn->count)) {
718 kn = NULL;
719 goto out;
720 }
721
722 /*
723 * The node could be a new node or a reused node. If it's a new node,
724 * we are ok. If it's reused because of RCU (because of
725 * SLAB_TYPESAFE_BY_RCU), the __kernfs_new_node always sets its 'ino'
726 * before 'count'. So if 'count' is uptodate, 'ino' should be uptodate,
727 * hence we can use 'ino' to filter stale node.
728 */
729 if (kn->ino != ino)
730 goto out;
731 rcu_read_unlock();
732
733 return kn;
734out:
735 rcu_read_unlock();
736 kernfs_put(kn);
737 return NULL;
738}
739
683/** 740/**
684 * kernfs_add_one - add kernfs_node to parent without warning 741 * kernfs_add_one - add kernfs_node to parent without warning
685 * @kn: kernfs_node to be added 742 * @kn: kernfs_node to be added
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 2d5144ab4251..e9c226f29828 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -98,6 +98,8 @@ int kernfs_add_one(struct kernfs_node *kn);
98struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, 98struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
99 const char *name, umode_t mode, 99 const char *name, umode_t mode,
100 unsigned flags); 100 unsigned flags);
101struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
102 unsigned int ino);
101 103
102/* 104/*
103 * file.c 105 * file.c
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d5b149a45be1..69c48bec8a63 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -330,7 +330,16 @@ struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
330 330
331void __init kernfs_init(void) 331void __init kernfs_init(void)
332{ 332{
333
334 /*
335 * the slab is freed in RCU context, so kernfs_find_and_get_node_by_ino
336 * can access the slab lock free. This could introduce stale nodes,
337 * please see how kernfs_find_and_get_node_by_ino filters out stale
338 * nodes.
339 */
333 kernfs_node_cache = kmem_cache_create("kernfs_node_cache", 340 kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
334 sizeof(struct kernfs_node), 341 sizeof(struct kernfs_node),
335 0, SLAB_PANIC, NULL); 342 0,
343 SLAB_PANIC | SLAB_TYPESAFE_BY_RCU,
344 NULL);
336} 345}