aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/spinlock.h5
-rw-r--r--fs/dcache.c17
-rw-r--r--fs/namei.c90
-rw-r--r--include/linux/dcache.h22
-rw-r--r--include/linux/lockref.h61
-rw-r--r--lib/Kconfig10
-rw-r--r--lib/Makefile1
-rw-r--r--lib/lockref.c127
9 files changed, 237 insertions, 97 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b32ebf92b0ce..67e00740531c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -16,6 +16,7 @@ config X86_64
16 def_bool y 16 def_bool y
17 depends on 64BIT 17 depends on 64BIT
18 select X86_DEV_DMA_OPS 18 select X86_DEV_DMA_OPS
19 select ARCH_USE_CMPXCHG_LOCKREF
19 20
20### Arch settings 21### Arch settings
21config X86 22config X86
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e3ddd7db723f..e0e668422c75 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -34,6 +34,11 @@
34# define UNLOCK_LOCK_PREFIX 34# define UNLOCK_LOCK_PREFIX
35#endif 35#endif
36 36
37static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
38{
39 return lock.tickets.head == lock.tickets.tail;
40}
41
37/* 42/*
38 * Ticket locks are conceptually two parts, one indicating the current head of 43 * Ticket locks are conceptually two parts, one indicating the current head of
39 * the queue, and the other indicating the current tail. The lock is acquired 44 * the queue, and the other indicating the current tail. The lock is acquired
diff --git a/fs/dcache.c b/fs/dcache.c
index b949af850cd6..96655f4f4574 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -611,8 +611,23 @@ static inline void __dget(struct dentry *dentry)
611 611
612struct dentry *dget_parent(struct dentry *dentry) 612struct dentry *dget_parent(struct dentry *dentry)
613{ 613{
614 int gotref;
614 struct dentry *ret; 615 struct dentry *ret;
615 616
617 /*
618 * Do optimistic parent lookup without any
619 * locking.
620 */
621 rcu_read_lock();
622 ret = ACCESS_ONCE(dentry->d_parent);
623 gotref = lockref_get_not_zero(&ret->d_lockref);
624 rcu_read_unlock();
625 if (likely(gotref)) {
626 if (likely(ret == ACCESS_ONCE(dentry->d_parent)))
627 return ret;
628 dput(ret);
629 }
630
616repeat: 631repeat:
617 /* 632 /*
618 * Don't need rcu_dereference because we re-check it was correct under 633 * Don't need rcu_dereference because we re-check it was correct under
@@ -1771,7 +1786,7 @@ static noinline enum slow_d_compare slow_dentry_cmp(
1771 * without taking d_lock and checking d_seq sequence count against @seq 1786 * without taking d_lock and checking d_seq sequence count against @seq
1772 * returned here. 1787 * returned here.
1773 * 1788 *
1774 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount 1789 * A refcount may be taken on the found dentry with the d_rcu_to_refcount
1775 * function. 1790 * function.
1776 * 1791 *
1777 * Alternatively, __d_lookup_rcu may be called again to look up the child of 1792 * Alternatively, __d_lookup_rcu may be called again to look up the child of
diff --git a/fs/namei.c b/fs/namei.c
index 7720fbd5277b..2c30c84d4ea1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -494,6 +494,50 @@ static inline void unlock_rcu_walk(void)
494 br_read_unlock(&vfsmount_lock); 494 br_read_unlock(&vfsmount_lock);
495} 495}
496 496
497/*
498 * When we move over from the RCU domain to properly refcounted
499 * long-lived dentries, we need to check the sequence numbers
500 * we got before lookup very carefully.
501 *
502 * We cannot blindly increment a dentry refcount - even if it
503 * is not locked - if it is zero, because it may have gone
504 * through the final d_kill() logic already.
505 *
506 * So for a zero refcount, we need to get the spinlock (which is
507 * safe even for a dead dentry because the de-allocation is
508 * RCU-delayed), and check the sequence count under the lock.
509 *
510 * Once we have checked the sequence count, we know it is live,
511 * and since we hold the spinlock it cannot die from under us.
512 *
513 * In contrast, if the reference count wasn't zero, we can just
514 * increment the lockref without having to take the spinlock.
515 * Even if the sequence number ends up being stale, we haven't
516 * gone through the final dput() and killed the dentry yet.
517 */
518static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
519{
520 int gotref;
521
522 gotref = lockref_get_or_lock(&dentry->d_lockref);
523
524 /* Does the sequence number still match? */
525 if (read_seqcount_retry(validate, seq)) {
526 if (gotref)
527 dput(dentry);
528 else
529 spin_unlock(&dentry->d_lock);
530 return -ECHILD;
531 }
532
533 /* Get the ref now, if we couldn't get it originally */
534 if (!gotref) {
535 dentry->d_lockref.count++;
536 spin_unlock(&dentry->d_lock);
537 }
538 return 0;
539}
540
497/** 541/**
498 * unlazy_walk - try to switch to ref-walk mode. 542 * unlazy_walk - try to switch to ref-walk mode.
499 * @nd: nameidata pathwalk data 543 * @nd: nameidata pathwalk data
@@ -518,29 +562,28 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
518 nd->root.dentry != fs->root.dentry) 562 nd->root.dentry != fs->root.dentry)
519 goto err_root; 563 goto err_root;
520 } 564 }
521 spin_lock(&parent->d_lock); 565
566 /*
567 * For a negative lookup, the lookup sequence point is the parents
568 * sequence point, and it only needs to revalidate the parent dentry.
569 *
570 * For a positive lookup, we need to move both the parent and the
571 * dentry from the RCU domain to be properly refcounted. And the
572 * sequence number in the dentry validates *both* dentry counters,
573 * since we checked the sequence number of the parent after we got
574 * the child sequence number. So we know the parent must still
575 * be valid if the child sequence number is still valid.
576 */
522 if (!dentry) { 577 if (!dentry) {
523 if (!__d_rcu_to_refcount(parent, nd->seq)) 578 if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0)
524 goto err_parent; 579 goto err_root;
525 BUG_ON(nd->inode != parent->d_inode); 580 BUG_ON(nd->inode != parent->d_inode);
526 } else { 581 } else {
527 if (dentry->d_parent != parent) 582 if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0)
583 goto err_root;
584 if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0)
528 goto err_parent; 585 goto err_parent;
529 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
530 if (!__d_rcu_to_refcount(dentry, nd->seq))
531 goto err_child;
532 /*
533 * If the sequence check on the child dentry passed, then
534 * the child has not been removed from its parent. This
535 * means the parent dentry must be valid and able to take
536 * a reference at this point.
537 */
538 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
539 BUG_ON(!parent->d_lockref.count);
540 parent->d_lockref.count++;
541 spin_unlock(&dentry->d_lock);
542 } 586 }
543 spin_unlock(&parent->d_lock);
544 if (want_root) { 587 if (want_root) {
545 path_get(&nd->root); 588 path_get(&nd->root);
546 spin_unlock(&fs->lock); 589 spin_unlock(&fs->lock);
@@ -551,10 +594,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
551 nd->flags &= ~LOOKUP_RCU; 594 nd->flags &= ~LOOKUP_RCU;
552 return 0; 595 return 0;
553 596
554err_child:
555 spin_unlock(&dentry->d_lock);
556err_parent: 597err_parent:
557 spin_unlock(&parent->d_lock); 598 dput(dentry);
558err_root: 599err_root:
559 if (want_root) 600 if (want_root)
560 spin_unlock(&fs->lock); 601 spin_unlock(&fs->lock);
@@ -585,14 +626,11 @@ static int complete_walk(struct nameidata *nd)
585 nd->flags &= ~LOOKUP_RCU; 626 nd->flags &= ~LOOKUP_RCU;
586 if (!(nd->flags & LOOKUP_ROOT)) 627 if (!(nd->flags & LOOKUP_ROOT))
587 nd->root.mnt = NULL; 628 nd->root.mnt = NULL;
588 spin_lock(&dentry->d_lock); 629
589 if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { 630 if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) {
590 spin_unlock(&dentry->d_lock);
591 unlock_rcu_walk(); 631 unlock_rcu_walk();
592 return -ECHILD; 632 return -ECHILD;
593 } 633 }
594 BUG_ON(nd->inode != dentry->d_inode);
595 spin_unlock(&dentry->d_lock);
596 mntget(nd->path.mnt); 634 mntget(nd->path.mnt);
597 unlock_rcu_walk(); 635 unlock_rcu_walk();
598 } 636 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index efdc94434c30..9169b91ea2d2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -304,28 +304,6 @@ extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *);
304extern struct dentry *__d_lookup_rcu(const struct dentry *parent, 304extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
305 const struct qstr *name, unsigned *seq); 305 const struct qstr *name, unsigned *seq);
306 306
307/**
308 * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
309 * @dentry: dentry to take a ref on
310 * @seq: seqcount to verify against
311 * Returns: 0 on failure, else 1.
312 *
313 * __d_rcu_to_refcount operates on a dentry,seq pair that was returned
314 * by __d_lookup_rcu, to get a reference on an rcu-walk dentry.
315 */
316static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq)
317{
318 int ret = 0;
319
320 assert_spin_locked(&dentry->d_lock);
321 if (!read_seqcount_retry(&dentry->d_seq, seq)) {
322 ret = 1;
323 dentry->d_lockref.count++;
324 }
325
326 return ret;
327}
328
329static inline unsigned d_count(const struct dentry *dentry) 307static inline unsigned d_count(const struct dentry *dentry)
330{ 308{
331 return dentry->d_lockref.count; 309 return dentry->d_lockref.count;
diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index 01233e01627a..ca07b5028b01 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -17,55 +17,20 @@
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18 18
19struct lockref { 19struct lockref {
20 spinlock_t lock; 20 union {
21 unsigned int count; 21#ifdef CONFIG_CMPXCHG_LOCKREF
22 aligned_u64 lock_count;
23#endif
24 struct {
25 spinlock_t lock;
26 unsigned int count;
27 };
28 };
22}; 29};
23 30
24/** 31extern void lockref_get(struct lockref *);
25 * lockref_get - Increments reference count unconditionally 32extern int lockref_get_not_zero(struct lockref *);
26 * @lockcnt: pointer to lockref structure 33extern int lockref_get_or_lock(struct lockref *);
27 * 34extern int lockref_put_or_lock(struct lockref *);
28 * This operation is only valid if you already hold a reference
29 * to the object, so you know the count cannot be zero.
30 */
31static inline void lockref_get(struct lockref *lockref)
32{
33 spin_lock(&lockref->lock);
34 lockref->count++;
35 spin_unlock(&lockref->lock);
36}
37
38/**
39 * lockref_get_not_zero - Increments count unless the count is 0
40 * @lockcnt: pointer to lockref structure
41 * Return: 1 if count updated successfully or 0 if count is 0
42 */
43static inline int lockref_get_not_zero(struct lockref *lockref)
44{
45 int retval = 0;
46
47 spin_lock(&lockref->lock);
48 if (lockref->count) {
49 lockref->count++;
50 retval = 1;
51 }
52 spin_unlock(&lockref->lock);
53 return retval;
54}
55
56/**
57 * lockref_put_or_lock - decrements count unless count <= 1 before decrement
58 * @lockcnt: pointer to lockref structure
59 * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
60 */
61static inline int lockref_put_or_lock(struct lockref *lockref)
62{
63 spin_lock(&lockref->lock);
64 if (lockref->count <= 1)
65 return 0;
66 lockref->count--;
67 spin_unlock(&lockref->lock);
68 return 1;
69}
70 35
71#endif /* __LINUX_LOCKREF_H */ 36#endif /* __LINUX_LOCKREF_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 71d9f81f6eed..65561716c16c 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -48,6 +48,16 @@ config STMP_DEVICE
48config PERCPU_RWSEM 48config PERCPU_RWSEM
49 boolean 49 boolean
50 50
51config ARCH_USE_CMPXCHG_LOCKREF
52 bool
53
54config CMPXCHG_LOCKREF
55 def_bool y if ARCH_USE_CMPXCHG_LOCKREF
56 depends on SMP
57 depends on !GENERIC_LOCKBREAK
58 depends on !DEBUG_SPINLOCK
59 depends on !DEBUG_LOCK_ALLOC
60
51config CRC_CCITT 61config CRC_CCITT
52 tristate "CRC-CCITT functions" 62 tristate "CRC-CCITT functions"
53 help 63 help
diff --git a/lib/Makefile b/lib/Makefile
index 7baccfd8a4e9..f2cb3082697c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -20,6 +20,7 @@ lib-$(CONFIG_MMU) += ioremap.o
20lib-$(CONFIG_SMP) += cpumask.o 20lib-$(CONFIG_SMP) += cpumask.o
21 21
22lib-y += kobject.o klist.o 22lib-y += kobject.o klist.o
23obj-y += lockref.o
23 24
24obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ 25obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
25 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ 26 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
diff --git a/lib/lockref.c b/lib/lockref.c
new file mode 100644
index 000000000000..7819c2d1d315
--- /dev/null
+++ b/lib/lockref.c
@@ -0,0 +1,127 @@
1#include <linux/export.h>
2#include <linux/lockref.h>
3
4#ifdef CONFIG_CMPXCHG_LOCKREF
5
6/*
7 * Note that the "cmpxchg()" reloads the "old" value for the
8 * failure case.
9 */
10#define CMPXCHG_LOOP(CODE, SUCCESS) do { \
11 struct lockref old; \
12 BUILD_BUG_ON(sizeof(old) != 8); \
13 old.lock_count = ACCESS_ONCE(lockref->lock_count); \
14 while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \
15 struct lockref new = old, prev = old; \
16 CODE \
17 old.lock_count = cmpxchg(&lockref->lock_count, \
18 old.lock_count, new.lock_count); \
19 if (likely(old.lock_count == prev.lock_count)) { \
20 SUCCESS; \
21 } \
22 } \
23} while (0)
24
25#else
26
27#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0)
28
29#endif
30
31/**
32 * lockref_get - Increments reference count unconditionally
33 * @lockcnt: pointer to lockref structure
34 *
35 * This operation is only valid if you already hold a reference
36 * to the object, so you know the count cannot be zero.
37 */
38void lockref_get(struct lockref *lockref)
39{
40 CMPXCHG_LOOP(
41 new.count++;
42 ,
43 return;
44 );
45
46 spin_lock(&lockref->lock);
47 lockref->count++;
48 spin_unlock(&lockref->lock);
49}
50EXPORT_SYMBOL(lockref_get);
51
52/**
53 * lockref_get_not_zero - Increments count unless the count is 0
54 * @lockcnt: pointer to lockref structure
55 * Return: 1 if count updated successfully or 0 if count was zero
56 */
57int lockref_get_not_zero(struct lockref *lockref)
58{
59 int retval;
60
61 CMPXCHG_LOOP(
62 new.count++;
63 if (!old.count)
64 return 0;
65 ,
66 return 1;
67 );
68
69 spin_lock(&lockref->lock);
70 retval = 0;
71 if (lockref->count) {
72 lockref->count++;
73 retval = 1;
74 }
75 spin_unlock(&lockref->lock);
76 return retval;
77}
78EXPORT_SYMBOL(lockref_get_not_zero);
79
80/**
81 * lockref_get_or_lock - Increments count unless the count is 0
82 * @lockcnt: pointer to lockref structure
83 * Return: 1 if count updated successfully or 0 if count was zero
84 * and we got the lock instead.
85 */
86int lockref_get_or_lock(struct lockref *lockref)
87{
88 CMPXCHG_LOOP(
89 new.count++;
90 if (!old.count)
91 break;
92 ,
93 return 1;
94 );
95
96 spin_lock(&lockref->lock);
97 if (!lockref->count)
98 return 0;
99 lockref->count++;
100 spin_unlock(&lockref->lock);
101 return 1;
102}
103EXPORT_SYMBOL(lockref_get_or_lock);
104
105/**
106 * lockref_put_or_lock - decrements count unless count <= 1 before decrement
107 * @lockcnt: pointer to lockref structure
108 * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
109 */
110int lockref_put_or_lock(struct lockref *lockref)
111{
112 CMPXCHG_LOOP(
113 new.count--;
114 if (old.count <= 1)
115 break;
116 ,
117 return 1;
118 );
119
120 spin_lock(&lockref->lock);
121 if (lockref->count <= 1)
122 return 0;
123 lockref->count--;
124 spin_unlock(&lockref->lock);
125 return 1;
126}
127EXPORT_SYMBOL(lockref_put_or_lock);