diff options
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock.h | 5 | ||||
-rw-r--r-- | fs/dcache.c | 17 | ||||
-rw-r--r-- | fs/namei.c | 90 | ||||
-rw-r--r-- | include/linux/dcache.h | 22 | ||||
-rw-r--r-- | include/linux/lockref.h | 61 | ||||
-rw-r--r-- | lib/Kconfig | 10 | ||||
-rw-r--r-- | lib/Makefile | 1 | ||||
-rw-r--r-- | lib/lockref.c | 127 |
9 files changed, 237 insertions, 97 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b32ebf92b0ce..67e00740531c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -16,6 +16,7 @@ config X86_64 | |||
16 | def_bool y | 16 | def_bool y |
17 | depends on 64BIT | 17 | depends on 64BIT |
18 | select X86_DEV_DMA_OPS | 18 | select X86_DEV_DMA_OPS |
19 | select ARCH_USE_CMPXCHG_LOCKREF | ||
19 | 20 | ||
20 | ### Arch settings | 21 | ### Arch settings |
21 | config X86 | 22 | config X86 |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index e3ddd7db723f..e0e668422c75 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -34,6 +34,11 @@ | |||
34 | # define UNLOCK_LOCK_PREFIX | 34 | # define UNLOCK_LOCK_PREFIX |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) | ||
38 | { | ||
39 | return lock.tickets.head == lock.tickets.tail; | ||
40 | } | ||
41 | |||
37 | /* | 42 | /* |
38 | * Ticket locks are conceptually two parts, one indicating the current head of | 43 | * Ticket locks are conceptually two parts, one indicating the current head of |
39 | * the queue, and the other indicating the current tail. The lock is acquired | 44 | * the queue, and the other indicating the current tail. The lock is acquired |
diff --git a/fs/dcache.c b/fs/dcache.c index b949af850cd6..96655f4f4574 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -611,8 +611,23 @@ static inline void __dget(struct dentry *dentry) | |||
611 | 611 | ||
612 | struct dentry *dget_parent(struct dentry *dentry) | 612 | struct dentry *dget_parent(struct dentry *dentry) |
613 | { | 613 | { |
614 | int gotref; | ||
614 | struct dentry *ret; | 615 | struct dentry *ret; |
615 | 616 | ||
617 | /* | ||
618 | * Do optimistic parent lookup without any | ||
619 | * locking. | ||
620 | */ | ||
621 | rcu_read_lock(); | ||
622 | ret = ACCESS_ONCE(dentry->d_parent); | ||
623 | gotref = lockref_get_not_zero(&ret->d_lockref); | ||
624 | rcu_read_unlock(); | ||
625 | if (likely(gotref)) { | ||
626 | if (likely(ret == ACCESS_ONCE(dentry->d_parent))) | ||
627 | return ret; | ||
628 | dput(ret); | ||
629 | } | ||
630 | |||
616 | repeat: | 631 | repeat: |
617 | /* | 632 | /* |
618 | * Don't need rcu_dereference because we re-check it was correct under | 633 | * Don't need rcu_dereference because we re-check it was correct under |
@@ -1771,7 +1786,7 @@ static noinline enum slow_d_compare slow_dentry_cmp( | |||
1771 | * without taking d_lock and checking d_seq sequence count against @seq | 1786 | * without taking d_lock and checking d_seq sequence count against @seq |
1772 | * returned here. | 1787 | * returned here. |
1773 | * | 1788 | * |
1774 | * A refcount may be taken on the found dentry with the __d_rcu_to_refcount | 1789 | * A refcount may be taken on the found dentry with the d_rcu_to_refcount |
1775 | * function. | 1790 | * function. |
1776 | * | 1791 | * |
1777 | * Alternatively, __d_lookup_rcu may be called again to look up the child of | 1792 | * Alternatively, __d_lookup_rcu may be called again to look up the child of |
diff --git a/fs/namei.c b/fs/namei.c index 7720fbd5277b..2c30c84d4ea1 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -494,6 +494,50 @@ static inline void unlock_rcu_walk(void) | |||
494 | br_read_unlock(&vfsmount_lock); | 494 | br_read_unlock(&vfsmount_lock); |
495 | } | 495 | } |
496 | 496 | ||
497 | /* | ||
498 | * When we move over from the RCU domain to properly refcounted | ||
499 | * long-lived dentries, we need to check the sequence numbers | ||
500 | * we got before lookup very carefully. | ||
501 | * | ||
502 | * We cannot blindly increment a dentry refcount - even if it | ||
503 | * is not locked - if it is zero, because it may have gone | ||
504 | * through the final d_kill() logic already. | ||
505 | * | ||
506 | * So for a zero refcount, we need to get the spinlock (which is | ||
507 | * safe even for a dead dentry because the de-allocation is | ||
508 | * RCU-delayed), and check the sequence count under the lock. | ||
509 | * | ||
510 | * Once we have checked the sequence count, we know it is live, | ||
511 | * and since we hold the spinlock it cannot die from under us. | ||
512 | * | ||
513 | * In contrast, if the reference count wasn't zero, we can just | ||
514 | * increment the lockref without having to take the spinlock. | ||
515 | * Even if the sequence number ends up being stale, we haven't | ||
516 | * gone through the final dput() and killed the dentry yet. | ||
517 | */ | ||
518 | static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq) | ||
519 | { | ||
520 | int gotref; | ||
521 | |||
522 | gotref = lockref_get_or_lock(&dentry->d_lockref); | ||
523 | |||
524 | /* Does the sequence number still match? */ | ||
525 | if (read_seqcount_retry(validate, seq)) { | ||
526 | if (gotref) | ||
527 | dput(dentry); | ||
528 | else | ||
529 | spin_unlock(&dentry->d_lock); | ||
530 | return -ECHILD; | ||
531 | } | ||
532 | |||
533 | /* Get the ref now, if we couldn't get it originally */ | ||
534 | if (!gotref) { | ||
535 | dentry->d_lockref.count++; | ||
536 | spin_unlock(&dentry->d_lock); | ||
537 | } | ||
538 | return 0; | ||
539 | } | ||
540 | |||
497 | /** | 541 | /** |
498 | * unlazy_walk - try to switch to ref-walk mode. | 542 | * unlazy_walk - try to switch to ref-walk mode. |
499 | * @nd: nameidata pathwalk data | 543 | * @nd: nameidata pathwalk data |
@@ -518,29 +562,28 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) | |||
518 | nd->root.dentry != fs->root.dentry) | 562 | nd->root.dentry != fs->root.dentry) |
519 | goto err_root; | 563 | goto err_root; |
520 | } | 564 | } |
521 | spin_lock(&parent->d_lock); | 565 | |
566 | /* | ||
567 | * For a negative lookup, the lookup sequence point is the parents | ||
568 | * sequence point, and it only needs to revalidate the parent dentry. | ||
569 | * | ||
570 | * For a positive lookup, we need to move both the parent and the | ||
571 | * dentry from the RCU domain to be properly refcounted. And the | ||
572 | * sequence number in the dentry validates *both* dentry counters, | ||
573 | * since we checked the sequence number of the parent after we got | ||
574 | * the child sequence number. So we know the parent must still | ||
575 | * be valid if the child sequence number is still valid. | ||
576 | */ | ||
522 | if (!dentry) { | 577 | if (!dentry) { |
523 | if (!__d_rcu_to_refcount(parent, nd->seq)) | 578 | if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0) |
524 | goto err_parent; | 579 | goto err_root; |
525 | BUG_ON(nd->inode != parent->d_inode); | 580 | BUG_ON(nd->inode != parent->d_inode); |
526 | } else { | 581 | } else { |
527 | if (dentry->d_parent != parent) | 582 | if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) |
583 | goto err_root; | ||
584 | if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0) | ||
528 | goto err_parent; | 585 | goto err_parent; |
529 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | ||
530 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | ||
531 | goto err_child; | ||
532 | /* | ||
533 | * If the sequence check on the child dentry passed, then | ||
534 | * the child has not been removed from its parent. This | ||
535 | * means the parent dentry must be valid and able to take | ||
536 | * a reference at this point. | ||
537 | */ | ||
538 | BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); | ||
539 | BUG_ON(!parent->d_lockref.count); | ||
540 | parent->d_lockref.count++; | ||
541 | spin_unlock(&dentry->d_lock); | ||
542 | } | 586 | } |
543 | spin_unlock(&parent->d_lock); | ||
544 | if (want_root) { | 587 | if (want_root) { |
545 | path_get(&nd->root); | 588 | path_get(&nd->root); |
546 | spin_unlock(&fs->lock); | 589 | spin_unlock(&fs->lock); |
@@ -551,10 +594,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) | |||
551 | nd->flags &= ~LOOKUP_RCU; | 594 | nd->flags &= ~LOOKUP_RCU; |
552 | return 0; | 595 | return 0; |
553 | 596 | ||
554 | err_child: | ||
555 | spin_unlock(&dentry->d_lock); | ||
556 | err_parent: | 597 | err_parent: |
557 | spin_unlock(&parent->d_lock); | 598 | dput(dentry); |
558 | err_root: | 599 | err_root: |
559 | if (want_root) | 600 | if (want_root) |
560 | spin_unlock(&fs->lock); | 601 | spin_unlock(&fs->lock); |
@@ -585,14 +626,11 @@ static int complete_walk(struct nameidata *nd) | |||
585 | nd->flags &= ~LOOKUP_RCU; | 626 | nd->flags &= ~LOOKUP_RCU; |
586 | if (!(nd->flags & LOOKUP_ROOT)) | 627 | if (!(nd->flags & LOOKUP_ROOT)) |
587 | nd->root.mnt = NULL; | 628 | nd->root.mnt = NULL; |
588 | spin_lock(&dentry->d_lock); | 629 | |
589 | if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { | 630 | if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) { |
590 | spin_unlock(&dentry->d_lock); | ||
591 | unlock_rcu_walk(); | 631 | unlock_rcu_walk(); |
592 | return -ECHILD; | 632 | return -ECHILD; |
593 | } | 633 | } |
594 | BUG_ON(nd->inode != dentry->d_inode); | ||
595 | spin_unlock(&dentry->d_lock); | ||
596 | mntget(nd->path.mnt); | 634 | mntget(nd->path.mnt); |
597 | unlock_rcu_walk(); | 635 | unlock_rcu_walk(); |
598 | } | 636 | } |
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index efdc94434c30..9169b91ea2d2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
@@ -304,28 +304,6 @@ extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); | |||
304 | extern struct dentry *__d_lookup_rcu(const struct dentry *parent, | 304 | extern struct dentry *__d_lookup_rcu(const struct dentry *parent, |
305 | const struct qstr *name, unsigned *seq); | 305 | const struct qstr *name, unsigned *seq); |
306 | 306 | ||
307 | /** | ||
308 | * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok | ||
309 | * @dentry: dentry to take a ref on | ||
310 | * @seq: seqcount to verify against | ||
311 | * Returns: 0 on failure, else 1. | ||
312 | * | ||
313 | * __d_rcu_to_refcount operates on a dentry,seq pair that was returned | ||
314 | * by __d_lookup_rcu, to get a reference on an rcu-walk dentry. | ||
315 | */ | ||
316 | static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) | ||
317 | { | ||
318 | int ret = 0; | ||
319 | |||
320 | assert_spin_locked(&dentry->d_lock); | ||
321 | if (!read_seqcount_retry(&dentry->d_seq, seq)) { | ||
322 | ret = 1; | ||
323 | dentry->d_lockref.count++; | ||
324 | } | ||
325 | |||
326 | return ret; | ||
327 | } | ||
328 | |||
329 | static inline unsigned d_count(const struct dentry *dentry) | 307 | static inline unsigned d_count(const struct dentry *dentry) |
330 | { | 308 | { |
331 | return dentry->d_lockref.count; | 309 | return dentry->d_lockref.count; |
diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 01233e01627a..ca07b5028b01 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h | |||
@@ -17,55 +17,20 @@ | |||
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | 18 | ||
19 | struct lockref { | 19 | struct lockref { |
20 | spinlock_t lock; | 20 | union { |
21 | unsigned int count; | 21 | #ifdef CONFIG_CMPXCHG_LOCKREF |
22 | aligned_u64 lock_count; | ||
23 | #endif | ||
24 | struct { | ||
25 | spinlock_t lock; | ||
26 | unsigned int count; | ||
27 | }; | ||
28 | }; | ||
22 | }; | 29 | }; |
23 | 30 | ||
24 | /** | 31 | extern void lockref_get(struct lockref *); |
25 | * lockref_get - Increments reference count unconditionally | 32 | extern int lockref_get_not_zero(struct lockref *); |
26 | * @lockcnt: pointer to lockref structure | 33 | extern int lockref_get_or_lock(struct lockref *); |
27 | * | 34 | extern int lockref_put_or_lock(struct lockref *); |
28 | * This operation is only valid if you already hold a reference | ||
29 | * to the object, so you know the count cannot be zero. | ||
30 | */ | ||
31 | static inline void lockref_get(struct lockref *lockref) | ||
32 | { | ||
33 | spin_lock(&lockref->lock); | ||
34 | lockref->count++; | ||
35 | spin_unlock(&lockref->lock); | ||
36 | } | ||
37 | |||
38 | /** | ||
39 | * lockref_get_not_zero - Increments count unless the count is 0 | ||
40 | * @lockcnt: pointer to lockref structure | ||
41 | * Return: 1 if count updated successfully or 0 if count is 0 | ||
42 | */ | ||
43 | static inline int lockref_get_not_zero(struct lockref *lockref) | ||
44 | { | ||
45 | int retval = 0; | ||
46 | |||
47 | spin_lock(&lockref->lock); | ||
48 | if (lockref->count) { | ||
49 | lockref->count++; | ||
50 | retval = 1; | ||
51 | } | ||
52 | spin_unlock(&lockref->lock); | ||
53 | return retval; | ||
54 | } | ||
55 | |||
56 | /** | ||
57 | * lockref_put_or_lock - decrements count unless count <= 1 before decrement | ||
58 | * @lockcnt: pointer to lockref structure | ||
59 | * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken | ||
60 | */ | ||
61 | static inline int lockref_put_or_lock(struct lockref *lockref) | ||
62 | { | ||
63 | spin_lock(&lockref->lock); | ||
64 | if (lockref->count <= 1) | ||
65 | return 0; | ||
66 | lockref->count--; | ||
67 | spin_unlock(&lockref->lock); | ||
68 | return 1; | ||
69 | } | ||
70 | 35 | ||
71 | #endif /* __LINUX_LOCKREF_H */ | 36 | #endif /* __LINUX_LOCKREF_H */ |
diff --git a/lib/Kconfig b/lib/Kconfig index 71d9f81f6eed..65561716c16c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
@@ -48,6 +48,16 @@ config STMP_DEVICE | |||
48 | config PERCPU_RWSEM | 48 | config PERCPU_RWSEM |
49 | boolean | 49 | boolean |
50 | 50 | ||
51 | config ARCH_USE_CMPXCHG_LOCKREF | ||
52 | bool | ||
53 | |||
54 | config CMPXCHG_LOCKREF | ||
55 | def_bool y if ARCH_USE_CMPXCHG_LOCKREF | ||
56 | depends on SMP | ||
57 | depends on !GENERIC_LOCKBREAK | ||
58 | depends on !DEBUG_SPINLOCK | ||
59 | depends on !DEBUG_LOCK_ALLOC | ||
60 | |||
51 | config CRC_CCITT | 61 | config CRC_CCITT |
52 | tristate "CRC-CCITT functions" | 62 | tristate "CRC-CCITT functions" |
53 | help | 63 | help |
diff --git a/lib/Makefile b/lib/Makefile index 7baccfd8a4e9..f2cb3082697c 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -20,6 +20,7 @@ lib-$(CONFIG_MMU) += ioremap.o | |||
20 | lib-$(CONFIG_SMP) += cpumask.o | 20 | lib-$(CONFIG_SMP) += cpumask.o |
21 | 21 | ||
22 | lib-y += kobject.o klist.o | 22 | lib-y += kobject.o klist.o |
23 | obj-y += lockref.o | ||
23 | 24 | ||
24 | obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ | 25 | obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ |
25 | bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ | 26 | bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ |
diff --git a/lib/lockref.c b/lib/lockref.c new file mode 100644 index 000000000000..7819c2d1d315 --- /dev/null +++ b/lib/lockref.c | |||
@@ -0,0 +1,127 @@ | |||
1 | #include <linux/export.h> | ||
2 | #include <linux/lockref.h> | ||
3 | |||
4 | #ifdef CONFIG_CMPXCHG_LOCKREF | ||
5 | |||
6 | /* | ||
7 | * Note that the "cmpxchg()" reloads the "old" value for the | ||
8 | * failure case. | ||
9 | */ | ||
10 | #define CMPXCHG_LOOP(CODE, SUCCESS) do { \ | ||
11 | struct lockref old; \ | ||
12 | BUILD_BUG_ON(sizeof(old) != 8); \ | ||
13 | old.lock_count = ACCESS_ONCE(lockref->lock_count); \ | ||
14 | while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ | ||
15 | struct lockref new = old, prev = old; \ | ||
16 | CODE \ | ||
17 | old.lock_count = cmpxchg(&lockref->lock_count, \ | ||
18 | old.lock_count, new.lock_count); \ | ||
19 | if (likely(old.lock_count == prev.lock_count)) { \ | ||
20 | SUCCESS; \ | ||
21 | } \ | ||
22 | } \ | ||
23 | } while (0) | ||
24 | |||
25 | #else | ||
26 | |||
27 | #define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0) | ||
28 | |||
29 | #endif | ||
30 | |||
31 | /** | ||
32 | * lockref_get - Increments reference count unconditionally | ||
33 | * @lockcnt: pointer to lockref structure | ||
34 | * | ||
35 | * This operation is only valid if you already hold a reference | ||
36 | * to the object, so you know the count cannot be zero. | ||
37 | */ | ||
38 | void lockref_get(struct lockref *lockref) | ||
39 | { | ||
40 | CMPXCHG_LOOP( | ||
41 | new.count++; | ||
42 | , | ||
43 | return; | ||
44 | ); | ||
45 | |||
46 | spin_lock(&lockref->lock); | ||
47 | lockref->count++; | ||
48 | spin_unlock(&lockref->lock); | ||
49 | } | ||
50 | EXPORT_SYMBOL(lockref_get); | ||
51 | |||
52 | /** | ||
53 | * lockref_get_not_zero - Increments count unless the count is 0 | ||
54 | * @lockcnt: pointer to lockref structure | ||
55 | * Return: 1 if count updated successfully or 0 if count was zero | ||
56 | */ | ||
57 | int lockref_get_not_zero(struct lockref *lockref) | ||
58 | { | ||
59 | int retval; | ||
60 | |||
61 | CMPXCHG_LOOP( | ||
62 | new.count++; | ||
63 | if (!old.count) | ||
64 | return 0; | ||
65 | , | ||
66 | return 1; | ||
67 | ); | ||
68 | |||
69 | spin_lock(&lockref->lock); | ||
70 | retval = 0; | ||
71 | if (lockref->count) { | ||
72 | lockref->count++; | ||
73 | retval = 1; | ||
74 | } | ||
75 | spin_unlock(&lockref->lock); | ||
76 | return retval; | ||
77 | } | ||
78 | EXPORT_SYMBOL(lockref_get_not_zero); | ||
79 | |||
80 | /** | ||
81 | * lockref_get_or_lock - Increments count unless the count is 0 | ||
82 | * @lockcnt: pointer to lockref structure | ||
83 | * Return: 1 if count updated successfully or 0 if count was zero | ||
84 | * and we got the lock instead. | ||
85 | */ | ||
86 | int lockref_get_or_lock(struct lockref *lockref) | ||
87 | { | ||
88 | CMPXCHG_LOOP( | ||
89 | new.count++; | ||
90 | if (!old.count) | ||
91 | break; | ||
92 | , | ||
93 | return 1; | ||
94 | ); | ||
95 | |||
96 | spin_lock(&lockref->lock); | ||
97 | if (!lockref->count) | ||
98 | return 0; | ||
99 | lockref->count++; | ||
100 | spin_unlock(&lockref->lock); | ||
101 | return 1; | ||
102 | } | ||
103 | EXPORT_SYMBOL(lockref_get_or_lock); | ||
104 | |||
105 | /** | ||
106 | * lockref_put_or_lock - decrements count unless count <= 1 before decrement | ||
107 | * @lockcnt: pointer to lockref structure | ||
108 | * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken | ||
109 | */ | ||
110 | int lockref_put_or_lock(struct lockref *lockref) | ||
111 | { | ||
112 | CMPXCHG_LOOP( | ||
113 | new.count--; | ||
114 | if (old.count <= 1) | ||
115 | break; | ||
116 | , | ||
117 | return 1; | ||
118 | ); | ||
119 | |||
120 | spin_lock(&lockref->lock); | ||
121 | if (lockref->count <= 1) | ||
122 | return 0; | ||
123 | lockref->count--; | ||
124 | spin_unlock(&lockref->lock); | ||
125 | return 1; | ||
126 | } | ||
127 | EXPORT_SYMBOL(lockref_put_or_lock); | ||