diff options
author | Peter Zijlstra <peterz@infradead.org> | 2015-06-22 08:16:33 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-09-22 09:25:53 -0400 |
commit | aba37660738325d48c913f3a952a7116d6e6a74b (patch) | |
tree | 42a6a0d45ab43fb2e8db1162fe08b7954790bc92 | |
parent | 11d9684ca638aad99f740ef3abcba2aa4c9290bf (diff) |
fs/locks: Replace lg_global with a percpu-rwsem
Replace the global part of the lglock with a percpu-rwsem.
Since fcl_lock is a spinlock and itself nests under i_lock, which too
is a spinlock we cannot acquire sleeping locks at
locks_{insert,remove}_global_locks().
We can however wrap all fcl_lock acquisitions with percpu_down_read
such that all invocations of locks_{insert,remove}_global_locks() have
that read lock held.
This allows us to replace the lg_global part of the lglock with the
write side of the rwsem.
In the absense of writers, percpu_{down,up}_read() are free of atomic
instructions. This further avoids the very long preempt-disable
regions caused by lglock on larger machines.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Cc: der.herr@hofr.at
Cc: paulmck@linux.vnet.ibm.com
Cc: riel@redhat.com
Cc: tj@kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | fs/locks.c | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/fs/locks.c b/fs/locks.c index ee1b15f6fc13..8f609ec03364 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -164,6 +164,7 @@ int lease_break_time = 45; | |||
164 | */ | 164 | */ |
165 | DEFINE_STATIC_LGLOCK(file_lock_lglock); | 165 | DEFINE_STATIC_LGLOCK(file_lock_lglock); |
166 | static DEFINE_PER_CPU(struct hlist_head, file_lock_list); | 166 | static DEFINE_PER_CPU(struct hlist_head, file_lock_list); |
167 | DEFINE_STATIC_PERCPU_RWSEM(file_rwsem); | ||
167 | 168 | ||
168 | /* | 169 | /* |
169 | * The blocked_hash is used to find POSIX lock loops for deadlock detection. | 170 | * The blocked_hash is used to find POSIX lock loops for deadlock detection. |
@@ -587,6 +588,8 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) | |||
587 | /* Must be called with the flc_lock held! */ | 588 | /* Must be called with the flc_lock held! */ |
588 | static void locks_insert_global_locks(struct file_lock *fl) | 589 | static void locks_insert_global_locks(struct file_lock *fl) |
589 | { | 590 | { |
591 | percpu_rwsem_assert_held(&file_rwsem); | ||
592 | |||
590 | lg_local_lock(&file_lock_lglock); | 593 | lg_local_lock(&file_lock_lglock); |
591 | fl->fl_link_cpu = smp_processor_id(); | 594 | fl->fl_link_cpu = smp_processor_id(); |
592 | hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list)); | 595 | hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list)); |
@@ -596,6 +599,8 @@ static void locks_insert_global_locks(struct file_lock *fl) | |||
596 | /* Must be called with the flc_lock held! */ | 599 | /* Must be called with the flc_lock held! */ |
597 | static void locks_delete_global_locks(struct file_lock *fl) | 600 | static void locks_delete_global_locks(struct file_lock *fl) |
598 | { | 601 | { |
602 | percpu_rwsem_assert_held(&file_rwsem); | ||
603 | |||
599 | /* | 604 | /* |
600 | * Avoid taking lock if already unhashed. This is safe since this check | 605 | * Avoid taking lock if already unhashed. This is safe since this check |
601 | * is done while holding the flc_lock, and new insertions into the list | 606 | * is done while holding the flc_lock, and new insertions into the list |
@@ -915,6 +920,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) | |||
915 | return -ENOMEM; | 920 | return -ENOMEM; |
916 | } | 921 | } |
917 | 922 | ||
923 | percpu_down_read(&file_rwsem); | ||
918 | spin_lock(&ctx->flc_lock); | 924 | spin_lock(&ctx->flc_lock); |
919 | if (request->fl_flags & FL_ACCESS) | 925 | if (request->fl_flags & FL_ACCESS) |
920 | goto find_conflict; | 926 | goto find_conflict; |
@@ -955,6 +961,7 @@ find_conflict: | |||
955 | 961 | ||
956 | out: | 962 | out: |
957 | spin_unlock(&ctx->flc_lock); | 963 | spin_unlock(&ctx->flc_lock); |
964 | percpu_up_read(&file_rwsem); | ||
958 | if (new_fl) | 965 | if (new_fl) |
959 | locks_free_lock(new_fl); | 966 | locks_free_lock(new_fl); |
960 | locks_dispose_list(&dispose); | 967 | locks_dispose_list(&dispose); |
@@ -991,6 +998,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, | |||
991 | new_fl2 = locks_alloc_lock(); | 998 | new_fl2 = locks_alloc_lock(); |
992 | } | 999 | } |
993 | 1000 | ||
1001 | percpu_down_read(&file_rwsem); | ||
994 | spin_lock(&ctx->flc_lock); | 1002 | spin_lock(&ctx->flc_lock); |
995 | /* | 1003 | /* |
996 | * New lock request. Walk all POSIX locks and look for conflicts. If | 1004 | * New lock request. Walk all POSIX locks and look for conflicts. If |
@@ -1162,6 +1170,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, | |||
1162 | } | 1170 | } |
1163 | out: | 1171 | out: |
1164 | spin_unlock(&ctx->flc_lock); | 1172 | spin_unlock(&ctx->flc_lock); |
1173 | percpu_up_read(&file_rwsem); | ||
1165 | /* | 1174 | /* |
1166 | * Free any unused locks. | 1175 | * Free any unused locks. |
1167 | */ | 1176 | */ |
@@ -1436,6 +1445,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |||
1436 | return error; | 1445 | return error; |
1437 | } | 1446 | } |
1438 | 1447 | ||
1448 | percpu_down_read(&file_rwsem); | ||
1439 | spin_lock(&ctx->flc_lock); | 1449 | spin_lock(&ctx->flc_lock); |
1440 | 1450 | ||
1441 | time_out_leases(inode, &dispose); | 1451 | time_out_leases(inode, &dispose); |
@@ -1487,9 +1497,13 @@ restart: | |||
1487 | locks_insert_block(fl, new_fl); | 1497 | locks_insert_block(fl, new_fl); |
1488 | trace_break_lease_block(inode, new_fl); | 1498 | trace_break_lease_block(inode, new_fl); |
1489 | spin_unlock(&ctx->flc_lock); | 1499 | spin_unlock(&ctx->flc_lock); |
1500 | percpu_up_read(&file_rwsem); | ||
1501 | |||
1490 | locks_dispose_list(&dispose); | 1502 | locks_dispose_list(&dispose); |
1491 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | 1503 | error = wait_event_interruptible_timeout(new_fl->fl_wait, |
1492 | !new_fl->fl_next, break_time); | 1504 | !new_fl->fl_next, break_time); |
1505 | |||
1506 | percpu_down_read(&file_rwsem); | ||
1493 | spin_lock(&ctx->flc_lock); | 1507 | spin_lock(&ctx->flc_lock); |
1494 | trace_break_lease_unblock(inode, new_fl); | 1508 | trace_break_lease_unblock(inode, new_fl); |
1495 | locks_delete_block(new_fl); | 1509 | locks_delete_block(new_fl); |
@@ -1506,6 +1520,7 @@ restart: | |||
1506 | } | 1520 | } |
1507 | out: | 1521 | out: |
1508 | spin_unlock(&ctx->flc_lock); | 1522 | spin_unlock(&ctx->flc_lock); |
1523 | percpu_up_read(&file_rwsem); | ||
1509 | locks_dispose_list(&dispose); | 1524 | locks_dispose_list(&dispose); |
1510 | locks_free_lock(new_fl); | 1525 | locks_free_lock(new_fl); |
1511 | return error; | 1526 | return error; |
@@ -1660,6 +1675,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |||
1660 | return -EINVAL; | 1675 | return -EINVAL; |
1661 | } | 1676 | } |
1662 | 1677 | ||
1678 | percpu_down_read(&file_rwsem); | ||
1663 | spin_lock(&ctx->flc_lock); | 1679 | spin_lock(&ctx->flc_lock); |
1664 | time_out_leases(inode, &dispose); | 1680 | time_out_leases(inode, &dispose); |
1665 | error = check_conflicting_open(dentry, arg, lease->fl_flags); | 1681 | error = check_conflicting_open(dentry, arg, lease->fl_flags); |
@@ -1730,6 +1746,7 @@ out_setup: | |||
1730 | lease->fl_lmops->lm_setup(lease, priv); | 1746 | lease->fl_lmops->lm_setup(lease, priv); |
1731 | out: | 1747 | out: |
1732 | spin_unlock(&ctx->flc_lock); | 1748 | spin_unlock(&ctx->flc_lock); |
1749 | percpu_up_read(&file_rwsem); | ||
1733 | locks_dispose_list(&dispose); | 1750 | locks_dispose_list(&dispose); |
1734 | if (is_deleg) | 1751 | if (is_deleg) |
1735 | inode_unlock(inode); | 1752 | inode_unlock(inode); |
@@ -1752,6 +1769,7 @@ static int generic_delete_lease(struct file *filp, void *owner) | |||
1752 | return error; | 1769 | return error; |
1753 | } | 1770 | } |
1754 | 1771 | ||
1772 | percpu_down_read(&file_rwsem); | ||
1755 | spin_lock(&ctx->flc_lock); | 1773 | spin_lock(&ctx->flc_lock); |
1756 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | 1774 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { |
1757 | if (fl->fl_file == filp && | 1775 | if (fl->fl_file == filp && |
@@ -1764,6 +1782,7 @@ static int generic_delete_lease(struct file *filp, void *owner) | |||
1764 | if (victim) | 1782 | if (victim) |
1765 | error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); | 1783 | error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); |
1766 | spin_unlock(&ctx->flc_lock); | 1784 | spin_unlock(&ctx->flc_lock); |
1785 | percpu_up_read(&file_rwsem); | ||
1767 | locks_dispose_list(&dispose); | 1786 | locks_dispose_list(&dispose); |
1768 | return error; | 1787 | return error; |
1769 | } | 1788 | } |
@@ -2703,6 +2722,7 @@ static void *locks_start(struct seq_file *f, loff_t *pos) | |||
2703 | struct locks_iterator *iter = f->private; | 2722 | struct locks_iterator *iter = f->private; |
2704 | 2723 | ||
2705 | iter->li_pos = *pos + 1; | 2724 | iter->li_pos = *pos + 1; |
2725 | percpu_down_write(&file_rwsem); | ||
2706 | lg_global_lock(&file_lock_lglock); | 2726 | lg_global_lock(&file_lock_lglock); |
2707 | spin_lock(&blocked_lock_lock); | 2727 | spin_lock(&blocked_lock_lock); |
2708 | return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos); | 2728 | return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos); |
@@ -2721,6 +2741,7 @@ static void locks_stop(struct seq_file *f, void *v) | |||
2721 | { | 2741 | { |
2722 | spin_unlock(&blocked_lock_lock); | 2742 | spin_unlock(&blocked_lock_lock); |
2723 | lg_global_unlock(&file_lock_lglock); | 2743 | lg_global_unlock(&file_lock_lglock); |
2744 | percpu_up_write(&file_rwsem); | ||
2724 | } | 2745 | } |
2725 | 2746 | ||
2726 | static const struct seq_operations locks_seq_operations = { | 2747 | static const struct seq_operations locks_seq_operations = { |