aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2015-06-22 08:16:33 -0400
committerIngo Molnar <mingo@kernel.org>2016-09-22 09:25:53 -0400
commitaba37660738325d48c913f3a952a7116d6e6a74b (patch)
tree42a6a0d45ab43fb2e8db1162fe08b7954790bc92
parent11d9684ca638aad99f740ef3abcba2aa4c9290bf (diff)
fs/locks: Replace lg_global with a percpu-rwsem
Replace the global part of the lglock with a percpu-rwsem. Since fcl_lock is a spinlock and itself nests under i_lock, which too is a spinlock we cannot acquire sleeping locks at locks_{insert,remove}_global_locks(). We can however wrap all fcl_lock acquisitions with percpu_down_read such that all invocations of locks_{insert,remove}_global_locks() have that read lock held. This allows us to replace the lg_global part of the lglock with the write side of the rwsem. In the absense of writers, percpu_{down,up}_read() are free of atomic instructions. This further avoids the very long preempt-disable regions caused by lglock on larger machines. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Al Viro <viro@ZenIV.linux.org.uk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: dave@stgolabs.net Cc: der.herr@hofr.at Cc: paulmck@linux.vnet.ibm.com Cc: riel@redhat.com Cc: tj@kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--fs/locks.c21
1 files changed, 21 insertions, 0 deletions
diff --git a/fs/locks.c b/fs/locks.c
index ee1b15f6fc13..8f609ec03364 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -164,6 +164,7 @@ int lease_break_time = 45;
164 */ 164 */
165DEFINE_STATIC_LGLOCK(file_lock_lglock); 165DEFINE_STATIC_LGLOCK(file_lock_lglock);
166static DEFINE_PER_CPU(struct hlist_head, file_lock_list); 166static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
167DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
167 168
168/* 169/*
169 * The blocked_hash is used to find POSIX lock loops for deadlock detection. 170 * The blocked_hash is used to find POSIX lock loops for deadlock detection.
@@ -587,6 +588,8 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
587/* Must be called with the flc_lock held! */ 588/* Must be called with the flc_lock held! */
588static void locks_insert_global_locks(struct file_lock *fl) 589static void locks_insert_global_locks(struct file_lock *fl)
589{ 590{
591 percpu_rwsem_assert_held(&file_rwsem);
592
590 lg_local_lock(&file_lock_lglock); 593 lg_local_lock(&file_lock_lglock);
591 fl->fl_link_cpu = smp_processor_id(); 594 fl->fl_link_cpu = smp_processor_id();
592 hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list)); 595 hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list));
@@ -596,6 +599,8 @@ static void locks_insert_global_locks(struct file_lock *fl)
596/* Must be called with the flc_lock held! */ 599/* Must be called with the flc_lock held! */
597static void locks_delete_global_locks(struct file_lock *fl) 600static void locks_delete_global_locks(struct file_lock *fl)
598{ 601{
602 percpu_rwsem_assert_held(&file_rwsem);
603
599 /* 604 /*
600 * Avoid taking lock if already unhashed. This is safe since this check 605 * Avoid taking lock if already unhashed. This is safe since this check
601 * is done while holding the flc_lock, and new insertions into the list 606 * is done while holding the flc_lock, and new insertions into the list
@@ -915,6 +920,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request)
915 return -ENOMEM; 920 return -ENOMEM;
916 } 921 }
917 922
923 percpu_down_read(&file_rwsem);
918 spin_lock(&ctx->flc_lock); 924 spin_lock(&ctx->flc_lock);
919 if (request->fl_flags & FL_ACCESS) 925 if (request->fl_flags & FL_ACCESS)
920 goto find_conflict; 926 goto find_conflict;
@@ -955,6 +961,7 @@ find_conflict:
955 961
956out: 962out:
957 spin_unlock(&ctx->flc_lock); 963 spin_unlock(&ctx->flc_lock);
964 percpu_up_read(&file_rwsem);
958 if (new_fl) 965 if (new_fl)
959 locks_free_lock(new_fl); 966 locks_free_lock(new_fl);
960 locks_dispose_list(&dispose); 967 locks_dispose_list(&dispose);
@@ -991,6 +998,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
991 new_fl2 = locks_alloc_lock(); 998 new_fl2 = locks_alloc_lock();
992 } 999 }
993 1000
1001 percpu_down_read(&file_rwsem);
994 spin_lock(&ctx->flc_lock); 1002 spin_lock(&ctx->flc_lock);
995 /* 1003 /*
996 * New lock request. Walk all POSIX locks and look for conflicts. If 1004 * New lock request. Walk all POSIX locks and look for conflicts. If
@@ -1162,6 +1170,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
1162 } 1170 }
1163 out: 1171 out:
1164 spin_unlock(&ctx->flc_lock); 1172 spin_unlock(&ctx->flc_lock);
1173 percpu_up_read(&file_rwsem);
1165 /* 1174 /*
1166 * Free any unused locks. 1175 * Free any unused locks.
1167 */ 1176 */
@@ -1436,6 +1445,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1436 return error; 1445 return error;
1437 } 1446 }
1438 1447
1448 percpu_down_read(&file_rwsem);
1439 spin_lock(&ctx->flc_lock); 1449 spin_lock(&ctx->flc_lock);
1440 1450
1441 time_out_leases(inode, &dispose); 1451 time_out_leases(inode, &dispose);
@@ -1487,9 +1497,13 @@ restart:
1487 locks_insert_block(fl, new_fl); 1497 locks_insert_block(fl, new_fl);
1488 trace_break_lease_block(inode, new_fl); 1498 trace_break_lease_block(inode, new_fl);
1489 spin_unlock(&ctx->flc_lock); 1499 spin_unlock(&ctx->flc_lock);
1500 percpu_up_read(&file_rwsem);
1501
1490 locks_dispose_list(&dispose); 1502 locks_dispose_list(&dispose);
1491 error = wait_event_interruptible_timeout(new_fl->fl_wait, 1503 error = wait_event_interruptible_timeout(new_fl->fl_wait,
1492 !new_fl->fl_next, break_time); 1504 !new_fl->fl_next, break_time);
1505
1506 percpu_down_read(&file_rwsem);
1493 spin_lock(&ctx->flc_lock); 1507 spin_lock(&ctx->flc_lock);
1494 trace_break_lease_unblock(inode, new_fl); 1508 trace_break_lease_unblock(inode, new_fl);
1495 locks_delete_block(new_fl); 1509 locks_delete_block(new_fl);
@@ -1506,6 +1520,7 @@ restart:
1506 } 1520 }
1507out: 1521out:
1508 spin_unlock(&ctx->flc_lock); 1522 spin_unlock(&ctx->flc_lock);
1523 percpu_up_read(&file_rwsem);
1509 locks_dispose_list(&dispose); 1524 locks_dispose_list(&dispose);
1510 locks_free_lock(new_fl); 1525 locks_free_lock(new_fl);
1511 return error; 1526 return error;
@@ -1660,6 +1675,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1660 return -EINVAL; 1675 return -EINVAL;
1661 } 1676 }
1662 1677
1678 percpu_down_read(&file_rwsem);
1663 spin_lock(&ctx->flc_lock); 1679 spin_lock(&ctx->flc_lock);
1664 time_out_leases(inode, &dispose); 1680 time_out_leases(inode, &dispose);
1665 error = check_conflicting_open(dentry, arg, lease->fl_flags); 1681 error = check_conflicting_open(dentry, arg, lease->fl_flags);
@@ -1730,6 +1746,7 @@ out_setup:
1730 lease->fl_lmops->lm_setup(lease, priv); 1746 lease->fl_lmops->lm_setup(lease, priv);
1731out: 1747out:
1732 spin_unlock(&ctx->flc_lock); 1748 spin_unlock(&ctx->flc_lock);
1749 percpu_up_read(&file_rwsem);
1733 locks_dispose_list(&dispose); 1750 locks_dispose_list(&dispose);
1734 if (is_deleg) 1751 if (is_deleg)
1735 inode_unlock(inode); 1752 inode_unlock(inode);
@@ -1752,6 +1769,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
1752 return error; 1769 return error;
1753 } 1770 }
1754 1771
1772 percpu_down_read(&file_rwsem);
1755 spin_lock(&ctx->flc_lock); 1773 spin_lock(&ctx->flc_lock);
1756 list_for_each_entry(fl, &ctx->flc_lease, fl_list) { 1774 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1757 if (fl->fl_file == filp && 1775 if (fl->fl_file == filp &&
@@ -1764,6 +1782,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
1764 if (victim) 1782 if (victim)
1765 error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); 1783 error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1766 spin_unlock(&ctx->flc_lock); 1784 spin_unlock(&ctx->flc_lock);
1785 percpu_up_read(&file_rwsem);
1767 locks_dispose_list(&dispose); 1786 locks_dispose_list(&dispose);
1768 return error; 1787 return error;
1769} 1788}
@@ -2703,6 +2722,7 @@ static void *locks_start(struct seq_file *f, loff_t *pos)
2703 struct locks_iterator *iter = f->private; 2722 struct locks_iterator *iter = f->private;
2704 2723
2705 iter->li_pos = *pos + 1; 2724 iter->li_pos = *pos + 1;
2725 percpu_down_write(&file_rwsem);
2706 lg_global_lock(&file_lock_lglock); 2726 lg_global_lock(&file_lock_lglock);
2707 spin_lock(&blocked_lock_lock); 2727 spin_lock(&blocked_lock_lock);
2708 return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos); 2728 return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos);
@@ -2721,6 +2741,7 @@ static void locks_stop(struct seq_file *f, void *v)
2721{ 2741{
2722 spin_unlock(&blocked_lock_lock); 2742 spin_unlock(&blocked_lock_lock);
2723 lg_global_unlock(&file_lock_lglock); 2743 lg_global_unlock(&file_lock_lglock);
2744 percpu_up_write(&file_rwsem);
2724} 2745}
2725 2746
2726static const struct seq_operations locks_seq_operations = { 2747static const struct seq_operations locks_seq_operations = {