aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-12-17 01:29:43 -0500
committerDave Chinner <david@fromorbit.com>2010-12-17 01:29:43 -0500
commit1a3e8f3da09c7082d25b512a0ffe569391e4c09a (patch)
treec717ebe79e1f969f929d1fe6fb044fb59114449f /fs/xfs/xfs_inode.c
parentd95b7aaf9ab6738bef1ebcc52ab66563085e44ac (diff)
xfs: convert inode cache lookups to use RCU locking
With delayed logging greatly increasing the sustained parallelism of inode operations, the inode cache locking is showing significant read vs write contention when inode reclaim runs at the same time as lookups. There is also a lot more write lock acquistions than there are read locks (4:1 ratio) so the read locking is not really buying us much in the way of parallelism. To avoid the read vs write contention, change the cache to use RCU locking on the read side. To avoid needing to RCU free every single inode, use the built in slab RCU freeing mechanism. This requires us to be able to detect lookups of freed inodes, so enѕure that ever freed inode has an inode number of zero and the XFS_IRECLAIM flag set. We already check the XFS_IRECLAIM flag in cache hit lookup path, but also add a check for a zero inode number as well. We canthen convert all the read locking lockups to use RCU read side locking and hence remove all read side locking. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c52
1 files changed, 40 insertions, 12 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 108c7a085f94..43ffd9079106 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2000,17 +2000,33 @@ xfs_ifree_cluster(
2000 */ 2000 */
2001 for (i = 0; i < ninodes; i++) { 2001 for (i = 0; i < ninodes; i++) {
2002retry: 2002retry:
2003 read_lock(&pag->pag_ici_lock); 2003 rcu_read_lock();
2004 ip = radix_tree_lookup(&pag->pag_ici_root, 2004 ip = radix_tree_lookup(&pag->pag_ici_root,
2005 XFS_INO_TO_AGINO(mp, (inum + i))); 2005 XFS_INO_TO_AGINO(mp, (inum + i)));
2006 2006
2007 /* Inode not in memory or stale, nothing to do */ 2007 /* Inode not in memory, nothing to do */
2008 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { 2008 if (!ip) {
2009 read_unlock(&pag->pag_ici_lock); 2009 rcu_read_unlock();
2010 continue; 2010 continue;
2011 } 2011 }
2012 2012
2013 /* 2013 /*
2014 * because this is an RCU protected lookup, we could
2015 * find a recently freed or even reallocated inode
2016 * during the lookup. We need to check under the
2017 * i_flags_lock for a valid inode here. Skip it if it
2018 * is not valid, the wrong inode or stale.
2019 */
2020 spin_lock(&ip->i_flags_lock);
2021 if (ip->i_ino != inum + i ||
2022 __xfs_iflags_test(ip, XFS_ISTALE)) {
2023 spin_unlock(&ip->i_flags_lock);
2024 rcu_read_unlock();
2025 continue;
2026 }
2027 spin_unlock(&ip->i_flags_lock);
2028
2029 /*
2014 * Don't try to lock/unlock the current inode, but we 2030 * Don't try to lock/unlock the current inode, but we
2015 * _cannot_ skip the other inodes that we did not find 2031 * _cannot_ skip the other inodes that we did not find
2016 * in the list attached to the buffer and are not 2032 * in the list attached to the buffer and are not
@@ -2019,11 +2035,11 @@ retry:
2019 */ 2035 */
2020 if (ip != free_ip && 2036 if (ip != free_ip &&
2021 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2037 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2022 read_unlock(&pag->pag_ici_lock); 2038 rcu_read_unlock();
2023 delay(1); 2039 delay(1);
2024 goto retry; 2040 goto retry;
2025 } 2041 }
2026 read_unlock(&pag->pag_ici_lock); 2042 rcu_read_unlock();
2027 2043
2028 xfs_iflock(ip); 2044 xfs_iflock(ip);
2029 xfs_iflags_set(ip, XFS_ISTALE); 2045 xfs_iflags_set(ip, XFS_ISTALE);
@@ -2629,7 +2645,7 @@ xfs_iflush_cluster(
2629 2645
2630 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2646 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2631 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 2647 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
2632 read_lock(&pag->pag_ici_lock); 2648 rcu_read_lock();
2633 /* really need a gang lookup range call here */ 2649 /* really need a gang lookup range call here */
2634 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2650 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
2635 first_index, inodes_per_cluster); 2651 first_index, inodes_per_cluster);
@@ -2640,9 +2656,21 @@ xfs_iflush_cluster(
2640 iq = ilist[i]; 2656 iq = ilist[i];
2641 if (iq == ip) 2657 if (iq == ip)
2642 continue; 2658 continue;
2643 /* if the inode lies outside this cluster, we're done. */ 2659
2644 if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) 2660 /*
2645 break; 2661 * because this is an RCU protected lookup, we could find a
2662 * recently freed or even reallocated inode during the lookup.
2663 * We need to check under the i_flags_lock for a valid inode
2664 * here. Skip it if it is not valid or the wrong inode.
2665 */
2666 spin_lock(&ip->i_flags_lock);
2667 if (!ip->i_ino ||
2668 (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
2669 spin_unlock(&ip->i_flags_lock);
2670 continue;
2671 }
2672 spin_unlock(&ip->i_flags_lock);
2673
2646 /* 2674 /*
2647 * Do an un-protected check to see if the inode is dirty and 2675 * Do an un-protected check to see if the inode is dirty and
2648 * is a candidate for flushing. These checks will be repeated 2676 * is a candidate for flushing. These checks will be repeated
@@ -2692,7 +2720,7 @@ xfs_iflush_cluster(
2692 } 2720 }
2693 2721
2694out_free: 2722out_free:
2695 read_unlock(&pag->pag_ici_lock); 2723 rcu_read_unlock();
2696 kmem_free(ilist); 2724 kmem_free(ilist);
2697out_put: 2725out_put:
2698 xfs_perag_put(pag); 2726 xfs_perag_put(pag);
@@ -2704,7 +2732,7 @@ cluster_corrupt_out:
2704 * Corruption detected in the clustering loop. Invalidate the 2732 * Corruption detected in the clustering loop. Invalidate the
2705 * inode buffer and shut down the filesystem. 2733 * inode buffer and shut down the filesystem.
2706 */ 2734 */
2707 read_unlock(&pag->pag_ici_lock); 2735 rcu_read_unlock();
2708 /* 2736 /*
2709 * Clean up the buffer. If it was B_DELWRI, just release it -- 2737 * Clean up the buffer. If it was B_DELWRI, just release it --
2710 * brelse can handle it with no problems. If not, shut down the 2738 * brelse can handle it with no problems. If not, shut down the