aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-09-21 20:47:20 -0400
committerAlex Elder <aelder@sgi.com>2010-10-18 16:07:44 -0400
commite176579e70118ed7cfdb60f963628fe0ca771f3d (patch)
treee2d252fcd692aa4e3907300521e2af81c974f875
parentbd32d25a7cf7242512e77e70bab63df4402ab91c (diff)
xfs: lockless per-ag lookups
When we start taking a reference to the per-ag for every cached buffer in the system, kernel lockstat profiling on an 8-way create workload shows the mp->m_perag_lock has higher acquisition rates than the inode lock and has significantly more contention. That is, it becomes the highest contended lock in the system. The perag lookup is trivial to convert to lock-less RCU lookups because perag structures never go away. Hence the only thing we need to protect against is tree structure changes during a grow. This can be done simply by replacing the locking in xfs_perag_get() with RCU read locking. This removes the mp->m_perag_lock completely from this path. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Alex Elder <aelder@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c6
-rw-r--r--fs/xfs/xfs_ag.h3
-rw-r--r--fs/xfs/xfs_mount.c25
3 files changed, 23 insertions, 11 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 81976ffed7d6..3a1d229b4784 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -150,17 +150,17 @@ xfs_inode_ag_iter_next_pag(
150 int found; 150 int found;
151 int ref; 151 int ref;
152 152
153 spin_lock(&mp->m_perag_lock); 153 rcu_read_lock();
154 found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, 154 found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
155 (void **)&pag, *first, 1, tag); 155 (void **)&pag, *first, 1, tag);
156 if (found <= 0) { 156 if (found <= 0) {
157 spin_unlock(&mp->m_perag_lock); 157 rcu_read_unlock();
158 return NULL; 158 return NULL;
159 } 159 }
160 *first = pag->pag_agno + 1; 160 *first = pag->pag_agno + 1;
161 /* open coded pag reference increment */ 161 /* open coded pag reference increment */
162 ref = atomic_inc_return(&pag->pag_ref); 162 ref = atomic_inc_return(&pag->pag_ref);
163 spin_unlock(&mp->m_perag_lock); 163 rcu_read_unlock();
164 trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_); 164 trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
165 } else { 165 } else {
166 pag = xfs_perag_get(mp, *first); 166 pag = xfs_perag_get(mp, *first);
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 4917d4eed4ed..51c42c202bf1 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -230,6 +230,9 @@ typedef struct xfs_perag {
230 rwlock_t pag_ici_lock; /* incore inode lock */ 230 rwlock_t pag_ici_lock; /* incore inode lock */
231 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 231 struct radix_tree_root pag_ici_root; /* incore inode cache root */
232 int pag_ici_reclaimable; /* reclaimable inodes */ 232 int pag_ici_reclaimable; /* reclaimable inodes */
233
234 /* for rcu-safe freeing */
235 struct rcu_head rcu_head;
233#endif 236#endif
234 int pagb_count; /* pagb slots in use */ 237 int pagb_count; /* pagb slots in use */
235} xfs_perag_t; 238} xfs_perag_t;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 00c7a876807d..14fc6e9e1816 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -199,6 +199,8 @@ xfs_uuid_unmount(
199 199
200/* 200/*
201 * Reference counting access wrappers to the perag structures. 201 * Reference counting access wrappers to the perag structures.
202 * Because we never free per-ag structures, the only thing we
203 * have to protect against changes is the tree structure itself.
202 */ 204 */
203struct xfs_perag * 205struct xfs_perag *
204xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) 206xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
@@ -206,13 +208,13 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
206 struct xfs_perag *pag; 208 struct xfs_perag *pag;
207 int ref = 0; 209 int ref = 0;
208 210
209 spin_lock(&mp->m_perag_lock); 211 rcu_read_lock();
210 pag = radix_tree_lookup(&mp->m_perag_tree, agno); 212 pag = radix_tree_lookup(&mp->m_perag_tree, agno);
211 if (pag) { 213 if (pag) {
212 ASSERT(atomic_read(&pag->pag_ref) >= 0); 214 ASSERT(atomic_read(&pag->pag_ref) >= 0);
213 ref = atomic_inc_return(&pag->pag_ref); 215 ref = atomic_inc_return(&pag->pag_ref);
214 } 216 }
215 spin_unlock(&mp->m_perag_lock); 217 rcu_read_unlock();
216 trace_xfs_perag_get(mp, agno, ref, _RET_IP_); 218 trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
217 return pag; 219 return pag;
218} 220}
@@ -227,10 +229,18 @@ xfs_perag_put(struct xfs_perag *pag)
227 trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); 229 trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
228} 230}
229 231
232STATIC void
233__xfs_free_perag(
234 struct rcu_head *head)
235{
236 struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
237
238 ASSERT(atomic_read(&pag->pag_ref) == 0);
239 kmem_free(pag);
240}
241
230/* 242/*
231 * Free up the resources associated with a mount structure. Assume that 243 * Free up the per-ag resources associated with the mount structure.
232 * the structure was initially zeroed, so we can tell which fields got
233 * initialized.
234 */ 244 */
235STATIC void 245STATIC void
236xfs_free_perag( 246xfs_free_perag(
@@ -242,10 +252,9 @@ xfs_free_perag(
242 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 252 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
243 spin_lock(&mp->m_perag_lock); 253 spin_lock(&mp->m_perag_lock);
244 pag = radix_tree_delete(&mp->m_perag_tree, agno); 254 pag = radix_tree_delete(&mp->m_perag_tree, agno);
245 ASSERT(pag);
246 ASSERT(atomic_read(&pag->pag_ref) == 0);
247 spin_unlock(&mp->m_perag_lock); 255 spin_unlock(&mp->m_perag_lock);
248 kmem_free(pag); 256 ASSERT(pag);
257 call_rcu(&pag->rcu_head, __xfs_free_perag);
249 } 258 }
250} 259}
251 260