aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c136
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h8
-rw-r--r--fs/xfs/xfs_ag.h4
-rw-r--r--fs/xfs/xfs_mount.c2
4 files changed, 74 insertions, 76 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 251bcdc6352e..749d7d39d657 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -188,7 +188,7 @@ _xfs_buf_initialize(
188 atomic_set(&bp->b_hold, 1); 188 atomic_set(&bp->b_hold, 1);
189 init_completion(&bp->b_iowait); 189 init_completion(&bp->b_iowait);
190 INIT_LIST_HEAD(&bp->b_list); 190 INIT_LIST_HEAD(&bp->b_list);
191 INIT_LIST_HEAD(&bp->b_hash_list); 191 RB_CLEAR_NODE(&bp->b_rbnode);
192 init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ 192 init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
193 XB_SET_OWNER(bp); 193 XB_SET_OWNER(bp);
194 bp->b_target = target; 194 bp->b_target = target;
@@ -262,8 +262,6 @@ xfs_buf_free(
262{ 262{
263 trace_xfs_buf_free(bp, _RET_IP_); 263 trace_xfs_buf_free(bp, _RET_IP_);
264 264
265 ASSERT(list_empty(&bp->b_hash_list));
266
267 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 265 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
268 uint i; 266 uint i;
269 267
@@ -422,8 +420,10 @@ _xfs_buf_find(
422{ 420{
423 xfs_off_t range_base; 421 xfs_off_t range_base;
424 size_t range_length; 422 size_t range_length;
425 xfs_bufhash_t *hash; 423 struct xfs_perag *pag;
426 xfs_buf_t *bp, *n; 424 struct rb_node **rbp;
425 struct rb_node *parent;
426 xfs_buf_t *bp;
427 427
428 range_base = (ioff << BBSHIFT); 428 range_base = (ioff << BBSHIFT);
429 range_length = (isize << BBSHIFT); 429 range_length = (isize << BBSHIFT);
@@ -432,14 +432,37 @@ _xfs_buf_find(
432 ASSERT(!(range_length < (1 << btp->bt_sshift))); 432 ASSERT(!(range_length < (1 << btp->bt_sshift)));
433 ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); 433 ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
434 434
435 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; 435 /* get tree root */
436 436 pag = xfs_perag_get(btp->bt_mount,
437 spin_lock(&hash->bh_lock); 437 xfs_daddr_to_agno(btp->bt_mount, ioff));
438 438
439 list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { 439 /* walk tree */
440 ASSERT(btp == bp->b_target); 440 spin_lock(&pag->pag_buf_lock);
441 if (bp->b_file_offset == range_base && 441 rbp = &pag->pag_buf_tree.rb_node;
442 bp->b_buffer_length == range_length) { 442 parent = NULL;
443 bp = NULL;
444 while (*rbp) {
445 parent = *rbp;
446 bp = rb_entry(parent, struct xfs_buf, b_rbnode);
447
448 if (range_base < bp->b_file_offset)
449 rbp = &(*rbp)->rb_left;
450 else if (range_base > bp->b_file_offset)
451 rbp = &(*rbp)->rb_right;
452 else {
453 /*
454 * found a block offset match. If the range doesn't
455 * match, the only way this is allowed is if the buffer
456 * in the cache is stale and the transaction that made
457 * it stale has not yet committed. i.e. we are
458 * reallocating a busy extent. Skip this buffer and
459 * continue searching to the right for an exact match.
460 */
461 if (bp->b_buffer_length != range_length) {
462 ASSERT(bp->b_flags & XBF_STALE);
463 rbp = &(*rbp)->rb_right;
464 continue;
465 }
443 atomic_inc(&bp->b_hold); 466 atomic_inc(&bp->b_hold);
444 goto found; 467 goto found;
445 } 468 }
@@ -449,17 +472,21 @@ _xfs_buf_find(
449 if (new_bp) { 472 if (new_bp) {
450 _xfs_buf_initialize(new_bp, btp, range_base, 473 _xfs_buf_initialize(new_bp, btp, range_base,
451 range_length, flags); 474 range_length, flags);
452 new_bp->b_hash = hash; 475 rb_link_node(&new_bp->b_rbnode, parent, rbp);
453 list_add(&new_bp->b_hash_list, &hash->bh_list); 476 rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
477 /* the buffer keeps the perag reference until it is freed */
478 new_bp->b_pag = pag;
479 spin_unlock(&pag->pag_buf_lock);
454 } else { 480 } else {
455 XFS_STATS_INC(xb_miss_locked); 481 XFS_STATS_INC(xb_miss_locked);
482 spin_unlock(&pag->pag_buf_lock);
483 xfs_perag_put(pag);
456 } 484 }
457
458 spin_unlock(&hash->bh_lock);
459 return new_bp; 485 return new_bp;
460 486
461found: 487found:
462 spin_unlock(&hash->bh_lock); 488 spin_unlock(&pag->pag_buf_lock);
489 xfs_perag_put(pag);
463 490
464 /* Attempt to get the semaphore without sleeping, 491 /* Attempt to get the semaphore without sleeping,
465 * if this does not work then we need to drop the 492 * if this does not work then we need to drop the
@@ -809,27 +836,30 @@ void
809xfs_buf_rele( 836xfs_buf_rele(
810 xfs_buf_t *bp) 837 xfs_buf_t *bp)
811{ 838{
812 xfs_bufhash_t *hash = bp->b_hash; 839 struct xfs_perag *pag = bp->b_pag;
813 840
814 trace_xfs_buf_rele(bp, _RET_IP_); 841 trace_xfs_buf_rele(bp, _RET_IP_);
815 842
816 if (unlikely(!hash)) { 843 if (!pag) {
817 ASSERT(!bp->b_relse); 844 ASSERT(!bp->b_relse);
845 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
818 if (atomic_dec_and_test(&bp->b_hold)) 846 if (atomic_dec_and_test(&bp->b_hold))
819 xfs_buf_free(bp); 847 xfs_buf_free(bp);
820 return; 848 return;
821 } 849 }
822 850
851 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
823 ASSERT(atomic_read(&bp->b_hold) > 0); 852 ASSERT(atomic_read(&bp->b_hold) > 0);
824 if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { 853 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
825 if (bp->b_relse) { 854 if (bp->b_relse) {
826 atomic_inc(&bp->b_hold); 855 atomic_inc(&bp->b_hold);
827 spin_unlock(&hash->bh_lock); 856 spin_unlock(&pag->pag_buf_lock);
828 (*(bp->b_relse)) (bp); 857 bp->b_relse(bp);
829 } else { 858 } else {
830 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); 859 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
831 list_del_init(&bp->b_hash_list); 860 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
832 spin_unlock(&hash->bh_lock); 861 spin_unlock(&pag->pag_buf_lock);
862 xfs_perag_put(pag);
833 xfs_buf_free(bp); 863 xfs_buf_free(bp);
834 } 864 }
835 } 865 }
@@ -1429,56 +1459,24 @@ xfs_buf_iomove(
1429 */ 1459 */
1430void 1460void
1431xfs_wait_buftarg( 1461xfs_wait_buftarg(
1432 xfs_buftarg_t *btp) 1462 struct xfs_buftarg *btp)
1433{ 1463{
1434 xfs_bufhash_t *hash; 1464 struct xfs_perag *pag;
1435 uint i; 1465 uint i;
1436 1466
1437 for (i = 0; i < (1 << btp->bt_hashshift); i++) { 1467 for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
1438 hash = &btp->bt_hash[i]; 1468 pag = xfs_perag_get(btp->bt_mount, i);
1439 spin_lock(&hash->bh_lock); 1469 spin_lock(&pag->pag_buf_lock);
1440 while (!list_empty(&hash->bh_list)) { 1470 while (rb_first(&pag->pag_buf_tree)) {
1441 spin_unlock(&hash->bh_lock); 1471 spin_unlock(&pag->pag_buf_lock);
1442 delay(100); 1472 delay(100);
1443 spin_lock(&hash->bh_lock); 1473 spin_lock(&pag->pag_buf_lock);
1444 } 1474 }
1445 spin_unlock(&hash->bh_lock); 1475 spin_unlock(&pag->pag_buf_lock);
1446 } 1476 xfs_perag_put(pag);
1447}
1448
1449/*
1450 * Allocate buffer hash table for a given target.
1451 * For devices containing metadata (i.e. not the log/realtime devices)
1452 * we need to allocate a much larger hash table.
1453 */
1454STATIC void
1455xfs_alloc_bufhash(
1456 xfs_buftarg_t *btp,
1457 int external)
1458{
1459 unsigned int i;
1460
1461 if (external) {
1462 btp->bt_hash = NULL;
1463 return;
1464 }
1465 btp->bt_hashshift = 12; /* 4096 buckets */
1466 btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
1467 sizeof(xfs_bufhash_t));
1468 for (i = 0; i < (1 << btp->bt_hashshift); i++) {
1469 spin_lock_init(&btp->bt_hash[i].bh_lock);
1470 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
1471 } 1477 }
1472} 1478}
1473 1479
1474STATIC void
1475xfs_free_bufhash(
1476 xfs_buftarg_t *btp)
1477{
1478 kmem_free_large(btp->bt_hash);
1479 btp->bt_hash = NULL;
1480}
1481
1482/* 1480/*
1483 * buftarg list for delwrite queue processing 1481 * buftarg list for delwrite queue processing
1484 */ 1482 */
@@ -1511,7 +1509,6 @@ xfs_free_buftarg(
1511 xfs_flush_buftarg(btp, 1); 1509 xfs_flush_buftarg(btp, 1);
1512 if (mp->m_flags & XFS_MOUNT_BARRIER) 1510 if (mp->m_flags & XFS_MOUNT_BARRIER)
1513 xfs_blkdev_issue_flush(btp); 1511 xfs_blkdev_issue_flush(btp);
1514 xfs_free_bufhash(btp);
1515 iput(btp->bt_mapping->host); 1512 iput(btp->bt_mapping->host);
1516 1513
1517 /* Unregister the buftarg first so that we don't get a 1514 /* Unregister the buftarg first so that we don't get a
@@ -1651,7 +1648,6 @@ xfs_alloc_buftarg(
1651 goto error; 1648 goto error;
1652 if (xfs_alloc_delwrite_queue(btp, fsname)) 1649 if (xfs_alloc_delwrite_queue(btp, fsname))
1653 goto error; 1650 goto error;
1654 xfs_alloc_bufhash(btp, external);
1655 return btp; 1651 return btp;
1656 1652
1657error: 1653error:
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 1f109cee136c..c79882ecb513 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -135,10 +135,6 @@ typedef struct xfs_buftarg {
135 unsigned int bt_sshift; 135 unsigned int bt_sshift;
136 size_t bt_smask; 136 size_t bt_smask;
137 137
138 /* per device buffer hash table */
139 uint bt_hashshift;
140 xfs_bufhash_t *bt_hash;
141
142 /* per device delwri queue */ 138 /* per device delwri queue */
143 struct task_struct *bt_task; 139 struct task_struct *bt_task;
144 struct list_head bt_list; 140 struct list_head bt_list;
@@ -172,8 +168,8 @@ typedef struct xfs_buf {
172 wait_queue_head_t b_waiters; /* unpin waiters */ 168 wait_queue_head_t b_waiters; /* unpin waiters */
173 struct list_head b_list; 169 struct list_head b_list;
174 xfs_buf_flags_t b_flags; /* status flags */ 170 xfs_buf_flags_t b_flags; /* status flags */
175 struct list_head b_hash_list; /* hash table list */ 171 struct rb_node b_rbnode; /* rbtree node */
176 xfs_bufhash_t *b_hash; /* hash table list start */ 172 struct xfs_perag *b_pag; /* contains rbtree root */
177 xfs_buftarg_t *b_target; /* buffer target (device) */ 173 xfs_buftarg_t *b_target; /* buffer target (device) */
178 atomic_t b_hold; /* reference count */ 174 atomic_t b_hold; /* reference count */
179 xfs_daddr_t b_bn; /* block number for I/O */ 175 xfs_daddr_t b_bn; /* block number for I/O */
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index baeec83d01f9..63c7a1a6c022 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -233,6 +233,10 @@ typedef struct xfs_perag {
233 struct mutex pag_ici_reclaim_lock; /* serialisation point */ 233 struct mutex pag_ici_reclaim_lock; /* serialisation point */
234 unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ 234 unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
235 235
236 /* buffer cache index */
237 spinlock_t pag_buf_lock; /* lock for pag_buf_tree */
238 struct rb_root pag_buf_tree; /* ordered tree of active buffers */
239
236 /* for rcu-safe freeing */ 240 /* for rcu-safe freeing */
237 struct rcu_head rcu_head; 241 struct rcu_head rcu_head;
238#endif 242#endif
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 59859c343e04..cfa2fb4e7f97 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -479,6 +479,8 @@ xfs_initialize_perag(
479 rwlock_init(&pag->pag_ici_lock); 479 rwlock_init(&pag->pag_ici_lock);
480 mutex_init(&pag->pag_ici_reclaim_lock); 480 mutex_init(&pag->pag_ici_reclaim_lock);
481 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 481 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
482 spin_lock_init(&pag->pag_buf_lock);
483 pag->pag_buf_tree = RB_ROOT;
482 484
483 if (radix_tree_preload(GFP_NOFS)) 485 if (radix_tree_preload(GFP_NOFS))
484 goto out_unwind; 486 goto out_unwind;