diff options
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 136 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_ag.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 2 |
4 files changed, 74 insertions, 76 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 251bcdc6352e..749d7d39d657 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -188,7 +188,7 @@ _xfs_buf_initialize( | |||
188 | atomic_set(&bp->b_hold, 1); | 188 | atomic_set(&bp->b_hold, 1); |
189 | init_completion(&bp->b_iowait); | 189 | init_completion(&bp->b_iowait); |
190 | INIT_LIST_HEAD(&bp->b_list); | 190 | INIT_LIST_HEAD(&bp->b_list); |
191 | INIT_LIST_HEAD(&bp->b_hash_list); | 191 | RB_CLEAR_NODE(&bp->b_rbnode); |
192 | init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ | 192 | init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ |
193 | XB_SET_OWNER(bp); | 193 | XB_SET_OWNER(bp); |
194 | bp->b_target = target; | 194 | bp->b_target = target; |
@@ -262,8 +262,6 @@ xfs_buf_free( | |||
262 | { | 262 | { |
263 | trace_xfs_buf_free(bp, _RET_IP_); | 263 | trace_xfs_buf_free(bp, _RET_IP_); |
264 | 264 | ||
265 | ASSERT(list_empty(&bp->b_hash_list)); | ||
266 | |||
267 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 265 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { |
268 | uint i; | 266 | uint i; |
269 | 267 | ||
@@ -422,8 +420,10 @@ _xfs_buf_find( | |||
422 | { | 420 | { |
423 | xfs_off_t range_base; | 421 | xfs_off_t range_base; |
424 | size_t range_length; | 422 | size_t range_length; |
425 | xfs_bufhash_t *hash; | 423 | struct xfs_perag *pag; |
426 | xfs_buf_t *bp, *n; | 424 | struct rb_node **rbp; |
425 | struct rb_node *parent; | ||
426 | xfs_buf_t *bp; | ||
427 | 427 | ||
428 | range_base = (ioff << BBSHIFT); | 428 | range_base = (ioff << BBSHIFT); |
429 | range_length = (isize << BBSHIFT); | 429 | range_length = (isize << BBSHIFT); |
@@ -432,14 +432,37 @@ _xfs_buf_find( | |||
432 | ASSERT(!(range_length < (1 << btp->bt_sshift))); | 432 | ASSERT(!(range_length < (1 << btp->bt_sshift))); |
433 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); | 433 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); |
434 | 434 | ||
435 | hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; | 435 | /* get tree root */ |
436 | 436 | pag = xfs_perag_get(btp->bt_mount, | |
437 | spin_lock(&hash->bh_lock); | 437 | xfs_daddr_to_agno(btp->bt_mount, ioff)); |
438 | 438 | ||
439 | list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { | 439 | /* walk tree */ |
440 | ASSERT(btp == bp->b_target); | 440 | spin_lock(&pag->pag_buf_lock); |
441 | if (bp->b_file_offset == range_base && | 441 | rbp = &pag->pag_buf_tree.rb_node; |
442 | bp->b_buffer_length == range_length) { | 442 | parent = NULL; |
443 | bp = NULL; | ||
444 | while (*rbp) { | ||
445 | parent = *rbp; | ||
446 | bp = rb_entry(parent, struct xfs_buf, b_rbnode); | ||
447 | |||
448 | if (range_base < bp->b_file_offset) | ||
449 | rbp = &(*rbp)->rb_left; | ||
450 | else if (range_base > bp->b_file_offset) | ||
451 | rbp = &(*rbp)->rb_right; | ||
452 | else { | ||
453 | /* | ||
454 | * found a block offset match. If the range doesn't | ||
455 | * match, the only way this is allowed is if the buffer | ||
456 | * in the cache is stale and the transaction that made | ||
457 | * it stale has not yet committed. i.e. we are | ||
458 | * reallocating a busy extent. Skip this buffer and | ||
459 | * continue searching to the right for an exact match. | ||
460 | */ | ||
461 | if (bp->b_buffer_length != range_length) { | ||
462 | ASSERT(bp->b_flags & XBF_STALE); | ||
463 | rbp = &(*rbp)->rb_right; | ||
464 | continue; | ||
465 | } | ||
443 | atomic_inc(&bp->b_hold); | 466 | atomic_inc(&bp->b_hold); |
444 | goto found; | 467 | goto found; |
445 | } | 468 | } |
@@ -449,17 +472,21 @@ _xfs_buf_find( | |||
449 | if (new_bp) { | 472 | if (new_bp) { |
450 | _xfs_buf_initialize(new_bp, btp, range_base, | 473 | _xfs_buf_initialize(new_bp, btp, range_base, |
451 | range_length, flags); | 474 | range_length, flags); |
452 | new_bp->b_hash = hash; | 475 | rb_link_node(&new_bp->b_rbnode, parent, rbp); |
453 | list_add(&new_bp->b_hash_list, &hash->bh_list); | 476 | rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); |
477 | /* the buffer keeps the perag reference until it is freed */ | ||
478 | new_bp->b_pag = pag; | ||
479 | spin_unlock(&pag->pag_buf_lock); | ||
454 | } else { | 480 | } else { |
455 | XFS_STATS_INC(xb_miss_locked); | 481 | XFS_STATS_INC(xb_miss_locked); |
482 | spin_unlock(&pag->pag_buf_lock); | ||
483 | xfs_perag_put(pag); | ||
456 | } | 484 | } |
457 | |||
458 | spin_unlock(&hash->bh_lock); | ||
459 | return new_bp; | 485 | return new_bp; |
460 | 486 | ||
461 | found: | 487 | found: |
462 | spin_unlock(&hash->bh_lock); | 488 | spin_unlock(&pag->pag_buf_lock); |
489 | xfs_perag_put(pag); | ||
463 | 490 | ||
464 | /* Attempt to get the semaphore without sleeping, | 491 | /* Attempt to get the semaphore without sleeping, |
465 | * if this does not work then we need to drop the | 492 | * if this does not work then we need to drop the |
@@ -809,27 +836,30 @@ void | |||
809 | xfs_buf_rele( | 836 | xfs_buf_rele( |
810 | xfs_buf_t *bp) | 837 | xfs_buf_t *bp) |
811 | { | 838 | { |
812 | xfs_bufhash_t *hash = bp->b_hash; | 839 | struct xfs_perag *pag = bp->b_pag; |
813 | 840 | ||
814 | trace_xfs_buf_rele(bp, _RET_IP_); | 841 | trace_xfs_buf_rele(bp, _RET_IP_); |
815 | 842 | ||
816 | if (unlikely(!hash)) { | 843 | if (!pag) { |
817 | ASSERT(!bp->b_relse); | 844 | ASSERT(!bp->b_relse); |
845 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); | ||
818 | if (atomic_dec_and_test(&bp->b_hold)) | 846 | if (atomic_dec_and_test(&bp->b_hold)) |
819 | xfs_buf_free(bp); | 847 | xfs_buf_free(bp); |
820 | return; | 848 | return; |
821 | } | 849 | } |
822 | 850 | ||
851 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); | ||
823 | ASSERT(atomic_read(&bp->b_hold) > 0); | 852 | ASSERT(atomic_read(&bp->b_hold) > 0); |
824 | if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { | 853 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
825 | if (bp->b_relse) { | 854 | if (bp->b_relse) { |
826 | atomic_inc(&bp->b_hold); | 855 | atomic_inc(&bp->b_hold); |
827 | spin_unlock(&hash->bh_lock); | 856 | spin_unlock(&pag->pag_buf_lock); |
828 | (*(bp->b_relse)) (bp); | 857 | bp->b_relse(bp); |
829 | } else { | 858 | } else { |
830 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 859 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); |
831 | list_del_init(&bp->b_hash_list); | 860 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
832 | spin_unlock(&hash->bh_lock); | 861 | spin_unlock(&pag->pag_buf_lock); |
862 | xfs_perag_put(pag); | ||
833 | xfs_buf_free(bp); | 863 | xfs_buf_free(bp); |
834 | } | 864 | } |
835 | } | 865 | } |
@@ -1429,56 +1459,24 @@ xfs_buf_iomove( | |||
1429 | */ | 1459 | */ |
1430 | void | 1460 | void |
1431 | xfs_wait_buftarg( | 1461 | xfs_wait_buftarg( |
1432 | xfs_buftarg_t *btp) | 1462 | struct xfs_buftarg *btp) |
1433 | { | 1463 | { |
1434 | xfs_bufhash_t *hash; | 1464 | struct xfs_perag *pag; |
1435 | uint i; | 1465 | uint i; |
1436 | 1466 | ||
1437 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1467 | for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { |
1438 | hash = &btp->bt_hash[i]; | 1468 | pag = xfs_perag_get(btp->bt_mount, i); |
1439 | spin_lock(&hash->bh_lock); | 1469 | spin_lock(&pag->pag_buf_lock); |
1440 | while (!list_empty(&hash->bh_list)) { | 1470 | while (rb_first(&pag->pag_buf_tree)) { |
1441 | spin_unlock(&hash->bh_lock); | 1471 | spin_unlock(&pag->pag_buf_lock); |
1442 | delay(100); | 1472 | delay(100); |
1443 | spin_lock(&hash->bh_lock); | 1473 | spin_lock(&pag->pag_buf_lock); |
1444 | } | 1474 | } |
1445 | spin_unlock(&hash->bh_lock); | 1475 | spin_unlock(&pag->pag_buf_lock); |
1446 | } | 1476 | xfs_perag_put(pag); |
1447 | } | ||
1448 | |||
1449 | /* | ||
1450 | * Allocate buffer hash table for a given target. | ||
1451 | * For devices containing metadata (i.e. not the log/realtime devices) | ||
1452 | * we need to allocate a much larger hash table. | ||
1453 | */ | ||
1454 | STATIC void | ||
1455 | xfs_alloc_bufhash( | ||
1456 | xfs_buftarg_t *btp, | ||
1457 | int external) | ||
1458 | { | ||
1459 | unsigned int i; | ||
1460 | |||
1461 | if (external) { | ||
1462 | btp->bt_hash = NULL; | ||
1463 | return; | ||
1464 | } | ||
1465 | btp->bt_hashshift = 12; /* 4096 buckets */ | ||
1466 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * | ||
1467 | sizeof(xfs_bufhash_t)); | ||
1468 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | ||
1469 | spin_lock_init(&btp->bt_hash[i].bh_lock); | ||
1470 | INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); | ||
1471 | } | 1477 | } |
1472 | } | 1478 | } |
1473 | 1479 | ||
1474 | STATIC void | ||
1475 | xfs_free_bufhash( | ||
1476 | xfs_buftarg_t *btp) | ||
1477 | { | ||
1478 | kmem_free_large(btp->bt_hash); | ||
1479 | btp->bt_hash = NULL; | ||
1480 | } | ||
1481 | |||
1482 | /* | 1480 | /* |
1483 | * buftarg list for delwrite queue processing | 1481 | * buftarg list for delwrite queue processing |
1484 | */ | 1482 | */ |
@@ -1511,7 +1509,6 @@ xfs_free_buftarg( | |||
1511 | xfs_flush_buftarg(btp, 1); | 1509 | xfs_flush_buftarg(btp, 1); |
1512 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1510 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1513 | xfs_blkdev_issue_flush(btp); | 1511 | xfs_blkdev_issue_flush(btp); |
1514 | xfs_free_bufhash(btp); | ||
1515 | iput(btp->bt_mapping->host); | 1512 | iput(btp->bt_mapping->host); |
1516 | 1513 | ||
1517 | /* Unregister the buftarg first so that we don't get a | 1514 | /* Unregister the buftarg first so that we don't get a |
@@ -1651,7 +1648,6 @@ xfs_alloc_buftarg( | |||
1651 | goto error; | 1648 | goto error; |
1652 | if (xfs_alloc_delwrite_queue(btp, fsname)) | 1649 | if (xfs_alloc_delwrite_queue(btp, fsname)) |
1653 | goto error; | 1650 | goto error; |
1654 | xfs_alloc_bufhash(btp, external); | ||
1655 | return btp; | 1651 | return btp; |
1656 | 1652 | ||
1657 | error: | 1653 | error: |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 1f109cee136c..c79882ecb513 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -135,10 +135,6 @@ typedef struct xfs_buftarg { | |||
135 | unsigned int bt_sshift; | 135 | unsigned int bt_sshift; |
136 | size_t bt_smask; | 136 | size_t bt_smask; |
137 | 137 | ||
138 | /* per device buffer hash table */ | ||
139 | uint bt_hashshift; | ||
140 | xfs_bufhash_t *bt_hash; | ||
141 | |||
142 | /* per device delwri queue */ | 138 | /* per device delwri queue */ |
143 | struct task_struct *bt_task; | 139 | struct task_struct *bt_task; |
144 | struct list_head bt_list; | 140 | struct list_head bt_list; |
@@ -172,8 +168,8 @@ typedef struct xfs_buf { | |||
172 | wait_queue_head_t b_waiters; /* unpin waiters */ | 168 | wait_queue_head_t b_waiters; /* unpin waiters */ |
173 | struct list_head b_list; | 169 | struct list_head b_list; |
174 | xfs_buf_flags_t b_flags; /* status flags */ | 170 | xfs_buf_flags_t b_flags; /* status flags */ |
175 | struct list_head b_hash_list; /* hash table list */ | 171 | struct rb_node b_rbnode; /* rbtree node */ |
176 | xfs_bufhash_t *b_hash; /* hash table list start */ | 172 | struct xfs_perag *b_pag; /* contains rbtree root */ |
177 | xfs_buftarg_t *b_target; /* buffer target (device) */ | 173 | xfs_buftarg_t *b_target; /* buffer target (device) */ |
178 | atomic_t b_hold; /* reference count */ | 174 | atomic_t b_hold; /* reference count */ |
179 | xfs_daddr_t b_bn; /* block number for I/O */ | 175 | xfs_daddr_t b_bn; /* block number for I/O */ |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index baeec83d01f9..63c7a1a6c022 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -233,6 +233,10 @@ typedef struct xfs_perag { | |||
233 | struct mutex pag_ici_reclaim_lock; /* serialisation point */ | 233 | struct mutex pag_ici_reclaim_lock; /* serialisation point */ |
234 | unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ | 234 | unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ |
235 | 235 | ||
236 | /* buffer cache index */ | ||
237 | spinlock_t pag_buf_lock; /* lock for pag_buf_tree */ | ||
238 | struct rb_root pag_buf_tree; /* ordered tree of active buffers */ | ||
239 | |||
236 | /* for rcu-safe freeing */ | 240 | /* for rcu-safe freeing */ |
237 | struct rcu_head rcu_head; | 241 | struct rcu_head rcu_head; |
238 | #endif | 242 | #endif |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 59859c343e04..cfa2fb4e7f97 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -479,6 +479,8 @@ xfs_initialize_perag( | |||
479 | rwlock_init(&pag->pag_ici_lock); | 479 | rwlock_init(&pag->pag_ici_lock); |
480 | mutex_init(&pag->pag_ici_reclaim_lock); | 480 | mutex_init(&pag->pag_ici_reclaim_lock); |
481 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); | 481 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); |
482 | spin_lock_init(&pag->pag_buf_lock); | ||
483 | pag->pag_buf_tree = RB_ROOT; | ||
482 | 484 | ||
483 | if (radix_tree_preload(GFP_NOFS)) | 485 | if (radix_tree_preload(GFP_NOFS)) |
484 | goto out_unwind; | 486 | goto out_unwind; |