diff options
38 files changed, 1997 insertions, 1920 deletions
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h deleted file mode 100644 index 4dfc7c370819..000000000000 --- a/fs/xfs/linux-2.6/sv.h +++ /dev/null | |||
| @@ -1,59 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
| 3 | * All Rights Reserved. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or | ||
| 6 | * modify it under the terms of the GNU General Public License as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it would be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write the Free Software Foundation, | ||
| 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 17 | */ | ||
| 18 | #ifndef __XFS_SUPPORT_SV_H__ | ||
| 19 | #define __XFS_SUPPORT_SV_H__ | ||
| 20 | |||
| 21 | #include <linux/wait.h> | ||
| 22 | #include <linux/sched.h> | ||
| 23 | #include <linux/spinlock.h> | ||
| 24 | |||
| 25 | /* | ||
| 26 | * Synchronisation variables. | ||
| 27 | * | ||
| 28 | * (Parameters "pri", "svf" and "rts" are not implemented) | ||
| 29 | */ | ||
| 30 | |||
| 31 | typedef struct sv_s { | ||
| 32 | wait_queue_head_t waiters; | ||
| 33 | } sv_t; | ||
| 34 | |||
| 35 | static inline void _sv_wait(sv_t *sv, spinlock_t *lock) | ||
| 36 | { | ||
| 37 | DECLARE_WAITQUEUE(wait, current); | ||
| 38 | |||
| 39 | add_wait_queue_exclusive(&sv->waiters, &wait); | ||
| 40 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 41 | spin_unlock(lock); | ||
| 42 | |||
| 43 | schedule(); | ||
| 44 | |||
| 45 | remove_wait_queue(&sv->waiters, &wait); | ||
| 46 | } | ||
| 47 | |||
| 48 | #define sv_init(sv,flag,name) \ | ||
| 49 | init_waitqueue_head(&(sv)->waiters) | ||
| 50 | #define sv_destroy(sv) \ | ||
| 51 | /*NOTHING*/ | ||
| 52 | #define sv_wait(sv, pri, lock, s) \ | ||
| 53 | _sv_wait(sv, lock) | ||
| 54 | #define sv_signal(sv) \ | ||
| 55 | wake_up(&(sv)->waiters) | ||
| 56 | #define sv_broadcast(sv) \ | ||
| 57 | wake_up_all(&(sv)->waiters) | ||
| 58 | |||
| 59 | #endif /* __XFS_SUPPORT_SV_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 691f61223ed6..ec7bbb5645b6 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
| @@ -38,15 +38,6 @@ | |||
| 38 | #include <linux/pagevec.h> | 38 | #include <linux/pagevec.h> |
| 39 | #include <linux/writeback.h> | 39 | #include <linux/writeback.h> |
| 40 | 40 | ||
| 41 | /* | ||
| 42 | * Types of I/O for bmap clustering and I/O completion tracking. | ||
| 43 | */ | ||
| 44 | enum { | ||
| 45 | IO_READ, /* mapping for a read */ | ||
| 46 | IO_DELAY, /* mapping covers delalloc region */ | ||
| 47 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | ||
| 48 | IO_NEW /* just allocated */ | ||
| 49 | }; | ||
| 50 | 41 | ||
| 51 | /* | 42 | /* |
| 52 | * Prime number of hash buckets since address is used as the key. | 43 | * Prime number of hash buckets since address is used as the key. |
| @@ -182,9 +173,6 @@ xfs_setfilesize( | |||
| 182 | xfs_inode_t *ip = XFS_I(ioend->io_inode); | 173 | xfs_inode_t *ip = XFS_I(ioend->io_inode); |
| 183 | xfs_fsize_t isize; | 174 | xfs_fsize_t isize; |
| 184 | 175 | ||
| 185 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | ||
| 186 | ASSERT(ioend->io_type != IO_READ); | ||
| 187 | |||
| 188 | if (unlikely(ioend->io_error)) | 176 | if (unlikely(ioend->io_error)) |
| 189 | return 0; | 177 | return 0; |
| 190 | 178 | ||
| @@ -244,10 +232,8 @@ xfs_end_io( | |||
| 244 | * We might have to update the on-disk file size after extending | 232 | * We might have to update the on-disk file size after extending |
| 245 | * writes. | 233 | * writes. |
| 246 | */ | 234 | */ |
| 247 | if (ioend->io_type != IO_READ) { | 235 | error = xfs_setfilesize(ioend); |
| 248 | error = xfs_setfilesize(ioend); | 236 | ASSERT(!error || error == EAGAIN); |
| 249 | ASSERT(!error || error == EAGAIN); | ||
| 250 | } | ||
| 251 | 237 | ||
| 252 | /* | 238 | /* |
| 253 | * If we didn't complete processing of the ioend, requeue it to the | 239 | * If we didn't complete processing of the ioend, requeue it to the |
| @@ -318,14 +304,63 @@ STATIC int | |||
| 318 | xfs_map_blocks( | 304 | xfs_map_blocks( |
| 319 | struct inode *inode, | 305 | struct inode *inode, |
| 320 | loff_t offset, | 306 | loff_t offset, |
| 321 | ssize_t count, | ||
| 322 | struct xfs_bmbt_irec *imap, | 307 | struct xfs_bmbt_irec *imap, |
| 323 | int flags) | 308 | int type, |
| 309 | int nonblocking) | ||
| 324 | { | 310 | { |
| 325 | int nmaps = 1; | 311 | struct xfs_inode *ip = XFS_I(inode); |
| 326 | int new = 0; | 312 | struct xfs_mount *mp = ip->i_mount; |
| 313 | ssize_t count = 1 << inode->i_blkbits; | ||
| 314 | xfs_fileoff_t offset_fsb, end_fsb; | ||
| 315 | int error = 0; | ||
| 316 | int bmapi_flags = XFS_BMAPI_ENTIRE; | ||
| 317 | int nimaps = 1; | ||
| 318 | |||
| 319 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
| 320 | return -XFS_ERROR(EIO); | ||
| 321 | |||
| 322 | if (type == IO_UNWRITTEN) | ||
| 323 | bmapi_flags |= XFS_BMAPI_IGSTATE; | ||
| 324 | |||
| 325 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { | ||
| 326 | if (nonblocking) | ||
| 327 | return -XFS_ERROR(EAGAIN); | ||
| 328 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
| 329 | } | ||
| 327 | 330 | ||
| 328 | return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new); | 331 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
| 332 | (ip->i_df.if_flags & XFS_IFEXTENTS)); | ||
| 333 | ASSERT(offset <= mp->m_maxioffset); | ||
| 334 | |||
| 335 | if (offset + count > mp->m_maxioffset) | ||
| 336 | count = mp->m_maxioffset - offset; | ||
| 337 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | ||
| 338 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
| 339 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, | ||
| 340 | bmapi_flags, NULL, 0, imap, &nimaps, NULL); | ||
| 341 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
| 342 | |||
| 343 | if (error) | ||
| 344 | return -XFS_ERROR(error); | ||
| 345 | |||
| 346 | if (type == IO_DELALLOC && | ||
| 347 | (!nimaps || isnullstartblock(imap->br_startblock))) { | ||
| 348 | error = xfs_iomap_write_allocate(ip, offset, count, imap); | ||
| 349 | if (!error) | ||
| 350 | trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); | ||
| 351 | return -XFS_ERROR(error); | ||
| 352 | } | ||
| 353 | |||
| 354 | #ifdef DEBUG | ||
| 355 | if (type == IO_UNWRITTEN) { | ||
| 356 | ASSERT(nimaps); | ||
| 357 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | ||
| 358 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | ||
| 359 | } | ||
| 360 | #endif | ||
| 361 | if (nimaps) | ||
| 362 | trace_xfs_map_blocks_found(ip, offset, count, type, imap); | ||
| 363 | return 0; | ||
| 329 | } | 364 | } |
| 330 | 365 | ||
| 331 | STATIC int | 366 | STATIC int |
| @@ -380,26 +415,18 @@ xfs_submit_ioend_bio( | |||
| 380 | 415 | ||
| 381 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? | 416 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? |
| 382 | WRITE_SYNC_PLUG : WRITE, bio); | 417 | WRITE_SYNC_PLUG : WRITE, bio); |
| 383 | ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); | ||
| 384 | bio_put(bio); | ||
| 385 | } | 418 | } |
| 386 | 419 | ||
| 387 | STATIC struct bio * | 420 | STATIC struct bio * |
| 388 | xfs_alloc_ioend_bio( | 421 | xfs_alloc_ioend_bio( |
| 389 | struct buffer_head *bh) | 422 | struct buffer_head *bh) |
| 390 | { | 423 | { |
| 391 | struct bio *bio; | ||
| 392 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 424 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
| 393 | 425 | struct bio *bio = bio_alloc(GFP_NOIO, nvecs); | |
| 394 | do { | ||
| 395 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
| 396 | nvecs >>= 1; | ||
| 397 | } while (!bio); | ||
| 398 | 426 | ||
| 399 | ASSERT(bio->bi_private == NULL); | 427 | ASSERT(bio->bi_private == NULL); |
| 400 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 428 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
| 401 | bio->bi_bdev = bh->b_bdev; | 429 | bio->bi_bdev = bh->b_bdev; |
| 402 | bio_get(bio); | ||
| 403 | return bio; | 430 | return bio; |
| 404 | } | 431 | } |
| 405 | 432 | ||
| @@ -470,9 +497,8 @@ xfs_submit_ioend( | |||
| 470 | /* Pass 1 - start writeback */ | 497 | /* Pass 1 - start writeback */ |
| 471 | do { | 498 | do { |
| 472 | next = ioend->io_list; | 499 | next = ioend->io_list; |
| 473 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { | 500 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) |
| 474 | xfs_start_buffer_writeback(bh); | 501 | xfs_start_buffer_writeback(bh); |
| 475 | } | ||
| 476 | } while ((ioend = next) != NULL); | 502 | } while ((ioend = next) != NULL); |
| 477 | 503 | ||
| 478 | /* Pass 2 - submit I/O */ | 504 | /* Pass 2 - submit I/O */ |
| @@ -600,117 +626,13 @@ xfs_map_at_offset( | |||
| 600 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 626 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); |
| 601 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 627 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
| 602 | 628 | ||
| 603 | lock_buffer(bh); | ||
| 604 | xfs_map_buffer(inode, bh, imap, offset); | 629 | xfs_map_buffer(inode, bh, imap, offset); |
| 605 | bh->b_bdev = xfs_find_bdev_for_inode(inode); | ||
| 606 | set_buffer_mapped(bh); | 630 | set_buffer_mapped(bh); |
| 607 | clear_buffer_delay(bh); | 631 | clear_buffer_delay(bh); |
| 608 | clear_buffer_unwritten(bh); | 632 | clear_buffer_unwritten(bh); |
| 609 | } | 633 | } |
| 610 | 634 | ||
| 611 | /* | 635 | /* |
| 612 | * Look for a page at index that is suitable for clustering. | ||
| 613 | */ | ||
| 614 | STATIC unsigned int | ||
| 615 | xfs_probe_page( | ||
| 616 | struct page *page, | ||
| 617 | unsigned int pg_offset) | ||
| 618 | { | ||
| 619 | struct buffer_head *bh, *head; | ||
| 620 | int ret = 0; | ||
| 621 | |||
| 622 | if (PageWriteback(page)) | ||
| 623 | return 0; | ||
| 624 | if (!PageDirty(page)) | ||
| 625 | return 0; | ||
| 626 | if (!page->mapping) | ||
| 627 | return 0; | ||
| 628 | if (!page_has_buffers(page)) | ||
| 629 | return 0; | ||
| 630 | |||
| 631 | bh = head = page_buffers(page); | ||
| 632 | do { | ||
| 633 | if (!buffer_uptodate(bh)) | ||
| 634 | break; | ||
| 635 | if (!buffer_mapped(bh)) | ||
| 636 | break; | ||
| 637 | ret += bh->b_size; | ||
| 638 | if (ret >= pg_offset) | ||
| 639 | break; | ||
| 640 | } while ((bh = bh->b_this_page) != head); | ||
| 641 | |||
| 642 | return ret; | ||
| 643 | } | ||
| 644 | |||
| 645 | STATIC size_t | ||
| 646 | xfs_probe_cluster( | ||
| 647 | struct inode *inode, | ||
| 648 | struct page *startpage, | ||
| 649 | struct buffer_head *bh, | ||
| 650 | struct buffer_head *head) | ||
| 651 | { | ||
| 652 | struct pagevec pvec; | ||
| 653 | pgoff_t tindex, tlast, tloff; | ||
| 654 | size_t total = 0; | ||
| 655 | int done = 0, i; | ||
| 656 | |||
| 657 | /* First sum forwards in this page */ | ||
| 658 | do { | ||
| 659 | if (!buffer_uptodate(bh) || !buffer_mapped(bh)) | ||
| 660 | return total; | ||
| 661 | total += bh->b_size; | ||
| 662 | } while ((bh = bh->b_this_page) != head); | ||
| 663 | |||
| 664 | /* if we reached the end of the page, sum forwards in following pages */ | ||
| 665 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
| 666 | tindex = startpage->index + 1; | ||
| 667 | |||
| 668 | /* Prune this back to avoid pathological behavior */ | ||
| 669 | tloff = min(tlast, startpage->index + 64); | ||
| 670 | |||
| 671 | pagevec_init(&pvec, 0); | ||
| 672 | while (!done && tindex <= tloff) { | ||
| 673 | unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); | ||
| 674 | |||
| 675 | if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) | ||
| 676 | break; | ||
| 677 | |||
| 678 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
| 679 | struct page *page = pvec.pages[i]; | ||
| 680 | size_t pg_offset, pg_len = 0; | ||
| 681 | |||
| 682 | if (tindex == tlast) { | ||
| 683 | pg_offset = | ||
| 684 | i_size_read(inode) & (PAGE_CACHE_SIZE - 1); | ||
| 685 | if (!pg_offset) { | ||
| 686 | done = 1; | ||
| 687 | break; | ||
| 688 | } | ||
| 689 | } else | ||
| 690 | pg_offset = PAGE_CACHE_SIZE; | ||
| 691 | |||
| 692 | if (page->index == tindex && trylock_page(page)) { | ||
| 693 | pg_len = xfs_probe_page(page, pg_offset); | ||
| 694 | unlock_page(page); | ||
| 695 | } | ||
| 696 | |||
| 697 | if (!pg_len) { | ||
| 698 | done = 1; | ||
| 699 | break; | ||
| 700 | } | ||
| 701 | |||
| 702 | total += pg_len; | ||
| 703 | tindex++; | ||
| 704 | } | ||
| 705 | |||
| 706 | pagevec_release(&pvec); | ||
| 707 | cond_resched(); | ||
| 708 | } | ||
| 709 | |||
| 710 | return total; | ||
| 711 | } | ||
| 712 | |||
| 713 | /* | ||
| 714 | * Test if a given page is suitable for writing as part of an unwritten | 636 | * Test if a given page is suitable for writing as part of an unwritten |
| 715 | * or delayed allocate extent. | 637 | * or delayed allocate extent. |
| 716 | */ | 638 | */ |
| @@ -731,9 +653,9 @@ xfs_is_delayed_page( | |||
| 731 | if (buffer_unwritten(bh)) | 653 | if (buffer_unwritten(bh)) |
| 732 | acceptable = (type == IO_UNWRITTEN); | 654 | acceptable = (type == IO_UNWRITTEN); |
| 733 | else if (buffer_delay(bh)) | 655 | else if (buffer_delay(bh)) |
| 734 | acceptable = (type == IO_DELAY); | 656 | acceptable = (type == IO_DELALLOC); |
| 735 | else if (buffer_dirty(bh) && buffer_mapped(bh)) | 657 | else if (buffer_dirty(bh) && buffer_mapped(bh)) |
| 736 | acceptable = (type == IO_NEW); | 658 | acceptable = (type == IO_OVERWRITE); |
| 737 | else | 659 | else |
| 738 | break; | 660 | break; |
| 739 | } while ((bh = bh->b_this_page) != head); | 661 | } while ((bh = bh->b_this_page) != head); |
| @@ -758,8 +680,7 @@ xfs_convert_page( | |||
| 758 | loff_t tindex, | 680 | loff_t tindex, |
| 759 | struct xfs_bmbt_irec *imap, | 681 | struct xfs_bmbt_irec *imap, |
| 760 | xfs_ioend_t **ioendp, | 682 | xfs_ioend_t **ioendp, |
| 761 | struct writeback_control *wbc, | 683 | struct writeback_control *wbc) |
| 762 | int all_bh) | ||
| 763 | { | 684 | { |
| 764 | struct buffer_head *bh, *head; | 685 | struct buffer_head *bh, *head; |
| 765 | xfs_off_t end_offset; | 686 | xfs_off_t end_offset; |
| @@ -814,37 +735,30 @@ xfs_convert_page( | |||
| 814 | continue; | 735 | continue; |
| 815 | } | 736 | } |
| 816 | 737 | ||
| 817 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | 738 | if (buffer_unwritten(bh) || buffer_delay(bh) || |
| 739 | buffer_mapped(bh)) { | ||
| 818 | if (buffer_unwritten(bh)) | 740 | if (buffer_unwritten(bh)) |
| 819 | type = IO_UNWRITTEN; | 741 | type = IO_UNWRITTEN; |
| 742 | else if (buffer_delay(bh)) | ||
| 743 | type = IO_DELALLOC; | ||
| 820 | else | 744 | else |
| 821 | type = IO_DELAY; | 745 | type = IO_OVERWRITE; |
| 822 | 746 | ||
| 823 | if (!xfs_imap_valid(inode, imap, offset)) { | 747 | if (!xfs_imap_valid(inode, imap, offset)) { |
| 824 | done = 1; | 748 | done = 1; |
| 825 | continue; | 749 | continue; |
| 826 | } | 750 | } |
| 827 | 751 | ||
| 828 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 752 | lock_buffer(bh); |
| 829 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 753 | if (type != IO_OVERWRITE) |
| 830 | 754 | xfs_map_at_offset(inode, bh, imap, offset); | |
| 831 | xfs_map_at_offset(inode, bh, imap, offset); | ||
| 832 | xfs_add_to_ioend(inode, bh, offset, type, | 755 | xfs_add_to_ioend(inode, bh, offset, type, |
| 833 | ioendp, done); | 756 | ioendp, done); |
| 834 | 757 | ||
| 835 | page_dirty--; | 758 | page_dirty--; |
| 836 | count++; | 759 | count++; |
| 837 | } else { | 760 | } else { |
| 838 | type = IO_NEW; | 761 | done = 1; |
| 839 | if (buffer_mapped(bh) && all_bh) { | ||
| 840 | lock_buffer(bh); | ||
| 841 | xfs_add_to_ioend(inode, bh, offset, | ||
| 842 | type, ioendp, done); | ||
| 843 | count++; | ||
| 844 | page_dirty--; | ||
| 845 | } else { | ||
| 846 | done = 1; | ||
| 847 | } | ||
| 848 | } | 762 | } |
| 849 | } while (offset += len, (bh = bh->b_this_page) != head); | 763 | } while (offset += len, (bh = bh->b_this_page) != head); |
| 850 | 764 | ||
| @@ -876,7 +790,6 @@ xfs_cluster_write( | |||
| 876 | struct xfs_bmbt_irec *imap, | 790 | struct xfs_bmbt_irec *imap, |
| 877 | xfs_ioend_t **ioendp, | 791 | xfs_ioend_t **ioendp, |
| 878 | struct writeback_control *wbc, | 792 | struct writeback_control *wbc, |
| 879 | int all_bh, | ||
| 880 | pgoff_t tlast) | 793 | pgoff_t tlast) |
| 881 | { | 794 | { |
| 882 | struct pagevec pvec; | 795 | struct pagevec pvec; |
| @@ -891,7 +804,7 @@ xfs_cluster_write( | |||
| 891 | 804 | ||
| 892 | for (i = 0; i < pagevec_count(&pvec); i++) { | 805 | for (i = 0; i < pagevec_count(&pvec); i++) { |
| 893 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, | 806 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, |
| 894 | imap, ioendp, wbc, all_bh); | 807 | imap, ioendp, wbc); |
| 895 | if (done) | 808 | if (done) |
| 896 | break; | 809 | break; |
| 897 | } | 810 | } |
| @@ -935,7 +848,7 @@ xfs_aops_discard_page( | |||
| 935 | struct buffer_head *bh, *head; | 848 | struct buffer_head *bh, *head; |
| 936 | loff_t offset = page_offset(page); | 849 | loff_t offset = page_offset(page); |
| 937 | 850 | ||
| 938 | if (!xfs_is_delayed_page(page, IO_DELAY)) | 851 | if (!xfs_is_delayed_page(page, IO_DELALLOC)) |
| 939 | goto out_invalidate; | 852 | goto out_invalidate; |
| 940 | 853 | ||
| 941 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 854 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
| @@ -1002,10 +915,10 @@ xfs_vm_writepage( | |||
| 1002 | unsigned int type; | 915 | unsigned int type; |
| 1003 | __uint64_t end_offset; | 916 | __uint64_t end_offset; |
| 1004 | pgoff_t end_index, last_index; | 917 | pgoff_t end_index, last_index; |
| 1005 | ssize_t size, len; | 918 | ssize_t len; |
| 1006 | int flags, err, imap_valid = 0, uptodate = 1; | 919 | int err, imap_valid = 0, uptodate = 1; |
| 1007 | int count = 0; | 920 | int count = 0; |
| 1008 | int all_bh = 0; | 921 | int nonblocking = 0; |
| 1009 | 922 | ||
| 1010 | trace_xfs_writepage(inode, page, 0); | 923 | trace_xfs_writepage(inode, page, 0); |
| 1011 | 924 | ||
| @@ -1056,10 +969,14 @@ xfs_vm_writepage( | |||
| 1056 | 969 | ||
| 1057 | bh = head = page_buffers(page); | 970 | bh = head = page_buffers(page); |
| 1058 | offset = page_offset(page); | 971 | offset = page_offset(page); |
| 1059 | flags = BMAPI_READ; | 972 | type = IO_OVERWRITE; |
| 1060 | type = IO_NEW; | 973 | |
| 974 | if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) | ||
| 975 | nonblocking = 1; | ||
| 1061 | 976 | ||
| 1062 | do { | 977 | do { |
| 978 | int new_ioend = 0; | ||
| 979 | |||
| 1063 | if (offset >= end_offset) | 980 | if (offset >= end_offset) |
| 1064 | break; | 981 | break; |
| 1065 | if (!buffer_uptodate(bh)) | 982 | if (!buffer_uptodate(bh)) |
| @@ -1076,90 +993,54 @@ xfs_vm_writepage( | |||
| 1076 | continue; | 993 | continue; |
| 1077 | } | 994 | } |
| 1078 | 995 | ||
| 1079 | if (imap_valid) | 996 | if (buffer_unwritten(bh)) { |
| 1080 | imap_valid = xfs_imap_valid(inode, &imap, offset); | 997 | if (type != IO_UNWRITTEN) { |
| 1081 | |||
| 1082 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | ||
| 1083 | int new_ioend = 0; | ||
| 1084 | |||
| 1085 | /* | ||
| 1086 | * Make sure we don't use a read-only iomap | ||
| 1087 | */ | ||
| 1088 | if (flags == BMAPI_READ) | ||
| 1089 | imap_valid = 0; | ||
| 1090 | |||
| 1091 | if (buffer_unwritten(bh)) { | ||
| 1092 | type = IO_UNWRITTEN; | 998 | type = IO_UNWRITTEN; |
| 1093 | flags = BMAPI_WRITE | BMAPI_IGNSTATE; | 999 | imap_valid = 0; |
| 1094 | } else if (buffer_delay(bh)) { | ||
| 1095 | type = IO_DELAY; | ||
| 1096 | flags = BMAPI_ALLOCATE; | ||
| 1097 | |||
| 1098 | if (wbc->sync_mode == WB_SYNC_NONE) | ||
| 1099 | flags |= BMAPI_TRYLOCK; | ||
| 1100 | } | ||
| 1101 | |||
| 1102 | if (!imap_valid) { | ||
| 1103 | /* | ||
| 1104 | * If we didn't have a valid mapping then we | ||
| 1105 | * need to ensure that we put the new mapping | ||
| 1106 | * in a new ioend structure. This needs to be | ||
| 1107 | * done to ensure that the ioends correctly | ||
| 1108 | * reflect the block mappings at io completion | ||
| 1109 | * for unwritten extent conversion. | ||
| 1110 | */ | ||
| 1111 | new_ioend = 1; | ||
| 1112 | err = xfs_map_blocks(inode, offset, len, | ||
| 1113 | &imap, flags); | ||
| 1114 | if (err) | ||
| 1115 | goto error; | ||
| 1116 | imap_valid = xfs_imap_valid(inode, &imap, | ||
| 1117 | offset); | ||
| 1118 | } | 1000 | } |
| 1119 | if (imap_valid) { | 1001 | } else if (buffer_delay(bh)) { |
| 1120 | xfs_map_at_offset(inode, bh, &imap, offset); | 1002 | if (type != IO_DELALLOC) { |
| 1121 | xfs_add_to_ioend(inode, bh, offset, type, | 1003 | type = IO_DELALLOC; |
| 1122 | &ioend, new_ioend); | 1004 | imap_valid = 0; |
| 1123 | count++; | ||
| 1124 | } | 1005 | } |
| 1125 | } else if (buffer_uptodate(bh)) { | 1006 | } else if (buffer_uptodate(bh)) { |
| 1126 | /* | 1007 | if (type != IO_OVERWRITE) { |
| 1127 | * we got here because the buffer is already mapped. | 1008 | type = IO_OVERWRITE; |
| 1128 | * That means it must already have extents allocated | 1009 | imap_valid = 0; |
| 1129 | * underneath it. Map the extent by reading it. | ||
| 1130 | */ | ||
| 1131 | if (!imap_valid || flags != BMAPI_READ) { | ||
| 1132 | flags = BMAPI_READ; | ||
| 1133 | size = xfs_probe_cluster(inode, page, bh, head); | ||
| 1134 | err = xfs_map_blocks(inode, offset, size, | ||
| 1135 | &imap, flags); | ||
| 1136 | if (err) | ||
| 1137 | goto error; | ||
| 1138 | imap_valid = xfs_imap_valid(inode, &imap, | ||
| 1139 | offset); | ||
| 1140 | } | 1010 | } |
| 1011 | } else { | ||
| 1012 | if (PageUptodate(page)) { | ||
| 1013 | ASSERT(buffer_mapped(bh)); | ||
| 1014 | imap_valid = 0; | ||
| 1015 | } | ||
| 1016 | continue; | ||
| 1017 | } | ||
| 1141 | 1018 | ||
| 1019 | if (imap_valid) | ||
| 1020 | imap_valid = xfs_imap_valid(inode, &imap, offset); | ||
| 1021 | if (!imap_valid) { | ||
| 1142 | /* | 1022 | /* |
| 1143 | * We set the type to IO_NEW in case we are doing a | 1023 | * If we didn't have a valid mapping then we need to |
| 1144 | * small write at EOF that is extending the file but | 1024 | * put the new mapping into a separate ioend structure. |
| 1145 | * without needing an allocation. We need to update the | 1025 | * This ensures non-contiguous extents always have |
| 1146 | * file size on I/O completion in this case so it is | 1026 | * separate ioends, which is particularly important |
| 1147 | * the same case as having just allocated a new extent | 1027 | * for unwritten extent conversion at I/O completion |
| 1148 | * that we are writing into for the first time. | 1028 | * time. |
| 1149 | */ | 1029 | */ |
| 1150 | type = IO_NEW; | 1030 | new_ioend = 1; |
| 1151 | if (trylock_buffer(bh)) { | 1031 | err = xfs_map_blocks(inode, offset, &imap, type, |
| 1152 | if (imap_valid) | 1032 | nonblocking); |
| 1153 | all_bh = 1; | 1033 | if (err) |
| 1154 | xfs_add_to_ioend(inode, bh, offset, type, | 1034 | goto error; |
| 1155 | &ioend, !imap_valid); | 1035 | imap_valid = xfs_imap_valid(inode, &imap, offset); |
| 1156 | count++; | 1036 | } |
| 1157 | } else { | 1037 | if (imap_valid) { |
| 1158 | imap_valid = 0; | 1038 | lock_buffer(bh); |
| 1159 | } | 1039 | if (type != IO_OVERWRITE) |
| 1160 | } else if (PageUptodate(page)) { | 1040 | xfs_map_at_offset(inode, bh, &imap, offset); |
| 1161 | ASSERT(buffer_mapped(bh)); | 1041 | xfs_add_to_ioend(inode, bh, offset, type, &ioend, |
| 1162 | imap_valid = 0; | 1042 | new_ioend); |
| 1043 | count++; | ||
| 1163 | } | 1044 | } |
| 1164 | 1045 | ||
| 1165 | if (!iohead) | 1046 | if (!iohead) |
| @@ -1188,7 +1069,7 @@ xfs_vm_writepage( | |||
| 1188 | end_index = last_index; | 1069 | end_index = last_index; |
| 1189 | 1070 | ||
| 1190 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, | 1071 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, |
| 1191 | wbc, all_bh, end_index); | 1072 | wbc, end_index); |
| 1192 | } | 1073 | } |
| 1193 | 1074 | ||
| 1194 | if (iohead) | 1075 | if (iohead) |
| @@ -1257,13 +1138,19 @@ __xfs_get_blocks( | |||
| 1257 | int create, | 1138 | int create, |
| 1258 | int direct) | 1139 | int direct) |
| 1259 | { | 1140 | { |
| 1260 | int flags = create ? BMAPI_WRITE : BMAPI_READ; | 1141 | struct xfs_inode *ip = XFS_I(inode); |
| 1142 | struct xfs_mount *mp = ip->i_mount; | ||
| 1143 | xfs_fileoff_t offset_fsb, end_fsb; | ||
| 1144 | int error = 0; | ||
| 1145 | int lockmode = 0; | ||
| 1261 | struct xfs_bmbt_irec imap; | 1146 | struct xfs_bmbt_irec imap; |
| 1147 | int nimaps = 1; | ||
| 1262 | xfs_off_t offset; | 1148 | xfs_off_t offset; |
| 1263 | ssize_t size; | 1149 | ssize_t size; |
| 1264 | int nimap = 1; | ||
| 1265 | int new = 0; | 1150 | int new = 0; |
| 1266 | int error; | 1151 | |
| 1152 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
| 1153 | return -XFS_ERROR(EIO); | ||
| 1267 | 1154 | ||
| 1268 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1155 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
| 1269 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1156 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
| @@ -1272,15 +1159,45 @@ __xfs_get_blocks( | |||
| 1272 | if (!create && direct && offset >= i_size_read(inode)) | 1159 | if (!create && direct && offset >= i_size_read(inode)) |
| 1273 | return 0; | 1160 | return 0; |
| 1274 | 1161 | ||
| 1275 | if (direct && create) | 1162 | if (create) { |
| 1276 | flags |= BMAPI_DIRECT; | 1163 | lockmode = XFS_ILOCK_EXCL; |
| 1164 | xfs_ilock(ip, lockmode); | ||
| 1165 | } else { | ||
| 1166 | lockmode = xfs_ilock_map_shared(ip); | ||
| 1167 | } | ||
| 1168 | |||
| 1169 | ASSERT(offset <= mp->m_maxioffset); | ||
| 1170 | if (offset + size > mp->m_maxioffset) | ||
| 1171 | size = mp->m_maxioffset - offset; | ||
| 1172 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); | ||
| 1173 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
| 1277 | 1174 | ||
| 1278 | error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, | 1175 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, |
| 1279 | &new); | 1176 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); |
| 1280 | if (error) | 1177 | if (error) |
| 1281 | return -error; | 1178 | goto out_unlock; |
| 1282 | if (nimap == 0) | 1179 | |
| 1283 | return 0; | 1180 | if (create && |
| 1181 | (!nimaps || | ||
| 1182 | (imap.br_startblock == HOLESTARTBLOCK || | ||
| 1183 | imap.br_startblock == DELAYSTARTBLOCK))) { | ||
| 1184 | if (direct) { | ||
| 1185 | error = xfs_iomap_write_direct(ip, offset, size, | ||
| 1186 | &imap, nimaps); | ||
| 1187 | } else { | ||
| 1188 | error = xfs_iomap_write_delay(ip, offset, size, &imap); | ||
| 1189 | } | ||
| 1190 | if (error) | ||
| 1191 | goto out_unlock; | ||
| 1192 | |||
| 1193 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); | ||
| 1194 | } else if (nimaps) { | ||
| 1195 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); | ||
| 1196 | } else { | ||
| 1197 | trace_xfs_get_blocks_notfound(ip, offset, size); | ||
| 1198 | goto out_unlock; | ||
| 1199 | } | ||
| 1200 | xfs_iunlock(ip, lockmode); | ||
| 1284 | 1201 | ||
| 1285 | if (imap.br_startblock != HOLESTARTBLOCK && | 1202 | if (imap.br_startblock != HOLESTARTBLOCK && |
| 1286 | imap.br_startblock != DELAYSTARTBLOCK) { | 1203 | imap.br_startblock != DELAYSTARTBLOCK) { |
| @@ -1347,6 +1264,10 @@ __xfs_get_blocks( | |||
| 1347 | } | 1264 | } |
| 1348 | 1265 | ||
| 1349 | return 0; | 1266 | return 0; |
| 1267 | |||
| 1268 | out_unlock: | ||
| 1269 | xfs_iunlock(ip, lockmode); | ||
| 1270 | return -error; | ||
| 1350 | } | 1271 | } |
| 1351 | 1272 | ||
| 1352 | int | 1273 | int |
| @@ -1434,7 +1355,7 @@ xfs_vm_direct_IO( | |||
| 1434 | ssize_t ret; | 1355 | ssize_t ret; |
| 1435 | 1356 | ||
| 1436 | if (rw & WRITE) { | 1357 | if (rw & WRITE) { |
| 1437 | iocb->private = xfs_alloc_ioend(inode, IO_NEW); | 1358 | iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); |
| 1438 | 1359 | ||
| 1439 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1360 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
| 1440 | offset, nr_segs, | 1361 | offset, nr_segs, |
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index c5057fb6237a..71f721e1a71f 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h | |||
| @@ -23,6 +23,22 @@ extern struct workqueue_struct *xfsconvertd_workqueue; | |||
| 23 | extern mempool_t *xfs_ioend_pool; | 23 | extern mempool_t *xfs_ioend_pool; |
| 24 | 24 | ||
| 25 | /* | 25 | /* |
| 26 | * Types of I/O for bmap clustering and I/O completion tracking. | ||
| 27 | */ | ||
| 28 | enum { | ||
| 29 | IO_DIRECT = 0, /* special case for direct I/O ioends */ | ||
| 30 | IO_DELALLOC, /* mapping covers delalloc region */ | ||
| 31 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | ||
| 32 | IO_OVERWRITE, /* mapping covers already allocated extent */ | ||
| 33 | }; | ||
| 34 | |||
| 35 | #define XFS_IO_TYPES \ | ||
| 36 | { 0, "" }, \ | ||
| 37 | { IO_DELALLOC, "delalloc" }, \ | ||
| 38 | { IO_UNWRITTEN, "unwritten" }, \ | ||
| 39 | { IO_OVERWRITE, "overwrite" } | ||
| 40 | |||
| 41 | /* | ||
| 26 | * xfs_ioend struct manages large extent writes for XFS. | 42 | * xfs_ioend struct manages large extent writes for XFS. |
| 27 | * It can manage several multi-page bio's at once. | 43 | * It can manage several multi-page bio's at once. |
| 28 | */ | 44 | */ |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 4c5deb6e9e31..92f1f2acc6ab 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
| @@ -44,12 +44,7 @@ | |||
| 44 | 44 | ||
| 45 | static kmem_zone_t *xfs_buf_zone; | 45 | static kmem_zone_t *xfs_buf_zone; |
| 46 | STATIC int xfsbufd(void *); | 46 | STATIC int xfsbufd(void *); |
| 47 | STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t); | ||
| 48 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); | 47 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); |
| 49 | static struct shrinker xfs_buf_shake = { | ||
| 50 | .shrink = xfsbufd_wakeup, | ||
| 51 | .seeks = DEFAULT_SEEKS, | ||
| 52 | }; | ||
| 53 | 48 | ||
| 54 | static struct workqueue_struct *xfslogd_workqueue; | 49 | static struct workqueue_struct *xfslogd_workqueue; |
| 55 | struct workqueue_struct *xfsdatad_workqueue; | 50 | struct workqueue_struct *xfsdatad_workqueue; |
| @@ -168,8 +163,79 @@ test_page_region( | |||
| 168 | } | 163 | } |
| 169 | 164 | ||
| 170 | /* | 165 | /* |
| 171 | * Internal xfs_buf_t object manipulation | 166 | * xfs_buf_lru_add - add a buffer to the LRU. |
| 167 | * | ||
| 168 | * The LRU takes a new reference to the buffer so that it will only be freed | ||
| 169 | * once the shrinker takes the buffer off the LRU. | ||
| 172 | */ | 170 | */ |
| 171 | STATIC void | ||
| 172 | xfs_buf_lru_add( | ||
| 173 | struct xfs_buf *bp) | ||
| 174 | { | ||
| 175 | struct xfs_buftarg *btp = bp->b_target; | ||
| 176 | |||
| 177 | spin_lock(&btp->bt_lru_lock); | ||
| 178 | if (list_empty(&bp->b_lru)) { | ||
| 179 | atomic_inc(&bp->b_hold); | ||
| 180 | list_add_tail(&bp->b_lru, &btp->bt_lru); | ||
| 181 | btp->bt_lru_nr++; | ||
| 182 | } | ||
| 183 | spin_unlock(&btp->bt_lru_lock); | ||
| 184 | } | ||
| 185 | |||
| 186 | /* | ||
| 187 | * xfs_buf_lru_del - remove a buffer from the LRU | ||
| 188 | * | ||
| 189 | * The unlocked check is safe here because it only occurs when there are not | ||
| 190 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there | ||
| 191 | * to optimise the shrinker removing the buffer from the LRU and calling | ||
| 192 | * xfs_buf_free(). i.e. it removes an unneccessary round trip on the | ||
| 193 | * bt_lru_lock. | ||
| 194 | */ | ||
| 195 | STATIC void | ||
| 196 | xfs_buf_lru_del( | ||
| 197 | struct xfs_buf *bp) | ||
| 198 | { | ||
| 199 | struct xfs_buftarg *btp = bp->b_target; | ||
| 200 | |||
| 201 | if (list_empty(&bp->b_lru)) | ||
| 202 | return; | ||
| 203 | |||
| 204 | spin_lock(&btp->bt_lru_lock); | ||
| 205 | if (!list_empty(&bp->b_lru)) { | ||
| 206 | list_del_init(&bp->b_lru); | ||
| 207 | btp->bt_lru_nr--; | ||
| 208 | } | ||
| 209 | spin_unlock(&btp->bt_lru_lock); | ||
| 210 | } | ||
| 211 | |||
| 212 | /* | ||
| 213 | * When we mark a buffer stale, we remove the buffer from the LRU and clear the | ||
| 214 | * b_lru_ref count so that the buffer is freed immediately when the buffer | ||
| 215 | * reference count falls to zero. If the buffer is already on the LRU, we need | ||
| 216 | * to remove the reference that LRU holds on the buffer. | ||
| 217 | * | ||
| 218 | * This prevents build-up of stale buffers on the LRU. | ||
| 219 | */ | ||
| 220 | void | ||
| 221 | xfs_buf_stale( | ||
| 222 | struct xfs_buf *bp) | ||
| 223 | { | ||
| 224 | bp->b_flags |= XBF_STALE; | ||
| 225 | atomic_set(&(bp)->b_lru_ref, 0); | ||
| 226 | if (!list_empty(&bp->b_lru)) { | ||
| 227 | struct xfs_buftarg *btp = bp->b_target; | ||
| 228 | |||
| 229 | spin_lock(&btp->bt_lru_lock); | ||
| 230 | if (!list_empty(&bp->b_lru)) { | ||
| 231 | list_del_init(&bp->b_lru); | ||
| 232 | btp->bt_lru_nr--; | ||
| 233 | atomic_dec(&bp->b_hold); | ||
| 234 | } | ||
| 235 | spin_unlock(&btp->bt_lru_lock); | ||
| 236 | } | ||
| 237 | ASSERT(atomic_read(&bp->b_hold) >= 1); | ||
| 238 | } | ||
| 173 | 239 | ||
| 174 | STATIC void | 240 | STATIC void |
| 175 | _xfs_buf_initialize( | 241 | _xfs_buf_initialize( |
| @@ -186,7 +252,9 @@ _xfs_buf_initialize( | |||
| 186 | 252 | ||
| 187 | memset(bp, 0, sizeof(xfs_buf_t)); | 253 | memset(bp, 0, sizeof(xfs_buf_t)); |
| 188 | atomic_set(&bp->b_hold, 1); | 254 | atomic_set(&bp->b_hold, 1); |
| 255 | atomic_set(&bp->b_lru_ref, 1); | ||
| 189 | init_completion(&bp->b_iowait); | 256 | init_completion(&bp->b_iowait); |
| 257 | INIT_LIST_HEAD(&bp->b_lru); | ||
| 190 | INIT_LIST_HEAD(&bp->b_list); | 258 | INIT_LIST_HEAD(&bp->b_list); |
| 191 | RB_CLEAR_NODE(&bp->b_rbnode); | 259 | RB_CLEAR_NODE(&bp->b_rbnode); |
| 192 | sema_init(&bp->b_sema, 0); /* held, no waiters */ | 260 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
| @@ -262,6 +330,8 @@ xfs_buf_free( | |||
| 262 | { | 330 | { |
| 263 | trace_xfs_buf_free(bp, _RET_IP_); | 331 | trace_xfs_buf_free(bp, _RET_IP_); |
| 264 | 332 | ||
| 333 | ASSERT(list_empty(&bp->b_lru)); | ||
| 334 | |||
| 265 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 335 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { |
| 266 | uint i; | 336 | uint i; |
| 267 | 337 | ||
| @@ -337,7 +407,6 @@ _xfs_buf_lookup_pages( | |||
| 337 | __func__, gfp_mask); | 407 | __func__, gfp_mask); |
| 338 | 408 | ||
| 339 | XFS_STATS_INC(xb_page_retries); | 409 | XFS_STATS_INC(xb_page_retries); |
| 340 | xfsbufd_wakeup(NULL, 0, gfp_mask); | ||
| 341 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 410 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
| 342 | goto retry; | 411 | goto retry; |
| 343 | } | 412 | } |
| @@ -828,6 +897,7 @@ xfs_buf_rele( | |||
| 828 | 897 | ||
| 829 | if (!pag) { | 898 | if (!pag) { |
| 830 | ASSERT(!bp->b_relse); | 899 | ASSERT(!bp->b_relse); |
| 900 | ASSERT(list_empty(&bp->b_lru)); | ||
| 831 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); | 901 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); |
| 832 | if (atomic_dec_and_test(&bp->b_hold)) | 902 | if (atomic_dec_and_test(&bp->b_hold)) |
| 833 | xfs_buf_free(bp); | 903 | xfs_buf_free(bp); |
| @@ -835,13 +905,19 @@ xfs_buf_rele( | |||
| 835 | } | 905 | } |
| 836 | 906 | ||
| 837 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); | 907 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); |
| 908 | |||
| 838 | ASSERT(atomic_read(&bp->b_hold) > 0); | 909 | ASSERT(atomic_read(&bp->b_hold) > 0); |
| 839 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { | 910 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
| 840 | if (bp->b_relse) { | 911 | if (bp->b_relse) { |
| 841 | atomic_inc(&bp->b_hold); | 912 | atomic_inc(&bp->b_hold); |
| 842 | spin_unlock(&pag->pag_buf_lock); | 913 | spin_unlock(&pag->pag_buf_lock); |
| 843 | bp->b_relse(bp); | 914 | bp->b_relse(bp); |
| 915 | } else if (!(bp->b_flags & XBF_STALE) && | ||
| 916 | atomic_read(&bp->b_lru_ref)) { | ||
| 917 | xfs_buf_lru_add(bp); | ||
| 918 | spin_unlock(&pag->pag_buf_lock); | ||
| 844 | } else { | 919 | } else { |
| 920 | xfs_buf_lru_del(bp); | ||
| 845 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 921 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); |
| 846 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); | 922 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
| 847 | spin_unlock(&pag->pag_buf_lock); | 923 | spin_unlock(&pag->pag_buf_lock); |
| @@ -1438,51 +1514,84 @@ xfs_buf_iomove( | |||
| 1438 | */ | 1514 | */ |
| 1439 | 1515 | ||
| 1440 | /* | 1516 | /* |
| 1441 | * Wait for any bufs with callbacks that have been submitted but | 1517 | * Wait for any bufs with callbacks that have been submitted but have not yet |
| 1442 | * have not yet returned... walk the hash list for the target. | 1518 | * returned. These buffers will have an elevated hold count, so wait on those |
| 1519 | * while freeing all the buffers only held by the LRU. | ||
| 1443 | */ | 1520 | */ |
| 1444 | void | 1521 | void |
| 1445 | xfs_wait_buftarg( | 1522 | xfs_wait_buftarg( |
| 1446 | struct xfs_buftarg *btp) | 1523 | struct xfs_buftarg *btp) |
| 1447 | { | 1524 | { |
| 1448 | struct xfs_perag *pag; | 1525 | struct xfs_buf *bp; |
| 1449 | uint i; | ||
| 1450 | 1526 | ||
| 1451 | for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { | 1527 | restart: |
| 1452 | pag = xfs_perag_get(btp->bt_mount, i); | 1528 | spin_lock(&btp->bt_lru_lock); |
| 1453 | spin_lock(&pag->pag_buf_lock); | 1529 | while (!list_empty(&btp->bt_lru)) { |
| 1454 | while (rb_first(&pag->pag_buf_tree)) { | 1530 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); |
| 1455 | spin_unlock(&pag->pag_buf_lock); | 1531 | if (atomic_read(&bp->b_hold) > 1) { |
| 1532 | spin_unlock(&btp->bt_lru_lock); | ||
| 1456 | delay(100); | 1533 | delay(100); |
| 1457 | spin_lock(&pag->pag_buf_lock); | 1534 | goto restart; |
| 1458 | } | 1535 | } |
| 1459 | spin_unlock(&pag->pag_buf_lock); | 1536 | /* |
| 1460 | xfs_perag_put(pag); | 1537 | * clear the LRU reference count so the bufer doesn't get |
| 1538 | * ignored in xfs_buf_rele(). | ||
| 1539 | */ | ||
| 1540 | atomic_set(&bp->b_lru_ref, 0); | ||
| 1541 | spin_unlock(&btp->bt_lru_lock); | ||
| 1542 | xfs_buf_rele(bp); | ||
| 1543 | spin_lock(&btp->bt_lru_lock); | ||
| 1461 | } | 1544 | } |
| 1545 | spin_unlock(&btp->bt_lru_lock); | ||
| 1462 | } | 1546 | } |
| 1463 | 1547 | ||
| 1464 | /* | 1548 | int |
| 1465 | * buftarg list for delwrite queue processing | 1549 | xfs_buftarg_shrink( |
| 1466 | */ | 1550 | struct shrinker *shrink, |
| 1467 | static LIST_HEAD(xfs_buftarg_list); | 1551 | int nr_to_scan, |
| 1468 | static DEFINE_SPINLOCK(xfs_buftarg_lock); | 1552 | gfp_t mask) |
| 1469 | |||
| 1470 | STATIC void | ||
| 1471 | xfs_register_buftarg( | ||
| 1472 | xfs_buftarg_t *btp) | ||
| 1473 | { | 1553 | { |
| 1474 | spin_lock(&xfs_buftarg_lock); | 1554 | struct xfs_buftarg *btp = container_of(shrink, |
| 1475 | list_add(&btp->bt_list, &xfs_buftarg_list); | 1555 | struct xfs_buftarg, bt_shrinker); |
| 1476 | spin_unlock(&xfs_buftarg_lock); | 1556 | struct xfs_buf *bp; |
| 1477 | } | 1557 | LIST_HEAD(dispose); |
| 1478 | 1558 | ||
| 1479 | STATIC void | 1559 | if (!nr_to_scan) |
| 1480 | xfs_unregister_buftarg( | 1560 | return btp->bt_lru_nr; |
| 1481 | xfs_buftarg_t *btp) | 1561 | |
| 1482 | { | 1562 | spin_lock(&btp->bt_lru_lock); |
| 1483 | spin_lock(&xfs_buftarg_lock); | 1563 | while (!list_empty(&btp->bt_lru)) { |
| 1484 | list_del(&btp->bt_list); | 1564 | if (nr_to_scan-- <= 0) |
| 1485 | spin_unlock(&xfs_buftarg_lock); | 1565 | break; |
| 1566 | |||
| 1567 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); | ||
| 1568 | |||
| 1569 | /* | ||
| 1570 | * Decrement the b_lru_ref count unless the value is already | ||
| 1571 | * zero. If the value is already zero, we need to reclaim the | ||
| 1572 | * buffer, otherwise it gets another trip through the LRU. | ||
| 1573 | */ | ||
| 1574 | if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { | ||
| 1575 | list_move_tail(&bp->b_lru, &btp->bt_lru); | ||
| 1576 | continue; | ||
| 1577 | } | ||
| 1578 | |||
| 1579 | /* | ||
| 1580 | * remove the buffer from the LRU now to avoid needing another | ||
| 1581 | * lock round trip inside xfs_buf_rele(). | ||
| 1582 | */ | ||
| 1583 | list_move(&bp->b_lru, &dispose); | ||
| 1584 | btp->bt_lru_nr--; | ||
| 1585 | } | ||
| 1586 | spin_unlock(&btp->bt_lru_lock); | ||
| 1587 | |||
| 1588 | while (!list_empty(&dispose)) { | ||
| 1589 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); | ||
| 1590 | list_del_init(&bp->b_lru); | ||
| 1591 | xfs_buf_rele(bp); | ||
| 1592 | } | ||
| 1593 | |||
| 1594 | return btp->bt_lru_nr; | ||
| 1486 | } | 1595 | } |
| 1487 | 1596 | ||
| 1488 | void | 1597 | void |
| @@ -1490,17 +1599,14 @@ xfs_free_buftarg( | |||
| 1490 | struct xfs_mount *mp, | 1599 | struct xfs_mount *mp, |
| 1491 | struct xfs_buftarg *btp) | 1600 | struct xfs_buftarg *btp) |
| 1492 | { | 1601 | { |
| 1602 | unregister_shrinker(&btp->bt_shrinker); | ||
| 1603 | |||
| 1493 | xfs_flush_buftarg(btp, 1); | 1604 | xfs_flush_buftarg(btp, 1); |
| 1494 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1605 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
| 1495 | xfs_blkdev_issue_flush(btp); | 1606 | xfs_blkdev_issue_flush(btp); |
| 1496 | iput(btp->bt_mapping->host); | 1607 | iput(btp->bt_mapping->host); |
| 1497 | 1608 | ||
| 1498 | /* Unregister the buftarg first so that we don't get a | ||
| 1499 | * wakeup finding a non-existent task | ||
| 1500 | */ | ||
| 1501 | xfs_unregister_buftarg(btp); | ||
| 1502 | kthread_stop(btp->bt_task); | 1609 | kthread_stop(btp->bt_task); |
| 1503 | |||
| 1504 | kmem_free(btp); | 1610 | kmem_free(btp); |
| 1505 | } | 1611 | } |
| 1506 | 1612 | ||
| @@ -1597,20 +1703,13 @@ xfs_alloc_delwrite_queue( | |||
| 1597 | xfs_buftarg_t *btp, | 1703 | xfs_buftarg_t *btp, |
| 1598 | const char *fsname) | 1704 | const char *fsname) |
| 1599 | { | 1705 | { |
| 1600 | int error = 0; | ||
| 1601 | |||
| 1602 | INIT_LIST_HEAD(&btp->bt_list); | ||
| 1603 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); | 1706 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); |
| 1604 | spin_lock_init(&btp->bt_delwrite_lock); | 1707 | spin_lock_init(&btp->bt_delwrite_lock); |
| 1605 | btp->bt_flags = 0; | 1708 | btp->bt_flags = 0; |
| 1606 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); | 1709 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); |
| 1607 | if (IS_ERR(btp->bt_task)) { | 1710 | if (IS_ERR(btp->bt_task)) |
| 1608 | error = PTR_ERR(btp->bt_task); | 1711 | return PTR_ERR(btp->bt_task); |
| 1609 | goto out_error; | 1712 | return 0; |
| 1610 | } | ||
| 1611 | xfs_register_buftarg(btp); | ||
| 1612 | out_error: | ||
| 1613 | return error; | ||
| 1614 | } | 1713 | } |
| 1615 | 1714 | ||
| 1616 | xfs_buftarg_t * | 1715 | xfs_buftarg_t * |
| @@ -1627,12 +1726,17 @@ xfs_alloc_buftarg( | |||
| 1627 | btp->bt_mount = mp; | 1726 | btp->bt_mount = mp; |
| 1628 | btp->bt_dev = bdev->bd_dev; | 1727 | btp->bt_dev = bdev->bd_dev; |
| 1629 | btp->bt_bdev = bdev; | 1728 | btp->bt_bdev = bdev; |
| 1729 | INIT_LIST_HEAD(&btp->bt_lru); | ||
| 1730 | spin_lock_init(&btp->bt_lru_lock); | ||
| 1630 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1731 | if (xfs_setsize_buftarg_early(btp, bdev)) |
| 1631 | goto error; | 1732 | goto error; |
| 1632 | if (xfs_mapping_buftarg(btp, bdev)) | 1733 | if (xfs_mapping_buftarg(btp, bdev)) |
| 1633 | goto error; | 1734 | goto error; |
| 1634 | if (xfs_alloc_delwrite_queue(btp, fsname)) | 1735 | if (xfs_alloc_delwrite_queue(btp, fsname)) |
| 1635 | goto error; | 1736 | goto error; |
| 1737 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | ||
| 1738 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | ||
| 1739 | register_shrinker(&btp->bt_shrinker); | ||
| 1636 | return btp; | 1740 | return btp; |
| 1637 | 1741 | ||
| 1638 | error: | 1742 | error: |
| @@ -1737,27 +1841,6 @@ xfs_buf_runall_queues( | |||
| 1737 | flush_workqueue(queue); | 1841 | flush_workqueue(queue); |
| 1738 | } | 1842 | } |
| 1739 | 1843 | ||
| 1740 | STATIC int | ||
| 1741 | xfsbufd_wakeup( | ||
| 1742 | struct shrinker *shrink, | ||
| 1743 | int priority, | ||
| 1744 | gfp_t mask) | ||
| 1745 | { | ||
| 1746 | xfs_buftarg_t *btp; | ||
| 1747 | |||
| 1748 | spin_lock(&xfs_buftarg_lock); | ||
| 1749 | list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { | ||
| 1750 | if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) | ||
| 1751 | continue; | ||
| 1752 | if (list_empty(&btp->bt_delwrite_queue)) | ||
| 1753 | continue; | ||
| 1754 | set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); | ||
| 1755 | wake_up_process(btp->bt_task); | ||
| 1756 | } | ||
| 1757 | spin_unlock(&xfs_buftarg_lock); | ||
| 1758 | return 0; | ||
| 1759 | } | ||
| 1760 | |||
| 1761 | /* | 1844 | /* |
| 1762 | * Move as many buffers as specified to the supplied list | 1845 | * Move as many buffers as specified to the supplied list |
| 1763 | * idicating if we skipped any buffers to prevent deadlocks. | 1846 | * idicating if we skipped any buffers to prevent deadlocks. |
| @@ -1952,7 +2035,6 @@ xfs_buf_init(void) | |||
| 1952 | if (!xfsconvertd_workqueue) | 2035 | if (!xfsconvertd_workqueue) |
| 1953 | goto out_destroy_xfsdatad_workqueue; | 2036 | goto out_destroy_xfsdatad_workqueue; |
| 1954 | 2037 | ||
| 1955 | register_shrinker(&xfs_buf_shake); | ||
| 1956 | return 0; | 2038 | return 0; |
| 1957 | 2039 | ||
| 1958 | out_destroy_xfsdatad_workqueue: | 2040 | out_destroy_xfsdatad_workqueue: |
| @@ -1968,7 +2050,6 @@ xfs_buf_init(void) | |||
| 1968 | void | 2050 | void |
| 1969 | xfs_buf_terminate(void) | 2051 | xfs_buf_terminate(void) |
| 1970 | { | 2052 | { |
| 1971 | unregister_shrinker(&xfs_buf_shake); | ||
| 1972 | destroy_workqueue(xfsconvertd_workqueue); | 2053 | destroy_workqueue(xfsconvertd_workqueue); |
| 1973 | destroy_workqueue(xfsdatad_workqueue); | 2054 | destroy_workqueue(xfsdatad_workqueue); |
| 1974 | destroy_workqueue(xfslogd_workqueue); | 2055 | destroy_workqueue(xfslogd_workqueue); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 383a3f37cf98..a76c2428faff 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
| @@ -128,10 +128,15 @@ typedef struct xfs_buftarg { | |||
| 128 | 128 | ||
| 129 | /* per device delwri queue */ | 129 | /* per device delwri queue */ |
| 130 | struct task_struct *bt_task; | 130 | struct task_struct *bt_task; |
| 131 | struct list_head bt_list; | ||
| 132 | struct list_head bt_delwrite_queue; | 131 | struct list_head bt_delwrite_queue; |
| 133 | spinlock_t bt_delwrite_lock; | 132 | spinlock_t bt_delwrite_lock; |
| 134 | unsigned long bt_flags; | 133 | unsigned long bt_flags; |
| 134 | |||
| 135 | /* LRU control structures */ | ||
| 136 | struct shrinker bt_shrinker; | ||
| 137 | struct list_head bt_lru; | ||
| 138 | spinlock_t bt_lru_lock; | ||
| 139 | unsigned int bt_lru_nr; | ||
| 135 | } xfs_buftarg_t; | 140 | } xfs_buftarg_t; |
| 136 | 141 | ||
| 137 | /* | 142 | /* |
| @@ -164,9 +169,11 @@ typedef struct xfs_buf { | |||
| 164 | xfs_off_t b_file_offset; /* offset in file */ | 169 | xfs_off_t b_file_offset; /* offset in file */ |
| 165 | size_t b_buffer_length;/* size of buffer in bytes */ | 170 | size_t b_buffer_length;/* size of buffer in bytes */ |
| 166 | atomic_t b_hold; /* reference count */ | 171 | atomic_t b_hold; /* reference count */ |
| 172 | atomic_t b_lru_ref; /* lru reclaim ref count */ | ||
| 167 | xfs_buf_flags_t b_flags; /* status flags */ | 173 | xfs_buf_flags_t b_flags; /* status flags */ |
| 168 | struct semaphore b_sema; /* semaphore for lockables */ | 174 | struct semaphore b_sema; /* semaphore for lockables */ |
| 169 | 175 | ||
| 176 | struct list_head b_lru; /* lru list */ | ||
| 170 | wait_queue_head_t b_waiters; /* unpin waiters */ | 177 | wait_queue_head_t b_waiters; /* unpin waiters */ |
| 171 | struct list_head b_list; | 178 | struct list_head b_list; |
| 172 | struct xfs_perag *b_pag; /* contains rbtree root */ | 179 | struct xfs_perag *b_pag; /* contains rbtree root */ |
| @@ -264,7 +271,8 @@ extern void xfs_buf_terminate(void); | |||
| 264 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ | 271 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ |
| 265 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) | 272 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) |
| 266 | 273 | ||
| 267 | #define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) | 274 | void xfs_buf_stale(struct xfs_buf *bp); |
| 275 | #define XFS_BUF_STALE(bp) xfs_buf_stale(bp); | ||
| 268 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) | 276 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) |
| 269 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) | 277 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) |
| 270 | #define XFS_BUF_SUPER_STALE(bp) do { \ | 278 | #define XFS_BUF_SUPER_STALE(bp) do { \ |
| @@ -328,9 +336,15 @@ extern void xfs_buf_terminate(void); | |||
| 328 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) | 336 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) |
| 329 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) | 337 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) |
| 330 | 338 | ||
| 331 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) | 339 | static inline void |
| 340 | xfs_buf_set_ref( | ||
| 341 | struct xfs_buf *bp, | ||
| 342 | int lru_ref) | ||
| 343 | { | ||
| 344 | atomic_set(&bp->b_lru_ref, lru_ref); | ||
| 345 | } | ||
| 346 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) | ||
| 332 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) | 347 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) |
| 333 | #define XFS_BUF_SET_REF(bp, ref) do { } while (0) | ||
| 334 | 348 | ||
| 335 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) | 349 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) |
| 336 | 350 | ||
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 3764d74790ec..fc0114da7fdd 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
| @@ -70,8 +70,16 @@ xfs_fs_encode_fh( | |||
| 70 | else | 70 | else |
| 71 | fileid_type = FILEID_INO32_GEN_PARENT; | 71 | fileid_type = FILEID_INO32_GEN_PARENT; |
| 72 | 72 | ||
| 73 | /* filesystem may contain 64bit inode numbers */ | 73 | /* |
| 74 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) | 74 | * If the the filesystem may contain 64bit inode numbers, we need |
| 75 | * to use larger file handles that can represent them. | ||
| 76 | * | ||
| 77 | * While we only allocate inodes that do not fit into 32 bits any | ||
| 78 | * large enough filesystem may contain them, thus the slightly | ||
| 79 | * confusing looking conditional below. | ||
| 80 | */ | ||
| 81 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || | ||
| 82 | (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) | ||
| 75 | fileid_type |= XFS_FILEID_TYPE_64FLAG; | 83 | fileid_type |= XFS_FILEID_TYPE_64FLAG; |
| 76 | 84 | ||
| 77 | /* | 85 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 214ddd71ff79..096494997747 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
| @@ -37,7 +37,6 @@ | |||
| 37 | 37 | ||
| 38 | #include <kmem.h> | 38 | #include <kmem.h> |
| 39 | #include <mrlock.h> | 39 | #include <mrlock.h> |
| 40 | #include <sv.h> | ||
| 41 | #include <time.h> | 40 | #include <time.h> |
| 42 | 41 | ||
| 43 | #include <support/debug.h> | 42 | #include <support/debug.h> |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 064f964d4f3c..c51faaa5e291 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
| @@ -834,8 +834,11 @@ xfsaild_wakeup( | |||
| 834 | struct xfs_ail *ailp, | 834 | struct xfs_ail *ailp, |
| 835 | xfs_lsn_t threshold_lsn) | 835 | xfs_lsn_t threshold_lsn) |
| 836 | { | 836 | { |
| 837 | ailp->xa_target = threshold_lsn; | 837 | /* only ever move the target forwards */ |
| 838 | wake_up_process(ailp->xa_task); | 838 | if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) { |
| 839 | ailp->xa_target = threshold_lsn; | ||
| 840 | wake_up_process(ailp->xa_task); | ||
| 841 | } | ||
| 839 | } | 842 | } |
| 840 | 843 | ||
| 841 | STATIC int | 844 | STATIC int |
| @@ -847,8 +850,17 @@ xfsaild( | |||
| 847 | long tout = 0; /* milliseconds */ | 850 | long tout = 0; /* milliseconds */ |
| 848 | 851 | ||
| 849 | while (!kthread_should_stop()) { | 852 | while (!kthread_should_stop()) { |
| 850 | schedule_timeout_interruptible(tout ? | 853 | /* |
| 851 | msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); | 854 | * for short sleeps indicating congestion, don't allow us to |
| 855 | * get woken early. Otherwise all we do is bang on the AIL lock | ||
| 856 | * without making progress. | ||
| 857 | */ | ||
| 858 | if (tout && tout <= 20) | ||
| 859 | __set_current_state(TASK_KILLABLE); | ||
| 860 | else | ||
| 861 | __set_current_state(TASK_INTERRUPTIBLE); | ||
| 862 | schedule_timeout(tout ? | ||
| 863 | msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); | ||
| 852 | 864 | ||
| 853 | /* swsusp */ | 865 | /* swsusp */ |
| 854 | try_to_freeze(); | 866 | try_to_freeze(); |
| @@ -1118,6 +1130,8 @@ xfs_fs_evict_inode( | |||
| 1118 | */ | 1130 | */ |
| 1119 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | 1131 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); |
| 1120 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 1132 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
| 1133 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
| 1134 | &xfs_iolock_reclaimable, "xfs_iolock_reclaimable"); | ||
| 1121 | 1135 | ||
| 1122 | xfs_inactive(ip); | 1136 | xfs_inactive(ip); |
| 1123 | } | 1137 | } |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index afb0d7cfad1c..a02480de9759 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
| @@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab( | |||
| 53 | { | 53 | { |
| 54 | struct inode *inode = VFS_I(ip); | 54 | struct inode *inode = VFS_I(ip); |
| 55 | 55 | ||
| 56 | ASSERT(rcu_read_lock_held()); | ||
| 57 | |||
| 58 | /* | ||
| 59 | * check for stale RCU freed inode | ||
| 60 | * | ||
| 61 | * If the inode has been reallocated, it doesn't matter if it's not in | ||
| 62 | * the AG we are walking - we are walking for writeback, so if it | ||
| 63 | * passes all the "valid inode" checks and is dirty, then we'll write | ||
| 64 | * it back anyway. If it has been reallocated and still being | ||
| 65 | * initialised, the XFS_INEW check below will catch it. | ||
| 66 | */ | ||
| 67 | spin_lock(&ip->i_flags_lock); | ||
| 68 | if (!ip->i_ino) | ||
| 69 | goto out_unlock_noent; | ||
| 70 | |||
| 71 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
| 72 | if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
| 73 | goto out_unlock_noent; | ||
| 74 | spin_unlock(&ip->i_flags_lock); | ||
| 75 | |||
| 56 | /* nothing to sync during shutdown */ | 76 | /* nothing to sync during shutdown */ |
| 57 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 77 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
| 58 | return EFSCORRUPTED; | 78 | return EFSCORRUPTED; |
| 59 | 79 | ||
| 60 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
| 61 | if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
| 62 | return ENOENT; | ||
| 63 | |||
| 64 | /* If we can't grab the inode, it must on it's way to reclaim. */ | 80 | /* If we can't grab the inode, it must on it's way to reclaim. */ |
| 65 | if (!igrab(inode)) | 81 | if (!igrab(inode)) |
| 66 | return ENOENT; | 82 | return ENOENT; |
| @@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab( | |||
| 72 | 88 | ||
| 73 | /* inode is valid */ | 89 | /* inode is valid */ |
| 74 | return 0; | 90 | return 0; |
| 91 | |||
| 92 | out_unlock_noent: | ||
| 93 | spin_unlock(&ip->i_flags_lock); | ||
| 94 | return ENOENT; | ||
| 75 | } | 95 | } |
| 76 | 96 | ||
| 77 | STATIC int | 97 | STATIC int |
| @@ -98,12 +118,12 @@ restart: | |||
| 98 | int error = 0; | 118 | int error = 0; |
| 99 | int i; | 119 | int i; |
| 100 | 120 | ||
| 101 | read_lock(&pag->pag_ici_lock); | 121 | rcu_read_lock(); |
| 102 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | 122 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, |
| 103 | (void **)batch, first_index, | 123 | (void **)batch, first_index, |
| 104 | XFS_LOOKUP_BATCH); | 124 | XFS_LOOKUP_BATCH); |
| 105 | if (!nr_found) { | 125 | if (!nr_found) { |
| 106 | read_unlock(&pag->pag_ici_lock); | 126 | rcu_read_unlock(); |
| 107 | break; | 127 | break; |
| 108 | } | 128 | } |
| 109 | 129 | ||
| @@ -118,18 +138,26 @@ restart: | |||
| 118 | batch[i] = NULL; | 138 | batch[i] = NULL; |
| 119 | 139 | ||
| 120 | /* | 140 | /* |
| 121 | * Update the index for the next lookup. Catch overflows | 141 | * Update the index for the next lookup. Catch |
| 122 | * into the next AG range which can occur if we have inodes | 142 | * overflows into the next AG range which can occur if |
| 123 | * in the last block of the AG and we are currently | 143 | * we have inodes in the last block of the AG and we |
| 124 | * pointing to the last inode. | 144 | * are currently pointing to the last inode. |
| 145 | * | ||
| 146 | * Because we may see inodes that are from the wrong AG | ||
| 147 | * due to RCU freeing and reallocation, only update the | ||
| 148 | * index if it lies in this AG. It was a race that lead | ||
| 149 | * us to see this inode, so another lookup from the | ||
| 150 | * same index will not find it again. | ||
| 125 | */ | 151 | */ |
| 152 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) | ||
| 153 | continue; | ||
| 126 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 154 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
| 127 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 155 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
| 128 | done = 1; | 156 | done = 1; |
| 129 | } | 157 | } |
| 130 | 158 | ||
| 131 | /* unlock now we've grabbed the inodes. */ | 159 | /* unlock now we've grabbed the inodes. */ |
| 132 | read_unlock(&pag->pag_ici_lock); | 160 | rcu_read_unlock(); |
| 133 | 161 | ||
| 134 | for (i = 0; i < nr_found; i++) { | 162 | for (i = 0; i < nr_found; i++) { |
| 135 | if (!batch[i]) | 163 | if (!batch[i]) |
| @@ -592,12 +620,12 @@ xfs_inode_set_reclaim_tag( | |||
| 592 | struct xfs_perag *pag; | 620 | struct xfs_perag *pag; |
| 593 | 621 | ||
| 594 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | 622 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); |
| 595 | write_lock(&pag->pag_ici_lock); | 623 | spin_lock(&pag->pag_ici_lock); |
| 596 | spin_lock(&ip->i_flags_lock); | 624 | spin_lock(&ip->i_flags_lock); |
| 597 | __xfs_inode_set_reclaim_tag(pag, ip); | 625 | __xfs_inode_set_reclaim_tag(pag, ip); |
| 598 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 626 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
| 599 | spin_unlock(&ip->i_flags_lock); | 627 | spin_unlock(&ip->i_flags_lock); |
| 600 | write_unlock(&pag->pag_ici_lock); | 628 | spin_unlock(&pag->pag_ici_lock); |
| 601 | xfs_perag_put(pag); | 629 | xfs_perag_put(pag); |
| 602 | } | 630 | } |
| 603 | 631 | ||
| @@ -639,9 +667,14 @@ xfs_reclaim_inode_grab( | |||
| 639 | struct xfs_inode *ip, | 667 | struct xfs_inode *ip, |
| 640 | int flags) | 668 | int flags) |
| 641 | { | 669 | { |
| 670 | ASSERT(rcu_read_lock_held()); | ||
| 671 | |||
| 672 | /* quick check for stale RCU freed inode */ | ||
| 673 | if (!ip->i_ino) | ||
| 674 | return 1; | ||
| 642 | 675 | ||
| 643 | /* | 676 | /* |
| 644 | * do some unlocked checks first to avoid unnecceary lock traffic. | 677 | * do some unlocked checks first to avoid unnecessary lock traffic. |
| 645 | * The first is a flush lock check, the second is a already in reclaim | 678 | * The first is a flush lock check, the second is a already in reclaim |
| 646 | * check. Only do these checks if we are not going to block on locks. | 679 | * check. Only do these checks if we are not going to block on locks. |
| 647 | */ | 680 | */ |
| @@ -654,11 +687,16 @@ xfs_reclaim_inode_grab( | |||
| 654 | * The radix tree lock here protects a thread in xfs_iget from racing | 687 | * The radix tree lock here protects a thread in xfs_iget from racing |
| 655 | * with us starting reclaim on the inode. Once we have the | 688 | * with us starting reclaim on the inode. Once we have the |
| 656 | * XFS_IRECLAIM flag set it will not touch us. | 689 | * XFS_IRECLAIM flag set it will not touch us. |
| 690 | * | ||
| 691 | * Due to RCU lookup, we may find inodes that have been freed and only | ||
| 692 | * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that | ||
| 693 | * aren't candidates for reclaim at all, so we must check the | ||
| 694 | * XFS_IRECLAIMABLE is set first before proceeding to reclaim. | ||
| 657 | */ | 695 | */ |
| 658 | spin_lock(&ip->i_flags_lock); | 696 | spin_lock(&ip->i_flags_lock); |
| 659 | ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | 697 | if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || |
| 660 | if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { | 698 | __xfs_iflags_test(ip, XFS_IRECLAIM)) { |
| 661 | /* ignore as it is already under reclaim */ | 699 | /* not a reclaim candidate. */ |
| 662 | spin_unlock(&ip->i_flags_lock); | 700 | spin_unlock(&ip->i_flags_lock); |
| 663 | return 1; | 701 | return 1; |
| 664 | } | 702 | } |
| @@ -795,12 +833,12 @@ reclaim: | |||
| 795 | * added to the tree assert that it's been there before to catch | 833 | * added to the tree assert that it's been there before to catch |
| 796 | * problems with the inode life time early on. | 834 | * problems with the inode life time early on. |
| 797 | */ | 835 | */ |
| 798 | write_lock(&pag->pag_ici_lock); | 836 | spin_lock(&pag->pag_ici_lock); |
| 799 | if (!radix_tree_delete(&pag->pag_ici_root, | 837 | if (!radix_tree_delete(&pag->pag_ici_root, |
| 800 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 838 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) |
| 801 | ASSERT(0); | 839 | ASSERT(0); |
| 802 | __xfs_inode_clear_reclaim(pag, ip); | 840 | __xfs_inode_clear_reclaim(pag, ip); |
| 803 | write_unlock(&pag->pag_ici_lock); | 841 | spin_unlock(&pag->pag_ici_lock); |
| 804 | 842 | ||
| 805 | /* | 843 | /* |
| 806 | * Here we do an (almost) spurious inode lock in order to coordinate | 844 | * Here we do an (almost) spurious inode lock in order to coordinate |
| @@ -864,14 +902,14 @@ restart: | |||
| 864 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | 902 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; |
| 865 | int i; | 903 | int i; |
| 866 | 904 | ||
| 867 | write_lock(&pag->pag_ici_lock); | 905 | rcu_read_lock(); |
| 868 | nr_found = radix_tree_gang_lookup_tag( | 906 | nr_found = radix_tree_gang_lookup_tag( |
| 869 | &pag->pag_ici_root, | 907 | &pag->pag_ici_root, |
| 870 | (void **)batch, first_index, | 908 | (void **)batch, first_index, |
| 871 | XFS_LOOKUP_BATCH, | 909 | XFS_LOOKUP_BATCH, |
| 872 | XFS_ICI_RECLAIM_TAG); | 910 | XFS_ICI_RECLAIM_TAG); |
| 873 | if (!nr_found) { | 911 | if (!nr_found) { |
| 874 | write_unlock(&pag->pag_ici_lock); | 912 | rcu_read_unlock(); |
| 875 | break; | 913 | break; |
| 876 | } | 914 | } |
| 877 | 915 | ||
| @@ -891,14 +929,24 @@ restart: | |||
| 891 | * occur if we have inodes in the last block of | 929 | * occur if we have inodes in the last block of |
| 892 | * the AG and we are currently pointing to the | 930 | * the AG and we are currently pointing to the |
| 893 | * last inode. | 931 | * last inode. |
| 932 | * | ||
| 933 | * Because we may see inodes that are from the | ||
| 934 | * wrong AG due to RCU freeing and | ||
| 935 | * reallocation, only update the index if it | ||
| 936 | * lies in this AG. It was a race that lead us | ||
| 937 | * to see this inode, so another lookup from | ||
| 938 | * the same index will not find it again. | ||
| 894 | */ | 939 | */ |
| 940 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != | ||
| 941 | pag->pag_agno) | ||
| 942 | continue; | ||
| 895 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 943 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
| 896 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 944 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
| 897 | done = 1; | 945 | done = 1; |
| 898 | } | 946 | } |
| 899 | 947 | ||
| 900 | /* unlock now we've grabbed the inodes. */ | 948 | /* unlock now we've grabbed the inodes. */ |
| 901 | write_unlock(&pag->pag_ici_lock); | 949 | rcu_read_unlock(); |
| 902 | 950 | ||
| 903 | for (i = 0; i < nr_found; i++) { | 951 | for (i = 0; i < nr_found; i++) { |
| 904 | if (!batch[i]) | 952 | if (!batch[i]) |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index acef2e98c594..647af2a2e7aa 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
| @@ -766,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
| 766 | __field(int, curr_res) | 766 | __field(int, curr_res) |
| 767 | __field(int, unit_res) | 767 | __field(int, unit_res) |
| 768 | __field(unsigned int, flags) | 768 | __field(unsigned int, flags) |
| 769 | __field(void *, reserve_headq) | 769 | __field(int, reserveq) |
| 770 | __field(void *, write_headq) | 770 | __field(int, writeq) |
| 771 | __field(int, grant_reserve_cycle) | 771 | __field(int, grant_reserve_cycle) |
| 772 | __field(int, grant_reserve_bytes) | 772 | __field(int, grant_reserve_bytes) |
| 773 | __field(int, grant_write_cycle) | 773 | __field(int, grant_write_cycle) |
| @@ -784,19 +784,21 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
| 784 | __entry->curr_res = tic->t_curr_res; | 784 | __entry->curr_res = tic->t_curr_res; |
| 785 | __entry->unit_res = tic->t_unit_res; | 785 | __entry->unit_res = tic->t_unit_res; |
| 786 | __entry->flags = tic->t_flags; | 786 | __entry->flags = tic->t_flags; |
| 787 | __entry->reserve_headq = log->l_reserve_headq; | 787 | __entry->reserveq = list_empty(&log->l_reserveq); |
| 788 | __entry->write_headq = log->l_write_headq; | 788 | __entry->writeq = list_empty(&log->l_writeq); |
| 789 | __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; | 789 | xlog_crack_grant_head(&log->l_grant_reserve_head, |
| 790 | __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; | 790 | &__entry->grant_reserve_cycle, |
| 791 | __entry->grant_write_cycle = log->l_grant_write_cycle; | 791 | &__entry->grant_reserve_bytes); |
| 792 | __entry->grant_write_bytes = log->l_grant_write_bytes; | 792 | xlog_crack_grant_head(&log->l_grant_write_head, |
| 793 | &__entry->grant_write_cycle, | ||
| 794 | &__entry->grant_write_bytes); | ||
| 793 | __entry->curr_cycle = log->l_curr_cycle; | 795 | __entry->curr_cycle = log->l_curr_cycle; |
| 794 | __entry->curr_block = log->l_curr_block; | 796 | __entry->curr_block = log->l_curr_block; |
| 795 | __entry->tail_lsn = log->l_tail_lsn; | 797 | __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); |
| 796 | ), | 798 | ), |
| 797 | TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " | 799 | TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " |
| 798 | "t_unit_res %u t_flags %s reserve_headq 0x%p " | 800 | "t_unit_res %u t_flags %s reserveq %s " |
| 799 | "write_headq 0x%p grant_reserve_cycle %d " | 801 | "writeq %s grant_reserve_cycle %d " |
| 800 | "grant_reserve_bytes %d grant_write_cycle %d " | 802 | "grant_reserve_bytes %d grant_write_cycle %d " |
| 801 | "grant_write_bytes %d curr_cycle %d curr_block %d " | 803 | "grant_write_bytes %d curr_cycle %d curr_block %d " |
| 802 | "tail_cycle %d tail_block %d", | 804 | "tail_cycle %d tail_block %d", |
| @@ -807,8 +809,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
| 807 | __entry->curr_res, | 809 | __entry->curr_res, |
| 808 | __entry->unit_res, | 810 | __entry->unit_res, |
| 809 | __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), | 811 | __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), |
| 810 | __entry->reserve_headq, | 812 | __entry->reserveq ? "empty" : "active", |
| 811 | __entry->write_headq, | 813 | __entry->writeq ? "empty" : "active", |
| 812 | __entry->grant_reserve_cycle, | 814 | __entry->grant_reserve_cycle, |
| 813 | __entry->grant_reserve_bytes, | 815 | __entry->grant_reserve_bytes, |
| 814 | __entry->grant_write_cycle, | 816 | __entry->grant_write_cycle, |
| @@ -835,6 +837,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); | |||
| 835 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); | 837 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); |
| 836 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); | 838 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); |
| 837 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); | 839 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); |
| 840 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); | ||
| 838 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); | 841 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); |
| 839 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); | 842 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); |
| 840 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); | 843 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); |
| @@ -842,6 +845,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); | |||
| 842 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); | 845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); |
| 843 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); | 846 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); |
| 844 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); | 847 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); |
| 848 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); | ||
| 845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); | 849 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); |
| 846 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); | 850 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); |
| 847 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); | 851 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); |
| @@ -935,10 +939,10 @@ DEFINE_PAGE_EVENT(xfs_writepage); | |||
| 935 | DEFINE_PAGE_EVENT(xfs_releasepage); | 939 | DEFINE_PAGE_EVENT(xfs_releasepage); |
| 936 | DEFINE_PAGE_EVENT(xfs_invalidatepage); | 940 | DEFINE_PAGE_EVENT(xfs_invalidatepage); |
| 937 | 941 | ||
| 938 | DECLARE_EVENT_CLASS(xfs_iomap_class, | 942 | DECLARE_EVENT_CLASS(xfs_imap_class, |
| 939 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, | 943 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, |
| 940 | int flags, struct xfs_bmbt_irec *irec), | 944 | int type, struct xfs_bmbt_irec *irec), |
| 941 | TP_ARGS(ip, offset, count, flags, irec), | 945 | TP_ARGS(ip, offset, count, type, irec), |
| 942 | TP_STRUCT__entry( | 946 | TP_STRUCT__entry( |
| 943 | __field(dev_t, dev) | 947 | __field(dev_t, dev) |
| 944 | __field(xfs_ino_t, ino) | 948 | __field(xfs_ino_t, ino) |
| @@ -946,7 +950,7 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
| 946 | __field(loff_t, new_size) | 950 | __field(loff_t, new_size) |
| 947 | __field(loff_t, offset) | 951 | __field(loff_t, offset) |
| 948 | __field(size_t, count) | 952 | __field(size_t, count) |
| 949 | __field(int, flags) | 953 | __field(int, type) |
| 950 | __field(xfs_fileoff_t, startoff) | 954 | __field(xfs_fileoff_t, startoff) |
| 951 | __field(xfs_fsblock_t, startblock) | 955 | __field(xfs_fsblock_t, startblock) |
| 952 | __field(xfs_filblks_t, blockcount) | 956 | __field(xfs_filblks_t, blockcount) |
| @@ -958,13 +962,13 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
| 958 | __entry->new_size = ip->i_new_size; | 962 | __entry->new_size = ip->i_new_size; |
| 959 | __entry->offset = offset; | 963 | __entry->offset = offset; |
| 960 | __entry->count = count; | 964 | __entry->count = count; |
| 961 | __entry->flags = flags; | 965 | __entry->type = type; |
| 962 | __entry->startoff = irec ? irec->br_startoff : 0; | 966 | __entry->startoff = irec ? irec->br_startoff : 0; |
| 963 | __entry->startblock = irec ? irec->br_startblock : 0; | 967 | __entry->startblock = irec ? irec->br_startblock : 0; |
| 964 | __entry->blockcount = irec ? irec->br_blockcount : 0; | 968 | __entry->blockcount = irec ? irec->br_blockcount : 0; |
| 965 | ), | 969 | ), |
| 966 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " | 970 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " |
| 967 | "offset 0x%llx count %zd flags %s " | 971 | "offset 0x%llx count %zd type %s " |
| 968 | "startoff 0x%llx startblock %lld blockcount 0x%llx", | 972 | "startoff 0x%llx startblock %lld blockcount 0x%llx", |
| 969 | MAJOR(__entry->dev), MINOR(__entry->dev), | 973 | MAJOR(__entry->dev), MINOR(__entry->dev), |
| 970 | __entry->ino, | 974 | __entry->ino, |
| @@ -972,20 +976,21 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
| 972 | __entry->new_size, | 976 | __entry->new_size, |
| 973 | __entry->offset, | 977 | __entry->offset, |
| 974 | __entry->count, | 978 | __entry->count, |
| 975 | __print_flags(__entry->flags, "|", BMAPI_FLAGS), | 979 | __print_symbolic(__entry->type, XFS_IO_TYPES), |
| 976 | __entry->startoff, | 980 | __entry->startoff, |
| 977 | (__int64_t)__entry->startblock, | 981 | (__int64_t)__entry->startblock, |
| 978 | __entry->blockcount) | 982 | __entry->blockcount) |
| 979 | ) | 983 | ) |
| 980 | 984 | ||
| 981 | #define DEFINE_IOMAP_EVENT(name) \ | 985 | #define DEFINE_IOMAP_EVENT(name) \ |
| 982 | DEFINE_EVENT(xfs_iomap_class, name, \ | 986 | DEFINE_EVENT(xfs_imap_class, name, \ |
| 983 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ | 987 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ |
| 984 | int flags, struct xfs_bmbt_irec *irec), \ | 988 | int type, struct xfs_bmbt_irec *irec), \ |
| 985 | TP_ARGS(ip, offset, count, flags, irec)) | 989 | TP_ARGS(ip, offset, count, type, irec)) |
| 986 | DEFINE_IOMAP_EVENT(xfs_iomap_enter); | 990 | DEFINE_IOMAP_EVENT(xfs_map_blocks_found); |
| 987 | DEFINE_IOMAP_EVENT(xfs_iomap_found); | 991 | DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); |
| 988 | DEFINE_IOMAP_EVENT(xfs_iomap_alloc); | 992 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); |
| 993 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); | ||
| 989 | 994 | ||
| 990 | DECLARE_EVENT_CLASS(xfs_simple_io_class, | 995 | DECLARE_EVENT_CLASS(xfs_simple_io_class, |
| 991 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), | 996 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), |
| @@ -1022,6 +1027,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \ | |||
| 1022 | TP_ARGS(ip, offset, count)) | 1027 | TP_ARGS(ip, offset, count)) |
| 1023 | DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); | 1028 | DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); |
| 1024 | DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); | 1029 | DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); |
| 1030 | DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); | ||
| 1025 | 1031 | ||
| 1026 | 1032 | ||
| 1027 | TRACE_EVENT(xfs_itruncate_start, | 1033 | TRACE_EVENT(xfs_itruncate_start, |
| @@ -1420,6 +1426,7 @@ DEFINE_EVENT(xfs_alloc_class, name, \ | |||
| 1420 | TP_PROTO(struct xfs_alloc_arg *args), \ | 1426 | TP_PROTO(struct xfs_alloc_arg *args), \ |
| 1421 | TP_ARGS(args)) | 1427 | TP_ARGS(args)) |
| 1422 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); | 1428 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); |
| 1429 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); | ||
| 1423 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); | 1430 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); |
| 1424 | DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); | 1431 | DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); |
| 1425 | DEFINE_ALLOC_EVENT(xfs_alloc_near_first); | 1432 | DEFINE_ALLOC_EVENT(xfs_alloc_near_first); |
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index faf8e1a83a12..d22aa3103106 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
| @@ -149,7 +149,6 @@ xfs_qm_dqdestroy( | |||
| 149 | ASSERT(list_empty(&dqp->q_freelist)); | 149 | ASSERT(list_empty(&dqp->q_freelist)); |
| 150 | 150 | ||
| 151 | mutex_destroy(&dqp->q_qlock); | 151 | mutex_destroy(&dqp->q_qlock); |
| 152 | sv_destroy(&dqp->q_pinwait); | ||
| 153 | kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); | 152 | kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); |
| 154 | 153 | ||
| 155 | atomic_dec(&xfs_Gqm->qm_totaldquots); | 154 | atomic_dec(&xfs_Gqm->qm_totaldquots); |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 63c7a1a6c022..58632cc17f2d 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
| @@ -227,7 +227,7 @@ typedef struct xfs_perag { | |||
| 227 | 227 | ||
| 228 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ | 228 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ |
| 229 | 229 | ||
| 230 | rwlock_t pag_ici_lock; /* incore inode lock */ | 230 | spinlock_t pag_ici_lock; /* incore inode cache lock */ |
| 231 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ | 231 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ |
| 232 | int pag_ici_reclaimable; /* reclaimable inodes */ | 232 | int pag_ici_reclaimable; /* reclaimable inodes */ |
| 233 | struct mutex pag_ici_reclaim_lock; /* serialisation point */ | 233 | struct mutex pag_ici_reclaim_lock; /* serialisation point */ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 112abc439ca5..fa8723f5870a 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
| @@ -577,61 +577,58 @@ xfs_alloc_ag_vextent_exact( | |||
| 577 | xfs_extlen_t rlen; /* length of returned extent */ | 577 | xfs_extlen_t rlen; /* length of returned extent */ |
| 578 | 578 | ||
| 579 | ASSERT(args->alignment == 1); | 579 | ASSERT(args->alignment == 1); |
| 580 | |||
| 580 | /* | 581 | /* |
| 581 | * Allocate/initialize a cursor for the by-number freespace btree. | 582 | * Allocate/initialize a cursor for the by-number freespace btree. |
| 582 | */ | 583 | */ |
| 583 | bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 584 | bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
| 584 | args->agno, XFS_BTNUM_BNO); | 585 | args->agno, XFS_BTNUM_BNO); |
| 586 | |||
| 585 | /* | 587 | /* |
| 586 | * Lookup bno and minlen in the btree (minlen is irrelevant, really). | 588 | * Lookup bno and minlen in the btree (minlen is irrelevant, really). |
| 587 | * Look for the closest free block <= bno, it must contain bno | 589 | * Look for the closest free block <= bno, it must contain bno |
| 588 | * if any free block does. | 590 | * if any free block does. |
| 589 | */ | 591 | */ |
| 590 | if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i))) | 592 | error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); |
| 593 | if (error) | ||
| 591 | goto error0; | 594 | goto error0; |
| 592 | if (!i) { | 595 | if (!i) |
| 593 | /* | 596 | goto not_found; |
| 594 | * Didn't find it, return null. | 597 | |
| 595 | */ | ||
| 596 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
| 597 | args->agbno = NULLAGBLOCK; | ||
| 598 | return 0; | ||
| 599 | } | ||
| 600 | /* | 598 | /* |
| 601 | * Grab the freespace record. | 599 | * Grab the freespace record. |
| 602 | */ | 600 | */ |
| 603 | if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))) | 601 | error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); |
| 602 | if (error) | ||
| 604 | goto error0; | 603 | goto error0; |
| 605 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 604 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
| 606 | ASSERT(fbno <= args->agbno); | 605 | ASSERT(fbno <= args->agbno); |
| 607 | minend = args->agbno + args->minlen; | 606 | minend = args->agbno + args->minlen; |
| 608 | maxend = args->agbno + args->maxlen; | 607 | maxend = args->agbno + args->maxlen; |
| 609 | fend = fbno + flen; | 608 | fend = fbno + flen; |
| 609 | |||
| 610 | /* | 610 | /* |
| 611 | * Give up if the freespace isn't long enough for the minimum request. | 611 | * Give up if the freespace isn't long enough for the minimum request. |
| 612 | */ | 612 | */ |
| 613 | if (fend < minend) { | 613 | if (fend < minend) |
| 614 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 614 | goto not_found; |
| 615 | args->agbno = NULLAGBLOCK; | 615 | |
| 616 | return 0; | ||
| 617 | } | ||
| 618 | /* | 616 | /* |
| 619 | * End of extent will be smaller of the freespace end and the | 617 | * End of extent will be smaller of the freespace end and the |
| 620 | * maximal requested end. | 618 | * maximal requested end. |
| 621 | */ | 619 | * |
| 622 | end = XFS_AGBLOCK_MIN(fend, maxend); | ||
| 623 | /* | ||
| 624 | * Fix the length according to mod and prod if given. | 620 | * Fix the length according to mod and prod if given. |
| 625 | */ | 621 | */ |
| 622 | end = XFS_AGBLOCK_MIN(fend, maxend); | ||
| 626 | args->len = end - args->agbno; | 623 | args->len = end - args->agbno; |
| 627 | xfs_alloc_fix_len(args); | 624 | xfs_alloc_fix_len(args); |
| 628 | if (!xfs_alloc_fix_minleft(args)) { | 625 | if (!xfs_alloc_fix_minleft(args)) |
| 629 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 626 | goto not_found; |
| 630 | return 0; | 627 | |
| 631 | } | ||
| 632 | rlen = args->len; | 628 | rlen = args->len; |
| 633 | ASSERT(args->agbno + rlen <= fend); | 629 | ASSERT(args->agbno + rlen <= fend); |
| 634 | end = args->agbno + rlen; | 630 | end = args->agbno + rlen; |
| 631 | |||
| 635 | /* | 632 | /* |
| 636 | * We are allocating agbno for rlen [agbno .. end] | 633 | * We are allocating agbno for rlen [agbno .. end] |
| 637 | * Allocate/initialize a cursor for the by-size btree. | 634 | * Allocate/initialize a cursor for the by-size btree. |
| @@ -640,16 +637,25 @@ xfs_alloc_ag_vextent_exact( | |||
| 640 | args->agno, XFS_BTNUM_CNT); | 637 | args->agno, XFS_BTNUM_CNT); |
| 641 | ASSERT(args->agbno + args->len <= | 638 | ASSERT(args->agbno + args->len <= |
| 642 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | 639 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); |
| 643 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, | 640 | error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, |
| 644 | args->agbno, args->len, XFSA_FIXUP_BNO_OK))) { | 641 | args->len, XFSA_FIXUP_BNO_OK); |
| 642 | if (error) { | ||
| 645 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); | 643 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); |
| 646 | goto error0; | 644 | goto error0; |
| 647 | } | 645 | } |
| 646 | |||
| 648 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 647 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); |
| 649 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 648 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
| 650 | 649 | ||
| 651 | trace_xfs_alloc_exact_done(args); | ||
| 652 | args->wasfromfl = 0; | 650 | args->wasfromfl = 0; |
| 651 | trace_xfs_alloc_exact_done(args); | ||
| 652 | return 0; | ||
| 653 | |||
| 654 | not_found: | ||
| 655 | /* Didn't find it, return null. */ | ||
| 656 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
| 657 | args->agbno = NULLAGBLOCK; | ||
| 658 | trace_xfs_alloc_exact_notfound(args); | ||
| 653 | return 0; | 659 | return 0; |
| 654 | 660 | ||
| 655 | error0: | 661 | error0: |
| @@ -659,6 +665,95 @@ error0: | |||
| 659 | } | 665 | } |
| 660 | 666 | ||
| 661 | /* | 667 | /* |
| 668 | * Search the btree in a given direction via the search cursor and compare | ||
| 669 | * the records found against the good extent we've already found. | ||
| 670 | */ | ||
| 671 | STATIC int | ||
| 672 | xfs_alloc_find_best_extent( | ||
| 673 | struct xfs_alloc_arg *args, /* allocation argument structure */ | ||
| 674 | struct xfs_btree_cur **gcur, /* good cursor */ | ||
| 675 | struct xfs_btree_cur **scur, /* searching cursor */ | ||
| 676 | xfs_agblock_t gdiff, /* difference for search comparison */ | ||
| 677 | xfs_agblock_t *sbno, /* extent found by search */ | ||
| 678 | xfs_extlen_t *slen, | ||
| 679 | xfs_extlen_t *slena, /* aligned length */ | ||
| 680 | int dir) /* 0 = search right, 1 = search left */ | ||
| 681 | { | ||
| 682 | xfs_agblock_t bno; | ||
| 683 | xfs_agblock_t new; | ||
| 684 | xfs_agblock_t sdiff; | ||
| 685 | int error; | ||
| 686 | int i; | ||
| 687 | |||
| 688 | /* The good extent is perfect, no need to search. */ | ||
| 689 | if (!gdiff) | ||
| 690 | goto out_use_good; | ||
| 691 | |||
| 692 | /* | ||
| 693 | * Look until we find a better one, run out of space or run off the end. | ||
| 694 | */ | ||
| 695 | do { | ||
| 696 | error = xfs_alloc_get_rec(*scur, sbno, slen, &i); | ||
| 697 | if (error) | ||
| 698 | goto error0; | ||
| 699 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
| 700 | xfs_alloc_compute_aligned(*sbno, *slen, args->alignment, | ||
| 701 | args->minlen, &bno, slena); | ||
| 702 | |||
| 703 | /* | ||
| 704 | * The good extent is closer than this one. | ||
| 705 | */ | ||
| 706 | if (!dir) { | ||
| 707 | if (bno >= args->agbno + gdiff) | ||
| 708 | goto out_use_good; | ||
| 709 | } else { | ||
| 710 | if (bno <= args->agbno - gdiff) | ||
| 711 | goto out_use_good; | ||
| 712 | } | ||
| 713 | |||
| 714 | /* | ||
| 715 | * Same distance, compare length and pick the best. | ||
| 716 | */ | ||
| 717 | if (*slena >= args->minlen) { | ||
| 718 | args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); | ||
| 719 | xfs_alloc_fix_len(args); | ||
| 720 | |||
| 721 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, | ||
| 722 | args->alignment, *sbno, | ||
| 723 | *slen, &new); | ||
| 724 | |||
| 725 | /* | ||
| 726 | * Choose closer size and invalidate other cursor. | ||
| 727 | */ | ||
| 728 | if (sdiff < gdiff) | ||
| 729 | goto out_use_search; | ||
| 730 | goto out_use_good; | ||
| 731 | } | ||
| 732 | |||
| 733 | if (!dir) | ||
| 734 | error = xfs_btree_increment(*scur, 0, &i); | ||
| 735 | else | ||
| 736 | error = xfs_btree_decrement(*scur, 0, &i); | ||
| 737 | if (error) | ||
| 738 | goto error0; | ||
| 739 | } while (i); | ||
| 740 | |||
| 741 | out_use_good: | ||
| 742 | xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); | ||
| 743 | *scur = NULL; | ||
| 744 | return 0; | ||
| 745 | |||
| 746 | out_use_search: | ||
| 747 | xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); | ||
| 748 | *gcur = NULL; | ||
| 749 | return 0; | ||
| 750 | |||
| 751 | error0: | ||
| 752 | /* caller invalidates cursors */ | ||
| 753 | return error; | ||
| 754 | } | ||
| 755 | |||
| 756 | /* | ||
| 662 | * Allocate a variable extent near bno in the allocation group agno. | 757 | * Allocate a variable extent near bno in the allocation group agno. |
| 663 | * Extent's length (returned in len) will be between minlen and maxlen, | 758 | * Extent's length (returned in len) will be between minlen and maxlen, |
| 664 | * and of the form k * prod + mod unless there's nothing that large. | 759 | * and of the form k * prod + mod unless there's nothing that large. |
| @@ -925,203 +1020,45 @@ xfs_alloc_ag_vextent_near( | |||
| 925 | } | 1020 | } |
| 926 | } | 1021 | } |
| 927 | } while (bno_cur_lt || bno_cur_gt); | 1022 | } while (bno_cur_lt || bno_cur_gt); |
| 1023 | |||
| 928 | /* | 1024 | /* |
| 929 | * Got both cursors still active, need to find better entry. | 1025 | * Got both cursors still active, need to find better entry. |
| 930 | */ | 1026 | */ |
| 931 | if (bno_cur_lt && bno_cur_gt) { | 1027 | if (bno_cur_lt && bno_cur_gt) { |
| 932 | /* | ||
| 933 | * Left side is long enough, look for a right side entry. | ||
| 934 | */ | ||
| 935 | if (ltlena >= args->minlen) { | 1028 | if (ltlena >= args->minlen) { |
| 936 | /* | 1029 | /* |
| 937 | * Fix up the length. | 1030 | * Left side is good, look for a right side entry. |
| 938 | */ | 1031 | */ |
| 939 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 1032 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
| 940 | xfs_alloc_fix_len(args); | 1033 | xfs_alloc_fix_len(args); |
| 941 | rlen = args->len; | 1034 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
| 942 | ltdiff = xfs_alloc_compute_diff(args->agbno, rlen, | ||
| 943 | args->alignment, ltbno, ltlen, <new); | 1035 | args->alignment, ltbno, ltlen, <new); |
| 1036 | |||
| 1037 | error = xfs_alloc_find_best_extent(args, | ||
| 1038 | &bno_cur_lt, &bno_cur_gt, | ||
| 1039 | ltdiff, >bno, >len, >lena, | ||
| 1040 | 0 /* search right */); | ||
| 1041 | } else { | ||
| 1042 | ASSERT(gtlena >= args->minlen); | ||
| 1043 | |||
| 944 | /* | 1044 | /* |
| 945 | * Not perfect. | 1045 | * Right side is good, look for a left side entry. |
| 946 | */ | ||
| 947 | if (ltdiff) { | ||
| 948 | /* | ||
| 949 | * Look until we find a better one, run out of | ||
| 950 | * space, or run off the end. | ||
| 951 | */ | ||
| 952 | while (bno_cur_lt && bno_cur_gt) { | ||
| 953 | if ((error = xfs_alloc_get_rec( | ||
| 954 | bno_cur_gt, >bno, | ||
| 955 | >len, &i))) | ||
| 956 | goto error0; | ||
| 957 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
| 958 | xfs_alloc_compute_aligned(gtbno, gtlen, | ||
| 959 | args->alignment, args->minlen, | ||
| 960 | >bnoa, >lena); | ||
| 961 | /* | ||
| 962 | * The left one is clearly better. | ||
| 963 | */ | ||
| 964 | if (gtbnoa >= args->agbno + ltdiff) { | ||
| 965 | xfs_btree_del_cursor( | ||
| 966 | bno_cur_gt, | ||
| 967 | XFS_BTREE_NOERROR); | ||
| 968 | bno_cur_gt = NULL; | ||
| 969 | break; | ||
| 970 | } | ||
| 971 | /* | ||
| 972 | * If we reach a big enough entry, | ||
| 973 | * compare the two and pick the best. | ||
| 974 | */ | ||
| 975 | if (gtlena >= args->minlen) { | ||
| 976 | args->len = | ||
| 977 | XFS_EXTLEN_MIN(gtlena, | ||
| 978 | args->maxlen); | ||
| 979 | xfs_alloc_fix_len(args); | ||
| 980 | rlen = args->len; | ||
| 981 | gtdiff = xfs_alloc_compute_diff( | ||
| 982 | args->agbno, rlen, | ||
| 983 | args->alignment, | ||
| 984 | gtbno, gtlen, >new); | ||
| 985 | /* | ||
| 986 | * Right side is better. | ||
| 987 | */ | ||
| 988 | if (gtdiff < ltdiff) { | ||
| 989 | xfs_btree_del_cursor( | ||
| 990 | bno_cur_lt, | ||
| 991 | XFS_BTREE_NOERROR); | ||
| 992 | bno_cur_lt = NULL; | ||
| 993 | } | ||
| 994 | /* | ||
| 995 | * Left side is better. | ||
| 996 | */ | ||
| 997 | else { | ||
| 998 | xfs_btree_del_cursor( | ||
| 999 | bno_cur_gt, | ||
| 1000 | XFS_BTREE_NOERROR); | ||
| 1001 | bno_cur_gt = NULL; | ||
| 1002 | } | ||
| 1003 | break; | ||
| 1004 | } | ||
| 1005 | /* | ||
| 1006 | * Fell off the right end. | ||
| 1007 | */ | ||
| 1008 | if ((error = xfs_btree_increment( | ||
| 1009 | bno_cur_gt, 0, &i))) | ||
| 1010 | goto error0; | ||
| 1011 | if (!i) { | ||
| 1012 | xfs_btree_del_cursor( | ||
| 1013 | bno_cur_gt, | ||
| 1014 | XFS_BTREE_NOERROR); | ||
| 1015 | bno_cur_gt = NULL; | ||
| 1016 | break; | ||
| 1017 | } | ||
| 1018 | } | ||
| 1019 | } | ||
| 1020 | /* | ||
| 1021 | * The left side is perfect, trash the right side. | ||
| 1022 | */ | ||
| 1023 | else { | ||
| 1024 | xfs_btree_del_cursor(bno_cur_gt, | ||
| 1025 | XFS_BTREE_NOERROR); | ||
| 1026 | bno_cur_gt = NULL; | ||
| 1027 | } | ||
| 1028 | } | ||
| 1029 | /* | ||
| 1030 | * It's the right side that was found first, look left. | ||
| 1031 | */ | ||
| 1032 | else { | ||
| 1033 | /* | ||
| 1034 | * Fix up the length. | ||
| 1035 | */ | 1046 | */ |
| 1036 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); | 1047 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); |
| 1037 | xfs_alloc_fix_len(args); | 1048 | xfs_alloc_fix_len(args); |
| 1038 | rlen = args->len; | 1049 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
| 1039 | gtdiff = xfs_alloc_compute_diff(args->agbno, rlen, | ||
| 1040 | args->alignment, gtbno, gtlen, >new); | 1050 | args->alignment, gtbno, gtlen, >new); |
| 1041 | /* | 1051 | |
| 1042 | * Right side entry isn't perfect. | 1052 | error = xfs_alloc_find_best_extent(args, |
| 1043 | */ | 1053 | &bno_cur_gt, &bno_cur_lt, |
| 1044 | if (gtdiff) { | 1054 | gtdiff, <bno, <len, <lena, |
| 1045 | /* | 1055 | 1 /* search left */); |
| 1046 | * Look until we find a better one, run out of | ||
| 1047 | * space, or run off the end. | ||
| 1048 | */ | ||
| 1049 | while (bno_cur_lt && bno_cur_gt) { | ||
| 1050 | if ((error = xfs_alloc_get_rec( | ||
| 1051 | bno_cur_lt, <bno, | ||
| 1052 | <len, &i))) | ||
| 1053 | goto error0; | ||
| 1054 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
| 1055 | xfs_alloc_compute_aligned(ltbno, ltlen, | ||
| 1056 | args->alignment, args->minlen, | ||
| 1057 | <bnoa, <lena); | ||
| 1058 | /* | ||
| 1059 | * The right one is clearly better. | ||
| 1060 | */ | ||
| 1061 | if (ltbnoa <= args->agbno - gtdiff) { | ||
| 1062 | xfs_btree_del_cursor( | ||
| 1063 | bno_cur_lt, | ||
| 1064 | XFS_BTREE_NOERROR); | ||
| 1065 | bno_cur_lt = NULL; | ||
| 1066 | break; | ||
| 1067 | } | ||
| 1068 | /* | ||
| 1069 | * If we reach a big enough entry, | ||
| 1070 | * compare the two and pick the best. | ||
| 1071 | */ | ||
| 1072 | if (ltlena >= args->minlen) { | ||
| 1073 | args->len = XFS_EXTLEN_MIN( | ||
| 1074 | ltlena, args->maxlen); | ||
| 1075 | xfs_alloc_fix_len(args); | ||
| 1076 | rlen = args->len; | ||
| 1077 | ltdiff = xfs_alloc_compute_diff( | ||
| 1078 | args->agbno, rlen, | ||
| 1079 | args->alignment, | ||
| 1080 | ltbno, ltlen, <new); | ||
| 1081 | /* | ||
| 1082 | * Left side is better. | ||
| 1083 | */ | ||
| 1084 | if (ltdiff < gtdiff) { | ||
| 1085 | xfs_btree_del_cursor( | ||
| 1086 | bno_cur_gt, | ||
| 1087 | XFS_BTREE_NOERROR); | ||
| 1088 | bno_cur_gt = NULL; | ||
| 1089 | } | ||
| 1090 | /* | ||
| 1091 | * Right side is better. | ||
| 1092 | */ | ||
| 1093 | else { | ||
| 1094 | xfs_btree_del_cursor( | ||
| 1095 | bno_cur_lt, | ||
| 1096 | XFS_BTREE_NOERROR); | ||
| 1097 | bno_cur_lt = NULL; | ||
| 1098 | } | ||
| 1099 | break; | ||
| 1100 | } | ||
| 1101 | /* | ||
| 1102 | * Fell off the left end. | ||
| 1103 | */ | ||
| 1104 | if ((error = xfs_btree_decrement( | ||
| 1105 | bno_cur_lt, 0, &i))) | ||
| 1106 | goto error0; | ||
| 1107 | if (!i) { | ||
| 1108 | xfs_btree_del_cursor(bno_cur_lt, | ||
| 1109 | XFS_BTREE_NOERROR); | ||
| 1110 | bno_cur_lt = NULL; | ||
| 1111 | break; | ||
| 1112 | } | ||
| 1113 | } | ||
| 1114 | } | ||
| 1115 | /* | ||
| 1116 | * The right side is perfect, trash the left side. | ||
| 1117 | */ | ||
| 1118 | else { | ||
| 1119 | xfs_btree_del_cursor(bno_cur_lt, | ||
| 1120 | XFS_BTREE_NOERROR); | ||
| 1121 | bno_cur_lt = NULL; | ||
| 1122 | } | ||
| 1123 | } | 1056 | } |
| 1057 | |||
| 1058 | if (error) | ||
| 1059 | goto error0; | ||
| 1124 | } | 1060 | } |
| 1061 | |||
| 1125 | /* | 1062 | /* |
| 1126 | * If we couldn't get anything, give up. | 1063 | * If we couldn't get anything, give up. |
| 1127 | */ | 1064 | */ |
| @@ -1130,6 +1067,7 @@ xfs_alloc_ag_vextent_near( | |||
| 1130 | args->agbno = NULLAGBLOCK; | 1067 | args->agbno = NULLAGBLOCK; |
| 1131 | return 0; | 1068 | return 0; |
| 1132 | } | 1069 | } |
| 1070 | |||
| 1133 | /* | 1071 | /* |
| 1134 | * At this point we have selected a freespace entry, either to the | 1072 | * At this point we have selected a freespace entry, either to the |
| 1135 | * left or to the right. If it's on the right, copy all the | 1073 | * left or to the right. If it's on the right, copy all the |
| @@ -1146,6 +1084,7 @@ xfs_alloc_ag_vextent_near( | |||
| 1146 | j = 1; | 1084 | j = 1; |
| 1147 | } else | 1085 | } else |
| 1148 | j = 0; | 1086 | j = 0; |
| 1087 | |||
| 1149 | /* | 1088 | /* |
| 1150 | * Fix up the length and compute the useful address. | 1089 | * Fix up the length and compute the useful address. |
| 1151 | */ | 1090 | */ |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index a6cff8edcdb6..71e90dc2aeb1 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
| @@ -637,7 +637,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) | |||
| 637 | * It didn't all fit, so we have to sort everything on hashval. | 637 | * It didn't all fit, so we have to sort everything on hashval. |
| 638 | */ | 638 | */ |
| 639 | sbsize = sf->hdr.count * sizeof(*sbuf); | 639 | sbsize = sf->hdr.count * sizeof(*sbuf); |
| 640 | sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP); | 640 | sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS); |
| 641 | 641 | ||
| 642 | /* | 642 | /* |
| 643 | * Scan the attribute list for the rest of the entries, storing | 643 | * Scan the attribute list for the rest of the entries, storing |
| @@ -2386,7 +2386,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) | |||
| 2386 | args.dp = context->dp; | 2386 | args.dp = context->dp; |
| 2387 | args.whichfork = XFS_ATTR_FORK; | 2387 | args.whichfork = XFS_ATTR_FORK; |
| 2388 | args.valuelen = valuelen; | 2388 | args.valuelen = valuelen; |
| 2389 | args.value = kmem_alloc(valuelen, KM_SLEEP); | 2389 | args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); |
| 2390 | args.rmtblkno = be32_to_cpu(name_rmt->valueblk); | 2390 | args.rmtblkno = be32_to_cpu(name_rmt->valueblk); |
| 2391 | args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); | 2391 | args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); |
| 2392 | retval = xfs_attr_rmtval_get(&args); | 2392 | retval = xfs_attr_rmtval_get(&args); |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 04f9cca8da7e..2f9e97c128a0 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
| @@ -634,9 +634,8 @@ xfs_btree_read_bufl( | |||
| 634 | return error; | 634 | return error; |
| 635 | } | 635 | } |
| 636 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); | 636 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); |
| 637 | if (bp != NULL) { | 637 | if (bp) |
| 638 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); | 638 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); |
| 639 | } | ||
| 640 | *bpp = bp; | 639 | *bpp = bp; |
| 641 | return 0; | 640 | return 0; |
| 642 | } | 641 | } |
| @@ -944,13 +943,13 @@ xfs_btree_set_refs( | |||
| 944 | switch (cur->bc_btnum) { | 943 | switch (cur->bc_btnum) { |
| 945 | case XFS_BTNUM_BNO: | 944 | case XFS_BTNUM_BNO: |
| 946 | case XFS_BTNUM_CNT: | 945 | case XFS_BTNUM_CNT: |
| 947 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF); | 946 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF); |
| 948 | break; | 947 | break; |
| 949 | case XFS_BTNUM_INO: | 948 | case XFS_BTNUM_INO: |
| 950 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF); | 949 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF); |
| 951 | break; | 950 | break; |
| 952 | case XFS_BTNUM_BMAP: | 951 | case XFS_BTNUM_BMAP: |
| 953 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF); | 952 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF); |
| 954 | break; | 953 | break; |
| 955 | default: | 954 | default: |
| 956 | ASSERT(0); | 955 | ASSERT(0); |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 2686d0d54c5b..ed2b65f3f8b9 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
| @@ -142,7 +142,7 @@ xfs_buf_item_log_check( | |||
| 142 | #endif | 142 | #endif |
| 143 | 143 | ||
| 144 | STATIC void xfs_buf_error_relse(xfs_buf_t *bp); | 144 | STATIC void xfs_buf_error_relse(xfs_buf_t *bp); |
| 145 | STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); | 145 | STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); |
| 146 | 146 | ||
| 147 | /* | 147 | /* |
| 148 | * This returns the number of log iovecs needed to log the | 148 | * This returns the number of log iovecs needed to log the |
| @@ -450,7 +450,7 @@ xfs_buf_item_unpin( | |||
| 450 | * xfs_trans_ail_delete() drops the AIL lock. | 450 | * xfs_trans_ail_delete() drops the AIL lock. |
| 451 | */ | 451 | */ |
| 452 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { | 452 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { |
| 453 | xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); | 453 | xfs_buf_do_callbacks(bp); |
| 454 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 454 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
| 455 | XFS_BUF_CLR_IODONE_FUNC(bp); | 455 | XFS_BUF_CLR_IODONE_FUNC(bp); |
| 456 | } else { | 456 | } else { |
| @@ -918,15 +918,26 @@ xfs_buf_attach_iodone( | |||
| 918 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); | 918 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); |
| 919 | } | 919 | } |
| 920 | 920 | ||
| 921 | /* | ||
| 922 | * We can have many callbacks on a buffer. Running the callbacks individually | ||
| 923 | * can cause a lot of contention on the AIL lock, so we allow for a single | ||
| 924 | * callback to be able to scan the remaining lip->li_bio_list for other items | ||
| 925 | * of the same type and callback to be processed in the first call. | ||
| 926 | * | ||
| 927 | * As a result, the loop walking the callback list below will also modify the | ||
| 928 | * list. it removes the first item from the list and then runs the callback. | ||
| 929 | * The loop then restarts from the new head of the list. This allows the | ||
| 930 | * callback to scan and modify the list attached to the buffer and we don't | ||
| 931 | * have to care about maintaining a next item pointer. | ||
| 932 | */ | ||
| 921 | STATIC void | 933 | STATIC void |
| 922 | xfs_buf_do_callbacks( | 934 | xfs_buf_do_callbacks( |
| 923 | xfs_buf_t *bp, | 935 | struct xfs_buf *bp) |
| 924 | xfs_log_item_t *lip) | ||
| 925 | { | 936 | { |
| 926 | xfs_log_item_t *nlip; | 937 | struct xfs_log_item *lip; |
| 927 | 938 | ||
| 928 | while (lip != NULL) { | 939 | while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) { |
| 929 | nlip = lip->li_bio_list; | 940 | XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list); |
| 930 | ASSERT(lip->li_cb != NULL); | 941 | ASSERT(lip->li_cb != NULL); |
| 931 | /* | 942 | /* |
| 932 | * Clear the next pointer so we don't have any | 943 | * Clear the next pointer so we don't have any |
| @@ -936,7 +947,6 @@ xfs_buf_do_callbacks( | |||
| 936 | */ | 947 | */ |
| 937 | lip->li_bio_list = NULL; | 948 | lip->li_bio_list = NULL; |
| 938 | lip->li_cb(bp, lip); | 949 | lip->li_cb(bp, lip); |
| 939 | lip = nlip; | ||
| 940 | } | 950 | } |
| 941 | } | 951 | } |
| 942 | 952 | ||
| @@ -970,7 +980,7 @@ xfs_buf_iodone_callbacks( | |||
| 970 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | 980 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); |
| 971 | XFS_BUF_SUPER_STALE(bp); | 981 | XFS_BUF_SUPER_STALE(bp); |
| 972 | trace_xfs_buf_item_iodone(bp, _RET_IP_); | 982 | trace_xfs_buf_item_iodone(bp, _RET_IP_); |
| 973 | xfs_buf_do_callbacks(bp, lip); | 983 | xfs_buf_do_callbacks(bp); |
| 974 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 984 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
| 975 | XFS_BUF_CLR_IODONE_FUNC(bp); | 985 | XFS_BUF_CLR_IODONE_FUNC(bp); |
| 976 | xfs_buf_ioend(bp, 0); | 986 | xfs_buf_ioend(bp, 0); |
| @@ -1029,7 +1039,7 @@ xfs_buf_iodone_callbacks( | |||
| 1029 | return; | 1039 | return; |
| 1030 | } | 1040 | } |
| 1031 | 1041 | ||
| 1032 | xfs_buf_do_callbacks(bp, lip); | 1042 | xfs_buf_do_callbacks(bp); |
| 1033 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1043 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
| 1034 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1044 | XFS_BUF_CLR_IODONE_FUNC(bp); |
| 1035 | xfs_buf_ioend(bp, 0); | 1045 | xfs_buf_ioend(bp, 0); |
| @@ -1063,7 +1073,7 @@ xfs_buf_error_relse( | |||
| 1063 | * We have to unpin the pinned buffers so do the | 1073 | * We have to unpin the pinned buffers so do the |
| 1064 | * callbacks. | 1074 | * callbacks. |
| 1065 | */ | 1075 | */ |
| 1066 | xfs_buf_do_callbacks(bp, lip); | 1076 | xfs_buf_do_callbacks(bp); |
| 1067 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1077 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
| 1068 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1078 | XFS_BUF_CLR_IODONE_FUNC(bp); |
| 1069 | XFS_BUF_SET_BRELSE_FUNC(bp,NULL); | 1079 | XFS_BUF_SET_BRELSE_FUNC(bp,NULL); |
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 0e2ed43f16c7..b6ecd2061e7c 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h | |||
| @@ -105,17 +105,6 @@ typedef struct xfs_buf_log_item { | |||
| 105 | xfs_buf_log_format_t bli_format; /* in-log header */ | 105 | xfs_buf_log_format_t bli_format; /* in-log header */ |
| 106 | } xfs_buf_log_item_t; | 106 | } xfs_buf_log_item_t; |
| 107 | 107 | ||
| 108 | /* | ||
| 109 | * This structure is used during recovery to record the buf log | ||
| 110 | * items which have been canceled and should not be replayed. | ||
| 111 | */ | ||
| 112 | typedef struct xfs_buf_cancel { | ||
| 113 | xfs_daddr_t bc_blkno; | ||
| 114 | uint bc_len; | ||
| 115 | int bc_refcount; | ||
| 116 | struct xfs_buf_cancel *bc_next; | ||
| 117 | } xfs_buf_cancel_t; | ||
| 118 | |||
| 119 | void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); | 108 | void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); |
| 120 | void xfs_buf_item_relse(struct xfs_buf *); | 109 | void xfs_buf_item_relse(struct xfs_buf *); |
| 121 | void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); | 110 | void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index a55e687bf562..75f2ef60e579 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
| @@ -48,6 +48,28 @@ xfs_efi_item_free( | |||
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | /* | 50 | /* |
| 51 | * Freeing the efi requires that we remove it from the AIL if it has already | ||
| 52 | * been placed there. However, the EFI may not yet have been placed in the AIL | ||
| 53 | * when called by xfs_efi_release() from EFD processing due to the ordering of | ||
| 54 | * committed vs unpin operations in bulk insert operations. Hence the | ||
| 55 | * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees | ||
| 56 | * the EFI. | ||
| 57 | */ | ||
| 58 | STATIC void | ||
| 59 | __xfs_efi_release( | ||
| 60 | struct xfs_efi_log_item *efip) | ||
| 61 | { | ||
| 62 | struct xfs_ail *ailp = efip->efi_item.li_ailp; | ||
| 63 | |||
| 64 | if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) { | ||
| 65 | spin_lock(&ailp->xa_lock); | ||
| 66 | /* xfs_trans_ail_delete() drops the AIL lock. */ | ||
| 67 | xfs_trans_ail_delete(ailp, &efip->efi_item); | ||
| 68 | xfs_efi_item_free(efip); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | /* | ||
| 51 | * This returns the number of iovecs needed to log the given efi item. | 73 | * This returns the number of iovecs needed to log the given efi item. |
| 52 | * We only need 1 iovec for an efi item. It just logs the efi_log_format | 74 | * We only need 1 iovec for an efi item. It just logs the efi_log_format |
| 53 | * structure. | 75 | * structure. |
| @@ -74,7 +96,8 @@ xfs_efi_item_format( | |||
| 74 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | 96 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); |
| 75 | uint size; | 97 | uint size; |
| 76 | 98 | ||
| 77 | ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents); | 99 | ASSERT(atomic_read(&efip->efi_next_extent) == |
| 100 | efip->efi_format.efi_nextents); | ||
| 78 | 101 | ||
| 79 | efip->efi_format.efi_type = XFS_LI_EFI; | 102 | efip->efi_format.efi_type = XFS_LI_EFI; |
| 80 | 103 | ||
| @@ -99,10 +122,12 @@ xfs_efi_item_pin( | |||
| 99 | } | 122 | } |
| 100 | 123 | ||
| 101 | /* | 124 | /* |
| 102 | * While EFIs cannot really be pinned, the unpin operation is the | 125 | * While EFIs cannot really be pinned, the unpin operation is the last place at |
| 103 | * last place at which the EFI is manipulated during a transaction. | 126 | * which the EFI is manipulated during a transaction. If we are being asked to |
| 104 | * Here we coordinate with xfs_efi_cancel() to determine who gets to | 127 | * remove the EFI it's because the transaction has been cancelled and by |
| 105 | * free the EFI. | 128 | * definition that means the EFI cannot be in the AIL so remove it from the |
| 129 | * transaction and free it. Otherwise coordinate with xfs_efi_release() (via | ||
| 130 | * XFS_EFI_COMMITTED) to determine who gets to free the EFI. | ||
| 106 | */ | 131 | */ |
| 107 | STATIC void | 132 | STATIC void |
| 108 | xfs_efi_item_unpin( | 133 | xfs_efi_item_unpin( |
| @@ -110,20 +135,14 @@ xfs_efi_item_unpin( | |||
| 110 | int remove) | 135 | int remove) |
| 111 | { | 136 | { |
| 112 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | 137 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); |
| 113 | struct xfs_ail *ailp = lip->li_ailp; | ||
| 114 | |||
| 115 | spin_lock(&ailp->xa_lock); | ||
| 116 | if (efip->efi_flags & XFS_EFI_CANCELED) { | ||
| 117 | if (remove) | ||
| 118 | xfs_trans_del_item(lip); | ||
| 119 | 138 | ||
| 120 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 139 | if (remove) { |
| 121 | xfs_trans_ail_delete(ailp, lip); | 140 | ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); |
| 141 | xfs_trans_del_item(lip); | ||
| 122 | xfs_efi_item_free(efip); | 142 | xfs_efi_item_free(efip); |
| 123 | } else { | 143 | return; |
| 124 | efip->efi_flags |= XFS_EFI_COMMITTED; | ||
| 125 | spin_unlock(&ailp->xa_lock); | ||
| 126 | } | 144 | } |
| 145 | __xfs_efi_release(efip); | ||
| 127 | } | 146 | } |
| 128 | 147 | ||
| 129 | /* | 148 | /* |
| @@ -152,16 +171,20 @@ xfs_efi_item_unlock( | |||
| 152 | } | 171 | } |
| 153 | 172 | ||
| 154 | /* | 173 | /* |
| 155 | * The EFI is logged only once and cannot be moved in the log, so | 174 | * The EFI is logged only once and cannot be moved in the log, so simply return |
| 156 | * simply return the lsn at which it's been logged. The canceled | 175 | * the lsn at which it's been logged. For bulk transaction committed |
| 157 | * flag is not paid any attention here. Checking for that is delayed | 176 | * processing, the EFI may be processed but not yet unpinned prior to the EFD |
| 158 | * until the EFI is unpinned. | 177 | * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected |
| 178 | * when processing the EFD. | ||
| 159 | */ | 179 | */ |
| 160 | STATIC xfs_lsn_t | 180 | STATIC xfs_lsn_t |
| 161 | xfs_efi_item_committed( | 181 | xfs_efi_item_committed( |
| 162 | struct xfs_log_item *lip, | 182 | struct xfs_log_item *lip, |
| 163 | xfs_lsn_t lsn) | 183 | xfs_lsn_t lsn) |
| 164 | { | 184 | { |
| 185 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | ||
| 186 | |||
| 187 | set_bit(XFS_EFI_COMMITTED, &efip->efi_flags); | ||
| 165 | return lsn; | 188 | return lsn; |
| 166 | } | 189 | } |
| 167 | 190 | ||
| @@ -230,6 +253,7 @@ xfs_efi_init( | |||
| 230 | xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); | 253 | xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); |
| 231 | efip->efi_format.efi_nextents = nextents; | 254 | efip->efi_format.efi_nextents = nextents; |
| 232 | efip->efi_format.efi_id = (__psint_t)(void*)efip; | 255 | efip->efi_format.efi_id = (__psint_t)(void*)efip; |
| 256 | atomic_set(&efip->efi_next_extent, 0); | ||
| 233 | 257 | ||
| 234 | return efip; | 258 | return efip; |
| 235 | } | 259 | } |
| @@ -289,37 +313,18 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) | |||
| 289 | } | 313 | } |
| 290 | 314 | ||
| 291 | /* | 315 | /* |
| 292 | * This is called by the efd item code below to release references to | 316 | * This is called by the efd item code below to release references to the given |
| 293 | * the given efi item. Each efd calls this with the number of | 317 | * efi item. Each efd calls this with the number of extents that it has |
| 294 | * extents that it has logged, and when the sum of these reaches | 318 | * logged, and when the sum of these reaches the total number of extents logged |
| 295 | * the total number of extents logged by this efi item we can free | 319 | * by this efi item we can free the efi item. |
| 296 | * the efi item. | ||
| 297 | * | ||
| 298 | * Freeing the efi item requires that we remove it from the AIL. | ||
| 299 | * We'll use the AIL lock to protect our counters as well as | ||
| 300 | * the removal from the AIL. | ||
| 301 | */ | 320 | */ |
| 302 | void | 321 | void |
| 303 | xfs_efi_release(xfs_efi_log_item_t *efip, | 322 | xfs_efi_release(xfs_efi_log_item_t *efip, |
| 304 | uint nextents) | 323 | uint nextents) |
| 305 | { | 324 | { |
| 306 | struct xfs_ail *ailp = efip->efi_item.li_ailp; | 325 | ASSERT(atomic_read(&efip->efi_next_extent) >= nextents); |
| 307 | int extents_left; | 326 | if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) |
| 308 | 327 | __xfs_efi_release(efip); | |
| 309 | ASSERT(efip->efi_next_extent > 0); | ||
| 310 | ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); | ||
| 311 | |||
| 312 | spin_lock(&ailp->xa_lock); | ||
| 313 | ASSERT(efip->efi_next_extent >= nextents); | ||
| 314 | efip->efi_next_extent -= nextents; | ||
| 315 | extents_left = efip->efi_next_extent; | ||
| 316 | if (extents_left == 0) { | ||
| 317 | /* xfs_trans_ail_delete() drops the AIL lock. */ | ||
| 318 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); | ||
| 319 | xfs_efi_item_free(efip); | ||
| 320 | } else { | ||
| 321 | spin_unlock(&ailp->xa_lock); | ||
| 322 | } | ||
| 323 | } | 328 | } |
| 324 | 329 | ||
| 325 | static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) | 330 | static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) |
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 0d22c56fdf64..375f68e42531 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h | |||
| @@ -111,11 +111,10 @@ typedef struct xfs_efd_log_format_64 { | |||
| 111 | #define XFS_EFI_MAX_FAST_EXTENTS 16 | 111 | #define XFS_EFI_MAX_FAST_EXTENTS 16 |
| 112 | 112 | ||
| 113 | /* | 113 | /* |
| 114 | * Define EFI flags. | 114 | * Define EFI flag bits. Manipulated by set/clear/test_bit operators. |
| 115 | */ | 115 | */ |
| 116 | #define XFS_EFI_RECOVERED 0x1 | 116 | #define XFS_EFI_RECOVERED 1 |
| 117 | #define XFS_EFI_COMMITTED 0x2 | 117 | #define XFS_EFI_COMMITTED 2 |
| 118 | #define XFS_EFI_CANCELED 0x4 | ||
| 119 | 118 | ||
| 120 | /* | 119 | /* |
| 121 | * This is the "extent free intention" log item. It is used | 120 | * This is the "extent free intention" log item. It is used |
| @@ -125,8 +124,8 @@ typedef struct xfs_efd_log_format_64 { | |||
| 125 | */ | 124 | */ |
| 126 | typedef struct xfs_efi_log_item { | 125 | typedef struct xfs_efi_log_item { |
| 127 | xfs_log_item_t efi_item; | 126 | xfs_log_item_t efi_item; |
| 128 | uint efi_flags; /* misc flags */ | 127 | atomic_t efi_next_extent; |
| 129 | uint efi_next_extent; | 128 | unsigned long efi_flags; /* misc flags */ |
| 130 | xfs_efi_log_format_t efi_format; | 129 | xfs_efi_log_format_t efi_format; |
| 131 | } xfs_efi_log_item_t; | 130 | } xfs_efi_log_item_t; |
| 132 | 131 | ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index a7c116e814af..f56d30e8040c 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
| @@ -374,6 +374,7 @@ xfs_growfs_data_private( | |||
| 374 | mp->m_maxicount = icount << mp->m_sb.sb_inopblog; | 374 | mp->m_maxicount = icount << mp->m_sb.sb_inopblog; |
| 375 | } else | 375 | } else |
| 376 | mp->m_maxicount = 0; | 376 | mp->m_maxicount = 0; |
| 377 | xfs_set_low_space_thresholds(mp); | ||
| 377 | 378 | ||
| 378 | /* update secondary superblocks. */ | 379 | /* update secondary superblocks. */ |
| 379 | for (agno = 1; agno < nagcount; agno++) { | 380 | for (agno = 1; agno < nagcount; agno++) { |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index d7de5a3f7867..cb9b6d1469f7 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
| @@ -43,6 +43,17 @@ | |||
| 43 | 43 | ||
| 44 | 44 | ||
| 45 | /* | 45 | /* |
| 46 | * Define xfs inode iolock lockdep classes. We need to ensure that all active | ||
| 47 | * inodes are considered the same for lockdep purposes, including inodes that | ||
| 48 | * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to | ||
| 49 | * guarantee the locks are considered the same when there are multiple lock | ||
| 50 | * initialisation siteѕ. Also, define a reclaimable inode class so it is | ||
| 51 | * obvious in lockdep reports which class the report is against. | ||
| 52 | */ | ||
| 53 | static struct lock_class_key xfs_iolock_active; | ||
| 54 | struct lock_class_key xfs_iolock_reclaimable; | ||
| 55 | |||
| 56 | /* | ||
| 46 | * Allocate and initialise an xfs_inode. | 57 | * Allocate and initialise an xfs_inode. |
| 47 | */ | 58 | */ |
| 48 | STATIC struct xfs_inode * | 59 | STATIC struct xfs_inode * |
| @@ -69,8 +80,11 @@ xfs_inode_alloc( | |||
| 69 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 80 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
| 70 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 81 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
| 71 | ASSERT(completion_done(&ip->i_flush)); | 82 | ASSERT(completion_done(&ip->i_flush)); |
| 83 | ASSERT(ip->i_ino == 0); | ||
| 72 | 84 | ||
| 73 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 85 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
| 86 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
| 87 | &xfs_iolock_active, "xfs_iolock_active"); | ||
| 74 | 88 | ||
| 75 | /* initialise the xfs inode */ | 89 | /* initialise the xfs inode */ |
| 76 | ip->i_ino = ino; | 90 | ip->i_ino = ino; |
| @@ -85,9 +99,6 @@ xfs_inode_alloc( | |||
| 85 | ip->i_size = 0; | 99 | ip->i_size = 0; |
| 86 | ip->i_new_size = 0; | 100 | ip->i_new_size = 0; |
| 87 | 101 | ||
| 88 | /* prevent anyone from using this yet */ | ||
| 89 | VFS_I(ip)->i_state = I_NEW; | ||
| 90 | |||
| 91 | return ip; | 102 | return ip; |
| 92 | } | 103 | } |
| 93 | 104 | ||
| @@ -145,7 +156,18 @@ xfs_inode_free( | |||
| 145 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 156 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
| 146 | ASSERT(completion_done(&ip->i_flush)); | 157 | ASSERT(completion_done(&ip->i_flush)); |
| 147 | 158 | ||
| 148 | call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback); | 159 | /* |
| 160 | * Because we use RCU freeing we need to ensure the inode always | ||
| 161 | * appears to be reclaimed with an invalid inode number when in the | ||
| 162 | * free state. The ip->i_flags_lock provides the barrier against lookup | ||
| 163 | * races. | ||
| 164 | */ | ||
| 165 | spin_lock(&ip->i_flags_lock); | ||
| 166 | ip->i_flags = XFS_IRECLAIM; | ||
| 167 | ip->i_ino = 0; | ||
| 168 | spin_unlock(&ip->i_flags_lock); | ||
| 169 | |||
| 170 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | ||
| 149 | } | 171 | } |
| 150 | 172 | ||
| 151 | /* | 173 | /* |
| @@ -155,14 +177,29 @@ static int | |||
| 155 | xfs_iget_cache_hit( | 177 | xfs_iget_cache_hit( |
| 156 | struct xfs_perag *pag, | 178 | struct xfs_perag *pag, |
| 157 | struct xfs_inode *ip, | 179 | struct xfs_inode *ip, |
| 180 | xfs_ino_t ino, | ||
| 158 | int flags, | 181 | int flags, |
| 159 | int lock_flags) __releases(pag->pag_ici_lock) | 182 | int lock_flags) __releases(RCU) |
| 160 | { | 183 | { |
| 161 | struct inode *inode = VFS_I(ip); | 184 | struct inode *inode = VFS_I(ip); |
| 162 | struct xfs_mount *mp = ip->i_mount; | 185 | struct xfs_mount *mp = ip->i_mount; |
| 163 | int error; | 186 | int error; |
| 164 | 187 | ||
| 188 | /* | ||
| 189 | * check for re-use of an inode within an RCU grace period due to the | ||
| 190 | * radix tree nodes not being updated yet. We monitor for this by | ||
| 191 | * setting the inode number to zero before freeing the inode structure. | ||
| 192 | * If the inode has been reallocated and set up, then the inode number | ||
| 193 | * will not match, so check for that, too. | ||
| 194 | */ | ||
| 165 | spin_lock(&ip->i_flags_lock); | 195 | spin_lock(&ip->i_flags_lock); |
| 196 | if (ip->i_ino != ino) { | ||
| 197 | trace_xfs_iget_skip(ip); | ||
| 198 | XFS_STATS_INC(xs_ig_frecycle); | ||
| 199 | error = EAGAIN; | ||
| 200 | goto out_error; | ||
| 201 | } | ||
| 202 | |||
| 166 | 203 | ||
| 167 | /* | 204 | /* |
| 168 | * If we are racing with another cache hit that is currently | 205 | * If we are racing with another cache hit that is currently |
| @@ -205,7 +242,7 @@ xfs_iget_cache_hit( | |||
| 205 | ip->i_flags |= XFS_IRECLAIM; | 242 | ip->i_flags |= XFS_IRECLAIM; |
| 206 | 243 | ||
| 207 | spin_unlock(&ip->i_flags_lock); | 244 | spin_unlock(&ip->i_flags_lock); |
| 208 | read_unlock(&pag->pag_ici_lock); | 245 | rcu_read_unlock(); |
| 209 | 246 | ||
| 210 | error = -inode_init_always(mp->m_super, inode); | 247 | error = -inode_init_always(mp->m_super, inode); |
| 211 | if (error) { | 248 | if (error) { |
| @@ -213,7 +250,7 @@ xfs_iget_cache_hit( | |||
| 213 | * Re-initializing the inode failed, and we are in deep | 250 | * Re-initializing the inode failed, and we are in deep |
| 214 | * trouble. Try to re-add it to the reclaim list. | 251 | * trouble. Try to re-add it to the reclaim list. |
| 215 | */ | 252 | */ |
| 216 | read_lock(&pag->pag_ici_lock); | 253 | rcu_read_lock(); |
| 217 | spin_lock(&ip->i_flags_lock); | 254 | spin_lock(&ip->i_flags_lock); |
| 218 | 255 | ||
| 219 | ip->i_flags &= ~XFS_INEW; | 256 | ip->i_flags &= ~XFS_INEW; |
| @@ -223,14 +260,20 @@ xfs_iget_cache_hit( | |||
| 223 | goto out_error; | 260 | goto out_error; |
| 224 | } | 261 | } |
| 225 | 262 | ||
| 226 | write_lock(&pag->pag_ici_lock); | 263 | spin_lock(&pag->pag_ici_lock); |
| 227 | spin_lock(&ip->i_flags_lock); | 264 | spin_lock(&ip->i_flags_lock); |
| 228 | ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); | 265 | ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); |
| 229 | ip->i_flags |= XFS_INEW; | 266 | ip->i_flags |= XFS_INEW; |
| 230 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 267 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
| 231 | inode->i_state = I_NEW; | 268 | inode->i_state = I_NEW; |
| 269 | |||
| 270 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | ||
| 271 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
| 272 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
| 273 | &xfs_iolock_active, "xfs_iolock_active"); | ||
| 274 | |||
| 232 | spin_unlock(&ip->i_flags_lock); | 275 | spin_unlock(&ip->i_flags_lock); |
| 233 | write_unlock(&pag->pag_ici_lock); | 276 | spin_unlock(&pag->pag_ici_lock); |
| 234 | } else { | 277 | } else { |
| 235 | /* If the VFS inode is being torn down, pause and try again. */ | 278 | /* If the VFS inode is being torn down, pause and try again. */ |
| 236 | if (!igrab(inode)) { | 279 | if (!igrab(inode)) { |
| @@ -241,7 +284,7 @@ xfs_iget_cache_hit( | |||
| 241 | 284 | ||
| 242 | /* We've got a live one. */ | 285 | /* We've got a live one. */ |
| 243 | spin_unlock(&ip->i_flags_lock); | 286 | spin_unlock(&ip->i_flags_lock); |
| 244 | read_unlock(&pag->pag_ici_lock); | 287 | rcu_read_unlock(); |
| 245 | trace_xfs_iget_hit(ip); | 288 | trace_xfs_iget_hit(ip); |
| 246 | } | 289 | } |
| 247 | 290 | ||
| @@ -255,7 +298,7 @@ xfs_iget_cache_hit( | |||
| 255 | 298 | ||
| 256 | out_error: | 299 | out_error: |
| 257 | spin_unlock(&ip->i_flags_lock); | 300 | spin_unlock(&ip->i_flags_lock); |
| 258 | read_unlock(&pag->pag_ici_lock); | 301 | rcu_read_unlock(); |
| 259 | return error; | 302 | return error; |
| 260 | } | 303 | } |
| 261 | 304 | ||
| @@ -308,7 +351,7 @@ xfs_iget_cache_miss( | |||
| 308 | BUG(); | 351 | BUG(); |
| 309 | } | 352 | } |
| 310 | 353 | ||
| 311 | write_lock(&pag->pag_ici_lock); | 354 | spin_lock(&pag->pag_ici_lock); |
| 312 | 355 | ||
| 313 | /* insert the new inode */ | 356 | /* insert the new inode */ |
| 314 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | 357 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); |
| @@ -323,14 +366,14 @@ xfs_iget_cache_miss( | |||
| 323 | ip->i_udquot = ip->i_gdquot = NULL; | 366 | ip->i_udquot = ip->i_gdquot = NULL; |
| 324 | xfs_iflags_set(ip, XFS_INEW); | 367 | xfs_iflags_set(ip, XFS_INEW); |
| 325 | 368 | ||
| 326 | write_unlock(&pag->pag_ici_lock); | 369 | spin_unlock(&pag->pag_ici_lock); |
| 327 | radix_tree_preload_end(); | 370 | radix_tree_preload_end(); |
| 328 | 371 | ||
| 329 | *ipp = ip; | 372 | *ipp = ip; |
| 330 | return 0; | 373 | return 0; |
| 331 | 374 | ||
| 332 | out_preload_end: | 375 | out_preload_end: |
| 333 | write_unlock(&pag->pag_ici_lock); | 376 | spin_unlock(&pag->pag_ici_lock); |
| 334 | radix_tree_preload_end(); | 377 | radix_tree_preload_end(); |
| 335 | if (lock_flags) | 378 | if (lock_flags) |
| 336 | xfs_iunlock(ip, lock_flags); | 379 | xfs_iunlock(ip, lock_flags); |
| @@ -377,7 +420,7 @@ xfs_iget( | |||
| 377 | xfs_agino_t agino; | 420 | xfs_agino_t agino; |
| 378 | 421 | ||
| 379 | /* reject inode numbers outside existing AGs */ | 422 | /* reject inode numbers outside existing AGs */ |
| 380 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) | 423 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
| 381 | return EINVAL; | 424 | return EINVAL; |
| 382 | 425 | ||
| 383 | /* get the perag structure and ensure that it's inode capable */ | 426 | /* get the perag structure and ensure that it's inode capable */ |
| @@ -386,15 +429,15 @@ xfs_iget( | |||
| 386 | 429 | ||
| 387 | again: | 430 | again: |
| 388 | error = 0; | 431 | error = 0; |
| 389 | read_lock(&pag->pag_ici_lock); | 432 | rcu_read_lock(); |
| 390 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | 433 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); |
| 391 | 434 | ||
| 392 | if (ip) { | 435 | if (ip) { |
| 393 | error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); | 436 | error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); |
| 394 | if (error) | 437 | if (error) |
| 395 | goto out_error_or_again; | 438 | goto out_error_or_again; |
| 396 | } else { | 439 | } else { |
| 397 | read_unlock(&pag->pag_ici_lock); | 440 | rcu_read_unlock(); |
| 398 | XFS_STATS_INC(xs_ig_missed); | 441 | XFS_STATS_INC(xs_ig_missed); |
| 399 | 442 | ||
| 400 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, | 443 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 108c7a085f94..be7cf625421f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -887,7 +887,7 @@ xfs_iread( | |||
| 887 | * around for a while. This helps to keep recently accessed | 887 | * around for a while. This helps to keep recently accessed |
| 888 | * meta-data in-core longer. | 888 | * meta-data in-core longer. |
| 889 | */ | 889 | */ |
| 890 | XFS_BUF_SET_REF(bp, XFS_INO_REF); | 890 | xfs_buf_set_ref(bp, XFS_INO_REF); |
| 891 | 891 | ||
| 892 | /* | 892 | /* |
| 893 | * Use xfs_trans_brelse() to release the buffer containing the | 893 | * Use xfs_trans_brelse() to release the buffer containing the |
| @@ -2000,17 +2000,33 @@ xfs_ifree_cluster( | |||
| 2000 | */ | 2000 | */ |
| 2001 | for (i = 0; i < ninodes; i++) { | 2001 | for (i = 0; i < ninodes; i++) { |
| 2002 | retry: | 2002 | retry: |
| 2003 | read_lock(&pag->pag_ici_lock); | 2003 | rcu_read_lock(); |
| 2004 | ip = radix_tree_lookup(&pag->pag_ici_root, | 2004 | ip = radix_tree_lookup(&pag->pag_ici_root, |
| 2005 | XFS_INO_TO_AGINO(mp, (inum + i))); | 2005 | XFS_INO_TO_AGINO(mp, (inum + i))); |
| 2006 | 2006 | ||
| 2007 | /* Inode not in memory or stale, nothing to do */ | 2007 | /* Inode not in memory, nothing to do */ |
| 2008 | if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { | 2008 | if (!ip) { |
| 2009 | read_unlock(&pag->pag_ici_lock); | 2009 | rcu_read_unlock(); |
| 2010 | continue; | 2010 | continue; |
| 2011 | } | 2011 | } |
| 2012 | 2012 | ||
| 2013 | /* | 2013 | /* |
| 2014 | * because this is an RCU protected lookup, we could | ||
| 2015 | * find a recently freed or even reallocated inode | ||
| 2016 | * during the lookup. We need to check under the | ||
| 2017 | * i_flags_lock for a valid inode here. Skip it if it | ||
| 2018 | * is not valid, the wrong inode or stale. | ||
| 2019 | */ | ||
| 2020 | spin_lock(&ip->i_flags_lock); | ||
| 2021 | if (ip->i_ino != inum + i || | ||
| 2022 | __xfs_iflags_test(ip, XFS_ISTALE)) { | ||
| 2023 | spin_unlock(&ip->i_flags_lock); | ||
| 2024 | rcu_read_unlock(); | ||
| 2025 | continue; | ||
| 2026 | } | ||
| 2027 | spin_unlock(&ip->i_flags_lock); | ||
| 2028 | |||
| 2029 | /* | ||
| 2014 | * Don't try to lock/unlock the current inode, but we | 2030 | * Don't try to lock/unlock the current inode, but we |
| 2015 | * _cannot_ skip the other inodes that we did not find | 2031 | * _cannot_ skip the other inodes that we did not find |
| 2016 | * in the list attached to the buffer and are not | 2032 | * in the list attached to the buffer and are not |
| @@ -2019,11 +2035,11 @@ retry: | |||
| 2019 | */ | 2035 | */ |
| 2020 | if (ip != free_ip && | 2036 | if (ip != free_ip && |
| 2021 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | 2037 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
| 2022 | read_unlock(&pag->pag_ici_lock); | 2038 | rcu_read_unlock(); |
| 2023 | delay(1); | 2039 | delay(1); |
| 2024 | goto retry; | 2040 | goto retry; |
| 2025 | } | 2041 | } |
| 2026 | read_unlock(&pag->pag_ici_lock); | 2042 | rcu_read_unlock(); |
| 2027 | 2043 | ||
| 2028 | xfs_iflock(ip); | 2044 | xfs_iflock(ip); |
| 2029 | xfs_iflags_set(ip, XFS_ISTALE); | 2045 | xfs_iflags_set(ip, XFS_ISTALE); |
| @@ -2629,7 +2645,7 @@ xfs_iflush_cluster( | |||
| 2629 | 2645 | ||
| 2630 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | 2646 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); |
| 2631 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | 2647 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; |
| 2632 | read_lock(&pag->pag_ici_lock); | 2648 | rcu_read_lock(); |
| 2633 | /* really need a gang lookup range call here */ | 2649 | /* really need a gang lookup range call here */ |
| 2634 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | 2650 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, |
| 2635 | first_index, inodes_per_cluster); | 2651 | first_index, inodes_per_cluster); |
| @@ -2640,9 +2656,21 @@ xfs_iflush_cluster( | |||
| 2640 | iq = ilist[i]; | 2656 | iq = ilist[i]; |
| 2641 | if (iq == ip) | 2657 | if (iq == ip) |
| 2642 | continue; | 2658 | continue; |
| 2643 | /* if the inode lies outside this cluster, we're done. */ | 2659 | |
| 2644 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) | 2660 | /* |
| 2645 | break; | 2661 | * because this is an RCU protected lookup, we could find a |
| 2662 | * recently freed or even reallocated inode during the lookup. | ||
| 2663 | * We need to check under the i_flags_lock for a valid inode | ||
| 2664 | * here. Skip it if it is not valid or the wrong inode. | ||
| 2665 | */ | ||
| 2666 | spin_lock(&ip->i_flags_lock); | ||
| 2667 | if (!ip->i_ino || | ||
| 2668 | (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { | ||
| 2669 | spin_unlock(&ip->i_flags_lock); | ||
| 2670 | continue; | ||
| 2671 | } | ||
| 2672 | spin_unlock(&ip->i_flags_lock); | ||
| 2673 | |||
| 2646 | /* | 2674 | /* |
| 2647 | * Do an un-protected check to see if the inode is dirty and | 2675 | * Do an un-protected check to see if the inode is dirty and |
| 2648 | * is a candidate for flushing. These checks will be repeated | 2676 | * is a candidate for flushing. These checks will be repeated |
| @@ -2692,7 +2720,7 @@ xfs_iflush_cluster( | |||
| 2692 | } | 2720 | } |
| 2693 | 2721 | ||
| 2694 | out_free: | 2722 | out_free: |
| 2695 | read_unlock(&pag->pag_ici_lock); | 2723 | rcu_read_unlock(); |
| 2696 | kmem_free(ilist); | 2724 | kmem_free(ilist); |
| 2697 | out_put: | 2725 | out_put: |
| 2698 | xfs_perag_put(pag); | 2726 | xfs_perag_put(pag); |
| @@ -2704,7 +2732,7 @@ cluster_corrupt_out: | |||
| 2704 | * Corruption detected in the clustering loop. Invalidate the | 2732 | * Corruption detected in the clustering loop. Invalidate the |
| 2705 | * inode buffer and shut down the filesystem. | 2733 | * inode buffer and shut down the filesystem. |
| 2706 | */ | 2734 | */ |
| 2707 | read_unlock(&pag->pag_ici_lock); | 2735 | rcu_read_unlock(); |
| 2708 | /* | 2736 | /* |
| 2709 | * Clean up the buffer. If it was B_DELWRI, just release it -- | 2737 | * Clean up the buffer. If it was B_DELWRI, just release it -- |
| 2710 | * brelse can handle it with no problems. If not, shut down the | 2738 | * brelse can handle it with no problems. If not, shut down the |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index fb2ca2e4cdc9..5c95fa8ec11d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
| @@ -376,12 +376,13 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
| 376 | /* | 376 | /* |
| 377 | * In-core inode flags. | 377 | * In-core inode flags. |
| 378 | */ | 378 | */ |
| 379 | #define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */ | 379 | #define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ |
| 380 | #define XFS_ISTALE 0x0002 /* inode has been staled */ | 380 | #define XFS_ISTALE 0x0002 /* inode has been staled */ |
| 381 | #define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ | 381 | #define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ |
| 382 | #define XFS_INEW 0x0008 /* inode has just been allocated */ | 382 | #define XFS_INEW 0x0008 /* inode has just been allocated */ |
| 383 | #define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ | 383 | #define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ |
| 384 | #define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ | 384 | #define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ |
| 385 | #define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ | ||
| 385 | 386 | ||
| 386 | /* | 387 | /* |
| 387 | * Flags for inode locking. | 388 | * Flags for inode locking. |
| @@ -438,6 +439,8 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
| 438 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) | 439 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) |
| 439 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) | 440 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) |
| 440 | 441 | ||
| 442 | extern struct lock_class_key xfs_iolock_reclaimable; | ||
| 443 | |||
| 441 | /* | 444 | /* |
| 442 | * Flags for xfs_itruncate_start(). | 445 | * Flags for xfs_itruncate_start(). |
| 443 | */ | 446 | */ |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 7c8d30c453c3..fd4f398bd6f1 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
| @@ -842,15 +842,64 @@ xfs_inode_item_destroy( | |||
| 842 | * flushed to disk. It is responsible for removing the inode item | 842 | * flushed to disk. It is responsible for removing the inode item |
| 843 | * from the AIL if it has not been re-logged, and unlocking the inode's | 843 | * from the AIL if it has not been re-logged, and unlocking the inode's |
| 844 | * flush lock. | 844 | * flush lock. |
| 845 | * | ||
| 846 | * To reduce AIL lock traffic as much as possible, we scan the buffer log item | ||
| 847 | * list for other inodes that will run this function. We remove them from the | ||
| 848 | * buffer list so we can process all the inode IO completions in one AIL lock | ||
| 849 | * traversal. | ||
| 845 | */ | 850 | */ |
| 846 | void | 851 | void |
| 847 | xfs_iflush_done( | 852 | xfs_iflush_done( |
| 848 | struct xfs_buf *bp, | 853 | struct xfs_buf *bp, |
| 849 | struct xfs_log_item *lip) | 854 | struct xfs_log_item *lip) |
| 850 | { | 855 | { |
| 851 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | 856 | struct xfs_inode_log_item *iip; |
| 852 | xfs_inode_t *ip = iip->ili_inode; | 857 | struct xfs_log_item *blip; |
| 858 | struct xfs_log_item *next; | ||
| 859 | struct xfs_log_item *prev; | ||
| 853 | struct xfs_ail *ailp = lip->li_ailp; | 860 | struct xfs_ail *ailp = lip->li_ailp; |
| 861 | int need_ail = 0; | ||
| 862 | |||
| 863 | /* | ||
| 864 | * Scan the buffer IO completions for other inodes being completed and | ||
| 865 | * attach them to the current inode log item. | ||
| 866 | */ | ||
| 867 | blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | ||
| 868 | prev = NULL; | ||
| 869 | while (blip != NULL) { | ||
| 870 | if (lip->li_cb != xfs_iflush_done) { | ||
| 871 | prev = blip; | ||
| 872 | blip = blip->li_bio_list; | ||
| 873 | continue; | ||
| 874 | } | ||
| 875 | |||
| 876 | /* remove from list */ | ||
| 877 | next = blip->li_bio_list; | ||
| 878 | if (!prev) { | ||
| 879 | XFS_BUF_SET_FSPRIVATE(bp, next); | ||
| 880 | } else { | ||
| 881 | prev->li_bio_list = next; | ||
| 882 | } | ||
| 883 | |||
| 884 | /* add to current list */ | ||
| 885 | blip->li_bio_list = lip->li_bio_list; | ||
| 886 | lip->li_bio_list = blip; | ||
| 887 | |||
| 888 | /* | ||
| 889 | * while we have the item, do the unlocked check for needing | ||
| 890 | * the AIL lock. | ||
| 891 | */ | ||
| 892 | iip = INODE_ITEM(blip); | ||
| 893 | if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) | ||
| 894 | need_ail++; | ||
| 895 | |||
| 896 | blip = next; | ||
| 897 | } | ||
| 898 | |||
| 899 | /* make sure we capture the state of the initial inode. */ | ||
| 900 | iip = INODE_ITEM(lip); | ||
| 901 | if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) | ||
| 902 | need_ail++; | ||
| 854 | 903 | ||
| 855 | /* | 904 | /* |
| 856 | * We only want to pull the item from the AIL if it is | 905 | * We only want to pull the item from the AIL if it is |
| @@ -861,28 +910,37 @@ xfs_iflush_done( | |||
| 861 | * the lock since it's cheaper, and then we recheck while | 910 | * the lock since it's cheaper, and then we recheck while |
| 862 | * holding the lock before removing the inode from the AIL. | 911 | * holding the lock before removing the inode from the AIL. |
| 863 | */ | 912 | */ |
| 864 | if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) { | 913 | if (need_ail) { |
| 914 | struct xfs_log_item *log_items[need_ail]; | ||
| 915 | int i = 0; | ||
| 865 | spin_lock(&ailp->xa_lock); | 916 | spin_lock(&ailp->xa_lock); |
| 866 | if (lip->li_lsn == iip->ili_flush_lsn) { | 917 | for (blip = lip; blip; blip = blip->li_bio_list) { |
| 867 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 918 | iip = INODE_ITEM(blip); |
| 868 | xfs_trans_ail_delete(ailp, lip); | 919 | if (iip->ili_logged && |
| 869 | } else { | 920 | blip->li_lsn == iip->ili_flush_lsn) { |
| 870 | spin_unlock(&ailp->xa_lock); | 921 | log_items[i++] = blip; |
| 922 | } | ||
| 923 | ASSERT(i <= need_ail); | ||
| 871 | } | 924 | } |
| 925 | /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ | ||
| 926 | xfs_trans_ail_delete_bulk(ailp, log_items, i); | ||
| 872 | } | 927 | } |
| 873 | 928 | ||
| 874 | iip->ili_logged = 0; | ||
| 875 | 929 | ||
| 876 | /* | 930 | /* |
| 877 | * Clear the ili_last_fields bits now that we know that the | 931 | * clean up and unlock the flush lock now we are done. We can clear the |
| 878 | * data corresponding to them is safely on disk. | 932 | * ili_last_fields bits now that we know that the data corresponding to |
| 933 | * them is safely on disk. | ||
| 879 | */ | 934 | */ |
| 880 | iip->ili_last_fields = 0; | 935 | for (blip = lip; blip; blip = next) { |
| 936 | next = blip->li_bio_list; | ||
| 937 | blip->li_bio_list = NULL; | ||
| 881 | 938 | ||
| 882 | /* | 939 | iip = INODE_ITEM(blip); |
| 883 | * Release the inode's flush lock since we're done with it. | 940 | iip->ili_logged = 0; |
| 884 | */ | 941 | iip->ili_last_fields = 0; |
| 885 | xfs_ifunlock(ip); | 942 | xfs_ifunlock(iip->ili_inode); |
| 943 | } | ||
| 886 | } | 944 | } |
| 887 | 945 | ||
| 888 | /* | 946 | /* |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 20576146369f..55582bd66659 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
| @@ -47,127 +47,8 @@ | |||
| 47 | 47 | ||
| 48 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ | 48 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ |
| 49 | << mp->m_writeio_log) | 49 | << mp->m_writeio_log) |
| 50 | #define XFS_STRAT_WRITE_IMAPS 2 | ||
| 51 | #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP | 50 | #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP |
| 52 | 51 | ||
| 53 | STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, | ||
| 54 | int, struct xfs_bmbt_irec *, int *); | ||
| 55 | STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int, | ||
| 56 | struct xfs_bmbt_irec *, int *); | ||
| 57 | STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, | ||
| 58 | struct xfs_bmbt_irec *, int *); | ||
| 59 | |||
| 60 | int | ||
| 61 | xfs_iomap( | ||
| 62 | struct xfs_inode *ip, | ||
| 63 | xfs_off_t offset, | ||
| 64 | ssize_t count, | ||
| 65 | int flags, | ||
| 66 | struct xfs_bmbt_irec *imap, | ||
| 67 | int *nimaps, | ||
| 68 | int *new) | ||
| 69 | { | ||
| 70 | struct xfs_mount *mp = ip->i_mount; | ||
| 71 | xfs_fileoff_t offset_fsb, end_fsb; | ||
| 72 | int error = 0; | ||
| 73 | int lockmode = 0; | ||
| 74 | int bmapi_flags = 0; | ||
| 75 | |||
| 76 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | ||
| 77 | |||
| 78 | *new = 0; | ||
| 79 | |||
| 80 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
| 81 | return XFS_ERROR(EIO); | ||
| 82 | |||
| 83 | trace_xfs_iomap_enter(ip, offset, count, flags, NULL); | ||
| 84 | |||
| 85 | switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { | ||
| 86 | case BMAPI_READ: | ||
| 87 | lockmode = xfs_ilock_map_shared(ip); | ||
| 88 | bmapi_flags = XFS_BMAPI_ENTIRE; | ||
| 89 | break; | ||
| 90 | case BMAPI_WRITE: | ||
| 91 | lockmode = XFS_ILOCK_EXCL; | ||
| 92 | if (flags & BMAPI_IGNSTATE) | ||
| 93 | bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; | ||
| 94 | xfs_ilock(ip, lockmode); | ||
| 95 | break; | ||
| 96 | case BMAPI_ALLOCATE: | ||
| 97 | lockmode = XFS_ILOCK_SHARED; | ||
| 98 | bmapi_flags = XFS_BMAPI_ENTIRE; | ||
| 99 | |||
| 100 | /* Attempt non-blocking lock */ | ||
| 101 | if (flags & BMAPI_TRYLOCK) { | ||
| 102 | if (!xfs_ilock_nowait(ip, lockmode)) | ||
| 103 | return XFS_ERROR(EAGAIN); | ||
| 104 | } else { | ||
| 105 | xfs_ilock(ip, lockmode); | ||
| 106 | } | ||
| 107 | break; | ||
| 108 | default: | ||
| 109 | BUG(); | ||
| 110 | } | ||
| 111 | |||
| 112 | ASSERT(offset <= mp->m_maxioffset); | ||
| 113 | if ((xfs_fsize_t)offset + count > mp->m_maxioffset) | ||
| 114 | count = mp->m_maxioffset - offset; | ||
| 115 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | ||
| 116 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
| 117 | |||
| 118 | error = xfs_bmapi(NULL, ip, offset_fsb, | ||
| 119 | (xfs_filblks_t)(end_fsb - offset_fsb), | ||
| 120 | bmapi_flags, NULL, 0, imap, | ||
| 121 | nimaps, NULL); | ||
| 122 | |||
| 123 | if (error) | ||
| 124 | goto out; | ||
| 125 | |||
| 126 | switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { | ||
| 127 | case BMAPI_WRITE: | ||
| 128 | /* If we found an extent, return it */ | ||
| 129 | if (*nimaps && | ||
| 130 | (imap->br_startblock != HOLESTARTBLOCK) && | ||
| 131 | (imap->br_startblock != DELAYSTARTBLOCK)) { | ||
| 132 | trace_xfs_iomap_found(ip, offset, count, flags, imap); | ||
| 133 | break; | ||
| 134 | } | ||
| 135 | |||
| 136 | if (flags & BMAPI_DIRECT) { | ||
| 137 | error = xfs_iomap_write_direct(ip, offset, count, flags, | ||
| 138 | imap, nimaps); | ||
| 139 | } else { | ||
| 140 | error = xfs_iomap_write_delay(ip, offset, count, flags, | ||
| 141 | imap, nimaps); | ||
| 142 | } | ||
| 143 | if (!error) { | ||
| 144 | trace_xfs_iomap_alloc(ip, offset, count, flags, imap); | ||
| 145 | } | ||
| 146 | *new = 1; | ||
| 147 | break; | ||
| 148 | case BMAPI_ALLOCATE: | ||
| 149 | /* If we found an extent, return it */ | ||
| 150 | xfs_iunlock(ip, lockmode); | ||
| 151 | lockmode = 0; | ||
| 152 | |||
| 153 | if (*nimaps && !isnullstartblock(imap->br_startblock)) { | ||
| 154 | trace_xfs_iomap_found(ip, offset, count, flags, imap); | ||
| 155 | break; | ||
| 156 | } | ||
| 157 | |||
| 158 | error = xfs_iomap_write_allocate(ip, offset, count, | ||
| 159 | imap, nimaps); | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | |||
| 163 | ASSERT(*nimaps <= 1); | ||
| 164 | |||
| 165 | out: | ||
| 166 | if (lockmode) | ||
| 167 | xfs_iunlock(ip, lockmode); | ||
| 168 | return XFS_ERROR(error); | ||
| 169 | } | ||
| 170 | |||
| 171 | STATIC int | 52 | STATIC int |
| 172 | xfs_iomap_eof_align_last_fsb( | 53 | xfs_iomap_eof_align_last_fsb( |
| 173 | xfs_mount_t *mp, | 54 | xfs_mount_t *mp, |
| @@ -236,14 +117,13 @@ xfs_cmn_err_fsblock_zero( | |||
| 236 | return EFSCORRUPTED; | 117 | return EFSCORRUPTED; |
| 237 | } | 118 | } |
| 238 | 119 | ||
| 239 | STATIC int | 120 | int |
| 240 | xfs_iomap_write_direct( | 121 | xfs_iomap_write_direct( |
| 241 | xfs_inode_t *ip, | 122 | xfs_inode_t *ip, |
| 242 | xfs_off_t offset, | 123 | xfs_off_t offset, |
| 243 | size_t count, | 124 | size_t count, |
| 244 | int flags, | ||
| 245 | xfs_bmbt_irec_t *imap, | 125 | xfs_bmbt_irec_t *imap, |
| 246 | int *nmaps) | 126 | int nmaps) |
| 247 | { | 127 | { |
| 248 | xfs_mount_t *mp = ip->i_mount; | 128 | xfs_mount_t *mp = ip->i_mount; |
| 249 | xfs_fileoff_t offset_fsb; | 129 | xfs_fileoff_t offset_fsb; |
| @@ -279,7 +159,7 @@ xfs_iomap_write_direct( | |||
| 279 | if (error) | 159 | if (error) |
| 280 | goto error_out; | 160 | goto error_out; |
| 281 | } else { | 161 | } else { |
| 282 | if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK)) | 162 | if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) |
| 283 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) | 163 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) |
| 284 | imap->br_blockcount + | 164 | imap->br_blockcount + |
| 285 | imap->br_startoff); | 165 | imap->br_startoff); |
| @@ -331,7 +211,7 @@ xfs_iomap_write_direct( | |||
| 331 | xfs_trans_ijoin(tp, ip); | 211 | xfs_trans_ijoin(tp, ip); |
| 332 | 212 | ||
| 333 | bmapi_flag = XFS_BMAPI_WRITE; | 213 | bmapi_flag = XFS_BMAPI_WRITE; |
| 334 | if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) | 214 | if (offset < ip->i_size || extsz) |
| 335 | bmapi_flag |= XFS_BMAPI_PREALLOC; | 215 | bmapi_flag |= XFS_BMAPI_PREALLOC; |
| 336 | 216 | ||
| 337 | /* | 217 | /* |
| @@ -370,7 +250,6 @@ xfs_iomap_write_direct( | |||
| 370 | goto error_out; | 250 | goto error_out; |
| 371 | } | 251 | } |
| 372 | 252 | ||
| 373 | *nmaps = 1; | ||
| 374 | return 0; | 253 | return 0; |
| 375 | 254 | ||
| 376 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | 255 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ |
| @@ -379,7 +258,6 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | |||
| 379 | 258 | ||
| 380 | error1: /* Just cancel transaction */ | 259 | error1: /* Just cancel transaction */ |
| 381 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | 260 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
| 382 | *nmaps = 0; /* nothing set-up here */ | ||
| 383 | 261 | ||
| 384 | error_out: | 262 | error_out: |
| 385 | return XFS_ERROR(error); | 263 | return XFS_ERROR(error); |
| @@ -389,6 +267,9 @@ error_out: | |||
| 389 | * If the caller is doing a write at the end of the file, then extend the | 267 | * If the caller is doing a write at the end of the file, then extend the |
| 390 | * allocation out to the file system's write iosize. We clean up any extra | 268 | * allocation out to the file system's write iosize. We clean up any extra |
| 391 | * space left over when the file is closed in xfs_inactive(). | 269 | * space left over when the file is closed in xfs_inactive(). |
| 270 | * | ||
| 271 | * If we find we already have delalloc preallocation beyond EOF, don't do more | ||
| 272 | * preallocation as it it not needed. | ||
| 392 | */ | 273 | */ |
| 393 | STATIC int | 274 | STATIC int |
| 394 | xfs_iomap_eof_want_preallocate( | 275 | xfs_iomap_eof_want_preallocate( |
| @@ -396,7 +277,6 @@ xfs_iomap_eof_want_preallocate( | |||
| 396 | xfs_inode_t *ip, | 277 | xfs_inode_t *ip, |
| 397 | xfs_off_t offset, | 278 | xfs_off_t offset, |
| 398 | size_t count, | 279 | size_t count, |
| 399 | int ioflag, | ||
| 400 | xfs_bmbt_irec_t *imap, | 280 | xfs_bmbt_irec_t *imap, |
| 401 | int nimaps, | 281 | int nimaps, |
| 402 | int *prealloc) | 282 | int *prealloc) |
| @@ -405,6 +285,7 @@ xfs_iomap_eof_want_preallocate( | |||
| 405 | xfs_filblks_t count_fsb; | 285 | xfs_filblks_t count_fsb; |
| 406 | xfs_fsblock_t firstblock; | 286 | xfs_fsblock_t firstblock; |
| 407 | int n, error, imaps; | 287 | int n, error, imaps; |
| 288 | int found_delalloc = 0; | ||
| 408 | 289 | ||
| 409 | *prealloc = 0; | 290 | *prealloc = 0; |
| 410 | if ((offset + count) <= ip->i_size) | 291 | if ((offset + count) <= ip->i_size) |
| @@ -429,20 +310,66 @@ xfs_iomap_eof_want_preallocate( | |||
| 429 | return 0; | 310 | return 0; |
| 430 | start_fsb += imap[n].br_blockcount; | 311 | start_fsb += imap[n].br_blockcount; |
| 431 | count_fsb -= imap[n].br_blockcount; | 312 | count_fsb -= imap[n].br_blockcount; |
| 313 | |||
| 314 | if (imap[n].br_startblock == DELAYSTARTBLOCK) | ||
| 315 | found_delalloc = 1; | ||
| 432 | } | 316 | } |
| 433 | } | 317 | } |
| 434 | *prealloc = 1; | 318 | if (!found_delalloc) |
| 319 | *prealloc = 1; | ||
| 435 | return 0; | 320 | return 0; |
| 436 | } | 321 | } |
| 437 | 322 | ||
| 438 | STATIC int | 323 | /* |
| 324 | * If we don't have a user specified preallocation size, dynamically increase | ||
| 325 | * the preallocation size as the size of the file grows. Cap the maximum size | ||
| 326 | * at a single extent or less if the filesystem is near full. The closer the | ||
| 327 | * filesystem is to full, the smaller the maximum prealocation. | ||
| 328 | */ | ||
| 329 | STATIC xfs_fsblock_t | ||
| 330 | xfs_iomap_prealloc_size( | ||
| 331 | struct xfs_mount *mp, | ||
| 332 | struct xfs_inode *ip) | ||
| 333 | { | ||
| 334 | xfs_fsblock_t alloc_blocks = 0; | ||
| 335 | |||
| 336 | if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { | ||
| 337 | int shift = 0; | ||
| 338 | int64_t freesp; | ||
| 339 | |||
| 340 | alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size); | ||
| 341 | alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, | ||
| 342 | rounddown_pow_of_two(alloc_blocks)); | ||
| 343 | |||
| 344 | xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); | ||
| 345 | freesp = mp->m_sb.sb_fdblocks; | ||
| 346 | if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { | ||
| 347 | shift = 2; | ||
| 348 | if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) | ||
| 349 | shift++; | ||
| 350 | if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) | ||
| 351 | shift++; | ||
| 352 | if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) | ||
| 353 | shift++; | ||
| 354 | if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) | ||
| 355 | shift++; | ||
| 356 | } | ||
| 357 | if (shift) | ||
| 358 | alloc_blocks >>= shift; | ||
| 359 | } | ||
| 360 | |||
| 361 | if (alloc_blocks < mp->m_writeio_blocks) | ||
| 362 | alloc_blocks = mp->m_writeio_blocks; | ||
| 363 | |||
| 364 | return alloc_blocks; | ||
| 365 | } | ||
| 366 | |||
| 367 | int | ||
| 439 | xfs_iomap_write_delay( | 368 | xfs_iomap_write_delay( |
| 440 | xfs_inode_t *ip, | 369 | xfs_inode_t *ip, |
| 441 | xfs_off_t offset, | 370 | xfs_off_t offset, |
| 442 | size_t count, | 371 | size_t count, |
| 443 | int ioflag, | 372 | xfs_bmbt_irec_t *ret_imap) |
| 444 | xfs_bmbt_irec_t *ret_imap, | ||
| 445 | int *nmaps) | ||
| 446 | { | 373 | { |
| 447 | xfs_mount_t *mp = ip->i_mount; | 374 | xfs_mount_t *mp = ip->i_mount; |
| 448 | xfs_fileoff_t offset_fsb; | 375 | xfs_fileoff_t offset_fsb; |
| @@ -469,16 +396,19 @@ xfs_iomap_write_delay( | |||
| 469 | extsz = xfs_get_extsz_hint(ip); | 396 | extsz = xfs_get_extsz_hint(ip); |
| 470 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 397 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
| 471 | 398 | ||
| 399 | |||
| 472 | error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, | 400 | error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, |
| 473 | ioflag, imap, XFS_WRITE_IMAPS, &prealloc); | 401 | imap, XFS_WRITE_IMAPS, &prealloc); |
| 474 | if (error) | 402 | if (error) |
| 475 | return error; | 403 | return error; |
| 476 | 404 | ||
| 477 | retry: | 405 | retry: |
| 478 | if (prealloc) { | 406 | if (prealloc) { |
| 407 | xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); | ||
| 408 | |||
| 479 | aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); | 409 | aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); |
| 480 | ioalign = XFS_B_TO_FSBT(mp, aligned_offset); | 410 | ioalign = XFS_B_TO_FSBT(mp, aligned_offset); |
| 481 | last_fsb = ioalign + mp->m_writeio_blocks; | 411 | last_fsb = ioalign + alloc_blocks; |
| 482 | } else { | 412 | } else { |
| 483 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); | 413 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); |
| 484 | } | 414 | } |
| @@ -496,22 +426,31 @@ retry: | |||
| 496 | XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | | 426 | XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | |
| 497 | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, | 427 | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, |
| 498 | &nimaps, NULL); | 428 | &nimaps, NULL); |
| 499 | if (error && (error != ENOSPC)) | 429 | switch (error) { |
| 430 | case 0: | ||
| 431 | case ENOSPC: | ||
| 432 | case EDQUOT: | ||
| 433 | break; | ||
| 434 | default: | ||
| 500 | return XFS_ERROR(error); | 435 | return XFS_ERROR(error); |
| 436 | } | ||
| 501 | 437 | ||
| 502 | /* | 438 | /* |
| 503 | * If bmapi returned us nothing, and if we didn't get back EDQUOT, | 439 | * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For |
| 504 | * then we must have run out of space - flush all other inodes with | 440 | * ENOSPC, * flush all other inodes with delalloc blocks to free up |
| 505 | * delalloc blocks and retry without EOF preallocation. | 441 | * some of the excess reserved metadata space. For both cases, retry |
| 442 | * without EOF preallocation. | ||
| 506 | */ | 443 | */ |
| 507 | if (nimaps == 0) { | 444 | if (nimaps == 0) { |
| 508 | trace_xfs_delalloc_enospc(ip, offset, count); | 445 | trace_xfs_delalloc_enospc(ip, offset, count); |
| 509 | if (flushed) | 446 | if (flushed) |
| 510 | return XFS_ERROR(ENOSPC); | 447 | return XFS_ERROR(error ? error : ENOSPC); |
| 511 | 448 | ||
| 512 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 449 | if (error == ENOSPC) { |
| 513 | xfs_flush_inodes(ip); | 450 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| 514 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 451 | xfs_flush_inodes(ip); |
| 452 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 453 | } | ||
| 515 | 454 | ||
| 516 | flushed = 1; | 455 | flushed = 1; |
| 517 | error = 0; | 456 | error = 0; |
| @@ -523,8 +462,6 @@ retry: | |||
| 523 | return xfs_cmn_err_fsblock_zero(ip, &imap[0]); | 462 | return xfs_cmn_err_fsblock_zero(ip, &imap[0]); |
| 524 | 463 | ||
| 525 | *ret_imap = imap[0]; | 464 | *ret_imap = imap[0]; |
| 526 | *nmaps = 1; | ||
| 527 | |||
| 528 | return 0; | 465 | return 0; |
| 529 | } | 466 | } |
| 530 | 467 | ||
| @@ -538,13 +475,12 @@ retry: | |||
| 538 | * We no longer bother to look at the incoming map - all we have to | 475 | * We no longer bother to look at the incoming map - all we have to |
| 539 | * guarantee is that whatever we allocate fills the required range. | 476 | * guarantee is that whatever we allocate fills the required range. |
| 540 | */ | 477 | */ |
| 541 | STATIC int | 478 | int |
| 542 | xfs_iomap_write_allocate( | 479 | xfs_iomap_write_allocate( |
| 543 | xfs_inode_t *ip, | 480 | xfs_inode_t *ip, |
| 544 | xfs_off_t offset, | 481 | xfs_off_t offset, |
| 545 | size_t count, | 482 | size_t count, |
| 546 | xfs_bmbt_irec_t *imap, | 483 | xfs_bmbt_irec_t *imap) |
| 547 | int *retmap) | ||
| 548 | { | 484 | { |
| 549 | xfs_mount_t *mp = ip->i_mount; | 485 | xfs_mount_t *mp = ip->i_mount; |
| 550 | xfs_fileoff_t offset_fsb, last_block; | 486 | xfs_fileoff_t offset_fsb, last_block; |
| @@ -557,8 +493,6 @@ xfs_iomap_write_allocate( | |||
| 557 | int error = 0; | 493 | int error = 0; |
| 558 | int nres; | 494 | int nres; |
| 559 | 495 | ||
| 560 | *retmap = 0; | ||
| 561 | |||
| 562 | /* | 496 | /* |
| 563 | * Make sure that the dquots are there. | 497 | * Make sure that the dquots are there. |
| 564 | */ | 498 | */ |
| @@ -680,7 +614,6 @@ xfs_iomap_write_allocate( | |||
| 680 | if ((offset_fsb >= imap->br_startoff) && | 614 | if ((offset_fsb >= imap->br_startoff) && |
| 681 | (offset_fsb < (imap->br_startoff + | 615 | (offset_fsb < (imap->br_startoff + |
| 682 | imap->br_blockcount))) { | 616 | imap->br_blockcount))) { |
| 683 | *retmap = 1; | ||
| 684 | XFS_STATS_INC(xs_xstrat_quick); | 617 | XFS_STATS_INC(xs_xstrat_quick); |
| 685 | return 0; | 618 | return 0; |
| 686 | } | 619 | } |
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 7748a430f50d..80615760959a 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
| @@ -18,30 +18,15 @@ | |||
| 18 | #ifndef __XFS_IOMAP_H__ | 18 | #ifndef __XFS_IOMAP_H__ |
| 19 | #define __XFS_IOMAP_H__ | 19 | #define __XFS_IOMAP_H__ |
| 20 | 20 | ||
| 21 | /* base extent manipulation calls */ | ||
| 22 | #define BMAPI_READ (1 << 0) /* read extents */ | ||
| 23 | #define BMAPI_WRITE (1 << 1) /* create extents */ | ||
| 24 | #define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */ | ||
| 25 | |||
| 26 | /* modifiers */ | ||
| 27 | #define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */ | ||
| 28 | #define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */ | ||
| 29 | #define BMAPI_MMA (1 << 6) /* allocate for mmap write */ | ||
| 30 | #define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */ | ||
| 31 | |||
| 32 | #define BMAPI_FLAGS \ | ||
| 33 | { BMAPI_READ, "READ" }, \ | ||
| 34 | { BMAPI_WRITE, "WRITE" }, \ | ||
| 35 | { BMAPI_ALLOCATE, "ALLOCATE" }, \ | ||
| 36 | { BMAPI_IGNSTATE, "IGNSTATE" }, \ | ||
| 37 | { BMAPI_DIRECT, "DIRECT" }, \ | ||
| 38 | { BMAPI_TRYLOCK, "TRYLOCK" } | ||
| 39 | |||
| 40 | struct xfs_inode; | 21 | struct xfs_inode; |
| 41 | struct xfs_bmbt_irec; | 22 | struct xfs_bmbt_irec; |
| 42 | 23 | ||
| 43 | extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, | 24 | extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, |
| 44 | struct xfs_bmbt_irec *, int *, int *); | 25 | struct xfs_bmbt_irec *, int); |
| 26 | extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, | ||
| 27 | struct xfs_bmbt_irec *); | ||
| 28 | extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, | ||
| 29 | struct xfs_bmbt_irec *); | ||
| 45 | extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); | 30 | extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); |
| 46 | 31 | ||
| 47 | #endif /* __XFS_IOMAP_H__*/ | 32 | #endif /* __XFS_IOMAP_H__*/ |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index cee4ab9f8a9e..0bf24b11d0c4 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
| @@ -47,7 +47,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, | |||
| 47 | xfs_buftarg_t *log_target, | 47 | xfs_buftarg_t *log_target, |
| 48 | xfs_daddr_t blk_offset, | 48 | xfs_daddr_t blk_offset, |
| 49 | int num_bblks); | 49 | int num_bblks); |
| 50 | STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); | 50 | STATIC int xlog_space_left(struct log *log, atomic64_t *head); |
| 51 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); | 51 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); |
| 52 | STATIC void xlog_dealloc_log(xlog_t *log); | 52 | STATIC void xlog_dealloc_log(xlog_t *log); |
| 53 | 53 | ||
| @@ -70,7 +70,7 @@ STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); | |||
| 70 | /* local functions to manipulate grant head */ | 70 | /* local functions to manipulate grant head */ |
| 71 | STATIC int xlog_grant_log_space(xlog_t *log, | 71 | STATIC int xlog_grant_log_space(xlog_t *log, |
| 72 | xlog_ticket_t *xtic); | 72 | xlog_ticket_t *xtic); |
| 73 | STATIC void xlog_grant_push_ail(xfs_mount_t *mp, | 73 | STATIC void xlog_grant_push_ail(struct log *log, |
| 74 | int need_bytes); | 74 | int need_bytes); |
| 75 | STATIC void xlog_regrant_reserve_log_space(xlog_t *log, | 75 | STATIC void xlog_regrant_reserve_log_space(xlog_t *log, |
| 76 | xlog_ticket_t *ticket); | 76 | xlog_ticket_t *ticket); |
| @@ -81,98 +81,73 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, | |||
| 81 | 81 | ||
| 82 | #if defined(DEBUG) | 82 | #if defined(DEBUG) |
| 83 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); | 83 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); |
| 84 | STATIC void xlog_verify_grant_head(xlog_t *log, int equals); | 84 | STATIC void xlog_verify_grant_tail(struct log *log); |
| 85 | STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, | 85 | STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, |
| 86 | int count, boolean_t syncing); | 86 | int count, boolean_t syncing); |
| 87 | STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, | 87 | STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, |
| 88 | xfs_lsn_t tail_lsn); | 88 | xfs_lsn_t tail_lsn); |
| 89 | #else | 89 | #else |
| 90 | #define xlog_verify_dest_ptr(a,b) | 90 | #define xlog_verify_dest_ptr(a,b) |
| 91 | #define xlog_verify_grant_head(a,b) | 91 | #define xlog_verify_grant_tail(a) |
| 92 | #define xlog_verify_iclog(a,b,c,d) | 92 | #define xlog_verify_iclog(a,b,c,d) |
| 93 | #define xlog_verify_tail_lsn(a,b,c) | 93 | #define xlog_verify_tail_lsn(a,b,c) |
| 94 | #endif | 94 | #endif |
| 95 | 95 | ||
| 96 | STATIC int xlog_iclogs_empty(xlog_t *log); | 96 | STATIC int xlog_iclogs_empty(xlog_t *log); |
| 97 | 97 | ||
| 98 | |||
| 99 | static void | 98 | static void |
| 100 | xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) | 99 | xlog_grant_sub_space( |
| 100 | struct log *log, | ||
| 101 | atomic64_t *head, | ||
| 102 | int bytes) | ||
| 101 | { | 103 | { |
| 102 | if (*qp) { | 104 | int64_t head_val = atomic64_read(head); |
| 103 | tic->t_next = (*qp); | 105 | int64_t new, old; |
| 104 | tic->t_prev = (*qp)->t_prev; | ||
| 105 | (*qp)->t_prev->t_next = tic; | ||
| 106 | (*qp)->t_prev = tic; | ||
| 107 | } else { | ||
| 108 | tic->t_prev = tic->t_next = tic; | ||
| 109 | *qp = tic; | ||
| 110 | } | ||
| 111 | 106 | ||
| 112 | tic->t_flags |= XLOG_TIC_IN_Q; | 107 | do { |
| 113 | } | 108 | int cycle, space; |
| 114 | 109 | ||
| 115 | static void | 110 | xlog_crack_grant_head_val(head_val, &cycle, &space); |
| 116 | xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) | ||
| 117 | { | ||
| 118 | if (tic == tic->t_next) { | ||
| 119 | *qp = NULL; | ||
| 120 | } else { | ||
| 121 | *qp = tic->t_next; | ||
| 122 | tic->t_next->t_prev = tic->t_prev; | ||
| 123 | tic->t_prev->t_next = tic->t_next; | ||
| 124 | } | ||
| 125 | 111 | ||
| 126 | tic->t_next = tic->t_prev = NULL; | 112 | space -= bytes; |
| 127 | tic->t_flags &= ~XLOG_TIC_IN_Q; | 113 | if (space < 0) { |
| 114 | space += log->l_logsize; | ||
| 115 | cycle--; | ||
| 116 | } | ||
| 117 | |||
| 118 | old = head_val; | ||
| 119 | new = xlog_assign_grant_head_val(cycle, space); | ||
| 120 | head_val = atomic64_cmpxchg(head, old, new); | ||
| 121 | } while (head_val != old); | ||
| 128 | } | 122 | } |
| 129 | 123 | ||
| 130 | static void | 124 | static void |
| 131 | xlog_grant_sub_space(struct log *log, int bytes) | 125 | xlog_grant_add_space( |
| 126 | struct log *log, | ||
| 127 | atomic64_t *head, | ||
| 128 | int bytes) | ||
| 132 | { | 129 | { |
| 133 | log->l_grant_write_bytes -= bytes; | 130 | int64_t head_val = atomic64_read(head); |
| 134 | if (log->l_grant_write_bytes < 0) { | 131 | int64_t new, old; |
| 135 | log->l_grant_write_bytes += log->l_logsize; | ||
| 136 | log->l_grant_write_cycle--; | ||
| 137 | } | ||
| 138 | |||
| 139 | log->l_grant_reserve_bytes -= bytes; | ||
| 140 | if ((log)->l_grant_reserve_bytes < 0) { | ||
| 141 | log->l_grant_reserve_bytes += log->l_logsize; | ||
| 142 | log->l_grant_reserve_cycle--; | ||
| 143 | } | ||
| 144 | 132 | ||
| 145 | } | 133 | do { |
| 134 | int tmp; | ||
| 135 | int cycle, space; | ||
| 146 | 136 | ||
| 147 | static void | 137 | xlog_crack_grant_head_val(head_val, &cycle, &space); |
| 148 | xlog_grant_add_space_write(struct log *log, int bytes) | ||
| 149 | { | ||
| 150 | int tmp = log->l_logsize - log->l_grant_write_bytes; | ||
| 151 | if (tmp > bytes) | ||
| 152 | log->l_grant_write_bytes += bytes; | ||
| 153 | else { | ||
| 154 | log->l_grant_write_cycle++; | ||
| 155 | log->l_grant_write_bytes = bytes - tmp; | ||
| 156 | } | ||
| 157 | } | ||
| 158 | 138 | ||
| 159 | static void | 139 | tmp = log->l_logsize - space; |
| 160 | xlog_grant_add_space_reserve(struct log *log, int bytes) | 140 | if (tmp > bytes) |
| 161 | { | 141 | space += bytes; |
| 162 | int tmp = log->l_logsize - log->l_grant_reserve_bytes; | 142 | else { |
| 163 | if (tmp > bytes) | 143 | space = bytes - tmp; |
| 164 | log->l_grant_reserve_bytes += bytes; | 144 | cycle++; |
| 165 | else { | 145 | } |
| 166 | log->l_grant_reserve_cycle++; | ||
| 167 | log->l_grant_reserve_bytes = bytes - tmp; | ||
| 168 | } | ||
| 169 | } | ||
| 170 | 146 | ||
| 171 | static inline void | 147 | old = head_val; |
| 172 | xlog_grant_add_space(struct log *log, int bytes) | 148 | new = xlog_assign_grant_head_val(cycle, space); |
| 173 | { | 149 | head_val = atomic64_cmpxchg(head, old, new); |
| 174 | xlog_grant_add_space_write(log, bytes); | 150 | } while (head_val != old); |
| 175 | xlog_grant_add_space_reserve(log, bytes); | ||
| 176 | } | 151 | } |
| 177 | 152 | ||
| 178 | static void | 153 | static void |
| @@ -355,7 +330,7 @@ xfs_log_reserve( | |||
| 355 | 330 | ||
| 356 | trace_xfs_log_reserve(log, internal_ticket); | 331 | trace_xfs_log_reserve(log, internal_ticket); |
| 357 | 332 | ||
| 358 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); | 333 | xlog_grant_push_ail(log, internal_ticket->t_unit_res); |
| 359 | retval = xlog_regrant_write_log_space(log, internal_ticket); | 334 | retval = xlog_regrant_write_log_space(log, internal_ticket); |
| 360 | } else { | 335 | } else { |
| 361 | /* may sleep if need to allocate more tickets */ | 336 | /* may sleep if need to allocate more tickets */ |
| @@ -369,7 +344,7 @@ xfs_log_reserve( | |||
| 369 | 344 | ||
| 370 | trace_xfs_log_reserve(log, internal_ticket); | 345 | trace_xfs_log_reserve(log, internal_ticket); |
| 371 | 346 | ||
| 372 | xlog_grant_push_ail(mp, | 347 | xlog_grant_push_ail(log, |
| 373 | (internal_ticket->t_unit_res * | 348 | (internal_ticket->t_unit_res * |
| 374 | internal_ticket->t_cnt)); | 349 | internal_ticket->t_cnt)); |
| 375 | retval = xlog_grant_log_space(log, internal_ticket); | 350 | retval = xlog_grant_log_space(log, internal_ticket); |
| @@ -584,8 +559,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
| 584 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || | 559 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || |
| 585 | iclog->ic_state == XLOG_STATE_DIRTY)) { | 560 | iclog->ic_state == XLOG_STATE_DIRTY)) { |
| 586 | if (!XLOG_FORCED_SHUTDOWN(log)) { | 561 | if (!XLOG_FORCED_SHUTDOWN(log)) { |
| 587 | sv_wait(&iclog->ic_force_wait, PMEM, | 562 | xlog_wait(&iclog->ic_force_wait, |
| 588 | &log->l_icloglock, s); | 563 | &log->l_icloglock); |
| 589 | } else { | 564 | } else { |
| 590 | spin_unlock(&log->l_icloglock); | 565 | spin_unlock(&log->l_icloglock); |
| 591 | } | 566 | } |
| @@ -625,8 +600,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
| 625 | || iclog->ic_state == XLOG_STATE_DIRTY | 600 | || iclog->ic_state == XLOG_STATE_DIRTY |
| 626 | || iclog->ic_state == XLOG_STATE_IOERROR) ) { | 601 | || iclog->ic_state == XLOG_STATE_IOERROR) ) { |
| 627 | 602 | ||
| 628 | sv_wait(&iclog->ic_force_wait, PMEM, | 603 | xlog_wait(&iclog->ic_force_wait, |
| 629 | &log->l_icloglock, s); | 604 | &log->l_icloglock); |
| 630 | } else { | 605 | } else { |
| 631 | spin_unlock(&log->l_icloglock); | 606 | spin_unlock(&log->l_icloglock); |
| 632 | } | 607 | } |
| @@ -703,55 +678,46 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
| 703 | { | 678 | { |
| 704 | xlog_ticket_t *tic; | 679 | xlog_ticket_t *tic; |
| 705 | xlog_t *log = mp->m_log; | 680 | xlog_t *log = mp->m_log; |
| 706 | int need_bytes, free_bytes, cycle, bytes; | 681 | int need_bytes, free_bytes; |
| 707 | 682 | ||
| 708 | if (XLOG_FORCED_SHUTDOWN(log)) | 683 | if (XLOG_FORCED_SHUTDOWN(log)) |
| 709 | return; | 684 | return; |
| 710 | 685 | ||
| 711 | if (tail_lsn == 0) { | 686 | if (tail_lsn == 0) |
| 712 | /* needed since sync_lsn is 64 bits */ | 687 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
| 713 | spin_lock(&log->l_icloglock); | ||
| 714 | tail_lsn = log->l_last_sync_lsn; | ||
| 715 | spin_unlock(&log->l_icloglock); | ||
| 716 | } | ||
| 717 | |||
| 718 | spin_lock(&log->l_grant_lock); | ||
| 719 | 688 | ||
| 720 | /* Also an invalid lsn. 1 implies that we aren't passing in a valid | 689 | /* tail_lsn == 1 implies that we weren't passed a valid value. */ |
| 721 | * tail_lsn. | 690 | if (tail_lsn != 1) |
| 722 | */ | 691 | atomic64_set(&log->l_tail_lsn, tail_lsn); |
| 723 | if (tail_lsn != 1) { | ||
| 724 | log->l_tail_lsn = tail_lsn; | ||
| 725 | } | ||
| 726 | 692 | ||
| 727 | if ((tic = log->l_write_headq)) { | 693 | if (!list_empty_careful(&log->l_writeq)) { |
| 728 | #ifdef DEBUG | 694 | #ifdef DEBUG |
| 729 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 695 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
| 730 | panic("Recovery problem"); | 696 | panic("Recovery problem"); |
| 731 | #endif | 697 | #endif |
| 732 | cycle = log->l_grant_write_cycle; | 698 | spin_lock(&log->l_grant_write_lock); |
| 733 | bytes = log->l_grant_write_bytes; | 699 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); |
| 734 | free_bytes = xlog_space_left(log, cycle, bytes); | 700 | list_for_each_entry(tic, &log->l_writeq, t_queue) { |
| 735 | do { | ||
| 736 | ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); | 701 | ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); |
| 737 | 702 | ||
| 738 | if (free_bytes < tic->t_unit_res && tail_lsn != 1) | 703 | if (free_bytes < tic->t_unit_res && tail_lsn != 1) |
| 739 | break; | 704 | break; |
| 740 | tail_lsn = 0; | 705 | tail_lsn = 0; |
| 741 | free_bytes -= tic->t_unit_res; | 706 | free_bytes -= tic->t_unit_res; |
| 742 | sv_signal(&tic->t_wait); | 707 | trace_xfs_log_regrant_write_wake_up(log, tic); |
| 743 | tic = tic->t_next; | 708 | wake_up(&tic->t_wait); |
| 744 | } while (tic != log->l_write_headq); | 709 | } |
| 710 | spin_unlock(&log->l_grant_write_lock); | ||
| 745 | } | 711 | } |
| 746 | if ((tic = log->l_reserve_headq)) { | 712 | |
| 713 | if (!list_empty_careful(&log->l_reserveq)) { | ||
| 747 | #ifdef DEBUG | 714 | #ifdef DEBUG |
| 748 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 715 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
| 749 | panic("Recovery problem"); | 716 | panic("Recovery problem"); |
| 750 | #endif | 717 | #endif |
| 751 | cycle = log->l_grant_reserve_cycle; | 718 | spin_lock(&log->l_grant_reserve_lock); |
| 752 | bytes = log->l_grant_reserve_bytes; | 719 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); |
| 753 | free_bytes = xlog_space_left(log, cycle, bytes); | 720 | list_for_each_entry(tic, &log->l_reserveq, t_queue) { |
| 754 | do { | ||
| 755 | if (tic->t_flags & XLOG_TIC_PERM_RESERV) | 721 | if (tic->t_flags & XLOG_TIC_PERM_RESERV) |
| 756 | need_bytes = tic->t_unit_res*tic->t_cnt; | 722 | need_bytes = tic->t_unit_res*tic->t_cnt; |
| 757 | else | 723 | else |
| @@ -760,12 +726,12 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
| 760 | break; | 726 | break; |
| 761 | tail_lsn = 0; | 727 | tail_lsn = 0; |
| 762 | free_bytes -= need_bytes; | 728 | free_bytes -= need_bytes; |
| 763 | sv_signal(&tic->t_wait); | 729 | trace_xfs_log_grant_wake_up(log, tic); |
| 764 | tic = tic->t_next; | 730 | wake_up(&tic->t_wait); |
| 765 | } while (tic != log->l_reserve_headq); | 731 | } |
| 732 | spin_unlock(&log->l_grant_reserve_lock); | ||
| 766 | } | 733 | } |
| 767 | spin_unlock(&log->l_grant_lock); | 734 | } |
| 768 | } /* xfs_log_move_tail */ | ||
| 769 | 735 | ||
| 770 | /* | 736 | /* |
| 771 | * Determine if we have a transaction that has gone to disk | 737 | * Determine if we have a transaction that has gone to disk |
| @@ -831,23 +797,19 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
| 831 | * We may be holding the log iclog lock upon entering this routine. | 797 | * We may be holding the log iclog lock upon entering this routine. |
| 832 | */ | 798 | */ |
| 833 | xfs_lsn_t | 799 | xfs_lsn_t |
| 834 | xlog_assign_tail_lsn(xfs_mount_t *mp) | 800 | xlog_assign_tail_lsn( |
| 801 | struct xfs_mount *mp) | ||
| 835 | { | 802 | { |
| 836 | xfs_lsn_t tail_lsn; | 803 | xfs_lsn_t tail_lsn; |
| 837 | xlog_t *log = mp->m_log; | 804 | struct log *log = mp->m_log; |
| 838 | 805 | ||
| 839 | tail_lsn = xfs_trans_ail_tail(mp->m_ail); | 806 | tail_lsn = xfs_trans_ail_tail(mp->m_ail); |
| 840 | spin_lock(&log->l_grant_lock); | 807 | if (!tail_lsn) |
| 841 | if (tail_lsn != 0) { | 808 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
| 842 | log->l_tail_lsn = tail_lsn; | ||
| 843 | } else { | ||
| 844 | tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn; | ||
| 845 | } | ||
| 846 | spin_unlock(&log->l_grant_lock); | ||
| 847 | 809 | ||
| 810 | atomic64_set(&log->l_tail_lsn, tail_lsn); | ||
| 848 | return tail_lsn; | 811 | return tail_lsn; |
| 849 | } /* xlog_assign_tail_lsn */ | 812 | } |
| 850 | |||
| 851 | 813 | ||
| 852 | /* | 814 | /* |
| 853 | * Return the space in the log between the tail and the head. The head | 815 | * Return the space in the log between the tail and the head. The head |
| @@ -864,21 +826,26 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) | |||
| 864 | * result is that we return the size of the log as the amount of space left. | 826 | * result is that we return the size of the log as the amount of space left. |
| 865 | */ | 827 | */ |
| 866 | STATIC int | 828 | STATIC int |
| 867 | xlog_space_left(xlog_t *log, int cycle, int bytes) | 829 | xlog_space_left( |
| 868 | { | 830 | struct log *log, |
| 869 | int free_bytes; | 831 | atomic64_t *head) |
| 870 | int tail_bytes; | 832 | { |
| 871 | int tail_cycle; | 833 | int free_bytes; |
| 872 | 834 | int tail_bytes; | |
| 873 | tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn)); | 835 | int tail_cycle; |
| 874 | tail_cycle = CYCLE_LSN(log->l_tail_lsn); | 836 | int head_cycle; |
| 875 | if ((tail_cycle == cycle) && (bytes >= tail_bytes)) { | 837 | int head_bytes; |
| 876 | free_bytes = log->l_logsize - (bytes - tail_bytes); | 838 | |
| 877 | } else if ((tail_cycle + 1) < cycle) { | 839 | xlog_crack_grant_head(head, &head_cycle, &head_bytes); |
| 840 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes); | ||
| 841 | tail_bytes = BBTOB(tail_bytes); | ||
| 842 | if (tail_cycle == head_cycle && head_bytes >= tail_bytes) | ||
| 843 | free_bytes = log->l_logsize - (head_bytes - tail_bytes); | ||
| 844 | else if (tail_cycle + 1 < head_cycle) | ||
| 878 | return 0; | 845 | return 0; |
| 879 | } else if (tail_cycle < cycle) { | 846 | else if (tail_cycle < head_cycle) { |
| 880 | ASSERT(tail_cycle == (cycle - 1)); | 847 | ASSERT(tail_cycle == (head_cycle - 1)); |
| 881 | free_bytes = tail_bytes - bytes; | 848 | free_bytes = tail_bytes - head_bytes; |
| 882 | } else { | 849 | } else { |
| 883 | /* | 850 | /* |
| 884 | * The reservation head is behind the tail. | 851 | * The reservation head is behind the tail. |
| @@ -889,12 +856,12 @@ xlog_space_left(xlog_t *log, int cycle, int bytes) | |||
| 889 | "xlog_space_left: head behind tail\n" | 856 | "xlog_space_left: head behind tail\n" |
| 890 | " tail_cycle = %d, tail_bytes = %d\n" | 857 | " tail_cycle = %d, tail_bytes = %d\n" |
| 891 | " GH cycle = %d, GH bytes = %d", | 858 | " GH cycle = %d, GH bytes = %d", |
| 892 | tail_cycle, tail_bytes, cycle, bytes); | 859 | tail_cycle, tail_bytes, head_cycle, head_bytes); |
| 893 | ASSERT(0); | 860 | ASSERT(0); |
| 894 | free_bytes = log->l_logsize; | 861 | free_bytes = log->l_logsize; |
| 895 | } | 862 | } |
| 896 | return free_bytes; | 863 | return free_bytes; |
| 897 | } /* xlog_space_left */ | 864 | } |
| 898 | 865 | ||
| 899 | 866 | ||
| 900 | /* | 867 | /* |
| @@ -1047,12 +1014,16 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1047 | log->l_flags |= XLOG_ACTIVE_RECOVERY; | 1014 | log->l_flags |= XLOG_ACTIVE_RECOVERY; |
| 1048 | 1015 | ||
| 1049 | log->l_prev_block = -1; | 1016 | log->l_prev_block = -1; |
| 1050 | log->l_tail_lsn = xlog_assign_lsn(1, 0); | ||
| 1051 | /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ | 1017 | /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ |
| 1052 | log->l_last_sync_lsn = log->l_tail_lsn; | 1018 | xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); |
| 1019 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); | ||
| 1053 | log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ | 1020 | log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ |
| 1054 | log->l_grant_reserve_cycle = 1; | 1021 | xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); |
| 1055 | log->l_grant_write_cycle = 1; | 1022 | xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); |
| 1023 | INIT_LIST_HEAD(&log->l_reserveq); | ||
| 1024 | INIT_LIST_HEAD(&log->l_writeq); | ||
| 1025 | spin_lock_init(&log->l_grant_reserve_lock); | ||
| 1026 | spin_lock_init(&log->l_grant_write_lock); | ||
| 1056 | 1027 | ||
| 1057 | error = EFSCORRUPTED; | 1028 | error = EFSCORRUPTED; |
| 1058 | if (xfs_sb_version_hassector(&mp->m_sb)) { | 1029 | if (xfs_sb_version_hassector(&mp->m_sb)) { |
| @@ -1094,8 +1065,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1094 | log->l_xbuf = bp; | 1065 | log->l_xbuf = bp; |
| 1095 | 1066 | ||
| 1096 | spin_lock_init(&log->l_icloglock); | 1067 | spin_lock_init(&log->l_icloglock); |
| 1097 | spin_lock_init(&log->l_grant_lock); | 1068 | init_waitqueue_head(&log->l_flush_wait); |
| 1098 | sv_init(&log->l_flush_wait, 0, "flush_wait"); | ||
| 1099 | 1069 | ||
| 1100 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1070 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
| 1101 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1071 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
| @@ -1151,8 +1121,8 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1151 | 1121 | ||
| 1152 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); | 1122 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); |
| 1153 | ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); | 1123 | ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); |
| 1154 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); | 1124 | init_waitqueue_head(&iclog->ic_force_wait); |
| 1155 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); | 1125 | init_waitqueue_head(&iclog->ic_write_wait); |
| 1156 | 1126 | ||
| 1157 | iclogp = &iclog->ic_next; | 1127 | iclogp = &iclog->ic_next; |
| 1158 | } | 1128 | } |
| @@ -1167,15 +1137,11 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1167 | out_free_iclog: | 1137 | out_free_iclog: |
| 1168 | for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { | 1138 | for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { |
| 1169 | prev_iclog = iclog->ic_next; | 1139 | prev_iclog = iclog->ic_next; |
| 1170 | if (iclog->ic_bp) { | 1140 | if (iclog->ic_bp) |
| 1171 | sv_destroy(&iclog->ic_force_wait); | ||
| 1172 | sv_destroy(&iclog->ic_write_wait); | ||
| 1173 | xfs_buf_free(iclog->ic_bp); | 1141 | xfs_buf_free(iclog->ic_bp); |
| 1174 | } | ||
| 1175 | kmem_free(iclog); | 1142 | kmem_free(iclog); |
| 1176 | } | 1143 | } |
| 1177 | spinlock_destroy(&log->l_icloglock); | 1144 | spinlock_destroy(&log->l_icloglock); |
| 1178 | spinlock_destroy(&log->l_grant_lock); | ||
| 1179 | xfs_buf_free(log->l_xbuf); | 1145 | xfs_buf_free(log->l_xbuf); |
| 1180 | out_free_log: | 1146 | out_free_log: |
| 1181 | kmem_free(log); | 1147 | kmem_free(log); |
| @@ -1223,61 +1189,60 @@ xlog_commit_record( | |||
| 1223 | * water mark. In this manner, we would be creating a low water mark. | 1189 | * water mark. In this manner, we would be creating a low water mark. |
| 1224 | */ | 1190 | */ |
| 1225 | STATIC void | 1191 | STATIC void |
| 1226 | xlog_grant_push_ail(xfs_mount_t *mp, | 1192 | xlog_grant_push_ail( |
| 1227 | int need_bytes) | 1193 | struct log *log, |
| 1194 | int need_bytes) | ||
| 1228 | { | 1195 | { |
| 1229 | xlog_t *log = mp->m_log; /* pointer to the log */ | 1196 | xfs_lsn_t threshold_lsn = 0; |
| 1230 | xfs_lsn_t tail_lsn; /* lsn of the log tail */ | 1197 | xfs_lsn_t last_sync_lsn; |
| 1231 | xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */ | 1198 | int free_blocks; |
| 1232 | int free_blocks; /* free blocks left to write to */ | 1199 | int free_bytes; |
| 1233 | int free_bytes; /* free bytes left to write to */ | 1200 | int threshold_block; |
| 1234 | int threshold_block; /* block in lsn we'd like to be at */ | 1201 | int threshold_cycle; |
| 1235 | int threshold_cycle; /* lsn cycle we'd like to be at */ | 1202 | int free_threshold; |
| 1236 | int free_threshold; | 1203 | |
| 1237 | 1204 | ASSERT(BTOBB(need_bytes) < log->l_logBBsize); | |
| 1238 | ASSERT(BTOBB(need_bytes) < log->l_logBBsize); | 1205 | |
| 1239 | 1206 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); | |
| 1240 | spin_lock(&log->l_grant_lock); | 1207 | free_blocks = BTOBBT(free_bytes); |
| 1241 | free_bytes = xlog_space_left(log, | 1208 | |
| 1242 | log->l_grant_reserve_cycle, | 1209 | /* |
| 1243 | log->l_grant_reserve_bytes); | 1210 | * Set the threshold for the minimum number of free blocks in the |
| 1244 | tail_lsn = log->l_tail_lsn; | 1211 | * log to the maximum of what the caller needs, one quarter of the |
| 1245 | free_blocks = BTOBBT(free_bytes); | 1212 | * log, and 256 blocks. |
| 1246 | 1213 | */ | |
| 1247 | /* | 1214 | free_threshold = BTOBB(need_bytes); |
| 1248 | * Set the threshold for the minimum number of free blocks in the | 1215 | free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); |
| 1249 | * log to the maximum of what the caller needs, one quarter of the | 1216 | free_threshold = MAX(free_threshold, 256); |
| 1250 | * log, and 256 blocks. | 1217 | if (free_blocks >= free_threshold) |
| 1251 | */ | 1218 | return; |
| 1252 | free_threshold = BTOBB(need_bytes); | 1219 | |
| 1253 | free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); | 1220 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle, |
| 1254 | free_threshold = MAX(free_threshold, 256); | 1221 | &threshold_block); |
| 1255 | if (free_blocks < free_threshold) { | 1222 | threshold_block += free_threshold; |
| 1256 | threshold_block = BLOCK_LSN(tail_lsn) + free_threshold; | ||
| 1257 | threshold_cycle = CYCLE_LSN(tail_lsn); | ||
| 1258 | if (threshold_block >= log->l_logBBsize) { | 1223 | if (threshold_block >= log->l_logBBsize) { |
| 1259 | threshold_block -= log->l_logBBsize; | 1224 | threshold_block -= log->l_logBBsize; |
| 1260 | threshold_cycle += 1; | 1225 | threshold_cycle += 1; |
| 1261 | } | 1226 | } |
| 1262 | threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block); | 1227 | threshold_lsn = xlog_assign_lsn(threshold_cycle, |
| 1228 | threshold_block); | ||
| 1229 | /* | ||
| 1230 | * Don't pass in an lsn greater than the lsn of the last | ||
| 1231 | * log record known to be on disk. Use a snapshot of the last sync lsn | ||
| 1232 | * so that it doesn't change between the compare and the set. | ||
| 1233 | */ | ||
| 1234 | last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); | ||
| 1235 | if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0) | ||
| 1236 | threshold_lsn = last_sync_lsn; | ||
| 1263 | 1237 | ||
| 1264 | /* Don't pass in an lsn greater than the lsn of the last | 1238 | /* |
| 1265 | * log record known to be on disk. | 1239 | * Get the transaction layer to kick the dirty buffers out to |
| 1240 | * disk asynchronously. No point in trying to do this if | ||
| 1241 | * the filesystem is shutting down. | ||
| 1266 | */ | 1242 | */ |
| 1267 | if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) | 1243 | if (!XLOG_FORCED_SHUTDOWN(log)) |
| 1268 | threshold_lsn = log->l_last_sync_lsn; | 1244 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); |
| 1269 | } | 1245 | } |
| 1270 | spin_unlock(&log->l_grant_lock); | ||
| 1271 | |||
| 1272 | /* | ||
| 1273 | * Get the transaction layer to kick the dirty buffers out to | ||
| 1274 | * disk asynchronously. No point in trying to do this if | ||
| 1275 | * the filesystem is shutting down. | ||
| 1276 | */ | ||
| 1277 | if (threshold_lsn && | ||
| 1278 | !XLOG_FORCED_SHUTDOWN(log)) | ||
| 1279 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); | ||
| 1280 | } /* xlog_grant_push_ail */ | ||
| 1281 | 1246 | ||
| 1282 | /* | 1247 | /* |
| 1283 | * The bdstrat callback function for log bufs. This gives us a central | 1248 | * The bdstrat callback function for log bufs. This gives us a central |
| @@ -1372,9 +1337,8 @@ xlog_sync(xlog_t *log, | |||
| 1372 | roundoff < BBTOB(1))); | 1337 | roundoff < BBTOB(1))); |
| 1373 | 1338 | ||
| 1374 | /* move grant heads by roundoff in sync */ | 1339 | /* move grant heads by roundoff in sync */ |
| 1375 | spin_lock(&log->l_grant_lock); | 1340 | xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); |
| 1376 | xlog_grant_add_space(log, roundoff); | 1341 | xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); |
| 1377 | spin_unlock(&log->l_grant_lock); | ||
| 1378 | 1342 | ||
| 1379 | /* put cycle number in every block */ | 1343 | /* put cycle number in every block */ |
| 1380 | xlog_pack_data(log, iclog, roundoff); | 1344 | xlog_pack_data(log, iclog, roundoff); |
| @@ -1489,15 +1453,12 @@ xlog_dealloc_log(xlog_t *log) | |||
| 1489 | 1453 | ||
| 1490 | iclog = log->l_iclog; | 1454 | iclog = log->l_iclog; |
| 1491 | for (i=0; i<log->l_iclog_bufs; i++) { | 1455 | for (i=0; i<log->l_iclog_bufs; i++) { |
| 1492 | sv_destroy(&iclog->ic_force_wait); | ||
| 1493 | sv_destroy(&iclog->ic_write_wait); | ||
| 1494 | xfs_buf_free(iclog->ic_bp); | 1456 | xfs_buf_free(iclog->ic_bp); |
| 1495 | next_iclog = iclog->ic_next; | 1457 | next_iclog = iclog->ic_next; |
| 1496 | kmem_free(iclog); | 1458 | kmem_free(iclog); |
| 1497 | iclog = next_iclog; | 1459 | iclog = next_iclog; |
| 1498 | } | 1460 | } |
| 1499 | spinlock_destroy(&log->l_icloglock); | 1461 | spinlock_destroy(&log->l_icloglock); |
| 1500 | spinlock_destroy(&log->l_grant_lock); | ||
| 1501 | 1462 | ||
| 1502 | xfs_buf_free(log->l_xbuf); | 1463 | xfs_buf_free(log->l_xbuf); |
| 1503 | log->l_mp->m_log = NULL; | 1464 | log->l_mp->m_log = NULL; |
| @@ -2232,7 +2193,7 @@ xlog_state_do_callback( | |||
| 2232 | lowest_lsn = xlog_get_lowest_lsn(log); | 2193 | lowest_lsn = xlog_get_lowest_lsn(log); |
| 2233 | if (lowest_lsn && | 2194 | if (lowest_lsn && |
| 2234 | XFS_LSN_CMP(lowest_lsn, | 2195 | XFS_LSN_CMP(lowest_lsn, |
| 2235 | be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { | 2196 | be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { |
| 2236 | iclog = iclog->ic_next; | 2197 | iclog = iclog->ic_next; |
| 2237 | continue; /* Leave this iclog for | 2198 | continue; /* Leave this iclog for |
| 2238 | * another thread */ | 2199 | * another thread */ |
| @@ -2240,23 +2201,21 @@ xlog_state_do_callback( | |||
| 2240 | 2201 | ||
| 2241 | iclog->ic_state = XLOG_STATE_CALLBACK; | 2202 | iclog->ic_state = XLOG_STATE_CALLBACK; |
| 2242 | 2203 | ||
| 2243 | spin_unlock(&log->l_icloglock); | ||
| 2244 | 2204 | ||
| 2245 | /* l_last_sync_lsn field protected by | 2205 | /* |
| 2246 | * l_grant_lock. Don't worry about iclog's lsn. | 2206 | * update the last_sync_lsn before we drop the |
| 2247 | * No one else can be here except us. | 2207 | * icloglock to ensure we are the only one that |
| 2208 | * can update it. | ||
| 2248 | */ | 2209 | */ |
| 2249 | spin_lock(&log->l_grant_lock); | 2210 | ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), |
| 2250 | ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn, | 2211 | be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); |
| 2251 | be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); | 2212 | atomic64_set(&log->l_last_sync_lsn, |
| 2252 | log->l_last_sync_lsn = | 2213 | be64_to_cpu(iclog->ic_header.h_lsn)); |
| 2253 | be64_to_cpu(iclog->ic_header.h_lsn); | ||
| 2254 | spin_unlock(&log->l_grant_lock); | ||
| 2255 | 2214 | ||
| 2256 | } else { | 2215 | } else |
| 2257 | spin_unlock(&log->l_icloglock); | ||
| 2258 | ioerrors++; | 2216 | ioerrors++; |
| 2259 | } | 2217 | |
| 2218 | spin_unlock(&log->l_icloglock); | ||
| 2260 | 2219 | ||
| 2261 | /* | 2220 | /* |
| 2262 | * Keep processing entries in the callback list until | 2221 | * Keep processing entries in the callback list until |
| @@ -2297,7 +2256,7 @@ xlog_state_do_callback( | |||
| 2297 | xlog_state_clean_log(log); | 2256 | xlog_state_clean_log(log); |
| 2298 | 2257 | ||
| 2299 | /* wake up threads waiting in xfs_log_force() */ | 2258 | /* wake up threads waiting in xfs_log_force() */ |
| 2300 | sv_broadcast(&iclog->ic_force_wait); | 2259 | wake_up_all(&iclog->ic_force_wait); |
| 2301 | 2260 | ||
| 2302 | iclog = iclog->ic_next; | 2261 | iclog = iclog->ic_next; |
| 2303 | } while (first_iclog != iclog); | 2262 | } while (first_iclog != iclog); |
| @@ -2344,7 +2303,7 @@ xlog_state_do_callback( | |||
| 2344 | spin_unlock(&log->l_icloglock); | 2303 | spin_unlock(&log->l_icloglock); |
| 2345 | 2304 | ||
| 2346 | if (wake) | 2305 | if (wake) |
| 2347 | sv_broadcast(&log->l_flush_wait); | 2306 | wake_up_all(&log->l_flush_wait); |
| 2348 | } | 2307 | } |
| 2349 | 2308 | ||
| 2350 | 2309 | ||
| @@ -2395,7 +2354,7 @@ xlog_state_done_syncing( | |||
| 2395 | * iclog buffer, we wake them all, one will get to do the | 2354 | * iclog buffer, we wake them all, one will get to do the |
| 2396 | * I/O, the others get to wait for the result. | 2355 | * I/O, the others get to wait for the result. |
| 2397 | */ | 2356 | */ |
| 2398 | sv_broadcast(&iclog->ic_write_wait); | 2357 | wake_up_all(&iclog->ic_write_wait); |
| 2399 | spin_unlock(&log->l_icloglock); | 2358 | spin_unlock(&log->l_icloglock); |
| 2400 | xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ | 2359 | xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ |
| 2401 | } /* xlog_state_done_syncing */ | 2360 | } /* xlog_state_done_syncing */ |
| @@ -2444,7 +2403,7 @@ restart: | |||
| 2444 | XFS_STATS_INC(xs_log_noiclogs); | 2403 | XFS_STATS_INC(xs_log_noiclogs); |
| 2445 | 2404 | ||
| 2446 | /* Wait for log writes to have flushed */ | 2405 | /* Wait for log writes to have flushed */ |
| 2447 | sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0); | 2406 | xlog_wait(&log->l_flush_wait, &log->l_icloglock); |
| 2448 | goto restart; | 2407 | goto restart; |
| 2449 | } | 2408 | } |
| 2450 | 2409 | ||
| @@ -2527,6 +2486,18 @@ restart: | |||
| 2527 | * | 2486 | * |
| 2528 | * Once a ticket gets put onto the reserveq, it will only return after | 2487 | * Once a ticket gets put onto the reserveq, it will only return after |
| 2529 | * the needed reservation is satisfied. | 2488 | * the needed reservation is satisfied. |
| 2489 | * | ||
| 2490 | * This function is structured so that it has a lock free fast path. This is | ||
| 2491 | * necessary because every new transaction reservation will come through this | ||
| 2492 | * path. Hence any lock will be globally hot if we take it unconditionally on | ||
| 2493 | * every pass. | ||
| 2494 | * | ||
| 2495 | * As tickets are only ever moved on and off the reserveq under the | ||
| 2496 | * l_grant_reserve_lock, we only need to take that lock if we are going | ||
| 2497 | * to add the ticket to the queue and sleep. We can avoid taking the lock if the | ||
| 2498 | * ticket was never added to the reserveq because the t_queue list head will be | ||
| 2499 | * empty and we hold the only reference to it so it can safely be checked | ||
| 2500 | * unlocked. | ||
| 2530 | */ | 2501 | */ |
| 2531 | STATIC int | 2502 | STATIC int |
| 2532 | xlog_grant_log_space(xlog_t *log, | 2503 | xlog_grant_log_space(xlog_t *log, |
| @@ -2534,24 +2505,27 @@ xlog_grant_log_space(xlog_t *log, | |||
| 2534 | { | 2505 | { |
| 2535 | int free_bytes; | 2506 | int free_bytes; |
| 2536 | int need_bytes; | 2507 | int need_bytes; |
| 2537 | #ifdef DEBUG | ||
| 2538 | xfs_lsn_t tail_lsn; | ||
| 2539 | #endif | ||
| 2540 | |||
| 2541 | 2508 | ||
| 2542 | #ifdef DEBUG | 2509 | #ifdef DEBUG |
| 2543 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 2510 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
| 2544 | panic("grant Recovery problem"); | 2511 | panic("grant Recovery problem"); |
| 2545 | #endif | 2512 | #endif |
| 2546 | 2513 | ||
| 2547 | /* Is there space or do we need to sleep? */ | ||
| 2548 | spin_lock(&log->l_grant_lock); | ||
| 2549 | |||
| 2550 | trace_xfs_log_grant_enter(log, tic); | 2514 | trace_xfs_log_grant_enter(log, tic); |
| 2551 | 2515 | ||
| 2516 | need_bytes = tic->t_unit_res; | ||
| 2517 | if (tic->t_flags & XFS_LOG_PERM_RESERV) | ||
| 2518 | need_bytes *= tic->t_ocnt; | ||
| 2519 | |||
| 2552 | /* something is already sleeping; insert new transaction at end */ | 2520 | /* something is already sleeping; insert new transaction at end */ |
| 2553 | if (log->l_reserve_headq) { | 2521 | if (!list_empty_careful(&log->l_reserveq)) { |
| 2554 | xlog_ins_ticketq(&log->l_reserve_headq, tic); | 2522 | spin_lock(&log->l_grant_reserve_lock); |
| 2523 | /* recheck the queue now we are locked */ | ||
| 2524 | if (list_empty(&log->l_reserveq)) { | ||
| 2525 | spin_unlock(&log->l_grant_reserve_lock); | ||
| 2526 | goto redo; | ||
| 2527 | } | ||
| 2528 | list_add_tail(&tic->t_queue, &log->l_reserveq); | ||
| 2555 | 2529 | ||
| 2556 | trace_xfs_log_grant_sleep1(log, tic); | 2530 | trace_xfs_log_grant_sleep1(log, tic); |
| 2557 | 2531 | ||
| @@ -2563,72 +2537,57 @@ xlog_grant_log_space(xlog_t *log, | |||
| 2563 | goto error_return; | 2537 | goto error_return; |
| 2564 | 2538 | ||
| 2565 | XFS_STATS_INC(xs_sleep_logspace); | 2539 | XFS_STATS_INC(xs_sleep_logspace); |
| 2566 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | 2540 | xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); |
| 2541 | |||
| 2567 | /* | 2542 | /* |
| 2568 | * If we got an error, and the filesystem is shutting down, | 2543 | * If we got an error, and the filesystem is shutting down, |
| 2569 | * we'll catch it down below. So just continue... | 2544 | * we'll catch it down below. So just continue... |
| 2570 | */ | 2545 | */ |
| 2571 | trace_xfs_log_grant_wake1(log, tic); | 2546 | trace_xfs_log_grant_wake1(log, tic); |
| 2572 | spin_lock(&log->l_grant_lock); | ||
| 2573 | } | 2547 | } |
| 2574 | if (tic->t_flags & XFS_LOG_PERM_RESERV) | ||
| 2575 | need_bytes = tic->t_unit_res*tic->t_ocnt; | ||
| 2576 | else | ||
| 2577 | need_bytes = tic->t_unit_res; | ||
| 2578 | 2548 | ||
| 2579 | redo: | 2549 | redo: |
| 2580 | if (XLOG_FORCED_SHUTDOWN(log)) | 2550 | if (XLOG_FORCED_SHUTDOWN(log)) |
| 2581 | goto error_return; | 2551 | goto error_return_unlocked; |
| 2582 | 2552 | ||
| 2583 | free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, | 2553 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); |
| 2584 | log->l_grant_reserve_bytes); | ||
| 2585 | if (free_bytes < need_bytes) { | 2554 | if (free_bytes < need_bytes) { |
| 2586 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | 2555 | spin_lock(&log->l_grant_reserve_lock); |
| 2587 | xlog_ins_ticketq(&log->l_reserve_headq, tic); | 2556 | if (list_empty(&tic->t_queue)) |
| 2557 | list_add_tail(&tic->t_queue, &log->l_reserveq); | ||
| 2588 | 2558 | ||
| 2589 | trace_xfs_log_grant_sleep2(log, tic); | 2559 | trace_xfs_log_grant_sleep2(log, tic); |
| 2590 | 2560 | ||
| 2591 | spin_unlock(&log->l_grant_lock); | ||
| 2592 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
| 2593 | spin_lock(&log->l_grant_lock); | ||
| 2594 | |||
| 2595 | XFS_STATS_INC(xs_sleep_logspace); | ||
| 2596 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | ||
| 2597 | |||
| 2598 | spin_lock(&log->l_grant_lock); | ||
| 2599 | if (XLOG_FORCED_SHUTDOWN(log)) | 2561 | if (XLOG_FORCED_SHUTDOWN(log)) |
| 2600 | goto error_return; | 2562 | goto error_return; |
| 2601 | 2563 | ||
| 2602 | trace_xfs_log_grant_wake2(log, tic); | 2564 | xlog_grant_push_ail(log, need_bytes); |
| 2565 | |||
| 2566 | XFS_STATS_INC(xs_sleep_logspace); | ||
| 2567 | xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); | ||
| 2603 | 2568 | ||
| 2569 | trace_xfs_log_grant_wake2(log, tic); | ||
| 2604 | goto redo; | 2570 | goto redo; |
| 2605 | } else if (tic->t_flags & XLOG_TIC_IN_Q) | 2571 | } |
| 2606 | xlog_del_ticketq(&log->l_reserve_headq, tic); | ||
| 2607 | 2572 | ||
| 2608 | /* we've got enough space */ | 2573 | if (!list_empty(&tic->t_queue)) { |
| 2609 | xlog_grant_add_space(log, need_bytes); | 2574 | spin_lock(&log->l_grant_reserve_lock); |
| 2610 | #ifdef DEBUG | 2575 | list_del_init(&tic->t_queue); |
| 2611 | tail_lsn = log->l_tail_lsn; | 2576 | spin_unlock(&log->l_grant_reserve_lock); |
| 2612 | /* | ||
| 2613 | * Check to make sure the grant write head didn't just over lap the | ||
| 2614 | * tail. If the cycles are the same, we can't be overlapping. | ||
| 2615 | * Otherwise, make sure that the cycles differ by exactly one and | ||
| 2616 | * check the byte count. | ||
| 2617 | */ | ||
| 2618 | if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { | ||
| 2619 | ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); | ||
| 2620 | ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); | ||
| 2621 | } | 2577 | } |
| 2622 | #endif | 2578 | |
| 2579 | /* we've got enough space */ | ||
| 2580 | xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); | ||
| 2581 | xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); | ||
| 2623 | trace_xfs_log_grant_exit(log, tic); | 2582 | trace_xfs_log_grant_exit(log, tic); |
| 2624 | xlog_verify_grant_head(log, 1); | 2583 | xlog_verify_grant_tail(log); |
| 2625 | spin_unlock(&log->l_grant_lock); | ||
| 2626 | return 0; | 2584 | return 0; |
| 2627 | 2585 | ||
| 2628 | error_return: | 2586 | error_return_unlocked: |
| 2629 | if (tic->t_flags & XLOG_TIC_IN_Q) | 2587 | spin_lock(&log->l_grant_reserve_lock); |
| 2630 | xlog_del_ticketq(&log->l_reserve_headq, tic); | 2588 | error_return: |
| 2631 | 2589 | list_del_init(&tic->t_queue); | |
| 2590 | spin_unlock(&log->l_grant_reserve_lock); | ||
| 2632 | trace_xfs_log_grant_error(log, tic); | 2591 | trace_xfs_log_grant_error(log, tic); |
| 2633 | 2592 | ||
| 2634 | /* | 2593 | /* |
| @@ -2638,7 +2597,6 @@ redo: | |||
| 2638 | */ | 2597 | */ |
| 2639 | tic->t_curr_res = 0; | 2598 | tic->t_curr_res = 0; |
| 2640 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ | 2599 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ |
| 2641 | spin_unlock(&log->l_grant_lock); | ||
| 2642 | return XFS_ERROR(EIO); | 2600 | return XFS_ERROR(EIO); |
| 2643 | } /* xlog_grant_log_space */ | 2601 | } /* xlog_grant_log_space */ |
| 2644 | 2602 | ||
| @@ -2646,17 +2604,14 @@ redo: | |||
| 2646 | /* | 2604 | /* |
| 2647 | * Replenish the byte reservation required by moving the grant write head. | 2605 | * Replenish the byte reservation required by moving the grant write head. |
| 2648 | * | 2606 | * |
| 2649 | * | 2607 | * Similar to xlog_grant_log_space, the function is structured to have a lock |
| 2608 | * free fast path. | ||
| 2650 | */ | 2609 | */ |
| 2651 | STATIC int | 2610 | STATIC int |
| 2652 | xlog_regrant_write_log_space(xlog_t *log, | 2611 | xlog_regrant_write_log_space(xlog_t *log, |
| 2653 | xlog_ticket_t *tic) | 2612 | xlog_ticket_t *tic) |
| 2654 | { | 2613 | { |
| 2655 | int free_bytes, need_bytes; | 2614 | int free_bytes, need_bytes; |
| 2656 | xlog_ticket_t *ntic; | ||
| 2657 | #ifdef DEBUG | ||
| 2658 | xfs_lsn_t tail_lsn; | ||
| 2659 | #endif | ||
| 2660 | 2615 | ||
| 2661 | tic->t_curr_res = tic->t_unit_res; | 2616 | tic->t_curr_res = tic->t_unit_res; |
| 2662 | xlog_tic_reset_res(tic); | 2617 | xlog_tic_reset_res(tic); |
| @@ -2669,12 +2624,9 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
| 2669 | panic("regrant Recovery problem"); | 2624 | panic("regrant Recovery problem"); |
| 2670 | #endif | 2625 | #endif |
| 2671 | 2626 | ||
| 2672 | spin_lock(&log->l_grant_lock); | ||
| 2673 | |||
| 2674 | trace_xfs_log_regrant_write_enter(log, tic); | 2627 | trace_xfs_log_regrant_write_enter(log, tic); |
| 2675 | |||
| 2676 | if (XLOG_FORCED_SHUTDOWN(log)) | 2628 | if (XLOG_FORCED_SHUTDOWN(log)) |
| 2677 | goto error_return; | 2629 | goto error_return_unlocked; |
| 2678 | 2630 | ||
| 2679 | /* If there are other waiters on the queue then give them a | 2631 | /* If there are other waiters on the queue then give them a |
| 2680 | * chance at logspace before us. Wake up the first waiters, | 2632 | * chance at logspace before us. Wake up the first waiters, |
| @@ -2683,92 +2635,76 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
| 2683 | * this transaction. | 2635 | * this transaction. |
| 2684 | */ | 2636 | */ |
| 2685 | need_bytes = tic->t_unit_res; | 2637 | need_bytes = tic->t_unit_res; |
| 2686 | if ((ntic = log->l_write_headq)) { | 2638 | if (!list_empty_careful(&log->l_writeq)) { |
| 2687 | free_bytes = xlog_space_left(log, log->l_grant_write_cycle, | 2639 | struct xlog_ticket *ntic; |
| 2688 | log->l_grant_write_bytes); | 2640 | |
| 2689 | do { | 2641 | spin_lock(&log->l_grant_write_lock); |
| 2642 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); | ||
| 2643 | list_for_each_entry(ntic, &log->l_writeq, t_queue) { | ||
| 2690 | ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); | 2644 | ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); |
| 2691 | 2645 | ||
| 2692 | if (free_bytes < ntic->t_unit_res) | 2646 | if (free_bytes < ntic->t_unit_res) |
| 2693 | break; | 2647 | break; |
| 2694 | free_bytes -= ntic->t_unit_res; | 2648 | free_bytes -= ntic->t_unit_res; |
| 2695 | sv_signal(&ntic->t_wait); | 2649 | wake_up(&ntic->t_wait); |
| 2696 | ntic = ntic->t_next; | 2650 | } |
| 2697 | } while (ntic != log->l_write_headq); | ||
| 2698 | |||
| 2699 | if (ntic != log->l_write_headq) { | ||
| 2700 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | ||
| 2701 | xlog_ins_ticketq(&log->l_write_headq, tic); | ||
| 2702 | 2651 | ||
| 2652 | if (ntic != list_first_entry(&log->l_writeq, | ||
| 2653 | struct xlog_ticket, t_queue)) { | ||
| 2654 | if (list_empty(&tic->t_queue)) | ||
| 2655 | list_add_tail(&tic->t_queue, &log->l_writeq); | ||
| 2703 | trace_xfs_log_regrant_write_sleep1(log, tic); | 2656 | trace_xfs_log_regrant_write_sleep1(log, tic); |
| 2704 | 2657 | ||
| 2705 | spin_unlock(&log->l_grant_lock); | 2658 | xlog_grant_push_ail(log, need_bytes); |
| 2706 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
| 2707 | spin_lock(&log->l_grant_lock); | ||
| 2708 | 2659 | ||
| 2709 | XFS_STATS_INC(xs_sleep_logspace); | 2660 | XFS_STATS_INC(xs_sleep_logspace); |
| 2710 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, | 2661 | xlog_wait(&tic->t_wait, &log->l_grant_write_lock); |
| 2711 | &log->l_grant_lock, s); | ||
| 2712 | |||
| 2713 | /* If we're shutting down, this tic is already | ||
| 2714 | * off the queue */ | ||
| 2715 | spin_lock(&log->l_grant_lock); | ||
| 2716 | if (XLOG_FORCED_SHUTDOWN(log)) | ||
| 2717 | goto error_return; | ||
| 2718 | |||
| 2719 | trace_xfs_log_regrant_write_wake1(log, tic); | 2662 | trace_xfs_log_regrant_write_wake1(log, tic); |
| 2720 | } | 2663 | } else |
| 2664 | spin_unlock(&log->l_grant_write_lock); | ||
| 2721 | } | 2665 | } |
| 2722 | 2666 | ||
| 2723 | redo: | 2667 | redo: |
| 2724 | if (XLOG_FORCED_SHUTDOWN(log)) | 2668 | if (XLOG_FORCED_SHUTDOWN(log)) |
| 2725 | goto error_return; | 2669 | goto error_return_unlocked; |
| 2726 | 2670 | ||
| 2727 | free_bytes = xlog_space_left(log, log->l_grant_write_cycle, | 2671 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); |
| 2728 | log->l_grant_write_bytes); | ||
| 2729 | if (free_bytes < need_bytes) { | 2672 | if (free_bytes < need_bytes) { |
| 2730 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | 2673 | spin_lock(&log->l_grant_write_lock); |
| 2731 | xlog_ins_ticketq(&log->l_write_headq, tic); | 2674 | if (list_empty(&tic->t_queue)) |
| 2732 | spin_unlock(&log->l_grant_lock); | 2675 | list_add_tail(&tic->t_queue, &log->l_writeq); |
| 2733 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
| 2734 | spin_lock(&log->l_grant_lock); | ||
| 2735 | |||
| 2736 | XFS_STATS_INC(xs_sleep_logspace); | ||
| 2737 | trace_xfs_log_regrant_write_sleep2(log, tic); | ||
| 2738 | |||
| 2739 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | ||
| 2740 | 2676 | ||
| 2741 | /* If we're shutting down, this tic is already off the queue */ | ||
| 2742 | spin_lock(&log->l_grant_lock); | ||
| 2743 | if (XLOG_FORCED_SHUTDOWN(log)) | 2677 | if (XLOG_FORCED_SHUTDOWN(log)) |
| 2744 | goto error_return; | 2678 | goto error_return; |
| 2745 | 2679 | ||
| 2680 | xlog_grant_push_ail(log, need_bytes); | ||
| 2681 | |||
| 2682 | XFS_STATS_INC(xs_sleep_logspace); | ||
| 2683 | trace_xfs_log_regrant_write_sleep2(log, tic); | ||
| 2684 | xlog_wait(&tic->t_wait, &log->l_grant_write_lock); | ||
| 2685 | |||
| 2746 | trace_xfs_log_regrant_write_wake2(log, tic); | 2686 | trace_xfs_log_regrant_write_wake2(log, tic); |
| 2747 | goto redo; | 2687 | goto redo; |
| 2748 | } else if (tic->t_flags & XLOG_TIC_IN_Q) | 2688 | } |
| 2749 | xlog_del_ticketq(&log->l_write_headq, tic); | ||
| 2750 | 2689 | ||
| 2751 | /* we've got enough space */ | 2690 | if (!list_empty(&tic->t_queue)) { |
| 2752 | xlog_grant_add_space_write(log, need_bytes); | 2691 | spin_lock(&log->l_grant_write_lock); |
| 2753 | #ifdef DEBUG | 2692 | list_del_init(&tic->t_queue); |
| 2754 | tail_lsn = log->l_tail_lsn; | 2693 | spin_unlock(&log->l_grant_write_lock); |
| 2755 | if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { | ||
| 2756 | ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); | ||
| 2757 | ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); | ||
| 2758 | } | 2694 | } |
| 2759 | #endif | ||
| 2760 | 2695 | ||
| 2696 | /* we've got enough space */ | ||
| 2697 | xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); | ||
| 2761 | trace_xfs_log_regrant_write_exit(log, tic); | 2698 | trace_xfs_log_regrant_write_exit(log, tic); |
| 2762 | 2699 | xlog_verify_grant_tail(log); | |
| 2763 | xlog_verify_grant_head(log, 1); | ||
| 2764 | spin_unlock(&log->l_grant_lock); | ||
| 2765 | return 0; | 2700 | return 0; |
| 2766 | 2701 | ||
| 2767 | 2702 | ||
| 2703 | error_return_unlocked: | ||
| 2704 | spin_lock(&log->l_grant_write_lock); | ||
| 2768 | error_return: | 2705 | error_return: |
| 2769 | if (tic->t_flags & XLOG_TIC_IN_Q) | 2706 | list_del_init(&tic->t_queue); |
| 2770 | xlog_del_ticketq(&log->l_reserve_headq, tic); | 2707 | spin_unlock(&log->l_grant_write_lock); |
| 2771 | |||
| 2772 | trace_xfs_log_regrant_write_error(log, tic); | 2708 | trace_xfs_log_regrant_write_error(log, tic); |
| 2773 | 2709 | ||
| 2774 | /* | 2710 | /* |
| @@ -2778,7 +2714,6 @@ redo: | |||
| 2778 | */ | 2714 | */ |
| 2779 | tic->t_curr_res = 0; | 2715 | tic->t_curr_res = 0; |
| 2780 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ | 2716 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ |
| 2781 | spin_unlock(&log->l_grant_lock); | ||
| 2782 | return XFS_ERROR(EIO); | 2717 | return XFS_ERROR(EIO); |
| 2783 | } /* xlog_regrant_write_log_space */ | 2718 | } /* xlog_regrant_write_log_space */ |
| 2784 | 2719 | ||
| @@ -2799,27 +2734,24 @@ xlog_regrant_reserve_log_space(xlog_t *log, | |||
| 2799 | if (ticket->t_cnt > 0) | 2734 | if (ticket->t_cnt > 0) |
| 2800 | ticket->t_cnt--; | 2735 | ticket->t_cnt--; |
| 2801 | 2736 | ||
| 2802 | spin_lock(&log->l_grant_lock); | 2737 | xlog_grant_sub_space(log, &log->l_grant_reserve_head, |
| 2803 | xlog_grant_sub_space(log, ticket->t_curr_res); | 2738 | ticket->t_curr_res); |
| 2739 | xlog_grant_sub_space(log, &log->l_grant_write_head, | ||
| 2740 | ticket->t_curr_res); | ||
| 2804 | ticket->t_curr_res = ticket->t_unit_res; | 2741 | ticket->t_curr_res = ticket->t_unit_res; |
| 2805 | xlog_tic_reset_res(ticket); | 2742 | xlog_tic_reset_res(ticket); |
| 2806 | 2743 | ||
| 2807 | trace_xfs_log_regrant_reserve_sub(log, ticket); | 2744 | trace_xfs_log_regrant_reserve_sub(log, ticket); |
| 2808 | 2745 | ||
| 2809 | xlog_verify_grant_head(log, 1); | ||
| 2810 | |||
| 2811 | /* just return if we still have some of the pre-reserved space */ | 2746 | /* just return if we still have some of the pre-reserved space */ |
| 2812 | if (ticket->t_cnt > 0) { | 2747 | if (ticket->t_cnt > 0) |
| 2813 | spin_unlock(&log->l_grant_lock); | ||
| 2814 | return; | 2748 | return; |
| 2815 | } | ||
| 2816 | 2749 | ||
| 2817 | xlog_grant_add_space_reserve(log, ticket->t_unit_res); | 2750 | xlog_grant_add_space(log, &log->l_grant_reserve_head, |
| 2751 | ticket->t_unit_res); | ||
| 2818 | 2752 | ||
| 2819 | trace_xfs_log_regrant_reserve_exit(log, ticket); | 2753 | trace_xfs_log_regrant_reserve_exit(log, ticket); |
| 2820 | 2754 | ||
| 2821 | xlog_verify_grant_head(log, 0); | ||
| 2822 | spin_unlock(&log->l_grant_lock); | ||
| 2823 | ticket->t_curr_res = ticket->t_unit_res; | 2755 | ticket->t_curr_res = ticket->t_unit_res; |
| 2824 | xlog_tic_reset_res(ticket); | 2756 | xlog_tic_reset_res(ticket); |
| 2825 | } /* xlog_regrant_reserve_log_space */ | 2757 | } /* xlog_regrant_reserve_log_space */ |
| @@ -2843,28 +2775,29 @@ STATIC void | |||
| 2843 | xlog_ungrant_log_space(xlog_t *log, | 2775 | xlog_ungrant_log_space(xlog_t *log, |
| 2844 | xlog_ticket_t *ticket) | 2776 | xlog_ticket_t *ticket) |
| 2845 | { | 2777 | { |
| 2778 | int bytes; | ||
| 2779 | |||
| 2846 | if (ticket->t_cnt > 0) | 2780 | if (ticket->t_cnt > 0) |
| 2847 | ticket->t_cnt--; | 2781 | ticket->t_cnt--; |
| 2848 | 2782 | ||
| 2849 | spin_lock(&log->l_grant_lock); | ||
| 2850 | trace_xfs_log_ungrant_enter(log, ticket); | 2783 | trace_xfs_log_ungrant_enter(log, ticket); |
| 2851 | |||
| 2852 | xlog_grant_sub_space(log, ticket->t_curr_res); | ||
| 2853 | |||
| 2854 | trace_xfs_log_ungrant_sub(log, ticket); | 2784 | trace_xfs_log_ungrant_sub(log, ticket); |
| 2855 | 2785 | ||
| 2856 | /* If this is a permanent reservation ticket, we may be able to free | 2786 | /* |
| 2787 | * If this is a permanent reservation ticket, we may be able to free | ||
| 2857 | * up more space based on the remaining count. | 2788 | * up more space based on the remaining count. |
| 2858 | */ | 2789 | */ |
| 2790 | bytes = ticket->t_curr_res; | ||
| 2859 | if (ticket->t_cnt > 0) { | 2791 | if (ticket->t_cnt > 0) { |
| 2860 | ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); | 2792 | ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); |
| 2861 | xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); | 2793 | bytes += ticket->t_unit_res*ticket->t_cnt; |
| 2862 | } | 2794 | } |
| 2863 | 2795 | ||
| 2796 | xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); | ||
| 2797 | xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); | ||
| 2798 | |||
| 2864 | trace_xfs_log_ungrant_exit(log, ticket); | 2799 | trace_xfs_log_ungrant_exit(log, ticket); |
| 2865 | 2800 | ||
| 2866 | xlog_verify_grant_head(log, 1); | ||
| 2867 | spin_unlock(&log->l_grant_lock); | ||
| 2868 | xfs_log_move_tail(log->l_mp, 1); | 2801 | xfs_log_move_tail(log->l_mp, 1); |
| 2869 | } /* xlog_ungrant_log_space */ | 2802 | } /* xlog_ungrant_log_space */ |
| 2870 | 2803 | ||
| @@ -2901,11 +2834,11 @@ xlog_state_release_iclog( | |||
| 2901 | 2834 | ||
| 2902 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { | 2835 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { |
| 2903 | /* update tail before writing to iclog */ | 2836 | /* update tail before writing to iclog */ |
| 2904 | xlog_assign_tail_lsn(log->l_mp); | 2837 | xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); |
| 2905 | sync++; | 2838 | sync++; |
| 2906 | iclog->ic_state = XLOG_STATE_SYNCING; | 2839 | iclog->ic_state = XLOG_STATE_SYNCING; |
| 2907 | iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); | 2840 | iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); |
| 2908 | xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); | 2841 | xlog_verify_tail_lsn(log, iclog, tail_lsn); |
| 2909 | /* cycle incremented when incrementing curr_block */ | 2842 | /* cycle incremented when incrementing curr_block */ |
| 2910 | } | 2843 | } |
| 2911 | spin_unlock(&log->l_icloglock); | 2844 | spin_unlock(&log->l_icloglock); |
| @@ -3088,7 +3021,7 @@ maybe_sleep: | |||
| 3088 | return XFS_ERROR(EIO); | 3021 | return XFS_ERROR(EIO); |
| 3089 | } | 3022 | } |
| 3090 | XFS_STATS_INC(xs_log_force_sleep); | 3023 | XFS_STATS_INC(xs_log_force_sleep); |
| 3091 | sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s); | 3024 | xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
| 3092 | /* | 3025 | /* |
| 3093 | * No need to grab the log lock here since we're | 3026 | * No need to grab the log lock here since we're |
| 3094 | * only deciding whether or not to return EIO | 3027 | * only deciding whether or not to return EIO |
| @@ -3206,8 +3139,8 @@ try_again: | |||
| 3206 | 3139 | ||
| 3207 | XFS_STATS_INC(xs_log_force_sleep); | 3140 | XFS_STATS_INC(xs_log_force_sleep); |
| 3208 | 3141 | ||
| 3209 | sv_wait(&iclog->ic_prev->ic_write_wait, | 3142 | xlog_wait(&iclog->ic_prev->ic_write_wait, |
| 3210 | PSWP, &log->l_icloglock, s); | 3143 | &log->l_icloglock); |
| 3211 | if (log_flushed) | 3144 | if (log_flushed) |
| 3212 | *log_flushed = 1; | 3145 | *log_flushed = 1; |
| 3213 | already_slept = 1; | 3146 | already_slept = 1; |
| @@ -3235,7 +3168,7 @@ try_again: | |||
| 3235 | return XFS_ERROR(EIO); | 3168 | return XFS_ERROR(EIO); |
| 3236 | } | 3169 | } |
| 3237 | XFS_STATS_INC(xs_log_force_sleep); | 3170 | XFS_STATS_INC(xs_log_force_sleep); |
| 3238 | sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); | 3171 | xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
| 3239 | /* | 3172 | /* |
| 3240 | * No need to grab the log lock here since we're | 3173 | * No need to grab the log lock here since we're |
| 3241 | * only deciding whether or not to return EIO | 3174 | * only deciding whether or not to return EIO |
| @@ -3310,10 +3243,8 @@ xfs_log_ticket_put( | |||
| 3310 | xlog_ticket_t *ticket) | 3243 | xlog_ticket_t *ticket) |
| 3311 | { | 3244 | { |
| 3312 | ASSERT(atomic_read(&ticket->t_ref) > 0); | 3245 | ASSERT(atomic_read(&ticket->t_ref) > 0); |
| 3313 | if (atomic_dec_and_test(&ticket->t_ref)) { | 3246 | if (atomic_dec_and_test(&ticket->t_ref)) |
| 3314 | sv_destroy(&ticket->t_wait); | ||
| 3315 | kmem_zone_free(xfs_log_ticket_zone, ticket); | 3247 | kmem_zone_free(xfs_log_ticket_zone, ticket); |
| 3316 | } | ||
| 3317 | } | 3248 | } |
| 3318 | 3249 | ||
| 3319 | xlog_ticket_t * | 3250 | xlog_ticket_t * |
| @@ -3435,6 +3366,7 @@ xlog_ticket_alloc( | |||
| 3435 | } | 3366 | } |
| 3436 | 3367 | ||
| 3437 | atomic_set(&tic->t_ref, 1); | 3368 | atomic_set(&tic->t_ref, 1); |
| 3369 | INIT_LIST_HEAD(&tic->t_queue); | ||
| 3438 | tic->t_unit_res = unit_bytes; | 3370 | tic->t_unit_res = unit_bytes; |
| 3439 | tic->t_curr_res = unit_bytes; | 3371 | tic->t_curr_res = unit_bytes; |
| 3440 | tic->t_cnt = cnt; | 3372 | tic->t_cnt = cnt; |
| @@ -3445,7 +3377,7 @@ xlog_ticket_alloc( | |||
| 3445 | tic->t_trans_type = 0; | 3377 | tic->t_trans_type = 0; |
| 3446 | if (xflags & XFS_LOG_PERM_RESERV) | 3378 | if (xflags & XFS_LOG_PERM_RESERV) |
| 3447 | tic->t_flags |= XLOG_TIC_PERM_RESERV; | 3379 | tic->t_flags |= XLOG_TIC_PERM_RESERV; |
| 3448 | sv_init(&tic->t_wait, SV_DEFAULT, "logtick"); | 3380 | init_waitqueue_head(&tic->t_wait); |
| 3449 | 3381 | ||
| 3450 | xlog_tic_reset_res(tic); | 3382 | xlog_tic_reset_res(tic); |
| 3451 | 3383 | ||
| @@ -3484,18 +3416,25 @@ xlog_verify_dest_ptr( | |||
| 3484 | } | 3416 | } |
| 3485 | 3417 | ||
| 3486 | STATIC void | 3418 | STATIC void |
| 3487 | xlog_verify_grant_head(xlog_t *log, int equals) | 3419 | xlog_verify_grant_tail( |
| 3420 | struct log *log) | ||
| 3488 | { | 3421 | { |
| 3489 | if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) { | 3422 | int tail_cycle, tail_blocks; |
| 3490 | if (equals) | 3423 | int cycle, space; |
| 3491 | ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes); | 3424 | |
| 3492 | else | 3425 | /* |
| 3493 | ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes); | 3426 | * Check to make sure the grant write head didn't just over lap the |
| 3494 | } else { | 3427 | * tail. If the cycles are the same, we can't be overlapping. |
| 3495 | ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle); | 3428 | * Otherwise, make sure that the cycles differ by exactly one and |
| 3496 | ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes); | 3429 | * check the byte count. |
| 3497 | } | 3430 | */ |
| 3498 | } /* xlog_verify_grant_head */ | 3431 | xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); |
| 3432 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); | ||
| 3433 | if (tail_cycle != cycle) { | ||
| 3434 | ASSERT(cycle - 1 == tail_cycle); | ||
| 3435 | ASSERT(space <= BBTOB(tail_blocks)); | ||
| 3436 | } | ||
| 3437 | } | ||
| 3499 | 3438 | ||
| 3500 | /* check if it will fit */ | 3439 | /* check if it will fit */ |
| 3501 | STATIC void | 3440 | STATIC void |
| @@ -3716,12 +3655,10 @@ xfs_log_force_umount( | |||
| 3716 | xlog_cil_force(log); | 3655 | xlog_cil_force(log); |
| 3717 | 3656 | ||
| 3718 | /* | 3657 | /* |
| 3719 | * We must hold both the GRANT lock and the LOG lock, | 3658 | * mark the filesystem and the as in a shutdown state and wake |
| 3720 | * before we mark the filesystem SHUTDOWN and wake | 3659 | * everybody up to tell them the bad news. |
| 3721 | * everybody up to tell the bad news. | ||
| 3722 | */ | 3660 | */ |
| 3723 | spin_lock(&log->l_icloglock); | 3661 | spin_lock(&log->l_icloglock); |
| 3724 | spin_lock(&log->l_grant_lock); | ||
| 3725 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; | 3662 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; |
| 3726 | if (mp->m_sb_bp) | 3663 | if (mp->m_sb_bp) |
| 3727 | XFS_BUF_DONE(mp->m_sb_bp); | 3664 | XFS_BUF_DONE(mp->m_sb_bp); |
| @@ -3742,27 +3679,21 @@ xfs_log_force_umount( | |||
| 3742 | spin_unlock(&log->l_icloglock); | 3679 | spin_unlock(&log->l_icloglock); |
| 3743 | 3680 | ||
| 3744 | /* | 3681 | /* |
| 3745 | * We don't want anybody waiting for log reservations | 3682 | * We don't want anybody waiting for log reservations after this. That |
| 3746 | * after this. That means we have to wake up everybody | 3683 | * means we have to wake up everybody queued up on reserveq as well as |
| 3747 | * queued up on reserve_headq as well as write_headq. | 3684 | * writeq. In addition, we make sure in xlog_{re}grant_log_space that |
| 3748 | * In addition, we make sure in xlog_{re}grant_log_space | 3685 | * we don't enqueue anything once the SHUTDOWN flag is set, and this |
| 3749 | * that we don't enqueue anything once the SHUTDOWN flag | 3686 | * action is protected by the grant locks. |
| 3750 | * is set, and this action is protected by the GRANTLOCK. | ||
| 3751 | */ | 3687 | */ |
| 3752 | if ((tic = log->l_reserve_headq)) { | 3688 | spin_lock(&log->l_grant_reserve_lock); |
| 3753 | do { | 3689 | list_for_each_entry(tic, &log->l_reserveq, t_queue) |
| 3754 | sv_signal(&tic->t_wait); | 3690 | wake_up(&tic->t_wait); |
| 3755 | tic = tic->t_next; | 3691 | spin_unlock(&log->l_grant_reserve_lock); |
| 3756 | } while (tic != log->l_reserve_headq); | 3692 | |
| 3757 | } | 3693 | spin_lock(&log->l_grant_write_lock); |
| 3758 | 3694 | list_for_each_entry(tic, &log->l_writeq, t_queue) | |
| 3759 | if ((tic = log->l_write_headq)) { | 3695 | wake_up(&tic->t_wait); |
| 3760 | do { | 3696 | spin_unlock(&log->l_grant_write_lock); |
| 3761 | sv_signal(&tic->t_wait); | ||
| 3762 | tic = tic->t_next; | ||
| 3763 | } while (tic != log->l_write_headq); | ||
| 3764 | } | ||
| 3765 | spin_unlock(&log->l_grant_lock); | ||
| 3766 | 3697 | ||
| 3767 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { | 3698 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { |
| 3768 | ASSERT(!logerror); | 3699 | ASSERT(!logerror); |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 23d6ceb5e97b..9dc8125d04e5 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
| @@ -61,7 +61,7 @@ xlog_cil_init( | |||
| 61 | INIT_LIST_HEAD(&cil->xc_committing); | 61 | INIT_LIST_HEAD(&cil->xc_committing); |
| 62 | spin_lock_init(&cil->xc_cil_lock); | 62 | spin_lock_init(&cil->xc_cil_lock); |
| 63 | init_rwsem(&cil->xc_ctx_lock); | 63 | init_rwsem(&cil->xc_ctx_lock); |
| 64 | sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait"); | 64 | init_waitqueue_head(&cil->xc_commit_wait); |
| 65 | 65 | ||
| 66 | INIT_LIST_HEAD(&ctx->committing); | 66 | INIT_LIST_HEAD(&ctx->committing); |
| 67 | INIT_LIST_HEAD(&ctx->busy_extents); | 67 | INIT_LIST_HEAD(&ctx->busy_extents); |
| @@ -361,15 +361,10 @@ xlog_cil_committed( | |||
| 361 | int abort) | 361 | int abort) |
| 362 | { | 362 | { |
| 363 | struct xfs_cil_ctx *ctx = args; | 363 | struct xfs_cil_ctx *ctx = args; |
| 364 | struct xfs_log_vec *lv; | ||
| 365 | int abortflag = abort ? XFS_LI_ABORTED : 0; | ||
| 366 | struct xfs_busy_extent *busyp, *n; | 364 | struct xfs_busy_extent *busyp, *n; |
| 367 | 365 | ||
| 368 | /* unpin all the log items */ | 366 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, |
| 369 | for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) { | 367 | ctx->start_lsn, abort); |
| 370 | xfs_trans_item_committed(lv->lv_item, ctx->start_lsn, | ||
| 371 | abortflag); | ||
| 372 | } | ||
| 373 | 368 | ||
| 374 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) | 369 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) |
| 375 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); | 370 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); |
| @@ -568,7 +563,7 @@ restart: | |||
| 568 | * It is still being pushed! Wait for the push to | 563 | * It is still being pushed! Wait for the push to |
| 569 | * complete, then start again from the beginning. | 564 | * complete, then start again from the beginning. |
| 570 | */ | 565 | */ |
| 571 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 566 | xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); |
| 572 | goto restart; | 567 | goto restart; |
| 573 | } | 568 | } |
| 574 | } | 569 | } |
| @@ -592,7 +587,7 @@ restart: | |||
| 592 | */ | 587 | */ |
| 593 | spin_lock(&cil->xc_cil_lock); | 588 | spin_lock(&cil->xc_cil_lock); |
| 594 | ctx->commit_lsn = commit_lsn; | 589 | ctx->commit_lsn = commit_lsn; |
| 595 | sv_broadcast(&cil->xc_commit_wait); | 590 | wake_up_all(&cil->xc_commit_wait); |
| 596 | spin_unlock(&cil->xc_cil_lock); | 591 | spin_unlock(&cil->xc_cil_lock); |
| 597 | 592 | ||
| 598 | /* release the hounds! */ | 593 | /* release the hounds! */ |
| @@ -757,7 +752,7 @@ restart: | |||
| 757 | * It is still being pushed! Wait for the push to | 752 | * It is still being pushed! Wait for the push to |
| 758 | * complete, then start again from the beginning. | 753 | * complete, then start again from the beginning. |
| 759 | */ | 754 | */ |
| 760 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 755 | xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); |
| 761 | goto restart; | 756 | goto restart; |
| 762 | } | 757 | } |
| 763 | if (ctx->sequence != sequence) | 758 | if (ctx->sequence != sequence) |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index edcdfe01617f..d5f8be8f4bf6 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
| @@ -21,7 +21,6 @@ | |||
| 21 | struct xfs_buf; | 21 | struct xfs_buf; |
| 22 | struct log; | 22 | struct log; |
| 23 | struct xlog_ticket; | 23 | struct xlog_ticket; |
| 24 | struct xfs_buf_cancel; | ||
| 25 | struct xfs_mount; | 24 | struct xfs_mount; |
| 26 | 25 | ||
| 27 | /* | 26 | /* |
| @@ -54,7 +53,6 @@ struct xfs_mount; | |||
| 54 | BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ | 53 | BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ |
| 55 | XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) | 54 | XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) |
| 56 | 55 | ||
| 57 | |||
| 58 | static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) | 56 | static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) |
| 59 | { | 57 | { |
| 60 | return ((xfs_lsn_t)cycle << 32) | block; | 58 | return ((xfs_lsn_t)cycle << 32) | block; |
| @@ -133,12 +131,10 @@ static inline uint xlog_get_client_id(__be32 i) | |||
| 133 | */ | 131 | */ |
| 134 | #define XLOG_TIC_INITED 0x1 /* has been initialized */ | 132 | #define XLOG_TIC_INITED 0x1 /* has been initialized */ |
| 135 | #define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ | 133 | #define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ |
| 136 | #define XLOG_TIC_IN_Q 0x4 | ||
| 137 | 134 | ||
| 138 | #define XLOG_TIC_FLAGS \ | 135 | #define XLOG_TIC_FLAGS \ |
| 139 | { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ | 136 | { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ |
| 140 | { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \ | 137 | { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" } |
| 141 | { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" } | ||
| 142 | 138 | ||
| 143 | #endif /* __KERNEL__ */ | 139 | #endif /* __KERNEL__ */ |
| 144 | 140 | ||
| @@ -244,9 +240,8 @@ typedef struct xlog_res { | |||
| 244 | } xlog_res_t; | 240 | } xlog_res_t; |
| 245 | 241 | ||
| 246 | typedef struct xlog_ticket { | 242 | typedef struct xlog_ticket { |
| 247 | sv_t t_wait; /* ticket wait queue : 20 */ | 243 | wait_queue_head_t t_wait; /* ticket wait queue */ |
| 248 | struct xlog_ticket *t_next; /* :4|8 */ | 244 | struct list_head t_queue; /* reserve/write queue */ |
| 249 | struct xlog_ticket *t_prev; /* :4|8 */ | ||
| 250 | xlog_tid_t t_tid; /* transaction identifier : 4 */ | 245 | xlog_tid_t t_tid; /* transaction identifier : 4 */ |
| 251 | atomic_t t_ref; /* ticket reference count : 4 */ | 246 | atomic_t t_ref; /* ticket reference count : 4 */ |
| 252 | int t_curr_res; /* current reservation in bytes : 4 */ | 247 | int t_curr_res; /* current reservation in bytes : 4 */ |
| @@ -353,8 +348,8 @@ typedef union xlog_in_core2 { | |||
| 353 | * and move everything else out to subsequent cachelines. | 348 | * and move everything else out to subsequent cachelines. |
| 354 | */ | 349 | */ |
| 355 | typedef struct xlog_in_core { | 350 | typedef struct xlog_in_core { |
| 356 | sv_t ic_force_wait; | 351 | wait_queue_head_t ic_force_wait; |
| 357 | sv_t ic_write_wait; | 352 | wait_queue_head_t ic_write_wait; |
| 358 | struct xlog_in_core *ic_next; | 353 | struct xlog_in_core *ic_next; |
| 359 | struct xlog_in_core *ic_prev; | 354 | struct xlog_in_core *ic_prev; |
| 360 | struct xfs_buf *ic_bp; | 355 | struct xfs_buf *ic_bp; |
| @@ -421,7 +416,7 @@ struct xfs_cil { | |||
| 421 | struct xfs_cil_ctx *xc_ctx; | 416 | struct xfs_cil_ctx *xc_ctx; |
| 422 | struct rw_semaphore xc_ctx_lock; | 417 | struct rw_semaphore xc_ctx_lock; |
| 423 | struct list_head xc_committing; | 418 | struct list_head xc_committing; |
| 424 | sv_t xc_commit_wait; | 419 | wait_queue_head_t xc_commit_wait; |
| 425 | xfs_lsn_t xc_current_sequence; | 420 | xfs_lsn_t xc_current_sequence; |
| 426 | }; | 421 | }; |
| 427 | 422 | ||
| @@ -491,7 +486,7 @@ typedef struct log { | |||
| 491 | struct xfs_buftarg *l_targ; /* buftarg of log */ | 486 | struct xfs_buftarg *l_targ; /* buftarg of log */ |
| 492 | uint l_flags; | 487 | uint l_flags; |
| 493 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ | 488 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ |
| 494 | struct xfs_buf_cancel **l_buf_cancel_table; | 489 | struct list_head *l_buf_cancel_table; |
| 495 | int l_iclog_hsize; /* size of iclog header */ | 490 | int l_iclog_hsize; /* size of iclog header */ |
| 496 | int l_iclog_heads; /* # of iclog header sectors */ | 491 | int l_iclog_heads; /* # of iclog header sectors */ |
| 497 | uint l_sectBBsize; /* sector size in BBs (2^n) */ | 492 | uint l_sectBBsize; /* sector size in BBs (2^n) */ |
| @@ -503,29 +498,40 @@ typedef struct log { | |||
| 503 | int l_logBBsize; /* size of log in BB chunks */ | 498 | int l_logBBsize; /* size of log in BB chunks */ |
| 504 | 499 | ||
| 505 | /* The following block of fields are changed while holding icloglock */ | 500 | /* The following block of fields are changed while holding icloglock */ |
| 506 | sv_t l_flush_wait ____cacheline_aligned_in_smp; | 501 | wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp; |
| 507 | /* waiting for iclog flush */ | 502 | /* waiting for iclog flush */ |
| 508 | int l_covered_state;/* state of "covering disk | 503 | int l_covered_state;/* state of "covering disk |
| 509 | * log entries" */ | 504 | * log entries" */ |
| 510 | xlog_in_core_t *l_iclog; /* head log queue */ | 505 | xlog_in_core_t *l_iclog; /* head log queue */ |
| 511 | spinlock_t l_icloglock; /* grab to change iclog state */ | 506 | spinlock_t l_icloglock; /* grab to change iclog state */ |
| 512 | xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed | ||
| 513 | * buffers */ | ||
| 514 | xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ | ||
| 515 | int l_curr_cycle; /* Cycle number of log writes */ | 507 | int l_curr_cycle; /* Cycle number of log writes */ |
| 516 | int l_prev_cycle; /* Cycle number before last | 508 | int l_prev_cycle; /* Cycle number before last |
| 517 | * block increment */ | 509 | * block increment */ |
| 518 | int l_curr_block; /* current logical log block */ | 510 | int l_curr_block; /* current logical log block */ |
| 519 | int l_prev_block; /* previous logical log block */ | 511 | int l_prev_block; /* previous logical log block */ |
| 520 | 512 | ||
| 521 | /* The following block of fields are changed while holding grant_lock */ | 513 | /* |
| 522 | spinlock_t l_grant_lock ____cacheline_aligned_in_smp; | 514 | * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and |
| 523 | xlog_ticket_t *l_reserve_headq; | 515 | * read without needing to hold specific locks. To avoid operations |
| 524 | xlog_ticket_t *l_write_headq; | 516 | * contending with other hot objects, place each of them on a separate |
| 525 | int l_grant_reserve_cycle; | 517 | * cacheline. |
| 526 | int l_grant_reserve_bytes; | 518 | */ |
| 527 | int l_grant_write_cycle; | 519 | /* lsn of last LR on disk */ |
| 528 | int l_grant_write_bytes; | 520 | atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp; |
| 521 | /* lsn of 1st LR with unflushed * buffers */ | ||
| 522 | atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; | ||
| 523 | |||
| 524 | /* | ||
| 525 | * ticket grant locks, queues and accounting have their own cachlines | ||
| 526 | * as these are quite hot and can be operated on concurrently. | ||
| 527 | */ | ||
| 528 | spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp; | ||
| 529 | struct list_head l_reserveq; | ||
| 530 | atomic64_t l_grant_reserve_head; | ||
| 531 | |||
| 532 | spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp; | ||
| 533 | struct list_head l_writeq; | ||
| 534 | atomic64_t l_grant_write_head; | ||
| 529 | 535 | ||
| 530 | /* The following field are used for debugging; need to hold icloglock */ | 536 | /* The following field are used for debugging; need to hold icloglock */ |
| 531 | #ifdef DEBUG | 537 | #ifdef DEBUG |
| @@ -534,6 +540,9 @@ typedef struct log { | |||
| 534 | 540 | ||
| 535 | } xlog_t; | 541 | } xlog_t; |
| 536 | 542 | ||
| 543 | #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ | ||
| 544 | ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE)) | ||
| 545 | |||
| 537 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) | 546 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) |
| 538 | 547 | ||
| 539 | /* common routines */ | 548 | /* common routines */ |
| @@ -562,6 +571,61 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector, | |||
| 562 | xlog_in_core_t **commit_iclog, uint flags); | 571 | xlog_in_core_t **commit_iclog, uint flags); |
| 563 | 572 | ||
| 564 | /* | 573 | /* |
| 574 | * When we crack an atomic LSN, we sample it first so that the value will not | ||
| 575 | * change while we are cracking it into the component values. This means we | ||
| 576 | * will always get consistent component values to work from. This should always | ||
| 577 | * be used to smaple and crack LSNs taht are stored and updated in atomic | ||
| 578 | * variables. | ||
| 579 | */ | ||
| 580 | static inline void | ||
| 581 | xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block) | ||
| 582 | { | ||
| 583 | xfs_lsn_t val = atomic64_read(lsn); | ||
| 584 | |||
| 585 | *cycle = CYCLE_LSN(val); | ||
| 586 | *block = BLOCK_LSN(val); | ||
| 587 | } | ||
| 588 | |||
| 589 | /* | ||
| 590 | * Calculate and assign a value to an atomic LSN variable from component pieces. | ||
| 591 | */ | ||
| 592 | static inline void | ||
| 593 | xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block) | ||
| 594 | { | ||
| 595 | atomic64_set(lsn, xlog_assign_lsn(cycle, block)); | ||
| 596 | } | ||
| 597 | |||
| 598 | /* | ||
| 599 | * When we crack the grant head, we sample it first so that the value will not | ||
| 600 | * change while we are cracking it into the component values. This means we | ||
| 601 | * will always get consistent component values to work from. | ||
| 602 | */ | ||
| 603 | static inline void | ||
| 604 | xlog_crack_grant_head_val(int64_t val, int *cycle, int *space) | ||
| 605 | { | ||
| 606 | *cycle = val >> 32; | ||
| 607 | *space = val & 0xffffffff; | ||
| 608 | } | ||
| 609 | |||
| 610 | static inline void | ||
| 611 | xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space) | ||
| 612 | { | ||
| 613 | xlog_crack_grant_head_val(atomic64_read(head), cycle, space); | ||
| 614 | } | ||
| 615 | |||
| 616 | static inline int64_t | ||
| 617 | xlog_assign_grant_head_val(int cycle, int space) | ||
| 618 | { | ||
| 619 | return ((int64_t)cycle << 32) | space; | ||
| 620 | } | ||
| 621 | |||
| 622 | static inline void | ||
| 623 | xlog_assign_grant_head(atomic64_t *head, int cycle, int space) | ||
| 624 | { | ||
| 625 | atomic64_set(head, xlog_assign_grant_head_val(cycle, space)); | ||
| 626 | } | ||
| 627 | |||
| 628 | /* | ||
| 565 | * Committed Item List interfaces | 629 | * Committed Item List interfaces |
| 566 | */ | 630 | */ |
| 567 | int xlog_cil_init(struct log *log); | 631 | int xlog_cil_init(struct log *log); |
| @@ -585,6 +649,21 @@ xlog_cil_force(struct log *log) | |||
| 585 | */ | 649 | */ |
| 586 | #define XLOG_UNMOUNT_REC_TYPE (-1U) | 650 | #define XLOG_UNMOUNT_REC_TYPE (-1U) |
| 587 | 651 | ||
| 652 | /* | ||
| 653 | * Wrapper function for waiting on a wait queue serialised against wakeups | ||
| 654 | * by a spinlock. This matches the semantics of all the wait queues used in the | ||
| 655 | * log code. | ||
| 656 | */ | ||
| 657 | static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock) | ||
| 658 | { | ||
| 659 | DECLARE_WAITQUEUE(wait, current); | ||
| 660 | |||
| 661 | add_wait_queue_exclusive(wq, &wait); | ||
| 662 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 663 | spin_unlock(lock); | ||
| 664 | schedule(); | ||
| 665 | remove_wait_queue(wq, &wait); | ||
| 666 | } | ||
| 588 | #endif /* __KERNEL__ */ | 667 | #endif /* __KERNEL__ */ |
| 589 | 668 | ||
| 590 | #endif /* __XFS_LOG_PRIV_H__ */ | 669 | #endif /* __XFS_LOG_PRIV_H__ */ |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 966d3f97458c..204d8e5fa7fa 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
| @@ -53,6 +53,17 @@ STATIC void xlog_recover_check_summary(xlog_t *); | |||
| 53 | #endif | 53 | #endif |
| 54 | 54 | ||
| 55 | /* | 55 | /* |
| 56 | * This structure is used during recovery to record the buf log items which | ||
| 57 | * have been canceled and should not be replayed. | ||
| 58 | */ | ||
| 59 | struct xfs_buf_cancel { | ||
| 60 | xfs_daddr_t bc_blkno; | ||
| 61 | uint bc_len; | ||
| 62 | int bc_refcount; | ||
| 63 | struct list_head bc_list; | ||
| 64 | }; | ||
| 65 | |||
| 66 | /* | ||
| 56 | * Sector aligned buffer routines for buffer create/read/write/access | 67 | * Sector aligned buffer routines for buffer create/read/write/access |
| 57 | */ | 68 | */ |
| 58 | 69 | ||
| @@ -925,12 +936,12 @@ xlog_find_tail( | |||
| 925 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); | 936 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); |
| 926 | if (found == 2) | 937 | if (found == 2) |
| 927 | log->l_curr_cycle++; | 938 | log->l_curr_cycle++; |
| 928 | log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); | 939 | atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); |
| 929 | log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); | 940 | atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); |
| 930 | log->l_grant_reserve_cycle = log->l_curr_cycle; | 941 | xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle, |
| 931 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); | 942 | BBTOB(log->l_curr_block)); |
| 932 | log->l_grant_write_cycle = log->l_curr_cycle; | 943 | xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle, |
| 933 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); | 944 | BBTOB(log->l_curr_block)); |
| 934 | 945 | ||
| 935 | /* | 946 | /* |
| 936 | * Look for unmount record. If we find it, then we know there | 947 | * Look for unmount record. If we find it, then we know there |
| @@ -960,7 +971,7 @@ xlog_find_tail( | |||
| 960 | } | 971 | } |
| 961 | after_umount_blk = (i + hblks + (int) | 972 | after_umount_blk = (i + hblks + (int) |
| 962 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; | 973 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; |
| 963 | tail_lsn = log->l_tail_lsn; | 974 | tail_lsn = atomic64_read(&log->l_tail_lsn); |
| 964 | if (*head_blk == after_umount_blk && | 975 | if (*head_blk == after_umount_blk && |
| 965 | be32_to_cpu(rhead->h_num_logops) == 1) { | 976 | be32_to_cpu(rhead->h_num_logops) == 1) { |
| 966 | umount_data_blk = (i + hblks) % log->l_logBBsize; | 977 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
| @@ -975,12 +986,10 @@ xlog_find_tail( | |||
| 975 | * log records will point recovery to after the | 986 | * log records will point recovery to after the |
| 976 | * current unmount record. | 987 | * current unmount record. |
| 977 | */ | 988 | */ |
| 978 | log->l_tail_lsn = | 989 | xlog_assign_atomic_lsn(&log->l_tail_lsn, |
| 979 | xlog_assign_lsn(log->l_curr_cycle, | 990 | log->l_curr_cycle, after_umount_blk); |
| 980 | after_umount_blk); | 991 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, |
| 981 | log->l_last_sync_lsn = | 992 | log->l_curr_cycle, after_umount_blk); |
| 982 | xlog_assign_lsn(log->l_curr_cycle, | ||
| 983 | after_umount_blk); | ||
| 984 | *tail_blk = after_umount_blk; | 993 | *tail_blk = after_umount_blk; |
| 985 | 994 | ||
| 986 | /* | 995 | /* |
| @@ -1605,82 +1614,45 @@ xlog_recover_reorder_trans( | |||
| 1605 | * record in the table to tell us how many times we expect to see this | 1614 | * record in the table to tell us how many times we expect to see this |
| 1606 | * record during the second pass. | 1615 | * record during the second pass. |
| 1607 | */ | 1616 | */ |
| 1608 | STATIC void | 1617 | STATIC int |
| 1609 | xlog_recover_do_buffer_pass1( | 1618 | xlog_recover_buffer_pass1( |
| 1610 | xlog_t *log, | 1619 | struct log *log, |
| 1611 | xfs_buf_log_format_t *buf_f) | 1620 | xlog_recover_item_t *item) |
| 1612 | { | 1621 | { |
| 1613 | xfs_buf_cancel_t *bcp; | 1622 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
| 1614 | xfs_buf_cancel_t *nextp; | 1623 | struct list_head *bucket; |
| 1615 | xfs_buf_cancel_t *prevp; | 1624 | struct xfs_buf_cancel *bcp; |
| 1616 | xfs_buf_cancel_t **bucket; | ||
| 1617 | xfs_daddr_t blkno = 0; | ||
| 1618 | uint len = 0; | ||
| 1619 | ushort flags = 0; | ||
| 1620 | |||
| 1621 | switch (buf_f->blf_type) { | ||
| 1622 | case XFS_LI_BUF: | ||
| 1623 | blkno = buf_f->blf_blkno; | ||
| 1624 | len = buf_f->blf_len; | ||
| 1625 | flags = buf_f->blf_flags; | ||
| 1626 | break; | ||
| 1627 | } | ||
| 1628 | 1625 | ||
| 1629 | /* | 1626 | /* |
| 1630 | * If this isn't a cancel buffer item, then just return. | 1627 | * If this isn't a cancel buffer item, then just return. |
| 1631 | */ | 1628 | */ |
| 1632 | if (!(flags & XFS_BLF_CANCEL)) { | 1629 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { |
| 1633 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); | 1630 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); |
| 1634 | return; | 1631 | return 0; |
| 1635 | } | ||
| 1636 | |||
| 1637 | /* | ||
| 1638 | * Insert an xfs_buf_cancel record into the hash table of | ||
| 1639 | * them. If there is already an identical record, bump | ||
| 1640 | * its reference count. | ||
| 1641 | */ | ||
| 1642 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | ||
| 1643 | XLOG_BC_TABLE_SIZE]; | ||
| 1644 | /* | ||
| 1645 | * If the hash bucket is empty then just insert a new record into | ||
| 1646 | * the bucket. | ||
| 1647 | */ | ||
| 1648 | if (*bucket == NULL) { | ||
| 1649 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | ||
| 1650 | KM_SLEEP); | ||
| 1651 | bcp->bc_blkno = blkno; | ||
| 1652 | bcp->bc_len = len; | ||
| 1653 | bcp->bc_refcount = 1; | ||
| 1654 | bcp->bc_next = NULL; | ||
| 1655 | *bucket = bcp; | ||
| 1656 | return; | ||
| 1657 | } | 1632 | } |
| 1658 | 1633 | ||
| 1659 | /* | 1634 | /* |
| 1660 | * The hash bucket is not empty, so search for duplicates of our | 1635 | * Insert an xfs_buf_cancel record into the hash table of them. |
| 1661 | * record. If we find one them just bump its refcount. If not | 1636 | * If there is already an identical record, bump its reference count. |
| 1662 | * then add us at the end of the list. | ||
| 1663 | */ | 1637 | */ |
| 1664 | prevp = NULL; | 1638 | bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); |
| 1665 | nextp = *bucket; | 1639 | list_for_each_entry(bcp, bucket, bc_list) { |
| 1666 | while (nextp != NULL) { | 1640 | if (bcp->bc_blkno == buf_f->blf_blkno && |
| 1667 | if (nextp->bc_blkno == blkno && nextp->bc_len == len) { | 1641 | bcp->bc_len == buf_f->blf_len) { |
| 1668 | nextp->bc_refcount++; | 1642 | bcp->bc_refcount++; |
| 1669 | trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); | 1643 | trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); |
| 1670 | return; | 1644 | return 0; |
| 1671 | } | 1645 | } |
| 1672 | prevp = nextp; | 1646 | } |
| 1673 | nextp = nextp->bc_next; | 1647 | |
| 1674 | } | 1648 | bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); |
| 1675 | ASSERT(prevp != NULL); | 1649 | bcp->bc_blkno = buf_f->blf_blkno; |
| 1676 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | 1650 | bcp->bc_len = buf_f->blf_len; |
| 1677 | KM_SLEEP); | ||
| 1678 | bcp->bc_blkno = blkno; | ||
| 1679 | bcp->bc_len = len; | ||
| 1680 | bcp->bc_refcount = 1; | 1651 | bcp->bc_refcount = 1; |
| 1681 | bcp->bc_next = NULL; | 1652 | list_add_tail(&bcp->bc_list, bucket); |
| 1682 | prevp->bc_next = bcp; | 1653 | |
| 1683 | trace_xfs_log_recover_buf_cancel_add(log, buf_f); | 1654 | trace_xfs_log_recover_buf_cancel_add(log, buf_f); |
| 1655 | return 0; | ||
| 1684 | } | 1656 | } |
| 1685 | 1657 | ||
| 1686 | /* | 1658 | /* |
| @@ -1698,14 +1670,13 @@ xlog_recover_do_buffer_pass1( | |||
| 1698 | */ | 1670 | */ |
| 1699 | STATIC int | 1671 | STATIC int |
| 1700 | xlog_check_buffer_cancelled( | 1672 | xlog_check_buffer_cancelled( |
| 1701 | xlog_t *log, | 1673 | struct log *log, |
| 1702 | xfs_daddr_t blkno, | 1674 | xfs_daddr_t blkno, |
| 1703 | uint len, | 1675 | uint len, |
| 1704 | ushort flags) | 1676 | ushort flags) |
| 1705 | { | 1677 | { |
| 1706 | xfs_buf_cancel_t *bcp; | 1678 | struct list_head *bucket; |
| 1707 | xfs_buf_cancel_t *prevp; | 1679 | struct xfs_buf_cancel *bcp; |
| 1708 | xfs_buf_cancel_t **bucket; | ||
| 1709 | 1680 | ||
| 1710 | if (log->l_buf_cancel_table == NULL) { | 1681 | if (log->l_buf_cancel_table == NULL) { |
| 1711 | /* | 1682 | /* |
| @@ -1716,128 +1687,70 @@ xlog_check_buffer_cancelled( | |||
| 1716 | return 0; | 1687 | return 0; |
| 1717 | } | 1688 | } |
| 1718 | 1689 | ||
| 1719 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | ||
| 1720 | XLOG_BC_TABLE_SIZE]; | ||
| 1721 | bcp = *bucket; | ||
| 1722 | if (bcp == NULL) { | ||
| 1723 | /* | ||
| 1724 | * There is no corresponding entry in the table built | ||
| 1725 | * in pass one, so this buffer has not been cancelled. | ||
| 1726 | */ | ||
| 1727 | ASSERT(!(flags & XFS_BLF_CANCEL)); | ||
| 1728 | return 0; | ||
| 1729 | } | ||
| 1730 | |||
| 1731 | /* | 1690 | /* |
| 1732 | * Search for an entry in the buffer cancel table that | 1691 | * Search for an entry in the cancel table that matches our buffer. |
| 1733 | * matches our buffer. | ||
| 1734 | */ | 1692 | */ |
| 1735 | prevp = NULL; | 1693 | bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); |
| 1736 | while (bcp != NULL) { | 1694 | list_for_each_entry(bcp, bucket, bc_list) { |
| 1737 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) { | 1695 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) |
| 1738 | /* | 1696 | goto found; |
| 1739 | * We've go a match, so return 1 so that the | ||
| 1740 | * recovery of this buffer is cancelled. | ||
| 1741 | * If this buffer is actually a buffer cancel | ||
| 1742 | * log item, then decrement the refcount on the | ||
| 1743 | * one in the table and remove it if this is the | ||
| 1744 | * last reference. | ||
| 1745 | */ | ||
| 1746 | if (flags & XFS_BLF_CANCEL) { | ||
| 1747 | bcp->bc_refcount--; | ||
| 1748 | if (bcp->bc_refcount == 0) { | ||
| 1749 | if (prevp == NULL) { | ||
| 1750 | *bucket = bcp->bc_next; | ||
| 1751 | } else { | ||
| 1752 | prevp->bc_next = bcp->bc_next; | ||
| 1753 | } | ||
| 1754 | kmem_free(bcp); | ||
| 1755 | } | ||
| 1756 | } | ||
| 1757 | return 1; | ||
| 1758 | } | ||
| 1759 | prevp = bcp; | ||
| 1760 | bcp = bcp->bc_next; | ||
| 1761 | } | 1697 | } |
| 1698 | |||
| 1762 | /* | 1699 | /* |
| 1763 | * We didn't find a corresponding entry in the table, so | 1700 | * We didn't find a corresponding entry in the table, so return 0 so |
| 1764 | * return 0 so that the buffer is NOT cancelled. | 1701 | * that the buffer is NOT cancelled. |
| 1765 | */ | 1702 | */ |
| 1766 | ASSERT(!(flags & XFS_BLF_CANCEL)); | 1703 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
| 1767 | return 0; | 1704 | return 0; |
| 1768 | } | ||
| 1769 | 1705 | ||
| 1770 | STATIC int | 1706 | found: |
| 1771 | xlog_recover_do_buffer_pass2( | 1707 | /* |
| 1772 | xlog_t *log, | 1708 | * We've go a match, so return 1 so that the recovery of this buffer |
| 1773 | xfs_buf_log_format_t *buf_f) | 1709 | * is cancelled. If this buffer is actually a buffer cancel log |
| 1774 | { | 1710 | * item, then decrement the refcount on the one in the table and |
| 1775 | xfs_daddr_t blkno = 0; | 1711 | * remove it if this is the last reference. |
| 1776 | ushort flags = 0; | 1712 | */ |
| 1777 | uint len = 0; | 1713 | if (flags & XFS_BLF_CANCEL) { |
| 1778 | 1714 | if (--bcp->bc_refcount == 0) { | |
| 1779 | switch (buf_f->blf_type) { | 1715 | list_del(&bcp->bc_list); |
| 1780 | case XFS_LI_BUF: | 1716 | kmem_free(bcp); |
| 1781 | blkno = buf_f->blf_blkno; | 1717 | } |
| 1782 | flags = buf_f->blf_flags; | ||
| 1783 | len = buf_f->blf_len; | ||
| 1784 | break; | ||
| 1785 | } | 1718 | } |
| 1786 | 1719 | return 1; | |
| 1787 | return xlog_check_buffer_cancelled(log, blkno, len, flags); | ||
| 1788 | } | 1720 | } |
| 1789 | 1721 | ||
| 1790 | /* | 1722 | /* |
| 1791 | * Perform recovery for a buffer full of inodes. In these buffers, | 1723 | * Perform recovery for a buffer full of inodes. In these buffers, the only |
| 1792 | * the only data which should be recovered is that which corresponds | 1724 | * data which should be recovered is that which corresponds to the |
| 1793 | * to the di_next_unlinked pointers in the on disk inode structures. | 1725 | * di_next_unlinked pointers in the on disk inode structures. The rest of the |
| 1794 | * The rest of the data for the inodes is always logged through the | 1726 | * data for the inodes is always logged through the inodes themselves rather |
| 1795 | * inodes themselves rather than the inode buffer and is recovered | 1727 | * than the inode buffer and is recovered in xlog_recover_inode_pass2(). |
| 1796 | * in xlog_recover_do_inode_trans(). | ||
| 1797 | * | 1728 | * |
| 1798 | * The only time when buffers full of inodes are fully recovered is | 1729 | * The only time when buffers full of inodes are fully recovered is when the |
| 1799 | * when the buffer is full of newly allocated inodes. In this case | 1730 | * buffer is full of newly allocated inodes. In this case the buffer will |
| 1800 | * the buffer will not be marked as an inode buffer and so will be | 1731 | * not be marked as an inode buffer and so will be sent to |
| 1801 | * sent to xlog_recover_do_reg_buffer() below during recovery. | 1732 | * xlog_recover_do_reg_buffer() below during recovery. |
| 1802 | */ | 1733 | */ |
| 1803 | STATIC int | 1734 | STATIC int |
| 1804 | xlog_recover_do_inode_buffer( | 1735 | xlog_recover_do_inode_buffer( |
| 1805 | xfs_mount_t *mp, | 1736 | struct xfs_mount *mp, |
| 1806 | xlog_recover_item_t *item, | 1737 | xlog_recover_item_t *item, |
| 1807 | xfs_buf_t *bp, | 1738 | struct xfs_buf *bp, |
| 1808 | xfs_buf_log_format_t *buf_f) | 1739 | xfs_buf_log_format_t *buf_f) |
| 1809 | { | 1740 | { |
| 1810 | int i; | 1741 | int i; |
| 1811 | int item_index; | 1742 | int item_index = 0; |
| 1812 | int bit; | 1743 | int bit = 0; |
| 1813 | int nbits; | 1744 | int nbits = 0; |
| 1814 | int reg_buf_offset; | 1745 | int reg_buf_offset = 0; |
| 1815 | int reg_buf_bytes; | 1746 | int reg_buf_bytes = 0; |
| 1816 | int next_unlinked_offset; | 1747 | int next_unlinked_offset; |
| 1817 | int inodes_per_buf; | 1748 | int inodes_per_buf; |
| 1818 | xfs_agino_t *logged_nextp; | 1749 | xfs_agino_t *logged_nextp; |
| 1819 | xfs_agino_t *buffer_nextp; | 1750 | xfs_agino_t *buffer_nextp; |
| 1820 | unsigned int *data_map = NULL; | ||
| 1821 | unsigned int map_size = 0; | ||
| 1822 | 1751 | ||
| 1823 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); | 1752 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); |
| 1824 | 1753 | ||
| 1825 | switch (buf_f->blf_type) { | ||
| 1826 | case XFS_LI_BUF: | ||
| 1827 | data_map = buf_f->blf_data_map; | ||
| 1828 | map_size = buf_f->blf_map_size; | ||
| 1829 | break; | ||
| 1830 | } | ||
| 1831 | /* | ||
| 1832 | * Set the variables corresponding to the current region to | ||
| 1833 | * 0 so that we'll initialize them on the first pass through | ||
| 1834 | * the loop. | ||
| 1835 | */ | ||
| 1836 | reg_buf_offset = 0; | ||
| 1837 | reg_buf_bytes = 0; | ||
| 1838 | bit = 0; | ||
| 1839 | nbits = 0; | ||
| 1840 | item_index = 0; | ||
| 1841 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; | 1754 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; |
| 1842 | for (i = 0; i < inodes_per_buf; i++) { | 1755 | for (i = 0; i < inodes_per_buf; i++) { |
| 1843 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + | 1756 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + |
| @@ -1852,18 +1765,18 @@ xlog_recover_do_inode_buffer( | |||
| 1852 | * the current di_next_unlinked field. | 1765 | * the current di_next_unlinked field. |
| 1853 | */ | 1766 | */ |
| 1854 | bit += nbits; | 1767 | bit += nbits; |
| 1855 | bit = xfs_next_bit(data_map, map_size, bit); | 1768 | bit = xfs_next_bit(buf_f->blf_data_map, |
| 1769 | buf_f->blf_map_size, bit); | ||
| 1856 | 1770 | ||
| 1857 | /* | 1771 | /* |
| 1858 | * If there are no more logged regions in the | 1772 | * If there are no more logged regions in the |
| 1859 | * buffer, then we're done. | 1773 | * buffer, then we're done. |
| 1860 | */ | 1774 | */ |
| 1861 | if (bit == -1) { | 1775 | if (bit == -1) |
| 1862 | return 0; | 1776 | return 0; |
| 1863 | } | ||
| 1864 | 1777 | ||
| 1865 | nbits = xfs_contig_bits(data_map, map_size, | 1778 | nbits = xfs_contig_bits(buf_f->blf_data_map, |
| 1866 | bit); | 1779 | buf_f->blf_map_size, bit); |
| 1867 | ASSERT(nbits > 0); | 1780 | ASSERT(nbits > 0); |
| 1868 | reg_buf_offset = bit << XFS_BLF_SHIFT; | 1781 | reg_buf_offset = bit << XFS_BLF_SHIFT; |
| 1869 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; | 1782 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; |
| @@ -1875,9 +1788,8 @@ xlog_recover_do_inode_buffer( | |||
| 1875 | * di_next_unlinked field, then move on to the next | 1788 | * di_next_unlinked field, then move on to the next |
| 1876 | * di_next_unlinked field. | 1789 | * di_next_unlinked field. |
| 1877 | */ | 1790 | */ |
| 1878 | if (next_unlinked_offset < reg_buf_offset) { | 1791 | if (next_unlinked_offset < reg_buf_offset) |
| 1879 | continue; | 1792 | continue; |
| 1880 | } | ||
| 1881 | 1793 | ||
| 1882 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1794 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
| 1883 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); | 1795 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
| @@ -1913,36 +1825,29 @@ xlog_recover_do_inode_buffer( | |||
| 1913 | * given buffer. The bitmap in the buf log format structure indicates | 1825 | * given buffer. The bitmap in the buf log format structure indicates |
| 1914 | * where to place the logged data. | 1826 | * where to place the logged data. |
| 1915 | */ | 1827 | */ |
| 1916 | /*ARGSUSED*/ | ||
| 1917 | STATIC void | 1828 | STATIC void |
| 1918 | xlog_recover_do_reg_buffer( | 1829 | xlog_recover_do_reg_buffer( |
| 1919 | struct xfs_mount *mp, | 1830 | struct xfs_mount *mp, |
| 1920 | xlog_recover_item_t *item, | 1831 | xlog_recover_item_t *item, |
| 1921 | xfs_buf_t *bp, | 1832 | struct xfs_buf *bp, |
| 1922 | xfs_buf_log_format_t *buf_f) | 1833 | xfs_buf_log_format_t *buf_f) |
| 1923 | { | 1834 | { |
| 1924 | int i; | 1835 | int i; |
| 1925 | int bit; | 1836 | int bit; |
| 1926 | int nbits; | 1837 | int nbits; |
| 1927 | unsigned int *data_map = NULL; | ||
| 1928 | unsigned int map_size = 0; | ||
| 1929 | int error; | 1838 | int error; |
| 1930 | 1839 | ||
| 1931 | trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); | 1840 | trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); |
| 1932 | 1841 | ||
| 1933 | switch (buf_f->blf_type) { | ||
| 1934 | case XFS_LI_BUF: | ||
| 1935 | data_map = buf_f->blf_data_map; | ||
| 1936 | map_size = buf_f->blf_map_size; | ||
| 1937 | break; | ||
| 1938 | } | ||
| 1939 | bit = 0; | 1842 | bit = 0; |
| 1940 | i = 1; /* 0 is the buf format structure */ | 1843 | i = 1; /* 0 is the buf format structure */ |
| 1941 | while (1) { | 1844 | while (1) { |
| 1942 | bit = xfs_next_bit(data_map, map_size, bit); | 1845 | bit = xfs_next_bit(buf_f->blf_data_map, |
| 1846 | buf_f->blf_map_size, bit); | ||
| 1943 | if (bit == -1) | 1847 | if (bit == -1) |
| 1944 | break; | 1848 | break; |
| 1945 | nbits = xfs_contig_bits(data_map, map_size, bit); | 1849 | nbits = xfs_contig_bits(buf_f->blf_data_map, |
| 1850 | buf_f->blf_map_size, bit); | ||
| 1946 | ASSERT(nbits > 0); | 1851 | ASSERT(nbits > 0); |
| 1947 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1852 | ASSERT(item->ri_buf[i].i_addr != NULL); |
| 1948 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); | 1853 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
| @@ -2176,77 +2081,46 @@ xlog_recover_do_dquot_buffer( | |||
| 2176 | * for more details on the implementation of the table of cancel records. | 2081 | * for more details on the implementation of the table of cancel records. |
| 2177 | */ | 2082 | */ |
| 2178 | STATIC int | 2083 | STATIC int |
| 2179 | xlog_recover_do_buffer_trans( | 2084 | xlog_recover_buffer_pass2( |
| 2180 | xlog_t *log, | 2085 | xlog_t *log, |
| 2181 | xlog_recover_item_t *item, | 2086 | xlog_recover_item_t *item) |
| 2182 | int pass) | ||
| 2183 | { | 2087 | { |
| 2184 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 2088 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
| 2185 | xfs_mount_t *mp; | 2089 | xfs_mount_t *mp = log->l_mp; |
| 2186 | xfs_buf_t *bp; | 2090 | xfs_buf_t *bp; |
| 2187 | int error; | 2091 | int error; |
| 2188 | int cancel; | ||
| 2189 | xfs_daddr_t blkno; | ||
| 2190 | int len; | ||
| 2191 | ushort flags; | ||
| 2192 | uint buf_flags; | 2092 | uint buf_flags; |
| 2193 | 2093 | ||
| 2194 | if (pass == XLOG_RECOVER_PASS1) { | 2094 | /* |
| 2195 | /* | 2095 | * In this pass we only want to recover all the buffers which have |
| 2196 | * In this pass we're only looking for buf items | 2096 | * not been cancelled and are not cancellation buffers themselves. |
| 2197 | * with the XFS_BLF_CANCEL bit set. | 2097 | */ |
| 2198 | */ | 2098 | if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, |
| 2199 | xlog_recover_do_buffer_pass1(log, buf_f); | 2099 | buf_f->blf_len, buf_f->blf_flags)) { |
| 2100 | trace_xfs_log_recover_buf_cancel(log, buf_f); | ||
| 2200 | return 0; | 2101 | return 0; |
| 2201 | } else { | ||
| 2202 | /* | ||
| 2203 | * In this pass we want to recover all the buffers | ||
| 2204 | * which have not been cancelled and are not | ||
| 2205 | * cancellation buffers themselves. The routine | ||
| 2206 | * we call here will tell us whether or not to | ||
| 2207 | * continue with the replay of this buffer. | ||
| 2208 | */ | ||
| 2209 | cancel = xlog_recover_do_buffer_pass2(log, buf_f); | ||
| 2210 | if (cancel) { | ||
| 2211 | trace_xfs_log_recover_buf_cancel(log, buf_f); | ||
| 2212 | return 0; | ||
| 2213 | } | ||
| 2214 | } | 2102 | } |
| 2103 | |||
| 2215 | trace_xfs_log_recover_buf_recover(log, buf_f); | 2104 | trace_xfs_log_recover_buf_recover(log, buf_f); |
| 2216 | switch (buf_f->blf_type) { | ||
| 2217 | case XFS_LI_BUF: | ||
| 2218 | blkno = buf_f->blf_blkno; | ||
| 2219 | len = buf_f->blf_len; | ||
| 2220 | flags = buf_f->blf_flags; | ||
| 2221 | break; | ||
| 2222 | default: | ||
| 2223 | xfs_fs_cmn_err(CE_ALERT, log->l_mp, | ||
| 2224 | "xfs_log_recover: unknown buffer type 0x%x, logdev %s", | ||
| 2225 | buf_f->blf_type, log->l_mp->m_logname ? | ||
| 2226 | log->l_mp->m_logname : "internal"); | ||
| 2227 | XFS_ERROR_REPORT("xlog_recover_do_buffer_trans", | ||
| 2228 | XFS_ERRLEVEL_LOW, log->l_mp); | ||
| 2229 | return XFS_ERROR(EFSCORRUPTED); | ||
| 2230 | } | ||
| 2231 | 2105 | ||
| 2232 | mp = log->l_mp; | ||
| 2233 | buf_flags = XBF_LOCK; | 2106 | buf_flags = XBF_LOCK; |
| 2234 | if (!(flags & XFS_BLF_INODE_BUF)) | 2107 | if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF)) |
| 2235 | buf_flags |= XBF_MAPPED; | 2108 | buf_flags |= XBF_MAPPED; |
| 2236 | 2109 | ||
| 2237 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); | 2110 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
| 2111 | buf_flags); | ||
| 2238 | if (XFS_BUF_ISERROR(bp)) { | 2112 | if (XFS_BUF_ISERROR(bp)) { |
| 2239 | xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, | 2113 | xfs_ioerror_alert("xlog_recover_do..(read#1)", mp, |
| 2240 | bp, blkno); | 2114 | bp, buf_f->blf_blkno); |
| 2241 | error = XFS_BUF_GETERROR(bp); | 2115 | error = XFS_BUF_GETERROR(bp); |
| 2242 | xfs_buf_relse(bp); | 2116 | xfs_buf_relse(bp); |
| 2243 | return error; | 2117 | return error; |
| 2244 | } | 2118 | } |
| 2245 | 2119 | ||
| 2246 | error = 0; | 2120 | error = 0; |
| 2247 | if (flags & XFS_BLF_INODE_BUF) { | 2121 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
| 2248 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2122 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
| 2249 | } else if (flags & | 2123 | } else if (buf_f->blf_flags & |
| 2250 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { | 2124 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
| 2251 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); | 2125 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
| 2252 | } else { | 2126 | } else { |
| @@ -2286,16 +2160,14 @@ xlog_recover_do_buffer_trans( | |||
| 2286 | } | 2160 | } |
| 2287 | 2161 | ||
| 2288 | STATIC int | 2162 | STATIC int |
| 2289 | xlog_recover_do_inode_trans( | 2163 | xlog_recover_inode_pass2( |
| 2290 | xlog_t *log, | 2164 | xlog_t *log, |
| 2291 | xlog_recover_item_t *item, | 2165 | xlog_recover_item_t *item) |
| 2292 | int pass) | ||
| 2293 | { | 2166 | { |
| 2294 | xfs_inode_log_format_t *in_f; | 2167 | xfs_inode_log_format_t *in_f; |
| 2295 | xfs_mount_t *mp; | 2168 | xfs_mount_t *mp = log->l_mp; |
| 2296 | xfs_buf_t *bp; | 2169 | xfs_buf_t *bp; |
| 2297 | xfs_dinode_t *dip; | 2170 | xfs_dinode_t *dip; |
| 2298 | xfs_ino_t ino; | ||
| 2299 | int len; | 2171 | int len; |
| 2300 | xfs_caddr_t src; | 2172 | xfs_caddr_t src; |
| 2301 | xfs_caddr_t dest; | 2173 | xfs_caddr_t dest; |
| @@ -2305,10 +2177,6 @@ xlog_recover_do_inode_trans( | |||
| 2305 | xfs_icdinode_t *dicp; | 2177 | xfs_icdinode_t *dicp; |
| 2306 | int need_free = 0; | 2178 | int need_free = 0; |
| 2307 | 2179 | ||
| 2308 | if (pass == XLOG_RECOVER_PASS1) { | ||
| 2309 | return 0; | ||
| 2310 | } | ||
| 2311 | |||
| 2312 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { | 2180 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { |
| 2313 | in_f = item->ri_buf[0].i_addr; | 2181 | in_f = item->ri_buf[0].i_addr; |
| 2314 | } else { | 2182 | } else { |
| @@ -2318,8 +2186,6 @@ xlog_recover_do_inode_trans( | |||
| 2318 | if (error) | 2186 | if (error) |
| 2319 | goto error; | 2187 | goto error; |
| 2320 | } | 2188 | } |
| 2321 | ino = in_f->ilf_ino; | ||
| 2322 | mp = log->l_mp; | ||
| 2323 | 2189 | ||
| 2324 | /* | 2190 | /* |
| 2325 | * Inode buffers can be freed, look out for it, | 2191 | * Inode buffers can be freed, look out for it, |
| @@ -2354,8 +2220,8 @@ xlog_recover_do_inode_trans( | |||
| 2354 | xfs_buf_relse(bp); | 2220 | xfs_buf_relse(bp); |
| 2355 | xfs_fs_cmn_err(CE_ALERT, mp, | 2221 | xfs_fs_cmn_err(CE_ALERT, mp, |
| 2356 | "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", | 2222 | "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", |
| 2357 | dip, bp, ino); | 2223 | dip, bp, in_f->ilf_ino); |
| 2358 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", | 2224 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", |
| 2359 | XFS_ERRLEVEL_LOW, mp); | 2225 | XFS_ERRLEVEL_LOW, mp); |
| 2360 | error = EFSCORRUPTED; | 2226 | error = EFSCORRUPTED; |
| 2361 | goto error; | 2227 | goto error; |
| @@ -2365,8 +2231,8 @@ xlog_recover_do_inode_trans( | |||
| 2365 | xfs_buf_relse(bp); | 2231 | xfs_buf_relse(bp); |
| 2366 | xfs_fs_cmn_err(CE_ALERT, mp, | 2232 | xfs_fs_cmn_err(CE_ALERT, mp, |
| 2367 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", | 2233 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", |
| 2368 | item, ino); | 2234 | item, in_f->ilf_ino); |
| 2369 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", | 2235 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", |
| 2370 | XFS_ERRLEVEL_LOW, mp); | 2236 | XFS_ERRLEVEL_LOW, mp); |
| 2371 | error = EFSCORRUPTED; | 2237 | error = EFSCORRUPTED; |
| 2372 | goto error; | 2238 | goto error; |
| @@ -2394,12 +2260,12 @@ xlog_recover_do_inode_trans( | |||
| 2394 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { | 2260 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { |
| 2395 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2261 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
| 2396 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { | 2262 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { |
| 2397 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", | 2263 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", |
| 2398 | XFS_ERRLEVEL_LOW, mp, dicp); | 2264 | XFS_ERRLEVEL_LOW, mp, dicp); |
| 2399 | xfs_buf_relse(bp); | 2265 | xfs_buf_relse(bp); |
| 2400 | xfs_fs_cmn_err(CE_ALERT, mp, | 2266 | xfs_fs_cmn_err(CE_ALERT, mp, |
| 2401 | "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2267 | "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
| 2402 | item, dip, bp, ino); | 2268 | item, dip, bp, in_f->ilf_ino); |
| 2403 | error = EFSCORRUPTED; | 2269 | error = EFSCORRUPTED; |
| 2404 | goto error; | 2270 | goto error; |
| 2405 | } | 2271 | } |
| @@ -2407,40 +2273,40 @@ xlog_recover_do_inode_trans( | |||
| 2407 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2273 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
| 2408 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && | 2274 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && |
| 2409 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { | 2275 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { |
| 2410 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", | 2276 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", |
| 2411 | XFS_ERRLEVEL_LOW, mp, dicp); | 2277 | XFS_ERRLEVEL_LOW, mp, dicp); |
| 2412 | xfs_buf_relse(bp); | 2278 | xfs_buf_relse(bp); |
| 2413 | xfs_fs_cmn_err(CE_ALERT, mp, | 2279 | xfs_fs_cmn_err(CE_ALERT, mp, |
| 2414 | "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2280 | "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
| 2415 | item, dip, bp, ino); | 2281 | item, dip, bp, in_f->ilf_ino); |
| 2416 | error = EFSCORRUPTED; | 2282 | error = EFSCORRUPTED; |
| 2417 | goto error; | 2283 | goto error; |
| 2418 | } | 2284 | } |
| 2419 | } | 2285 | } |
| 2420 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ | 2286 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ |
| 2421 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", | 2287 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", |
| 2422 | XFS_ERRLEVEL_LOW, mp, dicp); | 2288 | XFS_ERRLEVEL_LOW, mp, dicp); |
| 2423 | xfs_buf_relse(bp); | 2289 | xfs_buf_relse(bp); |
| 2424 | xfs_fs_cmn_err(CE_ALERT, mp, | 2290 | xfs_fs_cmn_err(CE_ALERT, mp, |
| 2425 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", | 2291 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", |
| 2426 | item, dip, bp, ino, | 2292 | item, dip, bp, in_f->ilf_ino, |
| 2427 | dicp->di_nextents + dicp->di_anextents, | 2293 | dicp->di_nextents + dicp->di_anextents, |
| 2428 | dicp->di_nblocks); | 2294 | dicp->di_nblocks); |
| 2429 | error = EFSCORRUPTED; | 2295 | error = EFSCORRUPTED; |
| 2430 | goto error; | 2296 | goto error; |
| 2431 | } | 2297 | } |
| 2432 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { | 2298 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { |
| 2433 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", | 2299 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", |
| 2434 | XFS_ERRLEVEL_LOW, mp, dicp); | 2300 | XFS_ERRLEVEL_LOW, mp, dicp); |
| 2435 | xfs_buf_relse(bp); | 2301 | xfs_buf_relse(bp); |
| 2436 | xfs_fs_cmn_err(CE_ALERT, mp, | 2302 | xfs_fs_cmn_err(CE_ALERT, mp, |
| 2437 | "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", | 2303 | "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", |
| 2438 | item, dip, bp, ino, dicp->di_forkoff); | 2304 | item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); |
| 2439 | error = EFSCORRUPTED; | 2305 | error = EFSCORRUPTED; |
| 2440 | goto error; | 2306 | goto error; |
| 2441 | } | 2307 | } |
| 2442 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { | 2308 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { |
| 2443 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", | 2309 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", |
| 2444 | XFS_ERRLEVEL_LOW, mp, dicp); | 2310 | XFS_ERRLEVEL_LOW, mp, dicp); |
| 2445 | xfs_buf_relse(bp); | 2311 | xfs_buf_relse(bp); |
| 2446 | xfs_fs_cmn_err(CE_ALERT, mp, | 2312 | xfs_fs_cmn_err(CE_ALERT, mp, |
| @@ -2532,7 +2398,7 @@ xlog_recover_do_inode_trans( | |||
| 2532 | break; | 2398 | break; |
| 2533 | 2399 | ||
| 2534 | default: | 2400 | default: |
| 2535 | xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); | 2401 | xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag"); |
| 2536 | ASSERT(0); | 2402 | ASSERT(0); |
| 2537 | xfs_buf_relse(bp); | 2403 | xfs_buf_relse(bp); |
| 2538 | error = EIO; | 2404 | error = EIO; |
| @@ -2556,18 +2422,11 @@ error: | |||
| 2556 | * of that type. | 2422 | * of that type. |
| 2557 | */ | 2423 | */ |
| 2558 | STATIC int | 2424 | STATIC int |
| 2559 | xlog_recover_do_quotaoff_trans( | 2425 | xlog_recover_quotaoff_pass1( |
| 2560 | xlog_t *log, | 2426 | xlog_t *log, |
| 2561 | xlog_recover_item_t *item, | 2427 | xlog_recover_item_t *item) |
| 2562 | int pass) | ||
| 2563 | { | 2428 | { |
| 2564 | xfs_qoff_logformat_t *qoff_f; | 2429 | xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; |
| 2565 | |||
| 2566 | if (pass == XLOG_RECOVER_PASS2) { | ||
| 2567 | return (0); | ||
| 2568 | } | ||
| 2569 | |||
| 2570 | qoff_f = item->ri_buf[0].i_addr; | ||
| 2571 | ASSERT(qoff_f); | 2430 | ASSERT(qoff_f); |
| 2572 | 2431 | ||
| 2573 | /* | 2432 | /* |
| @@ -2588,22 +2447,17 @@ xlog_recover_do_quotaoff_trans( | |||
| 2588 | * Recover a dquot record | 2447 | * Recover a dquot record |
| 2589 | */ | 2448 | */ |
| 2590 | STATIC int | 2449 | STATIC int |
| 2591 | xlog_recover_do_dquot_trans( | 2450 | xlog_recover_dquot_pass2( |
| 2592 | xlog_t *log, | 2451 | xlog_t *log, |
| 2593 | xlog_recover_item_t *item, | 2452 | xlog_recover_item_t *item) |
| 2594 | int pass) | ||
| 2595 | { | 2453 | { |
| 2596 | xfs_mount_t *mp; | 2454 | xfs_mount_t *mp = log->l_mp; |
| 2597 | xfs_buf_t *bp; | 2455 | xfs_buf_t *bp; |
| 2598 | struct xfs_disk_dquot *ddq, *recddq; | 2456 | struct xfs_disk_dquot *ddq, *recddq; |
| 2599 | int error; | 2457 | int error; |
| 2600 | xfs_dq_logformat_t *dq_f; | 2458 | xfs_dq_logformat_t *dq_f; |
| 2601 | uint type; | 2459 | uint type; |
| 2602 | 2460 | ||
| 2603 | if (pass == XLOG_RECOVER_PASS1) { | ||
| 2604 | return 0; | ||
| 2605 | } | ||
| 2606 | mp = log->l_mp; | ||
| 2607 | 2461 | ||
| 2608 | /* | 2462 | /* |
| 2609 | * Filesystems are required to send in quota flags at mount time. | 2463 | * Filesystems are required to send in quota flags at mount time. |
| @@ -2647,7 +2501,7 @@ xlog_recover_do_dquot_trans( | |||
| 2647 | if ((error = xfs_qm_dqcheck(recddq, | 2501 | if ((error = xfs_qm_dqcheck(recddq, |
| 2648 | dq_f->qlf_id, | 2502 | dq_f->qlf_id, |
| 2649 | 0, XFS_QMOPT_DOWARN, | 2503 | 0, XFS_QMOPT_DOWARN, |
| 2650 | "xlog_recover_do_dquot_trans (log copy)"))) { | 2504 | "xlog_recover_dquot_pass2 (log copy)"))) { |
| 2651 | return XFS_ERROR(EIO); | 2505 | return XFS_ERROR(EIO); |
| 2652 | } | 2506 | } |
| 2653 | ASSERT(dq_f->qlf_len == 1); | 2507 | ASSERT(dq_f->qlf_len == 1); |
| @@ -2670,7 +2524,7 @@ xlog_recover_do_dquot_trans( | |||
| 2670 | * minimal initialization then. | 2524 | * minimal initialization then. |
| 2671 | */ | 2525 | */ |
| 2672 | if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, | 2526 | if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, |
| 2673 | "xlog_recover_do_dquot_trans")) { | 2527 | "xlog_recover_dquot_pass2")) { |
| 2674 | xfs_buf_relse(bp); | 2528 | xfs_buf_relse(bp); |
| 2675 | return XFS_ERROR(EIO); | 2529 | return XFS_ERROR(EIO); |
| 2676 | } | 2530 | } |
| @@ -2693,38 +2547,31 @@ xlog_recover_do_dquot_trans( | |||
| 2693 | * LSN. | 2547 | * LSN. |
| 2694 | */ | 2548 | */ |
| 2695 | STATIC int | 2549 | STATIC int |
| 2696 | xlog_recover_do_efi_trans( | 2550 | xlog_recover_efi_pass2( |
| 2697 | xlog_t *log, | 2551 | xlog_t *log, |
| 2698 | xlog_recover_item_t *item, | 2552 | xlog_recover_item_t *item, |
| 2699 | xfs_lsn_t lsn, | 2553 | xfs_lsn_t lsn) |
| 2700 | int pass) | ||
| 2701 | { | 2554 | { |
| 2702 | int error; | 2555 | int error; |
| 2703 | xfs_mount_t *mp; | 2556 | xfs_mount_t *mp = log->l_mp; |
| 2704 | xfs_efi_log_item_t *efip; | 2557 | xfs_efi_log_item_t *efip; |
| 2705 | xfs_efi_log_format_t *efi_formatp; | 2558 | xfs_efi_log_format_t *efi_formatp; |
| 2706 | 2559 | ||
| 2707 | if (pass == XLOG_RECOVER_PASS1) { | ||
| 2708 | return 0; | ||
| 2709 | } | ||
| 2710 | |||
| 2711 | efi_formatp = item->ri_buf[0].i_addr; | 2560 | efi_formatp = item->ri_buf[0].i_addr; |
| 2712 | 2561 | ||
| 2713 | mp = log->l_mp; | ||
| 2714 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); | 2562 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); |
| 2715 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), | 2563 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), |
| 2716 | &(efip->efi_format)))) { | 2564 | &(efip->efi_format)))) { |
| 2717 | xfs_efi_item_free(efip); | 2565 | xfs_efi_item_free(efip); |
| 2718 | return error; | 2566 | return error; |
| 2719 | } | 2567 | } |
| 2720 | efip->efi_next_extent = efi_formatp->efi_nextents; | 2568 | atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); |
| 2721 | efip->efi_flags |= XFS_EFI_COMMITTED; | ||
| 2722 | 2569 | ||
| 2723 | spin_lock(&log->l_ailp->xa_lock); | 2570 | spin_lock(&log->l_ailp->xa_lock); |
| 2724 | /* | 2571 | /* |
| 2725 | * xfs_trans_ail_update() drops the AIL lock. | 2572 | * xfs_trans_ail_update() drops the AIL lock. |
| 2726 | */ | 2573 | */ |
| 2727 | xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); | 2574 | xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); |
| 2728 | return 0; | 2575 | return 0; |
| 2729 | } | 2576 | } |
| 2730 | 2577 | ||
| @@ -2737,11 +2584,10 @@ xlog_recover_do_efi_trans( | |||
| 2737 | * efd format structure. If we find it, we remove the efi from the | 2584 | * efd format structure. If we find it, we remove the efi from the |
| 2738 | * AIL and free it. | 2585 | * AIL and free it. |
| 2739 | */ | 2586 | */ |
| 2740 | STATIC void | 2587 | STATIC int |
| 2741 | xlog_recover_do_efd_trans( | 2588 | xlog_recover_efd_pass2( |
| 2742 | xlog_t *log, | 2589 | xlog_t *log, |
| 2743 | xlog_recover_item_t *item, | 2590 | xlog_recover_item_t *item) |
| 2744 | int pass) | ||
| 2745 | { | 2591 | { |
| 2746 | xfs_efd_log_format_t *efd_formatp; | 2592 | xfs_efd_log_format_t *efd_formatp; |
| 2747 | xfs_efi_log_item_t *efip = NULL; | 2593 | xfs_efi_log_item_t *efip = NULL; |
| @@ -2750,10 +2596,6 @@ xlog_recover_do_efd_trans( | |||
| 2750 | struct xfs_ail_cursor cur; | 2596 | struct xfs_ail_cursor cur; |
| 2751 | struct xfs_ail *ailp = log->l_ailp; | 2597 | struct xfs_ail *ailp = log->l_ailp; |
| 2752 | 2598 | ||
| 2753 | if (pass == XLOG_RECOVER_PASS1) { | ||
| 2754 | return; | ||
| 2755 | } | ||
| 2756 | |||
| 2757 | efd_formatp = item->ri_buf[0].i_addr; | 2599 | efd_formatp = item->ri_buf[0].i_addr; |
| 2758 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + | 2600 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + |
| 2759 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || | 2601 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || |
| @@ -2785,62 +2627,6 @@ xlog_recover_do_efd_trans( | |||
| 2785 | } | 2627 | } |
| 2786 | xfs_trans_ail_cursor_done(ailp, &cur); | 2628 | xfs_trans_ail_cursor_done(ailp, &cur); |
| 2787 | spin_unlock(&ailp->xa_lock); | 2629 | spin_unlock(&ailp->xa_lock); |
| 2788 | } | ||
| 2789 | |||
| 2790 | /* | ||
| 2791 | * Perform the transaction | ||
| 2792 | * | ||
| 2793 | * If the transaction modifies a buffer or inode, do it now. Otherwise, | ||
| 2794 | * EFIs and EFDs get queued up by adding entries into the AIL for them. | ||
| 2795 | */ | ||
| 2796 | STATIC int | ||
| 2797 | xlog_recover_do_trans( | ||
| 2798 | xlog_t *log, | ||
| 2799 | xlog_recover_t *trans, | ||
| 2800 | int pass) | ||
| 2801 | { | ||
| 2802 | int error = 0; | ||
| 2803 | xlog_recover_item_t *item; | ||
| 2804 | |||
| 2805 | error = xlog_recover_reorder_trans(log, trans, pass); | ||
| 2806 | if (error) | ||
| 2807 | return error; | ||
| 2808 | |||
| 2809 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | ||
| 2810 | trace_xfs_log_recover_item_recover(log, trans, item, pass); | ||
| 2811 | switch (ITEM_TYPE(item)) { | ||
| 2812 | case XFS_LI_BUF: | ||
| 2813 | error = xlog_recover_do_buffer_trans(log, item, pass); | ||
| 2814 | break; | ||
| 2815 | case XFS_LI_INODE: | ||
| 2816 | error = xlog_recover_do_inode_trans(log, item, pass); | ||
| 2817 | break; | ||
| 2818 | case XFS_LI_EFI: | ||
| 2819 | error = xlog_recover_do_efi_trans(log, item, | ||
| 2820 | trans->r_lsn, pass); | ||
| 2821 | break; | ||
| 2822 | case XFS_LI_EFD: | ||
| 2823 | xlog_recover_do_efd_trans(log, item, pass); | ||
| 2824 | error = 0; | ||
| 2825 | break; | ||
| 2826 | case XFS_LI_DQUOT: | ||
| 2827 | error = xlog_recover_do_dquot_trans(log, item, pass); | ||
| 2828 | break; | ||
| 2829 | case XFS_LI_QUOTAOFF: | ||
| 2830 | error = xlog_recover_do_quotaoff_trans(log, item, | ||
| 2831 | pass); | ||
| 2832 | break; | ||
| 2833 | default: | ||
| 2834 | xlog_warn( | ||
| 2835 | "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item)); | ||
| 2836 | ASSERT(0); | ||
| 2837 | error = XFS_ERROR(EIO); | ||
| 2838 | break; | ||
| 2839 | } | ||
| 2840 | |||
| 2841 | if (error) | ||
| 2842 | return error; | ||
| 2843 | } | ||
| 2844 | 2630 | ||
| 2845 | return 0; | 2631 | return 0; |
| 2846 | } | 2632 | } |
| @@ -2852,7 +2638,7 @@ xlog_recover_do_trans( | |||
| 2852 | */ | 2638 | */ |
| 2853 | STATIC void | 2639 | STATIC void |
| 2854 | xlog_recover_free_trans( | 2640 | xlog_recover_free_trans( |
| 2855 | xlog_recover_t *trans) | 2641 | struct xlog_recover *trans) |
| 2856 | { | 2642 | { |
| 2857 | xlog_recover_item_t *item, *n; | 2643 | xlog_recover_item_t *item, *n; |
| 2858 | int i; | 2644 | int i; |
| @@ -2871,17 +2657,95 @@ xlog_recover_free_trans( | |||
| 2871 | } | 2657 | } |
| 2872 | 2658 | ||
| 2873 | STATIC int | 2659 | STATIC int |
| 2660 | xlog_recover_commit_pass1( | ||
| 2661 | struct log *log, | ||
| 2662 | struct xlog_recover *trans, | ||
| 2663 | xlog_recover_item_t *item) | ||
| 2664 | { | ||
| 2665 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); | ||
| 2666 | |||
| 2667 | switch (ITEM_TYPE(item)) { | ||
| 2668 | case XFS_LI_BUF: | ||
| 2669 | return xlog_recover_buffer_pass1(log, item); | ||
| 2670 | case XFS_LI_QUOTAOFF: | ||
| 2671 | return xlog_recover_quotaoff_pass1(log, item); | ||
| 2672 | case XFS_LI_INODE: | ||
| 2673 | case XFS_LI_EFI: | ||
| 2674 | case XFS_LI_EFD: | ||
| 2675 | case XFS_LI_DQUOT: | ||
| 2676 | /* nothing to do in pass 1 */ | ||
| 2677 | return 0; | ||
| 2678 | default: | ||
| 2679 | xlog_warn( | ||
| 2680 | "XFS: invalid item type (%d) xlog_recover_commit_pass1", | ||
| 2681 | ITEM_TYPE(item)); | ||
| 2682 | ASSERT(0); | ||
| 2683 | return XFS_ERROR(EIO); | ||
| 2684 | } | ||
| 2685 | } | ||
| 2686 | |||
| 2687 | STATIC int | ||
| 2688 | xlog_recover_commit_pass2( | ||
| 2689 | struct log *log, | ||
| 2690 | struct xlog_recover *trans, | ||
| 2691 | xlog_recover_item_t *item) | ||
| 2692 | { | ||
| 2693 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); | ||
| 2694 | |||
| 2695 | switch (ITEM_TYPE(item)) { | ||
| 2696 | case XFS_LI_BUF: | ||
| 2697 | return xlog_recover_buffer_pass2(log, item); | ||
| 2698 | case XFS_LI_INODE: | ||
| 2699 | return xlog_recover_inode_pass2(log, item); | ||
| 2700 | case XFS_LI_EFI: | ||
| 2701 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); | ||
| 2702 | case XFS_LI_EFD: | ||
| 2703 | return xlog_recover_efd_pass2(log, item); | ||
| 2704 | case XFS_LI_DQUOT: | ||
| 2705 | return xlog_recover_dquot_pass2(log, item); | ||
| 2706 | case XFS_LI_QUOTAOFF: | ||
| 2707 | /* nothing to do in pass2 */ | ||
| 2708 | return 0; | ||
| 2709 | default: | ||
| 2710 | xlog_warn( | ||
| 2711 | "XFS: invalid item type (%d) xlog_recover_commit_pass2", | ||
| 2712 | ITEM_TYPE(item)); | ||
| 2713 | ASSERT(0); | ||
| 2714 | return XFS_ERROR(EIO); | ||
| 2715 | } | ||
| 2716 | } | ||
| 2717 | |||
| 2718 | /* | ||
| 2719 | * Perform the transaction. | ||
| 2720 | * | ||
| 2721 | * If the transaction modifies a buffer or inode, do it now. Otherwise, | ||
| 2722 | * EFIs and EFDs get queued up by adding entries into the AIL for them. | ||
| 2723 | */ | ||
| 2724 | STATIC int | ||
| 2874 | xlog_recover_commit_trans( | 2725 | xlog_recover_commit_trans( |
| 2875 | xlog_t *log, | 2726 | struct log *log, |
| 2876 | xlog_recover_t *trans, | 2727 | struct xlog_recover *trans, |
| 2877 | int pass) | 2728 | int pass) |
| 2878 | { | 2729 | { |
| 2879 | int error; | 2730 | int error = 0; |
| 2731 | xlog_recover_item_t *item; | ||
| 2880 | 2732 | ||
| 2881 | hlist_del(&trans->r_list); | 2733 | hlist_del(&trans->r_list); |
| 2882 | if ((error = xlog_recover_do_trans(log, trans, pass))) | 2734 | |
| 2735 | error = xlog_recover_reorder_trans(log, trans, pass); | ||
| 2736 | if (error) | ||
| 2883 | return error; | 2737 | return error; |
| 2884 | xlog_recover_free_trans(trans); /* no error */ | 2738 | |
| 2739 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | ||
| 2740 | if (pass == XLOG_RECOVER_PASS1) | ||
| 2741 | error = xlog_recover_commit_pass1(log, trans, item); | ||
| 2742 | else | ||
| 2743 | error = xlog_recover_commit_pass2(log, trans, item); | ||
| 2744 | if (error) | ||
| 2745 | return error; | ||
| 2746 | } | ||
| 2747 | |||
| 2748 | xlog_recover_free_trans(trans); | ||
| 2885 | return 0; | 2749 | return 0; |
| 2886 | } | 2750 | } |
| 2887 | 2751 | ||
| @@ -3011,7 +2875,7 @@ xlog_recover_process_efi( | |||
| 3011 | xfs_extent_t *extp; | 2875 | xfs_extent_t *extp; |
| 3012 | xfs_fsblock_t startblock_fsb; | 2876 | xfs_fsblock_t startblock_fsb; |
| 3013 | 2877 | ||
| 3014 | ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); | 2878 | ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); |
| 3015 | 2879 | ||
| 3016 | /* | 2880 | /* |
| 3017 | * First check the validity of the extents described by the | 2881 | * First check the validity of the extents described by the |
| @@ -3050,7 +2914,7 @@ xlog_recover_process_efi( | |||
| 3050 | extp->ext_len); | 2914 | extp->ext_len); |
| 3051 | } | 2915 | } |
| 3052 | 2916 | ||
| 3053 | efip->efi_flags |= XFS_EFI_RECOVERED; | 2917 | set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); |
| 3054 | error = xfs_trans_commit(tp, 0); | 2918 | error = xfs_trans_commit(tp, 0); |
| 3055 | return error; | 2919 | return error; |
| 3056 | 2920 | ||
| @@ -3107,7 +2971,7 @@ xlog_recover_process_efis( | |||
| 3107 | * Skip EFIs that we've already processed. | 2971 | * Skip EFIs that we've already processed. |
| 3108 | */ | 2972 | */ |
| 3109 | efip = (xfs_efi_log_item_t *)lip; | 2973 | efip = (xfs_efi_log_item_t *)lip; |
| 3110 | if (efip->efi_flags & XFS_EFI_RECOVERED) { | 2974 | if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) { |
| 3111 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 2975 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
| 3112 | continue; | 2976 | continue; |
| 3113 | } | 2977 | } |
| @@ -3724,7 +3588,7 @@ xlog_do_log_recovery( | |||
| 3724 | xfs_daddr_t head_blk, | 3588 | xfs_daddr_t head_blk, |
| 3725 | xfs_daddr_t tail_blk) | 3589 | xfs_daddr_t tail_blk) |
| 3726 | { | 3590 | { |
| 3727 | int error; | 3591 | int error, i; |
| 3728 | 3592 | ||
| 3729 | ASSERT(head_blk != tail_blk); | 3593 | ASSERT(head_blk != tail_blk); |
| 3730 | 3594 | ||
| @@ -3732,10 +3596,12 @@ xlog_do_log_recovery( | |||
| 3732 | * First do a pass to find all of the cancelled buf log items. | 3596 | * First do a pass to find all of the cancelled buf log items. |
| 3733 | * Store them in the buf_cancel_table for use in the second pass. | 3597 | * Store them in the buf_cancel_table for use in the second pass. |
| 3734 | */ | 3598 | */ |
| 3735 | log->l_buf_cancel_table = | 3599 | log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * |
| 3736 | (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * | 3600 | sizeof(struct list_head), |
| 3737 | sizeof(xfs_buf_cancel_t*), | ||
| 3738 | KM_SLEEP); | 3601 | KM_SLEEP); |
| 3602 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | ||
| 3603 | INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); | ||
| 3604 | |||
| 3739 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, | 3605 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, |
| 3740 | XLOG_RECOVER_PASS1); | 3606 | XLOG_RECOVER_PASS1); |
| 3741 | if (error != 0) { | 3607 | if (error != 0) { |
| @@ -3754,7 +3620,7 @@ xlog_do_log_recovery( | |||
| 3754 | int i; | 3620 | int i; |
| 3755 | 3621 | ||
| 3756 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | 3622 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) |
| 3757 | ASSERT(log->l_buf_cancel_table[i] == NULL); | 3623 | ASSERT(list_empty(&log->l_buf_cancel_table[i])); |
| 3758 | } | 3624 | } |
| 3759 | #endif /* DEBUG */ | 3625 | #endif /* DEBUG */ |
| 3760 | 3626 | ||
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 19e9dfa1c254..d447aef84bc3 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
| @@ -472,7 +472,7 @@ xfs_initialize_perag( | |||
| 472 | goto out_unwind; | 472 | goto out_unwind; |
| 473 | pag->pag_agno = index; | 473 | pag->pag_agno = index; |
| 474 | pag->pag_mount = mp; | 474 | pag->pag_mount = mp; |
| 475 | rwlock_init(&pag->pag_ici_lock); | 475 | spin_lock_init(&pag->pag_ici_lock); |
| 476 | mutex_init(&pag->pag_ici_reclaim_lock); | 476 | mutex_init(&pag->pag_ici_reclaim_lock); |
| 477 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); | 477 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); |
| 478 | spin_lock_init(&pag->pag_buf_lock); | 478 | spin_lock_init(&pag->pag_buf_lock); |
| @@ -975,6 +975,24 @@ xfs_set_rw_sizes(xfs_mount_t *mp) | |||
| 975 | } | 975 | } |
| 976 | 976 | ||
| 977 | /* | 977 | /* |
| 978 | * precalculate the low space thresholds for dynamic speculative preallocation. | ||
| 979 | */ | ||
| 980 | void | ||
| 981 | xfs_set_low_space_thresholds( | ||
| 982 | struct xfs_mount *mp) | ||
| 983 | { | ||
| 984 | int i; | ||
| 985 | |||
| 986 | for (i = 0; i < XFS_LOWSP_MAX; i++) { | ||
| 987 | __uint64_t space = mp->m_sb.sb_dblocks; | ||
| 988 | |||
| 989 | do_div(space, 100); | ||
| 990 | mp->m_low_space[i] = space * (i + 1); | ||
| 991 | } | ||
| 992 | } | ||
| 993 | |||
| 994 | |||
| 995 | /* | ||
| 978 | * Set whether we're using inode alignment. | 996 | * Set whether we're using inode alignment. |
| 979 | */ | 997 | */ |
| 980 | STATIC void | 998 | STATIC void |
| @@ -1196,6 +1214,9 @@ xfs_mountfs( | |||
| 1196 | */ | 1214 | */ |
| 1197 | xfs_set_rw_sizes(mp); | 1215 | xfs_set_rw_sizes(mp); |
| 1198 | 1216 | ||
| 1217 | /* set the low space thresholds for dynamic preallocation */ | ||
| 1218 | xfs_set_low_space_thresholds(mp); | ||
| 1219 | |||
| 1199 | /* | 1220 | /* |
| 1200 | * Set the inode cluster size. | 1221 | * Set the inode cluster size. |
| 1201 | * This may still be overridden by the file system | 1222 | * This may still be overridden by the file system |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 5861b4980740..a62e8971539d 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
| @@ -103,6 +103,16 @@ extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t, | |||
| 103 | xfs_mod_incore_sb(mp, field, delta, rsvd) | 103 | xfs_mod_incore_sb(mp, field, delta, rsvd) |
| 104 | #endif | 104 | #endif |
| 105 | 105 | ||
| 106 | /* dynamic preallocation free space thresholds, 5% down to 1% */ | ||
| 107 | enum { | ||
| 108 | XFS_LOWSP_1_PCNT = 0, | ||
| 109 | XFS_LOWSP_2_PCNT, | ||
| 110 | XFS_LOWSP_3_PCNT, | ||
| 111 | XFS_LOWSP_4_PCNT, | ||
| 112 | XFS_LOWSP_5_PCNT, | ||
| 113 | XFS_LOWSP_MAX, | ||
| 114 | }; | ||
| 115 | |||
| 106 | typedef struct xfs_mount { | 116 | typedef struct xfs_mount { |
| 107 | struct super_block *m_super; | 117 | struct super_block *m_super; |
| 108 | xfs_tid_t m_tid; /* next unused tid for fs */ | 118 | xfs_tid_t m_tid; /* next unused tid for fs */ |
| @@ -202,6 +212,8 @@ typedef struct xfs_mount { | |||
| 202 | __int64_t m_update_flags; /* sb flags we need to update | 212 | __int64_t m_update_flags; /* sb flags we need to update |
| 203 | on the next remount,rw */ | 213 | on the next remount,rw */ |
| 204 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ | 214 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ |
| 215 | int64_t m_low_space[XFS_LOWSP_MAX]; | ||
| 216 | /* low free space thresholds */ | ||
| 205 | } xfs_mount_t; | 217 | } xfs_mount_t; |
| 206 | 218 | ||
| 207 | /* | 219 | /* |
| @@ -379,6 +391,8 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); | |||
| 379 | 391 | ||
| 380 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); | 392 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); |
| 381 | 393 | ||
| 394 | extern void xfs_set_low_space_thresholds(struct xfs_mount *); | ||
| 395 | |||
| 382 | #endif /* __KERNEL__ */ | 396 | #endif /* __KERNEL__ */ |
| 383 | 397 | ||
| 384 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); | 398 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index f6d956b7711e..f80a067a4658 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
| @@ -1350,7 +1350,7 @@ xfs_trans_fill_vecs( | |||
| 1350 | * they could be immediately flushed and we'd have to race with the flusher | 1350 | * they could be immediately flushed and we'd have to race with the flusher |
| 1351 | * trying to pull the item from the AIL as we add it. | 1351 | * trying to pull the item from the AIL as we add it. |
| 1352 | */ | 1352 | */ |
| 1353 | void | 1353 | static void |
| 1354 | xfs_trans_item_committed( | 1354 | xfs_trans_item_committed( |
| 1355 | struct xfs_log_item *lip, | 1355 | struct xfs_log_item *lip, |
| 1356 | xfs_lsn_t commit_lsn, | 1356 | xfs_lsn_t commit_lsn, |
| @@ -1425,6 +1425,83 @@ xfs_trans_committed( | |||
| 1425 | xfs_trans_free(tp); | 1425 | xfs_trans_free(tp); |
| 1426 | } | 1426 | } |
| 1427 | 1427 | ||
| 1428 | static inline void | ||
| 1429 | xfs_log_item_batch_insert( | ||
| 1430 | struct xfs_ail *ailp, | ||
| 1431 | struct xfs_log_item **log_items, | ||
| 1432 | int nr_items, | ||
| 1433 | xfs_lsn_t commit_lsn) | ||
| 1434 | { | ||
| 1435 | int i; | ||
| 1436 | |||
| 1437 | spin_lock(&ailp->xa_lock); | ||
| 1438 | /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ | ||
| 1439 | xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); | ||
| 1440 | |||
| 1441 | for (i = 0; i < nr_items; i++) | ||
| 1442 | IOP_UNPIN(log_items[i], 0); | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | /* | ||
| 1446 | * Bulk operation version of xfs_trans_committed that takes a log vector of | ||
| 1447 | * items to insert into the AIL. This uses bulk AIL insertion techniques to | ||
| 1448 | * minimise lock traffic. | ||
| 1449 | */ | ||
| 1450 | void | ||
| 1451 | xfs_trans_committed_bulk( | ||
| 1452 | struct xfs_ail *ailp, | ||
| 1453 | struct xfs_log_vec *log_vector, | ||
| 1454 | xfs_lsn_t commit_lsn, | ||
| 1455 | int aborted) | ||
| 1456 | { | ||
| 1457 | #define LOG_ITEM_BATCH_SIZE 32 | ||
| 1458 | struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; | ||
| 1459 | struct xfs_log_vec *lv; | ||
| 1460 | int i = 0; | ||
| 1461 | |||
| 1462 | /* unpin all the log items */ | ||
| 1463 | for (lv = log_vector; lv; lv = lv->lv_next ) { | ||
| 1464 | struct xfs_log_item *lip = lv->lv_item; | ||
| 1465 | xfs_lsn_t item_lsn; | ||
| 1466 | |||
| 1467 | if (aborted) | ||
| 1468 | lip->li_flags |= XFS_LI_ABORTED; | ||
| 1469 | item_lsn = IOP_COMMITTED(lip, commit_lsn); | ||
| 1470 | |||
| 1471 | /* item_lsn of -1 means the item was freed */ | ||
| 1472 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) | ||
| 1473 | continue; | ||
| 1474 | |||
| 1475 | if (item_lsn != commit_lsn) { | ||
| 1476 | |||
| 1477 | /* | ||
| 1478 | * Not a bulk update option due to unusual item_lsn. | ||
| 1479 | * Push into AIL immediately, rechecking the lsn once | ||
| 1480 | * we have the ail lock. Then unpin the item. | ||
| 1481 | */ | ||
| 1482 | spin_lock(&ailp->xa_lock); | ||
| 1483 | if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) | ||
| 1484 | xfs_trans_ail_update(ailp, lip, item_lsn); | ||
| 1485 | else | ||
| 1486 | spin_unlock(&ailp->xa_lock); | ||
| 1487 | IOP_UNPIN(lip, 0); | ||
| 1488 | continue; | ||
| 1489 | } | ||
| 1490 | |||
| 1491 | /* Item is a candidate for bulk AIL insert. */ | ||
| 1492 | log_items[i++] = lv->lv_item; | ||
| 1493 | if (i >= LOG_ITEM_BATCH_SIZE) { | ||
| 1494 | xfs_log_item_batch_insert(ailp, log_items, | ||
| 1495 | LOG_ITEM_BATCH_SIZE, commit_lsn); | ||
| 1496 | i = 0; | ||
| 1497 | } | ||
| 1498 | } | ||
| 1499 | |||
| 1500 | /* make sure we insert the remainder! */ | ||
| 1501 | if (i) | ||
| 1502 | xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); | ||
| 1503 | } | ||
| 1504 | |||
| 1428 | /* | 1505 | /* |
| 1429 | * Called from the trans_commit code when we notice that | 1506 | * Called from the trans_commit code when we notice that |
| 1430 | * the filesystem is in the middle of a forced shutdown. | 1507 | * the filesystem is in the middle of a forced shutdown. |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 246286b77a86..c2042b736b81 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
| @@ -294,8 +294,8 @@ struct xfs_log_item_desc { | |||
| 294 | #define XFS_ALLOC_BTREE_REF 2 | 294 | #define XFS_ALLOC_BTREE_REF 2 |
| 295 | #define XFS_BMAP_BTREE_REF 2 | 295 | #define XFS_BMAP_BTREE_REF 2 |
| 296 | #define XFS_DIR_BTREE_REF 2 | 296 | #define XFS_DIR_BTREE_REF 2 |
| 297 | #define XFS_INO_REF 2 | ||
| 297 | #define XFS_ATTR_BTREE_REF 1 | 298 | #define XFS_ATTR_BTREE_REF 1 |
| 298 | #define XFS_INO_REF 1 | ||
| 299 | #define XFS_DQUOT_REF 1 | 299 | #define XFS_DQUOT_REF 1 |
| 300 | 300 | ||
| 301 | #ifdef __KERNEL__ | 301 | #ifdef __KERNEL__ |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index dc9069568ff7..c5bbbc45db91 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
| @@ -28,8 +28,8 @@ | |||
| 28 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
| 29 | #include "xfs_error.h" | 29 | #include "xfs_error.h" |
| 30 | 30 | ||
| 31 | STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *); | 31 | STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); |
| 32 | STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); | 32 | STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); |
| 33 | STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); | 33 | STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); |
| 34 | STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); | 34 | STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); |
| 35 | 35 | ||
| @@ -449,129 +449,152 @@ xfs_trans_unlocked_item( | |||
| 449 | xfs_log_move_tail(ailp->xa_mount, 1); | 449 | xfs_log_move_tail(ailp->xa_mount, 1); |
| 450 | } /* xfs_trans_unlocked_item */ | 450 | } /* xfs_trans_unlocked_item */ |
| 451 | 451 | ||
| 452 | |||
| 453 | /* | 452 | /* |
| 454 | * Update the position of the item in the AIL with the new | 453 | * xfs_trans_ail_update - bulk AIL insertion operation. |
| 455 | * lsn. If it is not yet in the AIL, add it. Otherwise, move | 454 | * |
| 456 | * it to its new position by removing it and re-adding it. | 455 | * @xfs_trans_ail_update takes an array of log items that all need to be |
| 456 | * positioned at the same LSN in the AIL. If an item is not in the AIL, it will | ||
| 457 | * be added. Otherwise, it will be repositioned by removing it and re-adding | ||
| 458 | * it to the AIL. If we move the first item in the AIL, update the log tail to | ||
| 459 | * match the new minimum LSN in the AIL. | ||
| 457 | * | 460 | * |
| 458 | * Wakeup anyone with an lsn less than the item's lsn. If the item | 461 | * This function takes the AIL lock once to execute the update operations on |
| 459 | * we move in the AIL is the minimum one, update the tail lsn in the | 462 | * all the items in the array, and as such should not be called with the AIL |
| 460 | * log manager. | 463 | * lock held. As a result, once we have the AIL lock, we need to check each log |
| 464 | * item LSN to confirm it needs to be moved forward in the AIL. | ||
| 461 | * | 465 | * |
| 462 | * This function must be called with the AIL lock held. The lock | 466 | * To optimise the insert operation, we delete all the items from the AIL in |
| 463 | * is dropped before returning. | 467 | * the first pass, moving them into a temporary list, then splice the temporary |
| 468 | * list into the correct position in the AIL. This avoids needing to do an | ||
| 469 | * insert operation on every item. | ||
| 470 | * | ||
| 471 | * This function must be called with the AIL lock held. The lock is dropped | ||
| 472 | * before returning. | ||
| 464 | */ | 473 | */ |
| 465 | void | 474 | void |
| 466 | xfs_trans_ail_update( | 475 | xfs_trans_ail_update_bulk( |
| 467 | struct xfs_ail *ailp, | 476 | struct xfs_ail *ailp, |
| 468 | xfs_log_item_t *lip, | 477 | struct xfs_log_item **log_items, |
| 469 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | 478 | int nr_items, |
| 479 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | ||
| 470 | { | 480 | { |
| 471 | xfs_log_item_t *dlip = NULL; | 481 | xfs_log_item_t *mlip; |
| 472 | xfs_log_item_t *mlip; /* ptr to minimum lip */ | ||
| 473 | xfs_lsn_t tail_lsn; | 482 | xfs_lsn_t tail_lsn; |
| 483 | int mlip_changed = 0; | ||
| 484 | int i; | ||
| 485 | LIST_HEAD(tmp); | ||
| 474 | 486 | ||
| 475 | mlip = xfs_ail_min(ailp); | 487 | mlip = xfs_ail_min(ailp); |
| 476 | 488 | ||
| 477 | if (lip->li_flags & XFS_LI_IN_AIL) { | 489 | for (i = 0; i < nr_items; i++) { |
| 478 | dlip = xfs_ail_delete(ailp, lip); | 490 | struct xfs_log_item *lip = log_items[i]; |
| 479 | ASSERT(dlip == lip); | 491 | if (lip->li_flags & XFS_LI_IN_AIL) { |
| 480 | xfs_trans_ail_cursor_clear(ailp, dlip); | 492 | /* check if we really need to move the item */ |
| 481 | } else { | 493 | if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0) |
| 482 | lip->li_flags |= XFS_LI_IN_AIL; | 494 | continue; |
| 495 | |||
| 496 | xfs_ail_delete(ailp, lip); | ||
| 497 | if (mlip == lip) | ||
| 498 | mlip_changed = 1; | ||
| 499 | } else { | ||
| 500 | lip->li_flags |= XFS_LI_IN_AIL; | ||
| 501 | } | ||
| 502 | lip->li_lsn = lsn; | ||
| 503 | list_add(&lip->li_ail, &tmp); | ||
| 483 | } | 504 | } |
| 484 | 505 | ||
| 485 | lip->li_lsn = lsn; | 506 | xfs_ail_splice(ailp, &tmp, lsn); |
| 486 | xfs_ail_insert(ailp, lip); | ||
| 487 | 507 | ||
| 488 | if (mlip == dlip) { | 508 | if (!mlip_changed) { |
| 489 | mlip = xfs_ail_min(ailp); | ||
| 490 | /* | ||
| 491 | * It is not safe to access mlip after the AIL lock is | ||
| 492 | * dropped, so we must get a copy of li_lsn before we do | ||
| 493 | * so. This is especially important on 32-bit platforms | ||
| 494 | * where accessing and updating 64-bit values like li_lsn | ||
| 495 | * is not atomic. | ||
| 496 | */ | ||
| 497 | tail_lsn = mlip->li_lsn; | ||
| 498 | spin_unlock(&ailp->xa_lock); | ||
| 499 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
| 500 | } else { | ||
| 501 | spin_unlock(&ailp->xa_lock); | 509 | spin_unlock(&ailp->xa_lock); |
| 510 | return; | ||
| 502 | } | 511 | } |
| 503 | 512 | ||
| 504 | 513 | /* | |
| 505 | } /* xfs_trans_update_ail */ | 514 | * It is not safe to access mlip after the AIL lock is dropped, so we |
| 515 | * must get a copy of li_lsn before we do so. This is especially | ||
| 516 | * important on 32-bit platforms where accessing and updating 64-bit | ||
| 517 | * values like li_lsn is not atomic. | ||
| 518 | */ | ||
| 519 | mlip = xfs_ail_min(ailp); | ||
| 520 | tail_lsn = mlip->li_lsn; | ||
| 521 | spin_unlock(&ailp->xa_lock); | ||
| 522 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
| 523 | } | ||
| 506 | 524 | ||
| 507 | /* | 525 | /* |
| 508 | * Delete the given item from the AIL. It must already be in | 526 | * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL |
| 509 | * the AIL. | ||
| 510 | * | 527 | * |
| 511 | * Wakeup anyone with an lsn less than item's lsn. If the item | 528 | * @xfs_trans_ail_delete_bulk takes an array of log items that all need to |
| 512 | * we delete in the AIL is the minimum one, update the tail lsn in the | 529 | * removed from the AIL. The caller is already holding the AIL lock, and done |
| 513 | * log manager. | 530 | * all the checks necessary to ensure the items passed in via @log_items are |
| 531 | * ready for deletion. This includes checking that the items are in the AIL. | ||
| 514 | * | 532 | * |
| 515 | * Clear the IN_AIL flag from the item, reset its lsn to 0, and | 533 | * For each log item to be removed, unlink it from the AIL, clear the IN_AIL |
| 516 | * bump the AIL's generation count to indicate that the tree | 534 | * flag from the item and reset the item's lsn to 0. If we remove the first |
| 517 | * has changed. | 535 | * item in the AIL, update the log tail to match the new minimum LSN in the |
| 536 | * AIL. | ||
| 518 | * | 537 | * |
| 519 | * This function must be called with the AIL lock held. The lock | 538 | * This function will not drop the AIL lock until all items are removed from |
| 520 | * is dropped before returning. | 539 | * the AIL to minimise the amount of lock traffic on the AIL. This does not |
| 540 | * greatly increase the AIL hold time, but does significantly reduce the amount | ||
| 541 | * of traffic on the lock, especially during IO completion. | ||
| 542 | * | ||
| 543 | * This function must be called with the AIL lock held. The lock is dropped | ||
| 544 | * before returning. | ||
| 521 | */ | 545 | */ |
| 522 | void | 546 | void |
| 523 | xfs_trans_ail_delete( | 547 | xfs_trans_ail_delete_bulk( |
| 524 | struct xfs_ail *ailp, | 548 | struct xfs_ail *ailp, |
| 525 | xfs_log_item_t *lip) __releases(ailp->xa_lock) | 549 | struct xfs_log_item **log_items, |
| 550 | int nr_items) __releases(ailp->xa_lock) | ||
| 526 | { | 551 | { |
| 527 | xfs_log_item_t *dlip; | ||
| 528 | xfs_log_item_t *mlip; | 552 | xfs_log_item_t *mlip; |
| 529 | xfs_lsn_t tail_lsn; | 553 | xfs_lsn_t tail_lsn; |
| 554 | int mlip_changed = 0; | ||
| 555 | int i; | ||
| 530 | 556 | ||
| 531 | if (lip->li_flags & XFS_LI_IN_AIL) { | 557 | mlip = xfs_ail_min(ailp); |
| 532 | mlip = xfs_ail_min(ailp); | ||
| 533 | dlip = xfs_ail_delete(ailp, lip); | ||
| 534 | ASSERT(dlip == lip); | ||
| 535 | xfs_trans_ail_cursor_clear(ailp, dlip); | ||
| 536 | |||
| 537 | 558 | ||
| 538 | lip->li_flags &= ~XFS_LI_IN_AIL; | 559 | for (i = 0; i < nr_items; i++) { |
| 539 | lip->li_lsn = 0; | 560 | struct xfs_log_item *lip = log_items[i]; |
| 561 | if (!(lip->li_flags & XFS_LI_IN_AIL)) { | ||
| 562 | struct xfs_mount *mp = ailp->xa_mount; | ||
| 540 | 563 | ||
| 541 | if (mlip == dlip) { | ||
| 542 | mlip = xfs_ail_min(ailp); | ||
| 543 | /* | ||
| 544 | * It is not safe to access mlip after the AIL lock | ||
| 545 | * is dropped, so we must get a copy of li_lsn | ||
| 546 | * before we do so. This is especially important | ||
| 547 | * on 32-bit platforms where accessing and updating | ||
| 548 | * 64-bit values like li_lsn is not atomic. | ||
| 549 | */ | ||
| 550 | tail_lsn = mlip ? mlip->li_lsn : 0; | ||
| 551 | spin_unlock(&ailp->xa_lock); | ||
| 552 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
| 553 | } else { | ||
| 554 | spin_unlock(&ailp->xa_lock); | 564 | spin_unlock(&ailp->xa_lock); |
| 565 | if (!XFS_FORCED_SHUTDOWN(mp)) { | ||
| 566 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, | ||
| 567 | "%s: attempting to delete a log item that is not in the AIL", | ||
| 568 | __func__); | ||
| 569 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
| 570 | } | ||
| 571 | return; | ||
| 555 | } | 572 | } |
| 573 | |||
| 574 | xfs_ail_delete(ailp, lip); | ||
| 575 | lip->li_flags &= ~XFS_LI_IN_AIL; | ||
| 576 | lip->li_lsn = 0; | ||
| 577 | if (mlip == lip) | ||
| 578 | mlip_changed = 1; | ||
| 556 | } | 579 | } |
| 557 | else { | ||
| 558 | /* | ||
| 559 | * If the file system is not being shutdown, we are in | ||
| 560 | * serious trouble if we get to this stage. | ||
| 561 | */ | ||
| 562 | struct xfs_mount *mp = ailp->xa_mount; | ||
| 563 | 580 | ||
| 581 | if (!mlip_changed) { | ||
| 564 | spin_unlock(&ailp->xa_lock); | 582 | spin_unlock(&ailp->xa_lock); |
| 565 | if (!XFS_FORCED_SHUTDOWN(mp)) { | 583 | return; |
| 566 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, | ||
| 567 | "%s: attempting to delete a log item that is not in the AIL", | ||
| 568 | __func__); | ||
| 569 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
| 570 | } | ||
| 571 | } | 584 | } |
| 572 | } | ||
| 573 | |||
| 574 | 585 | ||
| 586 | /* | ||
| 587 | * It is not safe to access mlip after the AIL lock is dropped, so we | ||
| 588 | * must get a copy of li_lsn before we do so. This is especially | ||
| 589 | * important on 32-bit platforms where accessing and updating 64-bit | ||
| 590 | * values like li_lsn is not atomic. It is possible we've emptied the | ||
| 591 | * AIL here, so if that is the case, pass an LSN of 0 to the tail move. | ||
| 592 | */ | ||
| 593 | mlip = xfs_ail_min(ailp); | ||
| 594 | tail_lsn = mlip ? mlip->li_lsn : 0; | ||
| 595 | spin_unlock(&ailp->xa_lock); | ||
| 596 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
| 597 | } | ||
| 575 | 598 | ||
| 576 | /* | 599 | /* |
| 577 | * The active item list (AIL) is a doubly linked list of log | 600 | * The active item list (AIL) is a doubly linked list of log |
| @@ -623,16 +646,13 @@ xfs_trans_ail_destroy( | |||
| 623 | } | 646 | } |
| 624 | 647 | ||
| 625 | /* | 648 | /* |
| 626 | * Insert the given log item into the AIL. | 649 | * splice the log item list into the AIL at the given LSN. |
| 627 | * We almost always insert at the end of the list, so on inserts | ||
| 628 | * we search from the end of the list to find where the | ||
| 629 | * new item belongs. | ||
| 630 | */ | 650 | */ |
| 631 | STATIC void | 651 | STATIC void |
| 632 | xfs_ail_insert( | 652 | xfs_ail_splice( |
| 633 | struct xfs_ail *ailp, | 653 | struct xfs_ail *ailp, |
| 634 | xfs_log_item_t *lip) | 654 | struct list_head *list, |
| 635 | /* ARGSUSED */ | 655 | xfs_lsn_t lsn) |
| 636 | { | 656 | { |
| 637 | xfs_log_item_t *next_lip; | 657 | xfs_log_item_t *next_lip; |
| 638 | 658 | ||
| @@ -640,39 +660,33 @@ xfs_ail_insert( | |||
| 640 | * If the list is empty, just insert the item. | 660 | * If the list is empty, just insert the item. |
| 641 | */ | 661 | */ |
| 642 | if (list_empty(&ailp->xa_ail)) { | 662 | if (list_empty(&ailp->xa_ail)) { |
| 643 | list_add(&lip->li_ail, &ailp->xa_ail); | 663 | list_splice(list, &ailp->xa_ail); |
| 644 | return; | 664 | return; |
| 645 | } | 665 | } |
| 646 | 666 | ||
| 647 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { | 667 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { |
| 648 | if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0) | 668 | if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) |
| 649 | break; | 669 | break; |
| 650 | } | 670 | } |
| 651 | 671 | ||
| 652 | ASSERT((&next_lip->li_ail == &ailp->xa_ail) || | 672 | ASSERT((&next_lip->li_ail == &ailp->xa_ail) || |
| 653 | (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); | 673 | (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)); |
| 654 | |||
| 655 | list_add(&lip->li_ail, &next_lip->li_ail); | ||
| 656 | 674 | ||
| 657 | xfs_ail_check(ailp, lip); | 675 | list_splice_init(list, &next_lip->li_ail); |
| 658 | return; | 676 | return; |
| 659 | } | 677 | } |
| 660 | 678 | ||
| 661 | /* | 679 | /* |
| 662 | * Delete the given item from the AIL. Return a pointer to the item. | 680 | * Delete the given item from the AIL. Return a pointer to the item. |
| 663 | */ | 681 | */ |
| 664 | /*ARGSUSED*/ | 682 | STATIC void |
| 665 | STATIC xfs_log_item_t * | ||
| 666 | xfs_ail_delete( | 683 | xfs_ail_delete( |
| 667 | struct xfs_ail *ailp, | 684 | struct xfs_ail *ailp, |
| 668 | xfs_log_item_t *lip) | 685 | xfs_log_item_t *lip) |
| 669 | /* ARGSUSED */ | ||
| 670 | { | 686 | { |
| 671 | xfs_ail_check(ailp, lip); | 687 | xfs_ail_check(ailp, lip); |
| 672 | |||
| 673 | list_del(&lip->li_ail); | 688 | list_del(&lip->li_ail); |
| 674 | 689 | xfs_trans_ail_cursor_clear(ailp, lip); | |
| 675 | return lip; | ||
| 676 | } | 690 | } |
| 677 | 691 | ||
| 678 | /* | 692 | /* |
| @@ -682,7 +696,6 @@ xfs_ail_delete( | |||
| 682 | STATIC xfs_log_item_t * | 696 | STATIC xfs_log_item_t * |
| 683 | xfs_ail_min( | 697 | xfs_ail_min( |
| 684 | struct xfs_ail *ailp) | 698 | struct xfs_ail *ailp) |
| 685 | /* ARGSUSED */ | ||
| 686 | { | 699 | { |
| 687 | if (list_empty(&ailp->xa_ail)) | 700 | if (list_empty(&ailp->xa_ail)) |
| 688 | return NULL; | 701 | return NULL; |
| @@ -699,7 +712,6 @@ STATIC xfs_log_item_t * | |||
| 699 | xfs_ail_next( | 712 | xfs_ail_next( |
| 700 | struct xfs_ail *ailp, | 713 | struct xfs_ail *ailp, |
| 701 | xfs_log_item_t *lip) | 714 | xfs_log_item_t *lip) |
| 702 | /* ARGSUSED */ | ||
| 703 | { | 715 | { |
| 704 | if (lip->li_ail.next == &ailp->xa_ail) | 716 | if (lip->li_ail.next == &ailp->xa_ail) |
| 705 | return NULL; | 717 | return NULL; |
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index f783d5e9fa70..f7590f5badea 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c | |||
| @@ -69,12 +69,16 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp, | |||
| 69 | tp->t_flags |= XFS_TRANS_DIRTY; | 69 | tp->t_flags |= XFS_TRANS_DIRTY; |
| 70 | efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; | 70 | efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; |
| 71 | 71 | ||
| 72 | next_extent = efip->efi_next_extent; | 72 | /* |
| 73 | * atomic_inc_return gives us the value after the increment; | ||
| 74 | * we want to use it as an array index so we need to subtract 1 from | ||
| 75 | * it. | ||
| 76 | */ | ||
| 77 | next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; | ||
| 73 | ASSERT(next_extent < efip->efi_format.efi_nextents); | 78 | ASSERT(next_extent < efip->efi_format.efi_nextents); |
| 74 | extp = &(efip->efi_format.efi_extents[next_extent]); | 79 | extp = &(efip->efi_format.efi_extents[next_extent]); |
| 75 | extp->ext_start = start_block; | 80 | extp->ext_start = start_block; |
| 76 | extp->ext_len = ext_len; | 81 | extp->ext_len = ext_len; |
| 77 | efip->efi_next_extent++; | ||
| 78 | } | 82 | } |
| 79 | 83 | ||
| 80 | 84 | ||
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 62da86c90de5..35162c238fa3 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
| @@ -22,15 +22,17 @@ struct xfs_log_item; | |||
| 22 | struct xfs_log_item_desc; | 22 | struct xfs_log_item_desc; |
| 23 | struct xfs_mount; | 23 | struct xfs_mount; |
| 24 | struct xfs_trans; | 24 | struct xfs_trans; |
| 25 | struct xfs_ail; | ||
| 26 | struct xfs_log_vec; | ||
| 25 | 27 | ||
| 26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); | 28 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); |
| 27 | void xfs_trans_del_item(struct xfs_log_item *); | 29 | void xfs_trans_del_item(struct xfs_log_item *); |
| 28 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, | 30 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, |
| 29 | int flags); | 31 | int flags); |
| 30 | void xfs_trans_item_committed(struct xfs_log_item *lip, | ||
| 31 | xfs_lsn_t commit_lsn, int aborted); | ||
| 32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); | 32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); |
| 33 | 33 | ||
| 34 | void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, | ||
| 35 | xfs_lsn_t commit_lsn, int aborted); | ||
| 34 | /* | 36 | /* |
| 35 | * AIL traversal cursor. | 37 | * AIL traversal cursor. |
| 36 | * | 38 | * |
| @@ -73,12 +75,29 @@ struct xfs_ail { | |||
| 73 | /* | 75 | /* |
| 74 | * From xfs_trans_ail.c | 76 | * From xfs_trans_ail.c |
| 75 | */ | 77 | */ |
| 76 | void xfs_trans_ail_update(struct xfs_ail *ailp, | 78 | void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, |
| 77 | struct xfs_log_item *lip, xfs_lsn_t lsn) | 79 | struct xfs_log_item **log_items, int nr_items, |
| 78 | __releases(ailp->xa_lock); | 80 | xfs_lsn_t lsn) __releases(ailp->xa_lock); |
| 79 | void xfs_trans_ail_delete(struct xfs_ail *ailp, | 81 | static inline void |
| 80 | struct xfs_log_item *lip) | 82 | xfs_trans_ail_update( |
| 81 | __releases(ailp->xa_lock); | 83 | struct xfs_ail *ailp, |
| 84 | struct xfs_log_item *lip, | ||
| 85 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | ||
| 86 | { | ||
| 87 | xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); | ||
| 88 | } | ||
| 89 | |||
| 90 | void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, | ||
| 91 | struct xfs_log_item **log_items, int nr_items) | ||
| 92 | __releases(ailp->xa_lock); | ||
| 93 | static inline void | ||
| 94 | xfs_trans_ail_delete( | ||
| 95 | struct xfs_ail *ailp, | ||
| 96 | xfs_log_item_t *lip) __releases(ailp->xa_lock) | ||
| 97 | { | ||
| 98 | xfs_trans_ail_delete_bulk(ailp, &lip, 1); | ||
| 99 | } | ||
| 100 | |||
| 82 | void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); | 101 | void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); |
| 83 | void xfs_trans_unlocked_item(struct xfs_ail *, | 102 | void xfs_trans_unlocked_item(struct xfs_ail *, |
| 84 | xfs_log_item_t *); | 103 | xfs_log_item_t *); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 8e4a63c4151a..d8e6f8cd6f0c 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
| @@ -964,29 +964,48 @@ xfs_release( | |||
| 964 | xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); | 964 | xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); |
| 965 | } | 965 | } |
| 966 | 966 | ||
| 967 | if (ip->i_d.di_nlink != 0) { | 967 | if (ip->i_d.di_nlink == 0) |
| 968 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && | 968 | return 0; |
| 969 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || | ||
| 970 | ip->i_delayed_blks > 0)) && | ||
| 971 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && | ||
| 972 | (!(ip->i_d.di_flags & | ||
| 973 | (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { | ||
| 974 | 969 | ||
| 975 | /* | 970 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && |
| 976 | * If we can't get the iolock just skip truncating | 971 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || |
| 977 | * the blocks past EOF because we could deadlock | 972 | ip->i_delayed_blks > 0)) && |
| 978 | * with the mmap_sem otherwise. We'll get another | 973 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && |
| 979 | * chance to drop them once the last reference to | 974 | (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { |
| 980 | * the inode is dropped, so we'll never leak blocks | ||
| 981 | * permanently. | ||
| 982 | */ | ||
| 983 | error = xfs_free_eofblocks(mp, ip, | ||
| 984 | XFS_FREE_EOF_TRYLOCK); | ||
| 985 | if (error) | ||
| 986 | return error; | ||
| 987 | } | ||
| 988 | } | ||
| 989 | 975 | ||
| 976 | /* | ||
| 977 | * If we can't get the iolock just skip truncating the blocks | ||
| 978 | * past EOF because we could deadlock with the mmap_sem | ||
| 979 | * otherwise. We'll get another chance to drop them once the | ||
| 980 | * last reference to the inode is dropped, so we'll never leak | ||
| 981 | * blocks permanently. | ||
| 982 | * | ||
| 983 | * Further, check if the inode is being opened, written and | ||
| 984 | * closed frequently and we have delayed allocation blocks | ||
| 985 | * oustanding (e.g. streaming writes from the NFS server), | ||
| 986 | * truncating the blocks past EOF will cause fragmentation to | ||
| 987 | * occur. | ||
| 988 | * | ||
| 989 | * In this case don't do the truncation, either, but we have to | ||
| 990 | * be careful how we detect this case. Blocks beyond EOF show | ||
| 991 | * up as i_delayed_blks even when the inode is clean, so we | ||
| 992 | * need to truncate them away first before checking for a dirty | ||
| 993 | * release. Hence on the first dirty close we will still remove | ||
| 994 | * the speculative allocation, but after that we will leave it | ||
| 995 | * in place. | ||
| 996 | */ | ||
| 997 | if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) | ||
| 998 | return 0; | ||
| 999 | |||
| 1000 | error = xfs_free_eofblocks(mp, ip, | ||
| 1001 | XFS_FREE_EOF_TRYLOCK); | ||
| 1002 | if (error) | ||
| 1003 | return error; | ||
| 1004 | |||
| 1005 | /* delalloc blocks after truncation means it really is dirty */ | ||
| 1006 | if (ip->i_delayed_blks) | ||
| 1007 | xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); | ||
| 1008 | } | ||
| 990 | return 0; | 1009 | return 0; |
| 991 | } | 1010 | } |
| 992 | 1011 | ||
