diff options
Diffstat (limited to 'fs/xfs')
38 files changed, 1997 insertions, 1920 deletions
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h deleted file mode 100644 index 4dfc7c370819..000000000000 --- a/fs/xfs/linux-2.6/sv.h +++ /dev/null | |||
@@ -1,59 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_SUPPORT_SV_H__ | ||
19 | #define __XFS_SUPPORT_SV_H__ | ||
20 | |||
21 | #include <linux/wait.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/spinlock.h> | ||
24 | |||
25 | /* | ||
26 | * Synchronisation variables. | ||
27 | * | ||
28 | * (Parameters "pri", "svf" and "rts" are not implemented) | ||
29 | */ | ||
30 | |||
31 | typedef struct sv_s { | ||
32 | wait_queue_head_t waiters; | ||
33 | } sv_t; | ||
34 | |||
35 | static inline void _sv_wait(sv_t *sv, spinlock_t *lock) | ||
36 | { | ||
37 | DECLARE_WAITQUEUE(wait, current); | ||
38 | |||
39 | add_wait_queue_exclusive(&sv->waiters, &wait); | ||
40 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
41 | spin_unlock(lock); | ||
42 | |||
43 | schedule(); | ||
44 | |||
45 | remove_wait_queue(&sv->waiters, &wait); | ||
46 | } | ||
47 | |||
48 | #define sv_init(sv,flag,name) \ | ||
49 | init_waitqueue_head(&(sv)->waiters) | ||
50 | #define sv_destroy(sv) \ | ||
51 | /*NOTHING*/ | ||
52 | #define sv_wait(sv, pri, lock, s) \ | ||
53 | _sv_wait(sv, lock) | ||
54 | #define sv_signal(sv) \ | ||
55 | wake_up(&(sv)->waiters) | ||
56 | #define sv_broadcast(sv) \ | ||
57 | wake_up_all(&(sv)->waiters) | ||
58 | |||
59 | #endif /* __XFS_SUPPORT_SV_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 691f61223ed6..ec7bbb5645b6 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -38,15 +38,6 @@ | |||
38 | #include <linux/pagevec.h> | 38 | #include <linux/pagevec.h> |
39 | #include <linux/writeback.h> | 39 | #include <linux/writeback.h> |
40 | 40 | ||
41 | /* | ||
42 | * Types of I/O for bmap clustering and I/O completion tracking. | ||
43 | */ | ||
44 | enum { | ||
45 | IO_READ, /* mapping for a read */ | ||
46 | IO_DELAY, /* mapping covers delalloc region */ | ||
47 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | ||
48 | IO_NEW /* just allocated */ | ||
49 | }; | ||
50 | 41 | ||
51 | /* | 42 | /* |
52 | * Prime number of hash buckets since address is used as the key. | 43 | * Prime number of hash buckets since address is used as the key. |
@@ -182,9 +173,6 @@ xfs_setfilesize( | |||
182 | xfs_inode_t *ip = XFS_I(ioend->io_inode); | 173 | xfs_inode_t *ip = XFS_I(ioend->io_inode); |
183 | xfs_fsize_t isize; | 174 | xfs_fsize_t isize; |
184 | 175 | ||
185 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | ||
186 | ASSERT(ioend->io_type != IO_READ); | ||
187 | |||
188 | if (unlikely(ioend->io_error)) | 176 | if (unlikely(ioend->io_error)) |
189 | return 0; | 177 | return 0; |
190 | 178 | ||
@@ -244,10 +232,8 @@ xfs_end_io( | |||
244 | * We might have to update the on-disk file size after extending | 232 | * We might have to update the on-disk file size after extending |
245 | * writes. | 233 | * writes. |
246 | */ | 234 | */ |
247 | if (ioend->io_type != IO_READ) { | 235 | error = xfs_setfilesize(ioend); |
248 | error = xfs_setfilesize(ioend); | 236 | ASSERT(!error || error == EAGAIN); |
249 | ASSERT(!error || error == EAGAIN); | ||
250 | } | ||
251 | 237 | ||
252 | /* | 238 | /* |
253 | * If we didn't complete processing of the ioend, requeue it to the | 239 | * If we didn't complete processing of the ioend, requeue it to the |
@@ -318,14 +304,63 @@ STATIC int | |||
318 | xfs_map_blocks( | 304 | xfs_map_blocks( |
319 | struct inode *inode, | 305 | struct inode *inode, |
320 | loff_t offset, | 306 | loff_t offset, |
321 | ssize_t count, | ||
322 | struct xfs_bmbt_irec *imap, | 307 | struct xfs_bmbt_irec *imap, |
323 | int flags) | 308 | int type, |
309 | int nonblocking) | ||
324 | { | 310 | { |
325 | int nmaps = 1; | 311 | struct xfs_inode *ip = XFS_I(inode); |
326 | int new = 0; | 312 | struct xfs_mount *mp = ip->i_mount; |
313 | ssize_t count = 1 << inode->i_blkbits; | ||
314 | xfs_fileoff_t offset_fsb, end_fsb; | ||
315 | int error = 0; | ||
316 | int bmapi_flags = XFS_BMAPI_ENTIRE; | ||
317 | int nimaps = 1; | ||
318 | |||
319 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
320 | return -XFS_ERROR(EIO); | ||
321 | |||
322 | if (type == IO_UNWRITTEN) | ||
323 | bmapi_flags |= XFS_BMAPI_IGSTATE; | ||
324 | |||
325 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { | ||
326 | if (nonblocking) | ||
327 | return -XFS_ERROR(EAGAIN); | ||
328 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
329 | } | ||
327 | 330 | ||
328 | return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new); | 331 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
332 | (ip->i_df.if_flags & XFS_IFEXTENTS)); | ||
333 | ASSERT(offset <= mp->m_maxioffset); | ||
334 | |||
335 | if (offset + count > mp->m_maxioffset) | ||
336 | count = mp->m_maxioffset - offset; | ||
337 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | ||
338 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
339 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, | ||
340 | bmapi_flags, NULL, 0, imap, &nimaps, NULL); | ||
341 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
342 | |||
343 | if (error) | ||
344 | return -XFS_ERROR(error); | ||
345 | |||
346 | if (type == IO_DELALLOC && | ||
347 | (!nimaps || isnullstartblock(imap->br_startblock))) { | ||
348 | error = xfs_iomap_write_allocate(ip, offset, count, imap); | ||
349 | if (!error) | ||
350 | trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); | ||
351 | return -XFS_ERROR(error); | ||
352 | } | ||
353 | |||
354 | #ifdef DEBUG | ||
355 | if (type == IO_UNWRITTEN) { | ||
356 | ASSERT(nimaps); | ||
357 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | ||
358 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | ||
359 | } | ||
360 | #endif | ||
361 | if (nimaps) | ||
362 | trace_xfs_map_blocks_found(ip, offset, count, type, imap); | ||
363 | return 0; | ||
329 | } | 364 | } |
330 | 365 | ||
331 | STATIC int | 366 | STATIC int |
@@ -380,26 +415,18 @@ xfs_submit_ioend_bio( | |||
380 | 415 | ||
381 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? | 416 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? |
382 | WRITE_SYNC_PLUG : WRITE, bio); | 417 | WRITE_SYNC_PLUG : WRITE, bio); |
383 | ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); | ||
384 | bio_put(bio); | ||
385 | } | 418 | } |
386 | 419 | ||
387 | STATIC struct bio * | 420 | STATIC struct bio * |
388 | xfs_alloc_ioend_bio( | 421 | xfs_alloc_ioend_bio( |
389 | struct buffer_head *bh) | 422 | struct buffer_head *bh) |
390 | { | 423 | { |
391 | struct bio *bio; | ||
392 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 424 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
393 | 425 | struct bio *bio = bio_alloc(GFP_NOIO, nvecs); | |
394 | do { | ||
395 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
396 | nvecs >>= 1; | ||
397 | } while (!bio); | ||
398 | 426 | ||
399 | ASSERT(bio->bi_private == NULL); | 427 | ASSERT(bio->bi_private == NULL); |
400 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 428 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
401 | bio->bi_bdev = bh->b_bdev; | 429 | bio->bi_bdev = bh->b_bdev; |
402 | bio_get(bio); | ||
403 | return bio; | 430 | return bio; |
404 | } | 431 | } |
405 | 432 | ||
@@ -470,9 +497,8 @@ xfs_submit_ioend( | |||
470 | /* Pass 1 - start writeback */ | 497 | /* Pass 1 - start writeback */ |
471 | do { | 498 | do { |
472 | next = ioend->io_list; | 499 | next = ioend->io_list; |
473 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { | 500 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) |
474 | xfs_start_buffer_writeback(bh); | 501 | xfs_start_buffer_writeback(bh); |
475 | } | ||
476 | } while ((ioend = next) != NULL); | 502 | } while ((ioend = next) != NULL); |
477 | 503 | ||
478 | /* Pass 2 - submit I/O */ | 504 | /* Pass 2 - submit I/O */ |
@@ -600,117 +626,13 @@ xfs_map_at_offset( | |||
600 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 626 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); |
601 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 627 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
602 | 628 | ||
603 | lock_buffer(bh); | ||
604 | xfs_map_buffer(inode, bh, imap, offset); | 629 | xfs_map_buffer(inode, bh, imap, offset); |
605 | bh->b_bdev = xfs_find_bdev_for_inode(inode); | ||
606 | set_buffer_mapped(bh); | 630 | set_buffer_mapped(bh); |
607 | clear_buffer_delay(bh); | 631 | clear_buffer_delay(bh); |
608 | clear_buffer_unwritten(bh); | 632 | clear_buffer_unwritten(bh); |
609 | } | 633 | } |
610 | 634 | ||
611 | /* | 635 | /* |
612 | * Look for a page at index that is suitable for clustering. | ||
613 | */ | ||
614 | STATIC unsigned int | ||
615 | xfs_probe_page( | ||
616 | struct page *page, | ||
617 | unsigned int pg_offset) | ||
618 | { | ||
619 | struct buffer_head *bh, *head; | ||
620 | int ret = 0; | ||
621 | |||
622 | if (PageWriteback(page)) | ||
623 | return 0; | ||
624 | if (!PageDirty(page)) | ||
625 | return 0; | ||
626 | if (!page->mapping) | ||
627 | return 0; | ||
628 | if (!page_has_buffers(page)) | ||
629 | return 0; | ||
630 | |||
631 | bh = head = page_buffers(page); | ||
632 | do { | ||
633 | if (!buffer_uptodate(bh)) | ||
634 | break; | ||
635 | if (!buffer_mapped(bh)) | ||
636 | break; | ||
637 | ret += bh->b_size; | ||
638 | if (ret >= pg_offset) | ||
639 | break; | ||
640 | } while ((bh = bh->b_this_page) != head); | ||
641 | |||
642 | return ret; | ||
643 | } | ||
644 | |||
645 | STATIC size_t | ||
646 | xfs_probe_cluster( | ||
647 | struct inode *inode, | ||
648 | struct page *startpage, | ||
649 | struct buffer_head *bh, | ||
650 | struct buffer_head *head) | ||
651 | { | ||
652 | struct pagevec pvec; | ||
653 | pgoff_t tindex, tlast, tloff; | ||
654 | size_t total = 0; | ||
655 | int done = 0, i; | ||
656 | |||
657 | /* First sum forwards in this page */ | ||
658 | do { | ||
659 | if (!buffer_uptodate(bh) || !buffer_mapped(bh)) | ||
660 | return total; | ||
661 | total += bh->b_size; | ||
662 | } while ((bh = bh->b_this_page) != head); | ||
663 | |||
664 | /* if we reached the end of the page, sum forwards in following pages */ | ||
665 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
666 | tindex = startpage->index + 1; | ||
667 | |||
668 | /* Prune this back to avoid pathological behavior */ | ||
669 | tloff = min(tlast, startpage->index + 64); | ||
670 | |||
671 | pagevec_init(&pvec, 0); | ||
672 | while (!done && tindex <= tloff) { | ||
673 | unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); | ||
674 | |||
675 | if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) | ||
676 | break; | ||
677 | |||
678 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
679 | struct page *page = pvec.pages[i]; | ||
680 | size_t pg_offset, pg_len = 0; | ||
681 | |||
682 | if (tindex == tlast) { | ||
683 | pg_offset = | ||
684 | i_size_read(inode) & (PAGE_CACHE_SIZE - 1); | ||
685 | if (!pg_offset) { | ||
686 | done = 1; | ||
687 | break; | ||
688 | } | ||
689 | } else | ||
690 | pg_offset = PAGE_CACHE_SIZE; | ||
691 | |||
692 | if (page->index == tindex && trylock_page(page)) { | ||
693 | pg_len = xfs_probe_page(page, pg_offset); | ||
694 | unlock_page(page); | ||
695 | } | ||
696 | |||
697 | if (!pg_len) { | ||
698 | done = 1; | ||
699 | break; | ||
700 | } | ||
701 | |||
702 | total += pg_len; | ||
703 | tindex++; | ||
704 | } | ||
705 | |||
706 | pagevec_release(&pvec); | ||
707 | cond_resched(); | ||
708 | } | ||
709 | |||
710 | return total; | ||
711 | } | ||
712 | |||
713 | /* | ||
714 | * Test if a given page is suitable for writing as part of an unwritten | 636 | * Test if a given page is suitable for writing as part of an unwritten |
715 | * or delayed allocate extent. | 637 | * or delayed allocate extent. |
716 | */ | 638 | */ |
@@ -731,9 +653,9 @@ xfs_is_delayed_page( | |||
731 | if (buffer_unwritten(bh)) | 653 | if (buffer_unwritten(bh)) |
732 | acceptable = (type == IO_UNWRITTEN); | 654 | acceptable = (type == IO_UNWRITTEN); |
733 | else if (buffer_delay(bh)) | 655 | else if (buffer_delay(bh)) |
734 | acceptable = (type == IO_DELAY); | 656 | acceptable = (type == IO_DELALLOC); |
735 | else if (buffer_dirty(bh) && buffer_mapped(bh)) | 657 | else if (buffer_dirty(bh) && buffer_mapped(bh)) |
736 | acceptable = (type == IO_NEW); | 658 | acceptable = (type == IO_OVERWRITE); |
737 | else | 659 | else |
738 | break; | 660 | break; |
739 | } while ((bh = bh->b_this_page) != head); | 661 | } while ((bh = bh->b_this_page) != head); |
@@ -758,8 +680,7 @@ xfs_convert_page( | |||
758 | loff_t tindex, | 680 | loff_t tindex, |
759 | struct xfs_bmbt_irec *imap, | 681 | struct xfs_bmbt_irec *imap, |
760 | xfs_ioend_t **ioendp, | 682 | xfs_ioend_t **ioendp, |
761 | struct writeback_control *wbc, | 683 | struct writeback_control *wbc) |
762 | int all_bh) | ||
763 | { | 684 | { |
764 | struct buffer_head *bh, *head; | 685 | struct buffer_head *bh, *head; |
765 | xfs_off_t end_offset; | 686 | xfs_off_t end_offset; |
@@ -814,37 +735,30 @@ xfs_convert_page( | |||
814 | continue; | 735 | continue; |
815 | } | 736 | } |
816 | 737 | ||
817 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | 738 | if (buffer_unwritten(bh) || buffer_delay(bh) || |
739 | buffer_mapped(bh)) { | ||
818 | if (buffer_unwritten(bh)) | 740 | if (buffer_unwritten(bh)) |
819 | type = IO_UNWRITTEN; | 741 | type = IO_UNWRITTEN; |
742 | else if (buffer_delay(bh)) | ||
743 | type = IO_DELALLOC; | ||
820 | else | 744 | else |
821 | type = IO_DELAY; | 745 | type = IO_OVERWRITE; |
822 | 746 | ||
823 | if (!xfs_imap_valid(inode, imap, offset)) { | 747 | if (!xfs_imap_valid(inode, imap, offset)) { |
824 | done = 1; | 748 | done = 1; |
825 | continue; | 749 | continue; |
826 | } | 750 | } |
827 | 751 | ||
828 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 752 | lock_buffer(bh); |
829 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 753 | if (type != IO_OVERWRITE) |
830 | 754 | xfs_map_at_offset(inode, bh, imap, offset); | |
831 | xfs_map_at_offset(inode, bh, imap, offset); | ||
832 | xfs_add_to_ioend(inode, bh, offset, type, | 755 | xfs_add_to_ioend(inode, bh, offset, type, |
833 | ioendp, done); | 756 | ioendp, done); |
834 | 757 | ||
835 | page_dirty--; | 758 | page_dirty--; |
836 | count++; | 759 | count++; |
837 | } else { | 760 | } else { |
838 | type = IO_NEW; | 761 | done = 1; |
839 | if (buffer_mapped(bh) && all_bh) { | ||
840 | lock_buffer(bh); | ||
841 | xfs_add_to_ioend(inode, bh, offset, | ||
842 | type, ioendp, done); | ||
843 | count++; | ||
844 | page_dirty--; | ||
845 | } else { | ||
846 | done = 1; | ||
847 | } | ||
848 | } | 762 | } |
849 | } while (offset += len, (bh = bh->b_this_page) != head); | 763 | } while (offset += len, (bh = bh->b_this_page) != head); |
850 | 764 | ||
@@ -876,7 +790,6 @@ xfs_cluster_write( | |||
876 | struct xfs_bmbt_irec *imap, | 790 | struct xfs_bmbt_irec *imap, |
877 | xfs_ioend_t **ioendp, | 791 | xfs_ioend_t **ioendp, |
878 | struct writeback_control *wbc, | 792 | struct writeback_control *wbc, |
879 | int all_bh, | ||
880 | pgoff_t tlast) | 793 | pgoff_t tlast) |
881 | { | 794 | { |
882 | struct pagevec pvec; | 795 | struct pagevec pvec; |
@@ -891,7 +804,7 @@ xfs_cluster_write( | |||
891 | 804 | ||
892 | for (i = 0; i < pagevec_count(&pvec); i++) { | 805 | for (i = 0; i < pagevec_count(&pvec); i++) { |
893 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, | 806 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, |
894 | imap, ioendp, wbc, all_bh); | 807 | imap, ioendp, wbc); |
895 | if (done) | 808 | if (done) |
896 | break; | 809 | break; |
897 | } | 810 | } |
@@ -935,7 +848,7 @@ xfs_aops_discard_page( | |||
935 | struct buffer_head *bh, *head; | 848 | struct buffer_head *bh, *head; |
936 | loff_t offset = page_offset(page); | 849 | loff_t offset = page_offset(page); |
937 | 850 | ||
938 | if (!xfs_is_delayed_page(page, IO_DELAY)) | 851 | if (!xfs_is_delayed_page(page, IO_DELALLOC)) |
939 | goto out_invalidate; | 852 | goto out_invalidate; |
940 | 853 | ||
941 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 854 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
@@ -1002,10 +915,10 @@ xfs_vm_writepage( | |||
1002 | unsigned int type; | 915 | unsigned int type; |
1003 | __uint64_t end_offset; | 916 | __uint64_t end_offset; |
1004 | pgoff_t end_index, last_index; | 917 | pgoff_t end_index, last_index; |
1005 | ssize_t size, len; | 918 | ssize_t len; |
1006 | int flags, err, imap_valid = 0, uptodate = 1; | 919 | int err, imap_valid = 0, uptodate = 1; |
1007 | int count = 0; | 920 | int count = 0; |
1008 | int all_bh = 0; | 921 | int nonblocking = 0; |
1009 | 922 | ||
1010 | trace_xfs_writepage(inode, page, 0); | 923 | trace_xfs_writepage(inode, page, 0); |
1011 | 924 | ||
@@ -1056,10 +969,14 @@ xfs_vm_writepage( | |||
1056 | 969 | ||
1057 | bh = head = page_buffers(page); | 970 | bh = head = page_buffers(page); |
1058 | offset = page_offset(page); | 971 | offset = page_offset(page); |
1059 | flags = BMAPI_READ; | 972 | type = IO_OVERWRITE; |
1060 | type = IO_NEW; | 973 | |
974 | if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) | ||
975 | nonblocking = 1; | ||
1061 | 976 | ||
1062 | do { | 977 | do { |
978 | int new_ioend = 0; | ||
979 | |||
1063 | if (offset >= end_offset) | 980 | if (offset >= end_offset) |
1064 | break; | 981 | break; |
1065 | if (!buffer_uptodate(bh)) | 982 | if (!buffer_uptodate(bh)) |
@@ -1076,90 +993,54 @@ xfs_vm_writepage( | |||
1076 | continue; | 993 | continue; |
1077 | } | 994 | } |
1078 | 995 | ||
1079 | if (imap_valid) | 996 | if (buffer_unwritten(bh)) { |
1080 | imap_valid = xfs_imap_valid(inode, &imap, offset); | 997 | if (type != IO_UNWRITTEN) { |
1081 | |||
1082 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | ||
1083 | int new_ioend = 0; | ||
1084 | |||
1085 | /* | ||
1086 | * Make sure we don't use a read-only iomap | ||
1087 | */ | ||
1088 | if (flags == BMAPI_READ) | ||
1089 | imap_valid = 0; | ||
1090 | |||
1091 | if (buffer_unwritten(bh)) { | ||
1092 | type = IO_UNWRITTEN; | 998 | type = IO_UNWRITTEN; |
1093 | flags = BMAPI_WRITE | BMAPI_IGNSTATE; | 999 | imap_valid = 0; |
1094 | } else if (buffer_delay(bh)) { | ||
1095 | type = IO_DELAY; | ||
1096 | flags = BMAPI_ALLOCATE; | ||
1097 | |||
1098 | if (wbc->sync_mode == WB_SYNC_NONE) | ||
1099 | flags |= BMAPI_TRYLOCK; | ||
1100 | } | ||
1101 | |||
1102 | if (!imap_valid) { | ||
1103 | /* | ||
1104 | * If we didn't have a valid mapping then we | ||
1105 | * need to ensure that we put the new mapping | ||
1106 | * in a new ioend structure. This needs to be | ||
1107 | * done to ensure that the ioends correctly | ||
1108 | * reflect the block mappings at io completion | ||
1109 | * for unwritten extent conversion. | ||
1110 | */ | ||
1111 | new_ioend = 1; | ||
1112 | err = xfs_map_blocks(inode, offset, len, | ||
1113 | &imap, flags); | ||
1114 | if (err) | ||
1115 | goto error; | ||
1116 | imap_valid = xfs_imap_valid(inode, &imap, | ||
1117 | offset); | ||
1118 | } | 1000 | } |
1119 | if (imap_valid) { | 1001 | } else if (buffer_delay(bh)) { |
1120 | xfs_map_at_offset(inode, bh, &imap, offset); | 1002 | if (type != IO_DELALLOC) { |
1121 | xfs_add_to_ioend(inode, bh, offset, type, | 1003 | type = IO_DELALLOC; |
1122 | &ioend, new_ioend); | 1004 | imap_valid = 0; |
1123 | count++; | ||
1124 | } | 1005 | } |
1125 | } else if (buffer_uptodate(bh)) { | 1006 | } else if (buffer_uptodate(bh)) { |
1126 | /* | 1007 | if (type != IO_OVERWRITE) { |
1127 | * we got here because the buffer is already mapped. | 1008 | type = IO_OVERWRITE; |
1128 | * That means it must already have extents allocated | 1009 | imap_valid = 0; |
1129 | * underneath it. Map the extent by reading it. | ||
1130 | */ | ||
1131 | if (!imap_valid || flags != BMAPI_READ) { | ||
1132 | flags = BMAPI_READ; | ||
1133 | size = xfs_probe_cluster(inode, page, bh, head); | ||
1134 | err = xfs_map_blocks(inode, offset, size, | ||
1135 | &imap, flags); | ||
1136 | if (err) | ||
1137 | goto error; | ||
1138 | imap_valid = xfs_imap_valid(inode, &imap, | ||
1139 | offset); | ||
1140 | } | 1010 | } |
1011 | } else { | ||
1012 | if (PageUptodate(page)) { | ||
1013 | ASSERT(buffer_mapped(bh)); | ||
1014 | imap_valid = 0; | ||
1015 | } | ||
1016 | continue; | ||
1017 | } | ||
1141 | 1018 | ||
1019 | if (imap_valid) | ||
1020 | imap_valid = xfs_imap_valid(inode, &imap, offset); | ||
1021 | if (!imap_valid) { | ||
1142 | /* | 1022 | /* |
1143 | * We set the type to IO_NEW in case we are doing a | 1023 | * If we didn't have a valid mapping then we need to |
1144 | * small write at EOF that is extending the file but | 1024 | * put the new mapping into a separate ioend structure. |
1145 | * without needing an allocation. We need to update the | 1025 | * This ensures non-contiguous extents always have |
1146 | * file size on I/O completion in this case so it is | 1026 | * separate ioends, which is particularly important |
1147 | * the same case as having just allocated a new extent | 1027 | * for unwritten extent conversion at I/O completion |
1148 | * that we are writing into for the first time. | 1028 | * time. |
1149 | */ | 1029 | */ |
1150 | type = IO_NEW; | 1030 | new_ioend = 1; |
1151 | if (trylock_buffer(bh)) { | 1031 | err = xfs_map_blocks(inode, offset, &imap, type, |
1152 | if (imap_valid) | 1032 | nonblocking); |
1153 | all_bh = 1; | 1033 | if (err) |
1154 | xfs_add_to_ioend(inode, bh, offset, type, | 1034 | goto error; |
1155 | &ioend, !imap_valid); | 1035 | imap_valid = xfs_imap_valid(inode, &imap, offset); |
1156 | count++; | 1036 | } |
1157 | } else { | 1037 | if (imap_valid) { |
1158 | imap_valid = 0; | 1038 | lock_buffer(bh); |
1159 | } | 1039 | if (type != IO_OVERWRITE) |
1160 | } else if (PageUptodate(page)) { | 1040 | xfs_map_at_offset(inode, bh, &imap, offset); |
1161 | ASSERT(buffer_mapped(bh)); | 1041 | xfs_add_to_ioend(inode, bh, offset, type, &ioend, |
1162 | imap_valid = 0; | 1042 | new_ioend); |
1043 | count++; | ||
1163 | } | 1044 | } |
1164 | 1045 | ||
1165 | if (!iohead) | 1046 | if (!iohead) |
@@ -1188,7 +1069,7 @@ xfs_vm_writepage( | |||
1188 | end_index = last_index; | 1069 | end_index = last_index; |
1189 | 1070 | ||
1190 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, | 1071 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, |
1191 | wbc, all_bh, end_index); | 1072 | wbc, end_index); |
1192 | } | 1073 | } |
1193 | 1074 | ||
1194 | if (iohead) | 1075 | if (iohead) |
@@ -1257,13 +1138,19 @@ __xfs_get_blocks( | |||
1257 | int create, | 1138 | int create, |
1258 | int direct) | 1139 | int direct) |
1259 | { | 1140 | { |
1260 | int flags = create ? BMAPI_WRITE : BMAPI_READ; | 1141 | struct xfs_inode *ip = XFS_I(inode); |
1142 | struct xfs_mount *mp = ip->i_mount; | ||
1143 | xfs_fileoff_t offset_fsb, end_fsb; | ||
1144 | int error = 0; | ||
1145 | int lockmode = 0; | ||
1261 | struct xfs_bmbt_irec imap; | 1146 | struct xfs_bmbt_irec imap; |
1147 | int nimaps = 1; | ||
1262 | xfs_off_t offset; | 1148 | xfs_off_t offset; |
1263 | ssize_t size; | 1149 | ssize_t size; |
1264 | int nimap = 1; | ||
1265 | int new = 0; | 1150 | int new = 0; |
1266 | int error; | 1151 | |
1152 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1153 | return -XFS_ERROR(EIO); | ||
1267 | 1154 | ||
1268 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1155 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
1269 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1156 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
@@ -1272,15 +1159,45 @@ __xfs_get_blocks( | |||
1272 | if (!create && direct && offset >= i_size_read(inode)) | 1159 | if (!create && direct && offset >= i_size_read(inode)) |
1273 | return 0; | 1160 | return 0; |
1274 | 1161 | ||
1275 | if (direct && create) | 1162 | if (create) { |
1276 | flags |= BMAPI_DIRECT; | 1163 | lockmode = XFS_ILOCK_EXCL; |
1164 | xfs_ilock(ip, lockmode); | ||
1165 | } else { | ||
1166 | lockmode = xfs_ilock_map_shared(ip); | ||
1167 | } | ||
1168 | |||
1169 | ASSERT(offset <= mp->m_maxioffset); | ||
1170 | if (offset + size > mp->m_maxioffset) | ||
1171 | size = mp->m_maxioffset - offset; | ||
1172 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); | ||
1173 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
1277 | 1174 | ||
1278 | error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, | 1175 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, |
1279 | &new); | 1176 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); |
1280 | if (error) | 1177 | if (error) |
1281 | return -error; | 1178 | goto out_unlock; |
1282 | if (nimap == 0) | 1179 | |
1283 | return 0; | 1180 | if (create && |
1181 | (!nimaps || | ||
1182 | (imap.br_startblock == HOLESTARTBLOCK || | ||
1183 | imap.br_startblock == DELAYSTARTBLOCK))) { | ||
1184 | if (direct) { | ||
1185 | error = xfs_iomap_write_direct(ip, offset, size, | ||
1186 | &imap, nimaps); | ||
1187 | } else { | ||
1188 | error = xfs_iomap_write_delay(ip, offset, size, &imap); | ||
1189 | } | ||
1190 | if (error) | ||
1191 | goto out_unlock; | ||
1192 | |||
1193 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); | ||
1194 | } else if (nimaps) { | ||
1195 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); | ||
1196 | } else { | ||
1197 | trace_xfs_get_blocks_notfound(ip, offset, size); | ||
1198 | goto out_unlock; | ||
1199 | } | ||
1200 | xfs_iunlock(ip, lockmode); | ||
1284 | 1201 | ||
1285 | if (imap.br_startblock != HOLESTARTBLOCK && | 1202 | if (imap.br_startblock != HOLESTARTBLOCK && |
1286 | imap.br_startblock != DELAYSTARTBLOCK) { | 1203 | imap.br_startblock != DELAYSTARTBLOCK) { |
@@ -1347,6 +1264,10 @@ __xfs_get_blocks( | |||
1347 | } | 1264 | } |
1348 | 1265 | ||
1349 | return 0; | 1266 | return 0; |
1267 | |||
1268 | out_unlock: | ||
1269 | xfs_iunlock(ip, lockmode); | ||
1270 | return -error; | ||
1350 | } | 1271 | } |
1351 | 1272 | ||
1352 | int | 1273 | int |
@@ -1434,7 +1355,7 @@ xfs_vm_direct_IO( | |||
1434 | ssize_t ret; | 1355 | ssize_t ret; |
1435 | 1356 | ||
1436 | if (rw & WRITE) { | 1357 | if (rw & WRITE) { |
1437 | iocb->private = xfs_alloc_ioend(inode, IO_NEW); | 1358 | iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); |
1438 | 1359 | ||
1439 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1360 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1440 | offset, nr_segs, | 1361 | offset, nr_segs, |
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index c5057fb6237a..71f721e1a71f 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h | |||
@@ -23,6 +23,22 @@ extern struct workqueue_struct *xfsconvertd_workqueue; | |||
23 | extern mempool_t *xfs_ioend_pool; | 23 | extern mempool_t *xfs_ioend_pool; |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Types of I/O for bmap clustering and I/O completion tracking. | ||
27 | */ | ||
28 | enum { | ||
29 | IO_DIRECT = 0, /* special case for direct I/O ioends */ | ||
30 | IO_DELALLOC, /* mapping covers delalloc region */ | ||
31 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | ||
32 | IO_OVERWRITE, /* mapping covers already allocated extent */ | ||
33 | }; | ||
34 | |||
35 | #define XFS_IO_TYPES \ | ||
36 | { 0, "" }, \ | ||
37 | { IO_DELALLOC, "delalloc" }, \ | ||
38 | { IO_UNWRITTEN, "unwritten" }, \ | ||
39 | { IO_OVERWRITE, "overwrite" } | ||
40 | |||
41 | /* | ||
26 | * xfs_ioend struct manages large extent writes for XFS. | 42 | * xfs_ioend struct manages large extent writes for XFS. |
27 | * It can manage several multi-page bio's at once. | 43 | * It can manage several multi-page bio's at once. |
28 | */ | 44 | */ |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 4c5deb6e9e31..92f1f2acc6ab 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -44,12 +44,7 @@ | |||
44 | 44 | ||
45 | static kmem_zone_t *xfs_buf_zone; | 45 | static kmem_zone_t *xfs_buf_zone; |
46 | STATIC int xfsbufd(void *); | 46 | STATIC int xfsbufd(void *); |
47 | STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t); | ||
48 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); | 47 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); |
49 | static struct shrinker xfs_buf_shake = { | ||
50 | .shrink = xfsbufd_wakeup, | ||
51 | .seeks = DEFAULT_SEEKS, | ||
52 | }; | ||
53 | 48 | ||
54 | static struct workqueue_struct *xfslogd_workqueue; | 49 | static struct workqueue_struct *xfslogd_workqueue; |
55 | struct workqueue_struct *xfsdatad_workqueue; | 50 | struct workqueue_struct *xfsdatad_workqueue; |
@@ -168,8 +163,79 @@ test_page_region( | |||
168 | } | 163 | } |
169 | 164 | ||
170 | /* | 165 | /* |
171 | * Internal xfs_buf_t object manipulation | 166 | * xfs_buf_lru_add - add a buffer to the LRU. |
167 | * | ||
168 | * The LRU takes a new reference to the buffer so that it will only be freed | ||
169 | * once the shrinker takes the buffer off the LRU. | ||
172 | */ | 170 | */ |
171 | STATIC void | ||
172 | xfs_buf_lru_add( | ||
173 | struct xfs_buf *bp) | ||
174 | { | ||
175 | struct xfs_buftarg *btp = bp->b_target; | ||
176 | |||
177 | spin_lock(&btp->bt_lru_lock); | ||
178 | if (list_empty(&bp->b_lru)) { | ||
179 | atomic_inc(&bp->b_hold); | ||
180 | list_add_tail(&bp->b_lru, &btp->bt_lru); | ||
181 | btp->bt_lru_nr++; | ||
182 | } | ||
183 | spin_unlock(&btp->bt_lru_lock); | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * xfs_buf_lru_del - remove a buffer from the LRU | ||
188 | * | ||
189 | * The unlocked check is safe here because it only occurs when there are not | ||
190 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there | ||
191 | * to optimise the shrinker removing the buffer from the LRU and calling | ||
192 | * xfs_buf_free(). i.e. it removes an unneccessary round trip on the | ||
193 | * bt_lru_lock. | ||
194 | */ | ||
195 | STATIC void | ||
196 | xfs_buf_lru_del( | ||
197 | struct xfs_buf *bp) | ||
198 | { | ||
199 | struct xfs_buftarg *btp = bp->b_target; | ||
200 | |||
201 | if (list_empty(&bp->b_lru)) | ||
202 | return; | ||
203 | |||
204 | spin_lock(&btp->bt_lru_lock); | ||
205 | if (!list_empty(&bp->b_lru)) { | ||
206 | list_del_init(&bp->b_lru); | ||
207 | btp->bt_lru_nr--; | ||
208 | } | ||
209 | spin_unlock(&btp->bt_lru_lock); | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * When we mark a buffer stale, we remove the buffer from the LRU and clear the | ||
214 | * b_lru_ref count so that the buffer is freed immediately when the buffer | ||
215 | * reference count falls to zero. If the buffer is already on the LRU, we need | ||
216 | * to remove the reference that LRU holds on the buffer. | ||
217 | * | ||
218 | * This prevents build-up of stale buffers on the LRU. | ||
219 | */ | ||
220 | void | ||
221 | xfs_buf_stale( | ||
222 | struct xfs_buf *bp) | ||
223 | { | ||
224 | bp->b_flags |= XBF_STALE; | ||
225 | atomic_set(&(bp)->b_lru_ref, 0); | ||
226 | if (!list_empty(&bp->b_lru)) { | ||
227 | struct xfs_buftarg *btp = bp->b_target; | ||
228 | |||
229 | spin_lock(&btp->bt_lru_lock); | ||
230 | if (!list_empty(&bp->b_lru)) { | ||
231 | list_del_init(&bp->b_lru); | ||
232 | btp->bt_lru_nr--; | ||
233 | atomic_dec(&bp->b_hold); | ||
234 | } | ||
235 | spin_unlock(&btp->bt_lru_lock); | ||
236 | } | ||
237 | ASSERT(atomic_read(&bp->b_hold) >= 1); | ||
238 | } | ||
173 | 239 | ||
174 | STATIC void | 240 | STATIC void |
175 | _xfs_buf_initialize( | 241 | _xfs_buf_initialize( |
@@ -186,7 +252,9 @@ _xfs_buf_initialize( | |||
186 | 252 | ||
187 | memset(bp, 0, sizeof(xfs_buf_t)); | 253 | memset(bp, 0, sizeof(xfs_buf_t)); |
188 | atomic_set(&bp->b_hold, 1); | 254 | atomic_set(&bp->b_hold, 1); |
255 | atomic_set(&bp->b_lru_ref, 1); | ||
189 | init_completion(&bp->b_iowait); | 256 | init_completion(&bp->b_iowait); |
257 | INIT_LIST_HEAD(&bp->b_lru); | ||
190 | INIT_LIST_HEAD(&bp->b_list); | 258 | INIT_LIST_HEAD(&bp->b_list); |
191 | RB_CLEAR_NODE(&bp->b_rbnode); | 259 | RB_CLEAR_NODE(&bp->b_rbnode); |
192 | sema_init(&bp->b_sema, 0); /* held, no waiters */ | 260 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
@@ -262,6 +330,8 @@ xfs_buf_free( | |||
262 | { | 330 | { |
263 | trace_xfs_buf_free(bp, _RET_IP_); | 331 | trace_xfs_buf_free(bp, _RET_IP_); |
264 | 332 | ||
333 | ASSERT(list_empty(&bp->b_lru)); | ||
334 | |||
265 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 335 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { |
266 | uint i; | 336 | uint i; |
267 | 337 | ||
@@ -337,7 +407,6 @@ _xfs_buf_lookup_pages( | |||
337 | __func__, gfp_mask); | 407 | __func__, gfp_mask); |
338 | 408 | ||
339 | XFS_STATS_INC(xb_page_retries); | 409 | XFS_STATS_INC(xb_page_retries); |
340 | xfsbufd_wakeup(NULL, 0, gfp_mask); | ||
341 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 410 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
342 | goto retry; | 411 | goto retry; |
343 | } | 412 | } |
@@ -828,6 +897,7 @@ xfs_buf_rele( | |||
828 | 897 | ||
829 | if (!pag) { | 898 | if (!pag) { |
830 | ASSERT(!bp->b_relse); | 899 | ASSERT(!bp->b_relse); |
900 | ASSERT(list_empty(&bp->b_lru)); | ||
831 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); | 901 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); |
832 | if (atomic_dec_and_test(&bp->b_hold)) | 902 | if (atomic_dec_and_test(&bp->b_hold)) |
833 | xfs_buf_free(bp); | 903 | xfs_buf_free(bp); |
@@ -835,13 +905,19 @@ xfs_buf_rele( | |||
835 | } | 905 | } |
836 | 906 | ||
837 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); | 907 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); |
908 | |||
838 | ASSERT(atomic_read(&bp->b_hold) > 0); | 909 | ASSERT(atomic_read(&bp->b_hold) > 0); |
839 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { | 910 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
840 | if (bp->b_relse) { | 911 | if (bp->b_relse) { |
841 | atomic_inc(&bp->b_hold); | 912 | atomic_inc(&bp->b_hold); |
842 | spin_unlock(&pag->pag_buf_lock); | 913 | spin_unlock(&pag->pag_buf_lock); |
843 | bp->b_relse(bp); | 914 | bp->b_relse(bp); |
915 | } else if (!(bp->b_flags & XBF_STALE) && | ||
916 | atomic_read(&bp->b_lru_ref)) { | ||
917 | xfs_buf_lru_add(bp); | ||
918 | spin_unlock(&pag->pag_buf_lock); | ||
844 | } else { | 919 | } else { |
920 | xfs_buf_lru_del(bp); | ||
845 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 921 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); |
846 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); | 922 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
847 | spin_unlock(&pag->pag_buf_lock); | 923 | spin_unlock(&pag->pag_buf_lock); |
@@ -1438,51 +1514,84 @@ xfs_buf_iomove( | |||
1438 | */ | 1514 | */ |
1439 | 1515 | ||
1440 | /* | 1516 | /* |
1441 | * Wait for any bufs with callbacks that have been submitted but | 1517 | * Wait for any bufs with callbacks that have been submitted but have not yet |
1442 | * have not yet returned... walk the hash list for the target. | 1518 | * returned. These buffers will have an elevated hold count, so wait on those |
1519 | * while freeing all the buffers only held by the LRU. | ||
1443 | */ | 1520 | */ |
1444 | void | 1521 | void |
1445 | xfs_wait_buftarg( | 1522 | xfs_wait_buftarg( |
1446 | struct xfs_buftarg *btp) | 1523 | struct xfs_buftarg *btp) |
1447 | { | 1524 | { |
1448 | struct xfs_perag *pag; | 1525 | struct xfs_buf *bp; |
1449 | uint i; | ||
1450 | 1526 | ||
1451 | for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { | 1527 | restart: |
1452 | pag = xfs_perag_get(btp->bt_mount, i); | 1528 | spin_lock(&btp->bt_lru_lock); |
1453 | spin_lock(&pag->pag_buf_lock); | 1529 | while (!list_empty(&btp->bt_lru)) { |
1454 | while (rb_first(&pag->pag_buf_tree)) { | 1530 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); |
1455 | spin_unlock(&pag->pag_buf_lock); | 1531 | if (atomic_read(&bp->b_hold) > 1) { |
1532 | spin_unlock(&btp->bt_lru_lock); | ||
1456 | delay(100); | 1533 | delay(100); |
1457 | spin_lock(&pag->pag_buf_lock); | 1534 | goto restart; |
1458 | } | 1535 | } |
1459 | spin_unlock(&pag->pag_buf_lock); | 1536 | /* |
1460 | xfs_perag_put(pag); | 1537 | * clear the LRU reference count so the bufer doesn't get |
1538 | * ignored in xfs_buf_rele(). | ||
1539 | */ | ||
1540 | atomic_set(&bp->b_lru_ref, 0); | ||
1541 | spin_unlock(&btp->bt_lru_lock); | ||
1542 | xfs_buf_rele(bp); | ||
1543 | spin_lock(&btp->bt_lru_lock); | ||
1461 | } | 1544 | } |
1545 | spin_unlock(&btp->bt_lru_lock); | ||
1462 | } | 1546 | } |
1463 | 1547 | ||
1464 | /* | 1548 | int |
1465 | * buftarg list for delwrite queue processing | 1549 | xfs_buftarg_shrink( |
1466 | */ | 1550 | struct shrinker *shrink, |
1467 | static LIST_HEAD(xfs_buftarg_list); | 1551 | int nr_to_scan, |
1468 | static DEFINE_SPINLOCK(xfs_buftarg_lock); | 1552 | gfp_t mask) |
1469 | |||
1470 | STATIC void | ||
1471 | xfs_register_buftarg( | ||
1472 | xfs_buftarg_t *btp) | ||
1473 | { | 1553 | { |
1474 | spin_lock(&xfs_buftarg_lock); | 1554 | struct xfs_buftarg *btp = container_of(shrink, |
1475 | list_add(&btp->bt_list, &xfs_buftarg_list); | 1555 | struct xfs_buftarg, bt_shrinker); |
1476 | spin_unlock(&xfs_buftarg_lock); | 1556 | struct xfs_buf *bp; |
1477 | } | 1557 | LIST_HEAD(dispose); |
1478 | 1558 | ||
1479 | STATIC void | 1559 | if (!nr_to_scan) |
1480 | xfs_unregister_buftarg( | 1560 | return btp->bt_lru_nr; |
1481 | xfs_buftarg_t *btp) | 1561 | |
1482 | { | 1562 | spin_lock(&btp->bt_lru_lock); |
1483 | spin_lock(&xfs_buftarg_lock); | 1563 | while (!list_empty(&btp->bt_lru)) { |
1484 | list_del(&btp->bt_list); | 1564 | if (nr_to_scan-- <= 0) |
1485 | spin_unlock(&xfs_buftarg_lock); | 1565 | break; |
1566 | |||
1567 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); | ||
1568 | |||
1569 | /* | ||
1570 | * Decrement the b_lru_ref count unless the value is already | ||
1571 | * zero. If the value is already zero, we need to reclaim the | ||
1572 | * buffer, otherwise it gets another trip through the LRU. | ||
1573 | */ | ||
1574 | if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { | ||
1575 | list_move_tail(&bp->b_lru, &btp->bt_lru); | ||
1576 | continue; | ||
1577 | } | ||
1578 | |||
1579 | /* | ||
1580 | * remove the buffer from the LRU now to avoid needing another | ||
1581 | * lock round trip inside xfs_buf_rele(). | ||
1582 | */ | ||
1583 | list_move(&bp->b_lru, &dispose); | ||
1584 | btp->bt_lru_nr--; | ||
1585 | } | ||
1586 | spin_unlock(&btp->bt_lru_lock); | ||
1587 | |||
1588 | while (!list_empty(&dispose)) { | ||
1589 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); | ||
1590 | list_del_init(&bp->b_lru); | ||
1591 | xfs_buf_rele(bp); | ||
1592 | } | ||
1593 | |||
1594 | return btp->bt_lru_nr; | ||
1486 | } | 1595 | } |
1487 | 1596 | ||
1488 | void | 1597 | void |
@@ -1490,17 +1599,14 @@ xfs_free_buftarg( | |||
1490 | struct xfs_mount *mp, | 1599 | struct xfs_mount *mp, |
1491 | struct xfs_buftarg *btp) | 1600 | struct xfs_buftarg *btp) |
1492 | { | 1601 | { |
1602 | unregister_shrinker(&btp->bt_shrinker); | ||
1603 | |||
1493 | xfs_flush_buftarg(btp, 1); | 1604 | xfs_flush_buftarg(btp, 1); |
1494 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1605 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1495 | xfs_blkdev_issue_flush(btp); | 1606 | xfs_blkdev_issue_flush(btp); |
1496 | iput(btp->bt_mapping->host); | 1607 | iput(btp->bt_mapping->host); |
1497 | 1608 | ||
1498 | /* Unregister the buftarg first so that we don't get a | ||
1499 | * wakeup finding a non-existent task | ||
1500 | */ | ||
1501 | xfs_unregister_buftarg(btp); | ||
1502 | kthread_stop(btp->bt_task); | 1609 | kthread_stop(btp->bt_task); |
1503 | |||
1504 | kmem_free(btp); | 1610 | kmem_free(btp); |
1505 | } | 1611 | } |
1506 | 1612 | ||
@@ -1597,20 +1703,13 @@ xfs_alloc_delwrite_queue( | |||
1597 | xfs_buftarg_t *btp, | 1703 | xfs_buftarg_t *btp, |
1598 | const char *fsname) | 1704 | const char *fsname) |
1599 | { | 1705 | { |
1600 | int error = 0; | ||
1601 | |||
1602 | INIT_LIST_HEAD(&btp->bt_list); | ||
1603 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); | 1706 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); |
1604 | spin_lock_init(&btp->bt_delwrite_lock); | 1707 | spin_lock_init(&btp->bt_delwrite_lock); |
1605 | btp->bt_flags = 0; | 1708 | btp->bt_flags = 0; |
1606 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); | 1709 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); |
1607 | if (IS_ERR(btp->bt_task)) { | 1710 | if (IS_ERR(btp->bt_task)) |
1608 | error = PTR_ERR(btp->bt_task); | 1711 | return PTR_ERR(btp->bt_task); |
1609 | goto out_error; | 1712 | return 0; |
1610 | } | ||
1611 | xfs_register_buftarg(btp); | ||
1612 | out_error: | ||
1613 | return error; | ||
1614 | } | 1713 | } |
1615 | 1714 | ||
1616 | xfs_buftarg_t * | 1715 | xfs_buftarg_t * |
@@ -1627,12 +1726,17 @@ xfs_alloc_buftarg( | |||
1627 | btp->bt_mount = mp; | 1726 | btp->bt_mount = mp; |
1628 | btp->bt_dev = bdev->bd_dev; | 1727 | btp->bt_dev = bdev->bd_dev; |
1629 | btp->bt_bdev = bdev; | 1728 | btp->bt_bdev = bdev; |
1729 | INIT_LIST_HEAD(&btp->bt_lru); | ||
1730 | spin_lock_init(&btp->bt_lru_lock); | ||
1630 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1731 | if (xfs_setsize_buftarg_early(btp, bdev)) |
1631 | goto error; | 1732 | goto error; |
1632 | if (xfs_mapping_buftarg(btp, bdev)) | 1733 | if (xfs_mapping_buftarg(btp, bdev)) |
1633 | goto error; | 1734 | goto error; |
1634 | if (xfs_alloc_delwrite_queue(btp, fsname)) | 1735 | if (xfs_alloc_delwrite_queue(btp, fsname)) |
1635 | goto error; | 1736 | goto error; |
1737 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | ||
1738 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | ||
1739 | register_shrinker(&btp->bt_shrinker); | ||
1636 | return btp; | 1740 | return btp; |
1637 | 1741 | ||
1638 | error: | 1742 | error: |
@@ -1737,27 +1841,6 @@ xfs_buf_runall_queues( | |||
1737 | flush_workqueue(queue); | 1841 | flush_workqueue(queue); |
1738 | } | 1842 | } |
1739 | 1843 | ||
1740 | STATIC int | ||
1741 | xfsbufd_wakeup( | ||
1742 | struct shrinker *shrink, | ||
1743 | int priority, | ||
1744 | gfp_t mask) | ||
1745 | { | ||
1746 | xfs_buftarg_t *btp; | ||
1747 | |||
1748 | spin_lock(&xfs_buftarg_lock); | ||
1749 | list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { | ||
1750 | if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) | ||
1751 | continue; | ||
1752 | if (list_empty(&btp->bt_delwrite_queue)) | ||
1753 | continue; | ||
1754 | set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); | ||
1755 | wake_up_process(btp->bt_task); | ||
1756 | } | ||
1757 | spin_unlock(&xfs_buftarg_lock); | ||
1758 | return 0; | ||
1759 | } | ||
1760 | |||
1761 | /* | 1844 | /* |
1762 | * Move as many buffers as specified to the supplied list | 1845 | * Move as many buffers as specified to the supplied list |
1763 | * idicating if we skipped any buffers to prevent deadlocks. | 1846 | * idicating if we skipped any buffers to prevent deadlocks. |
@@ -1952,7 +2035,6 @@ xfs_buf_init(void) | |||
1952 | if (!xfsconvertd_workqueue) | 2035 | if (!xfsconvertd_workqueue) |
1953 | goto out_destroy_xfsdatad_workqueue; | 2036 | goto out_destroy_xfsdatad_workqueue; |
1954 | 2037 | ||
1955 | register_shrinker(&xfs_buf_shake); | ||
1956 | return 0; | 2038 | return 0; |
1957 | 2039 | ||
1958 | out_destroy_xfsdatad_workqueue: | 2040 | out_destroy_xfsdatad_workqueue: |
@@ -1968,7 +2050,6 @@ xfs_buf_init(void) | |||
1968 | void | 2050 | void |
1969 | xfs_buf_terminate(void) | 2051 | xfs_buf_terminate(void) |
1970 | { | 2052 | { |
1971 | unregister_shrinker(&xfs_buf_shake); | ||
1972 | destroy_workqueue(xfsconvertd_workqueue); | 2053 | destroy_workqueue(xfsconvertd_workqueue); |
1973 | destroy_workqueue(xfsdatad_workqueue); | 2054 | destroy_workqueue(xfsdatad_workqueue); |
1974 | destroy_workqueue(xfslogd_workqueue); | 2055 | destroy_workqueue(xfslogd_workqueue); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 383a3f37cf98..a76c2428faff 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -128,10 +128,15 @@ typedef struct xfs_buftarg { | |||
128 | 128 | ||
129 | /* per device delwri queue */ | 129 | /* per device delwri queue */ |
130 | struct task_struct *bt_task; | 130 | struct task_struct *bt_task; |
131 | struct list_head bt_list; | ||
132 | struct list_head bt_delwrite_queue; | 131 | struct list_head bt_delwrite_queue; |
133 | spinlock_t bt_delwrite_lock; | 132 | spinlock_t bt_delwrite_lock; |
134 | unsigned long bt_flags; | 133 | unsigned long bt_flags; |
134 | |||
135 | /* LRU control structures */ | ||
136 | struct shrinker bt_shrinker; | ||
137 | struct list_head bt_lru; | ||
138 | spinlock_t bt_lru_lock; | ||
139 | unsigned int bt_lru_nr; | ||
135 | } xfs_buftarg_t; | 140 | } xfs_buftarg_t; |
136 | 141 | ||
137 | /* | 142 | /* |
@@ -164,9 +169,11 @@ typedef struct xfs_buf { | |||
164 | xfs_off_t b_file_offset; /* offset in file */ | 169 | xfs_off_t b_file_offset; /* offset in file */ |
165 | size_t b_buffer_length;/* size of buffer in bytes */ | 170 | size_t b_buffer_length;/* size of buffer in bytes */ |
166 | atomic_t b_hold; /* reference count */ | 171 | atomic_t b_hold; /* reference count */ |
172 | atomic_t b_lru_ref; /* lru reclaim ref count */ | ||
167 | xfs_buf_flags_t b_flags; /* status flags */ | 173 | xfs_buf_flags_t b_flags; /* status flags */ |
168 | struct semaphore b_sema; /* semaphore for lockables */ | 174 | struct semaphore b_sema; /* semaphore for lockables */ |
169 | 175 | ||
176 | struct list_head b_lru; /* lru list */ | ||
170 | wait_queue_head_t b_waiters; /* unpin waiters */ | 177 | wait_queue_head_t b_waiters; /* unpin waiters */ |
171 | struct list_head b_list; | 178 | struct list_head b_list; |
172 | struct xfs_perag *b_pag; /* contains rbtree root */ | 179 | struct xfs_perag *b_pag; /* contains rbtree root */ |
@@ -264,7 +271,8 @@ extern void xfs_buf_terminate(void); | |||
264 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ | 271 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ |
265 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) | 272 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) |
266 | 273 | ||
267 | #define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) | 274 | void xfs_buf_stale(struct xfs_buf *bp); |
275 | #define XFS_BUF_STALE(bp) xfs_buf_stale(bp); | ||
268 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) | 276 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) |
269 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) | 277 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) |
270 | #define XFS_BUF_SUPER_STALE(bp) do { \ | 278 | #define XFS_BUF_SUPER_STALE(bp) do { \ |
@@ -328,9 +336,15 @@ extern void xfs_buf_terminate(void); | |||
328 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) | 336 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) |
329 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) | 337 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) |
330 | 338 | ||
331 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) | 339 | static inline void |
340 | xfs_buf_set_ref( | ||
341 | struct xfs_buf *bp, | ||
342 | int lru_ref) | ||
343 | { | ||
344 | atomic_set(&bp->b_lru_ref, lru_ref); | ||
345 | } | ||
346 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) | ||
332 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) | 347 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) |
333 | #define XFS_BUF_SET_REF(bp, ref) do { } while (0) | ||
334 | 348 | ||
335 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) | 349 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) |
336 | 350 | ||
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 3764d74790ec..fc0114da7fdd 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
@@ -70,8 +70,16 @@ xfs_fs_encode_fh( | |||
70 | else | 70 | else |
71 | fileid_type = FILEID_INO32_GEN_PARENT; | 71 | fileid_type = FILEID_INO32_GEN_PARENT; |
72 | 72 | ||
73 | /* filesystem may contain 64bit inode numbers */ | 73 | /* |
74 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) | 74 | * If the the filesystem may contain 64bit inode numbers, we need |
75 | * to use larger file handles that can represent them. | ||
76 | * | ||
77 | * While we only allocate inodes that do not fit into 32 bits any | ||
78 | * large enough filesystem may contain them, thus the slightly | ||
79 | * confusing looking conditional below. | ||
80 | */ | ||
81 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || | ||
82 | (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) | ||
75 | fileid_type |= XFS_FILEID_TYPE_64FLAG; | 83 | fileid_type |= XFS_FILEID_TYPE_64FLAG; |
76 | 84 | ||
77 | /* | 85 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 214ddd71ff79..096494997747 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -37,7 +37,6 @@ | |||
37 | 37 | ||
38 | #include <kmem.h> | 38 | #include <kmem.h> |
39 | #include <mrlock.h> | 39 | #include <mrlock.h> |
40 | #include <sv.h> | ||
41 | #include <time.h> | 40 | #include <time.h> |
42 | 41 | ||
43 | #include <support/debug.h> | 42 | #include <support/debug.h> |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 064f964d4f3c..c51faaa5e291 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -834,8 +834,11 @@ xfsaild_wakeup( | |||
834 | struct xfs_ail *ailp, | 834 | struct xfs_ail *ailp, |
835 | xfs_lsn_t threshold_lsn) | 835 | xfs_lsn_t threshold_lsn) |
836 | { | 836 | { |
837 | ailp->xa_target = threshold_lsn; | 837 | /* only ever move the target forwards */ |
838 | wake_up_process(ailp->xa_task); | 838 | if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) { |
839 | ailp->xa_target = threshold_lsn; | ||
840 | wake_up_process(ailp->xa_task); | ||
841 | } | ||
839 | } | 842 | } |
840 | 843 | ||
841 | STATIC int | 844 | STATIC int |
@@ -847,8 +850,17 @@ xfsaild( | |||
847 | long tout = 0; /* milliseconds */ | 850 | long tout = 0; /* milliseconds */ |
848 | 851 | ||
849 | while (!kthread_should_stop()) { | 852 | while (!kthread_should_stop()) { |
850 | schedule_timeout_interruptible(tout ? | 853 | /* |
851 | msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); | 854 | * for short sleeps indicating congestion, don't allow us to |
855 | * get woken early. Otherwise all we do is bang on the AIL lock | ||
856 | * without making progress. | ||
857 | */ | ||
858 | if (tout && tout <= 20) | ||
859 | __set_current_state(TASK_KILLABLE); | ||
860 | else | ||
861 | __set_current_state(TASK_INTERRUPTIBLE); | ||
862 | schedule_timeout(tout ? | ||
863 | msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); | ||
852 | 864 | ||
853 | /* swsusp */ | 865 | /* swsusp */ |
854 | try_to_freeze(); | 866 | try_to_freeze(); |
@@ -1118,6 +1130,8 @@ xfs_fs_evict_inode( | |||
1118 | */ | 1130 | */ |
1119 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | 1131 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); |
1120 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 1132 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
1133 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
1134 | &xfs_iolock_reclaimable, "xfs_iolock_reclaimable"); | ||
1121 | 1135 | ||
1122 | xfs_inactive(ip); | 1136 | xfs_inactive(ip); |
1123 | } | 1137 | } |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index afb0d7cfad1c..a02480de9759 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab( | |||
53 | { | 53 | { |
54 | struct inode *inode = VFS_I(ip); | 54 | struct inode *inode = VFS_I(ip); |
55 | 55 | ||
56 | ASSERT(rcu_read_lock_held()); | ||
57 | |||
58 | /* | ||
59 | * check for stale RCU freed inode | ||
60 | * | ||
61 | * If the inode has been reallocated, it doesn't matter if it's not in | ||
62 | * the AG we are walking - we are walking for writeback, so if it | ||
63 | * passes all the "valid inode" checks and is dirty, then we'll write | ||
64 | * it back anyway. If it has been reallocated and still being | ||
65 | * initialised, the XFS_INEW check below will catch it. | ||
66 | */ | ||
67 | spin_lock(&ip->i_flags_lock); | ||
68 | if (!ip->i_ino) | ||
69 | goto out_unlock_noent; | ||
70 | |||
71 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
72 | if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
73 | goto out_unlock_noent; | ||
74 | spin_unlock(&ip->i_flags_lock); | ||
75 | |||
56 | /* nothing to sync during shutdown */ | 76 | /* nothing to sync during shutdown */ |
57 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 77 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
58 | return EFSCORRUPTED; | 78 | return EFSCORRUPTED; |
59 | 79 | ||
60 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
61 | if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
62 | return ENOENT; | ||
63 | |||
64 | /* If we can't grab the inode, it must on it's way to reclaim. */ | 80 | /* If we can't grab the inode, it must on it's way to reclaim. */ |
65 | if (!igrab(inode)) | 81 | if (!igrab(inode)) |
66 | return ENOENT; | 82 | return ENOENT; |
@@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab( | |||
72 | 88 | ||
73 | /* inode is valid */ | 89 | /* inode is valid */ |
74 | return 0; | 90 | return 0; |
91 | |||
92 | out_unlock_noent: | ||
93 | spin_unlock(&ip->i_flags_lock); | ||
94 | return ENOENT; | ||
75 | } | 95 | } |
76 | 96 | ||
77 | STATIC int | 97 | STATIC int |
@@ -98,12 +118,12 @@ restart: | |||
98 | int error = 0; | 118 | int error = 0; |
99 | int i; | 119 | int i; |
100 | 120 | ||
101 | read_lock(&pag->pag_ici_lock); | 121 | rcu_read_lock(); |
102 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | 122 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, |
103 | (void **)batch, first_index, | 123 | (void **)batch, first_index, |
104 | XFS_LOOKUP_BATCH); | 124 | XFS_LOOKUP_BATCH); |
105 | if (!nr_found) { | 125 | if (!nr_found) { |
106 | read_unlock(&pag->pag_ici_lock); | 126 | rcu_read_unlock(); |
107 | break; | 127 | break; |
108 | } | 128 | } |
109 | 129 | ||
@@ -118,18 +138,26 @@ restart: | |||
118 | batch[i] = NULL; | 138 | batch[i] = NULL; |
119 | 139 | ||
120 | /* | 140 | /* |
121 | * Update the index for the next lookup. Catch overflows | 141 | * Update the index for the next lookup. Catch |
122 | * into the next AG range which can occur if we have inodes | 142 | * overflows into the next AG range which can occur if |
123 | * in the last block of the AG and we are currently | 143 | * we have inodes in the last block of the AG and we |
124 | * pointing to the last inode. | 144 | * are currently pointing to the last inode. |
145 | * | ||
146 | * Because we may see inodes that are from the wrong AG | ||
147 | * due to RCU freeing and reallocation, only update the | ||
148 | * index if it lies in this AG. It was a race that lead | ||
149 | * us to see this inode, so another lookup from the | ||
150 | * same index will not find it again. | ||
125 | */ | 151 | */ |
152 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) | ||
153 | continue; | ||
126 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 154 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
127 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 155 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
128 | done = 1; | 156 | done = 1; |
129 | } | 157 | } |
130 | 158 | ||
131 | /* unlock now we've grabbed the inodes. */ | 159 | /* unlock now we've grabbed the inodes. */ |
132 | read_unlock(&pag->pag_ici_lock); | 160 | rcu_read_unlock(); |
133 | 161 | ||
134 | for (i = 0; i < nr_found; i++) { | 162 | for (i = 0; i < nr_found; i++) { |
135 | if (!batch[i]) | 163 | if (!batch[i]) |
@@ -592,12 +620,12 @@ xfs_inode_set_reclaim_tag( | |||
592 | struct xfs_perag *pag; | 620 | struct xfs_perag *pag; |
593 | 621 | ||
594 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | 622 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); |
595 | write_lock(&pag->pag_ici_lock); | 623 | spin_lock(&pag->pag_ici_lock); |
596 | spin_lock(&ip->i_flags_lock); | 624 | spin_lock(&ip->i_flags_lock); |
597 | __xfs_inode_set_reclaim_tag(pag, ip); | 625 | __xfs_inode_set_reclaim_tag(pag, ip); |
598 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 626 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
599 | spin_unlock(&ip->i_flags_lock); | 627 | spin_unlock(&ip->i_flags_lock); |
600 | write_unlock(&pag->pag_ici_lock); | 628 | spin_unlock(&pag->pag_ici_lock); |
601 | xfs_perag_put(pag); | 629 | xfs_perag_put(pag); |
602 | } | 630 | } |
603 | 631 | ||
@@ -639,9 +667,14 @@ xfs_reclaim_inode_grab( | |||
639 | struct xfs_inode *ip, | 667 | struct xfs_inode *ip, |
640 | int flags) | 668 | int flags) |
641 | { | 669 | { |
670 | ASSERT(rcu_read_lock_held()); | ||
671 | |||
672 | /* quick check for stale RCU freed inode */ | ||
673 | if (!ip->i_ino) | ||
674 | return 1; | ||
642 | 675 | ||
643 | /* | 676 | /* |
644 | * do some unlocked checks first to avoid unnecceary lock traffic. | 677 | * do some unlocked checks first to avoid unnecessary lock traffic. |
645 | * The first is a flush lock check, the second is a already in reclaim | 678 | * The first is a flush lock check, the second is a already in reclaim |
646 | * check. Only do these checks if we are not going to block on locks. | 679 | * check. Only do these checks if we are not going to block on locks. |
647 | */ | 680 | */ |
@@ -654,11 +687,16 @@ xfs_reclaim_inode_grab( | |||
654 | * The radix tree lock here protects a thread in xfs_iget from racing | 687 | * The radix tree lock here protects a thread in xfs_iget from racing |
655 | * with us starting reclaim on the inode. Once we have the | 688 | * with us starting reclaim on the inode. Once we have the |
656 | * XFS_IRECLAIM flag set it will not touch us. | 689 | * XFS_IRECLAIM flag set it will not touch us. |
690 | * | ||
691 | * Due to RCU lookup, we may find inodes that have been freed and only | ||
692 | * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that | ||
693 | * aren't candidates for reclaim at all, so we must check the | ||
694 | * XFS_IRECLAIMABLE is set first before proceeding to reclaim. | ||
657 | */ | 695 | */ |
658 | spin_lock(&ip->i_flags_lock); | 696 | spin_lock(&ip->i_flags_lock); |
659 | ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | 697 | if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || |
660 | if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { | 698 | __xfs_iflags_test(ip, XFS_IRECLAIM)) { |
661 | /* ignore as it is already under reclaim */ | 699 | /* not a reclaim candidate. */ |
662 | spin_unlock(&ip->i_flags_lock); | 700 | spin_unlock(&ip->i_flags_lock); |
663 | return 1; | 701 | return 1; |
664 | } | 702 | } |
@@ -795,12 +833,12 @@ reclaim: | |||
795 | * added to the tree assert that it's been there before to catch | 833 | * added to the tree assert that it's been there before to catch |
796 | * problems with the inode life time early on. | 834 | * problems with the inode life time early on. |
797 | */ | 835 | */ |
798 | write_lock(&pag->pag_ici_lock); | 836 | spin_lock(&pag->pag_ici_lock); |
799 | if (!radix_tree_delete(&pag->pag_ici_root, | 837 | if (!radix_tree_delete(&pag->pag_ici_root, |
800 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 838 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) |
801 | ASSERT(0); | 839 | ASSERT(0); |
802 | __xfs_inode_clear_reclaim(pag, ip); | 840 | __xfs_inode_clear_reclaim(pag, ip); |
803 | write_unlock(&pag->pag_ici_lock); | 841 | spin_unlock(&pag->pag_ici_lock); |
804 | 842 | ||
805 | /* | 843 | /* |
806 | * Here we do an (almost) spurious inode lock in order to coordinate | 844 | * Here we do an (almost) spurious inode lock in order to coordinate |
@@ -864,14 +902,14 @@ restart: | |||
864 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | 902 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; |
865 | int i; | 903 | int i; |
866 | 904 | ||
867 | write_lock(&pag->pag_ici_lock); | 905 | rcu_read_lock(); |
868 | nr_found = radix_tree_gang_lookup_tag( | 906 | nr_found = radix_tree_gang_lookup_tag( |
869 | &pag->pag_ici_root, | 907 | &pag->pag_ici_root, |
870 | (void **)batch, first_index, | 908 | (void **)batch, first_index, |
871 | XFS_LOOKUP_BATCH, | 909 | XFS_LOOKUP_BATCH, |
872 | XFS_ICI_RECLAIM_TAG); | 910 | XFS_ICI_RECLAIM_TAG); |
873 | if (!nr_found) { | 911 | if (!nr_found) { |
874 | write_unlock(&pag->pag_ici_lock); | 912 | rcu_read_unlock(); |
875 | break; | 913 | break; |
876 | } | 914 | } |
877 | 915 | ||
@@ -891,14 +929,24 @@ restart: | |||
891 | * occur if we have inodes in the last block of | 929 | * occur if we have inodes in the last block of |
892 | * the AG and we are currently pointing to the | 930 | * the AG and we are currently pointing to the |
893 | * last inode. | 931 | * last inode. |
932 | * | ||
933 | * Because we may see inodes that are from the | ||
934 | * wrong AG due to RCU freeing and | ||
935 | * reallocation, only update the index if it | ||
936 | * lies in this AG. It was a race that lead us | ||
937 | * to see this inode, so another lookup from | ||
938 | * the same index will not find it again. | ||
894 | */ | 939 | */ |
940 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != | ||
941 | pag->pag_agno) | ||
942 | continue; | ||
895 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 943 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
896 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 944 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
897 | done = 1; | 945 | done = 1; |
898 | } | 946 | } |
899 | 947 | ||
900 | /* unlock now we've grabbed the inodes. */ | 948 | /* unlock now we've grabbed the inodes. */ |
901 | write_unlock(&pag->pag_ici_lock); | 949 | rcu_read_unlock(); |
902 | 950 | ||
903 | for (i = 0; i < nr_found; i++) { | 951 | for (i = 0; i < nr_found; i++) { |
904 | if (!batch[i]) | 952 | if (!batch[i]) |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index acef2e98c594..647af2a2e7aa 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -766,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
766 | __field(int, curr_res) | 766 | __field(int, curr_res) |
767 | __field(int, unit_res) | 767 | __field(int, unit_res) |
768 | __field(unsigned int, flags) | 768 | __field(unsigned int, flags) |
769 | __field(void *, reserve_headq) | 769 | __field(int, reserveq) |
770 | __field(void *, write_headq) | 770 | __field(int, writeq) |
771 | __field(int, grant_reserve_cycle) | 771 | __field(int, grant_reserve_cycle) |
772 | __field(int, grant_reserve_bytes) | 772 | __field(int, grant_reserve_bytes) |
773 | __field(int, grant_write_cycle) | 773 | __field(int, grant_write_cycle) |
@@ -784,19 +784,21 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
784 | __entry->curr_res = tic->t_curr_res; | 784 | __entry->curr_res = tic->t_curr_res; |
785 | __entry->unit_res = tic->t_unit_res; | 785 | __entry->unit_res = tic->t_unit_res; |
786 | __entry->flags = tic->t_flags; | 786 | __entry->flags = tic->t_flags; |
787 | __entry->reserve_headq = log->l_reserve_headq; | 787 | __entry->reserveq = list_empty(&log->l_reserveq); |
788 | __entry->write_headq = log->l_write_headq; | 788 | __entry->writeq = list_empty(&log->l_writeq); |
789 | __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; | 789 | xlog_crack_grant_head(&log->l_grant_reserve_head, |
790 | __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; | 790 | &__entry->grant_reserve_cycle, |
791 | __entry->grant_write_cycle = log->l_grant_write_cycle; | 791 | &__entry->grant_reserve_bytes); |
792 | __entry->grant_write_bytes = log->l_grant_write_bytes; | 792 | xlog_crack_grant_head(&log->l_grant_write_head, |
793 | &__entry->grant_write_cycle, | ||
794 | &__entry->grant_write_bytes); | ||
793 | __entry->curr_cycle = log->l_curr_cycle; | 795 | __entry->curr_cycle = log->l_curr_cycle; |
794 | __entry->curr_block = log->l_curr_block; | 796 | __entry->curr_block = log->l_curr_block; |
795 | __entry->tail_lsn = log->l_tail_lsn; | 797 | __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); |
796 | ), | 798 | ), |
797 | TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " | 799 | TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " |
798 | "t_unit_res %u t_flags %s reserve_headq 0x%p " | 800 | "t_unit_res %u t_flags %s reserveq %s " |
799 | "write_headq 0x%p grant_reserve_cycle %d " | 801 | "writeq %s grant_reserve_cycle %d " |
800 | "grant_reserve_bytes %d grant_write_cycle %d " | 802 | "grant_reserve_bytes %d grant_write_cycle %d " |
801 | "grant_write_bytes %d curr_cycle %d curr_block %d " | 803 | "grant_write_bytes %d curr_cycle %d curr_block %d " |
802 | "tail_cycle %d tail_block %d", | 804 | "tail_cycle %d tail_block %d", |
@@ -807,8 +809,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
807 | __entry->curr_res, | 809 | __entry->curr_res, |
808 | __entry->unit_res, | 810 | __entry->unit_res, |
809 | __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), | 811 | __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), |
810 | __entry->reserve_headq, | 812 | __entry->reserveq ? "empty" : "active", |
811 | __entry->write_headq, | 813 | __entry->writeq ? "empty" : "active", |
812 | __entry->grant_reserve_cycle, | 814 | __entry->grant_reserve_cycle, |
813 | __entry->grant_reserve_bytes, | 815 | __entry->grant_reserve_bytes, |
814 | __entry->grant_write_cycle, | 816 | __entry->grant_write_cycle, |
@@ -835,6 +837,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); | |||
835 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); | 837 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); |
836 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); | 838 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); |
837 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); | 839 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); |
840 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); | ||
838 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); | 841 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); |
839 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); | 842 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); |
840 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); | 843 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); |
@@ -842,6 +845,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); | |||
842 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); | 845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); |
843 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); | 846 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); |
844 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); | 847 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); |
848 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); | ||
845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); | 849 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); |
846 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); | 850 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); |
847 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); | 851 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); |
@@ -935,10 +939,10 @@ DEFINE_PAGE_EVENT(xfs_writepage); | |||
935 | DEFINE_PAGE_EVENT(xfs_releasepage); | 939 | DEFINE_PAGE_EVENT(xfs_releasepage); |
936 | DEFINE_PAGE_EVENT(xfs_invalidatepage); | 940 | DEFINE_PAGE_EVENT(xfs_invalidatepage); |
937 | 941 | ||
938 | DECLARE_EVENT_CLASS(xfs_iomap_class, | 942 | DECLARE_EVENT_CLASS(xfs_imap_class, |
939 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, | 943 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, |
940 | int flags, struct xfs_bmbt_irec *irec), | 944 | int type, struct xfs_bmbt_irec *irec), |
941 | TP_ARGS(ip, offset, count, flags, irec), | 945 | TP_ARGS(ip, offset, count, type, irec), |
942 | TP_STRUCT__entry( | 946 | TP_STRUCT__entry( |
943 | __field(dev_t, dev) | 947 | __field(dev_t, dev) |
944 | __field(xfs_ino_t, ino) | 948 | __field(xfs_ino_t, ino) |
@@ -946,7 +950,7 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
946 | __field(loff_t, new_size) | 950 | __field(loff_t, new_size) |
947 | __field(loff_t, offset) | 951 | __field(loff_t, offset) |
948 | __field(size_t, count) | 952 | __field(size_t, count) |
949 | __field(int, flags) | 953 | __field(int, type) |
950 | __field(xfs_fileoff_t, startoff) | 954 | __field(xfs_fileoff_t, startoff) |
951 | __field(xfs_fsblock_t, startblock) | 955 | __field(xfs_fsblock_t, startblock) |
952 | __field(xfs_filblks_t, blockcount) | 956 | __field(xfs_filblks_t, blockcount) |
@@ -958,13 +962,13 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
958 | __entry->new_size = ip->i_new_size; | 962 | __entry->new_size = ip->i_new_size; |
959 | __entry->offset = offset; | 963 | __entry->offset = offset; |
960 | __entry->count = count; | 964 | __entry->count = count; |
961 | __entry->flags = flags; | 965 | __entry->type = type; |
962 | __entry->startoff = irec ? irec->br_startoff : 0; | 966 | __entry->startoff = irec ? irec->br_startoff : 0; |
963 | __entry->startblock = irec ? irec->br_startblock : 0; | 967 | __entry->startblock = irec ? irec->br_startblock : 0; |
964 | __entry->blockcount = irec ? irec->br_blockcount : 0; | 968 | __entry->blockcount = irec ? irec->br_blockcount : 0; |
965 | ), | 969 | ), |
966 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " | 970 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " |
967 | "offset 0x%llx count %zd flags %s " | 971 | "offset 0x%llx count %zd type %s " |
968 | "startoff 0x%llx startblock %lld blockcount 0x%llx", | 972 | "startoff 0x%llx startblock %lld blockcount 0x%llx", |
969 | MAJOR(__entry->dev), MINOR(__entry->dev), | 973 | MAJOR(__entry->dev), MINOR(__entry->dev), |
970 | __entry->ino, | 974 | __entry->ino, |
@@ -972,20 +976,21 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
972 | __entry->new_size, | 976 | __entry->new_size, |
973 | __entry->offset, | 977 | __entry->offset, |
974 | __entry->count, | 978 | __entry->count, |
975 | __print_flags(__entry->flags, "|", BMAPI_FLAGS), | 979 | __print_symbolic(__entry->type, XFS_IO_TYPES), |
976 | __entry->startoff, | 980 | __entry->startoff, |
977 | (__int64_t)__entry->startblock, | 981 | (__int64_t)__entry->startblock, |
978 | __entry->blockcount) | 982 | __entry->blockcount) |
979 | ) | 983 | ) |
980 | 984 | ||
981 | #define DEFINE_IOMAP_EVENT(name) \ | 985 | #define DEFINE_IOMAP_EVENT(name) \ |
982 | DEFINE_EVENT(xfs_iomap_class, name, \ | 986 | DEFINE_EVENT(xfs_imap_class, name, \ |
983 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ | 987 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ |
984 | int flags, struct xfs_bmbt_irec *irec), \ | 988 | int type, struct xfs_bmbt_irec *irec), \ |
985 | TP_ARGS(ip, offset, count, flags, irec)) | 989 | TP_ARGS(ip, offset, count, type, irec)) |
986 | DEFINE_IOMAP_EVENT(xfs_iomap_enter); | 990 | DEFINE_IOMAP_EVENT(xfs_map_blocks_found); |
987 | DEFINE_IOMAP_EVENT(xfs_iomap_found); | 991 | DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); |
988 | DEFINE_IOMAP_EVENT(xfs_iomap_alloc); | 992 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); |
993 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); | ||
989 | 994 | ||
990 | DECLARE_EVENT_CLASS(xfs_simple_io_class, | 995 | DECLARE_EVENT_CLASS(xfs_simple_io_class, |
991 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), | 996 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), |
@@ -1022,6 +1027,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \ | |||
1022 | TP_ARGS(ip, offset, count)) | 1027 | TP_ARGS(ip, offset, count)) |
1023 | DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); | 1028 | DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); |
1024 | DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); | 1029 | DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); |
1030 | DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); | ||
1025 | 1031 | ||
1026 | 1032 | ||
1027 | TRACE_EVENT(xfs_itruncate_start, | 1033 | TRACE_EVENT(xfs_itruncate_start, |
@@ -1420,6 +1426,7 @@ DEFINE_EVENT(xfs_alloc_class, name, \ | |||
1420 | TP_PROTO(struct xfs_alloc_arg *args), \ | 1426 | TP_PROTO(struct xfs_alloc_arg *args), \ |
1421 | TP_ARGS(args)) | 1427 | TP_ARGS(args)) |
1422 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); | 1428 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); |
1429 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); | ||
1423 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); | 1430 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); |
1424 | DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); | 1431 | DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); |
1425 | DEFINE_ALLOC_EVENT(xfs_alloc_near_first); | 1432 | DEFINE_ALLOC_EVENT(xfs_alloc_near_first); |
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index faf8e1a83a12..d22aa3103106 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -149,7 +149,6 @@ xfs_qm_dqdestroy( | |||
149 | ASSERT(list_empty(&dqp->q_freelist)); | 149 | ASSERT(list_empty(&dqp->q_freelist)); |
150 | 150 | ||
151 | mutex_destroy(&dqp->q_qlock); | 151 | mutex_destroy(&dqp->q_qlock); |
152 | sv_destroy(&dqp->q_pinwait); | ||
153 | kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); | 152 | kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); |
154 | 153 | ||
155 | atomic_dec(&xfs_Gqm->qm_totaldquots); | 154 | atomic_dec(&xfs_Gqm->qm_totaldquots); |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 63c7a1a6c022..58632cc17f2d 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -227,7 +227,7 @@ typedef struct xfs_perag { | |||
227 | 227 | ||
228 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ | 228 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ |
229 | 229 | ||
230 | rwlock_t pag_ici_lock; /* incore inode lock */ | 230 | spinlock_t pag_ici_lock; /* incore inode cache lock */ |
231 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ | 231 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ |
232 | int pag_ici_reclaimable; /* reclaimable inodes */ | 232 | int pag_ici_reclaimable; /* reclaimable inodes */ |
233 | struct mutex pag_ici_reclaim_lock; /* serialisation point */ | 233 | struct mutex pag_ici_reclaim_lock; /* serialisation point */ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 112abc439ca5..fa8723f5870a 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -577,61 +577,58 @@ xfs_alloc_ag_vextent_exact( | |||
577 | xfs_extlen_t rlen; /* length of returned extent */ | 577 | xfs_extlen_t rlen; /* length of returned extent */ |
578 | 578 | ||
579 | ASSERT(args->alignment == 1); | 579 | ASSERT(args->alignment == 1); |
580 | |||
580 | /* | 581 | /* |
581 | * Allocate/initialize a cursor for the by-number freespace btree. | 582 | * Allocate/initialize a cursor for the by-number freespace btree. |
582 | */ | 583 | */ |
583 | bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 584 | bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
584 | args->agno, XFS_BTNUM_BNO); | 585 | args->agno, XFS_BTNUM_BNO); |
586 | |||
585 | /* | 587 | /* |
586 | * Lookup bno and minlen in the btree (minlen is irrelevant, really). | 588 | * Lookup bno and minlen in the btree (minlen is irrelevant, really). |
587 | * Look for the closest free block <= bno, it must contain bno | 589 | * Look for the closest free block <= bno, it must contain bno |
588 | * if any free block does. | 590 | * if any free block does. |
589 | */ | 591 | */ |
590 | if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i))) | 592 | error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); |
593 | if (error) | ||
591 | goto error0; | 594 | goto error0; |
592 | if (!i) { | 595 | if (!i) |
593 | /* | 596 | goto not_found; |
594 | * Didn't find it, return null. | 597 | |
595 | */ | ||
596 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
597 | args->agbno = NULLAGBLOCK; | ||
598 | return 0; | ||
599 | } | ||
600 | /* | 598 | /* |
601 | * Grab the freespace record. | 599 | * Grab the freespace record. |
602 | */ | 600 | */ |
603 | if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))) | 601 | error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); |
602 | if (error) | ||
604 | goto error0; | 603 | goto error0; |
605 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 604 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
606 | ASSERT(fbno <= args->agbno); | 605 | ASSERT(fbno <= args->agbno); |
607 | minend = args->agbno + args->minlen; | 606 | minend = args->agbno + args->minlen; |
608 | maxend = args->agbno + args->maxlen; | 607 | maxend = args->agbno + args->maxlen; |
609 | fend = fbno + flen; | 608 | fend = fbno + flen; |
609 | |||
610 | /* | 610 | /* |
611 | * Give up if the freespace isn't long enough for the minimum request. | 611 | * Give up if the freespace isn't long enough for the minimum request. |
612 | */ | 612 | */ |
613 | if (fend < minend) { | 613 | if (fend < minend) |
614 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 614 | goto not_found; |
615 | args->agbno = NULLAGBLOCK; | 615 | |
616 | return 0; | ||
617 | } | ||
618 | /* | 616 | /* |
619 | * End of extent will be smaller of the freespace end and the | 617 | * End of extent will be smaller of the freespace end and the |
620 | * maximal requested end. | 618 | * maximal requested end. |
621 | */ | 619 | * |
622 | end = XFS_AGBLOCK_MIN(fend, maxend); | ||
623 | /* | ||
624 | * Fix the length according to mod and prod if given. | 620 | * Fix the length according to mod and prod if given. |
625 | */ | 621 | */ |
622 | end = XFS_AGBLOCK_MIN(fend, maxend); | ||
626 | args->len = end - args->agbno; | 623 | args->len = end - args->agbno; |
627 | xfs_alloc_fix_len(args); | 624 | xfs_alloc_fix_len(args); |
628 | if (!xfs_alloc_fix_minleft(args)) { | 625 | if (!xfs_alloc_fix_minleft(args)) |
629 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 626 | goto not_found; |
630 | return 0; | 627 | |
631 | } | ||
632 | rlen = args->len; | 628 | rlen = args->len; |
633 | ASSERT(args->agbno + rlen <= fend); | 629 | ASSERT(args->agbno + rlen <= fend); |
634 | end = args->agbno + rlen; | 630 | end = args->agbno + rlen; |
631 | |||
635 | /* | 632 | /* |
636 | * We are allocating agbno for rlen [agbno .. end] | 633 | * We are allocating agbno for rlen [agbno .. end] |
637 | * Allocate/initialize a cursor for the by-size btree. | 634 | * Allocate/initialize a cursor for the by-size btree. |
@@ -640,16 +637,25 @@ xfs_alloc_ag_vextent_exact( | |||
640 | args->agno, XFS_BTNUM_CNT); | 637 | args->agno, XFS_BTNUM_CNT); |
641 | ASSERT(args->agbno + args->len <= | 638 | ASSERT(args->agbno + args->len <= |
642 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | 639 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); |
643 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, | 640 | error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, |
644 | args->agbno, args->len, XFSA_FIXUP_BNO_OK))) { | 641 | args->len, XFSA_FIXUP_BNO_OK); |
642 | if (error) { | ||
645 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); | 643 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); |
646 | goto error0; | 644 | goto error0; |
647 | } | 645 | } |
646 | |||
648 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 647 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); |
649 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 648 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
650 | 649 | ||
651 | trace_xfs_alloc_exact_done(args); | ||
652 | args->wasfromfl = 0; | 650 | args->wasfromfl = 0; |
651 | trace_xfs_alloc_exact_done(args); | ||
652 | return 0; | ||
653 | |||
654 | not_found: | ||
655 | /* Didn't find it, return null. */ | ||
656 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
657 | args->agbno = NULLAGBLOCK; | ||
658 | trace_xfs_alloc_exact_notfound(args); | ||
653 | return 0; | 659 | return 0; |
654 | 660 | ||
655 | error0: | 661 | error0: |
@@ -659,6 +665,95 @@ error0: | |||
659 | } | 665 | } |
660 | 666 | ||
661 | /* | 667 | /* |
668 | * Search the btree in a given direction via the search cursor and compare | ||
669 | * the records found against the good extent we've already found. | ||
670 | */ | ||
671 | STATIC int | ||
672 | xfs_alloc_find_best_extent( | ||
673 | struct xfs_alloc_arg *args, /* allocation argument structure */ | ||
674 | struct xfs_btree_cur **gcur, /* good cursor */ | ||
675 | struct xfs_btree_cur **scur, /* searching cursor */ | ||
676 | xfs_agblock_t gdiff, /* difference for search comparison */ | ||
677 | xfs_agblock_t *sbno, /* extent found by search */ | ||
678 | xfs_extlen_t *slen, | ||
679 | xfs_extlen_t *slena, /* aligned length */ | ||
680 | int dir) /* 0 = search right, 1 = search left */ | ||
681 | { | ||
682 | xfs_agblock_t bno; | ||
683 | xfs_agblock_t new; | ||
684 | xfs_agblock_t sdiff; | ||
685 | int error; | ||
686 | int i; | ||
687 | |||
688 | /* The good extent is perfect, no need to search. */ | ||
689 | if (!gdiff) | ||
690 | goto out_use_good; | ||
691 | |||
692 | /* | ||
693 | * Look until we find a better one, run out of space or run off the end. | ||
694 | */ | ||
695 | do { | ||
696 | error = xfs_alloc_get_rec(*scur, sbno, slen, &i); | ||
697 | if (error) | ||
698 | goto error0; | ||
699 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
700 | xfs_alloc_compute_aligned(*sbno, *slen, args->alignment, | ||
701 | args->minlen, &bno, slena); | ||
702 | |||
703 | /* | ||
704 | * The good extent is closer than this one. | ||
705 | */ | ||
706 | if (!dir) { | ||
707 | if (bno >= args->agbno + gdiff) | ||
708 | goto out_use_good; | ||
709 | } else { | ||
710 | if (bno <= args->agbno - gdiff) | ||
711 | goto out_use_good; | ||
712 | } | ||
713 | |||
714 | /* | ||
715 | * Same distance, compare length and pick the best. | ||
716 | */ | ||
717 | if (*slena >= args->minlen) { | ||
718 | args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); | ||
719 | xfs_alloc_fix_len(args); | ||
720 | |||
721 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, | ||
722 | args->alignment, *sbno, | ||
723 | *slen, &new); | ||
724 | |||
725 | /* | ||
726 | * Choose closer size and invalidate other cursor. | ||
727 | */ | ||
728 | if (sdiff < gdiff) | ||
729 | goto out_use_search; | ||
730 | goto out_use_good; | ||
731 | } | ||
732 | |||
733 | if (!dir) | ||
734 | error = xfs_btree_increment(*scur, 0, &i); | ||
735 | else | ||
736 | error = xfs_btree_decrement(*scur, 0, &i); | ||
737 | if (error) | ||
738 | goto error0; | ||
739 | } while (i); | ||
740 | |||
741 | out_use_good: | ||
742 | xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); | ||
743 | *scur = NULL; | ||
744 | return 0; | ||
745 | |||
746 | out_use_search: | ||
747 | xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); | ||
748 | *gcur = NULL; | ||
749 | return 0; | ||
750 | |||
751 | error0: | ||
752 | /* caller invalidates cursors */ | ||
753 | return error; | ||
754 | } | ||
755 | |||
756 | /* | ||
662 | * Allocate a variable extent near bno in the allocation group agno. | 757 | * Allocate a variable extent near bno in the allocation group agno. |
663 | * Extent's length (returned in len) will be between minlen and maxlen, | 758 | * Extent's length (returned in len) will be between minlen and maxlen, |
664 | * and of the form k * prod + mod unless there's nothing that large. | 759 | * and of the form k * prod + mod unless there's nothing that large. |
@@ -925,203 +1020,45 @@ xfs_alloc_ag_vextent_near( | |||
925 | } | 1020 | } |
926 | } | 1021 | } |
927 | } while (bno_cur_lt || bno_cur_gt); | 1022 | } while (bno_cur_lt || bno_cur_gt); |
1023 | |||
928 | /* | 1024 | /* |
929 | * Got both cursors still active, need to find better entry. | 1025 | * Got both cursors still active, need to find better entry. |
930 | */ | 1026 | */ |
931 | if (bno_cur_lt && bno_cur_gt) { | 1027 | if (bno_cur_lt && bno_cur_gt) { |
932 | /* | ||
933 | * Left side is long enough, look for a right side entry. | ||
934 | */ | ||
935 | if (ltlena >= args->minlen) { | 1028 | if (ltlena >= args->minlen) { |
936 | /* | 1029 | /* |
937 | * Fix up the length. | 1030 | * Left side is good, look for a right side entry. |
938 | */ | 1031 | */ |
939 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 1032 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
940 | xfs_alloc_fix_len(args); | 1033 | xfs_alloc_fix_len(args); |
941 | rlen = args->len; | 1034 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
942 | ltdiff = xfs_alloc_compute_diff(args->agbno, rlen, | ||
943 | args->alignment, ltbno, ltlen, <new); | 1035 | args->alignment, ltbno, ltlen, <new); |
1036 | |||
1037 | error = xfs_alloc_find_best_extent(args, | ||
1038 | &bno_cur_lt, &bno_cur_gt, | ||
1039 | ltdiff, >bno, >len, >lena, | ||
1040 | 0 /* search right */); | ||
1041 | } else { | ||
1042 | ASSERT(gtlena >= args->minlen); | ||
1043 | |||
944 | /* | 1044 | /* |
945 | * Not perfect. | 1045 | * Right side is good, look for a left side entry. |
946 | */ | ||
947 | if (ltdiff) { | ||
948 | /* | ||
949 | * Look until we find a better one, run out of | ||
950 | * space, or run off the end. | ||
951 | */ | ||
952 | while (bno_cur_lt && bno_cur_gt) { | ||
953 | if ((error = xfs_alloc_get_rec( | ||
954 | bno_cur_gt, >bno, | ||
955 | >len, &i))) | ||
956 | goto error0; | ||
957 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
958 | xfs_alloc_compute_aligned(gtbno, gtlen, | ||
959 | args->alignment, args->minlen, | ||
960 | >bnoa, >lena); | ||
961 | /* | ||
962 | * The left one is clearly better. | ||
963 | */ | ||
964 | if (gtbnoa >= args->agbno + ltdiff) { | ||
965 | xfs_btree_del_cursor( | ||
966 | bno_cur_gt, | ||
967 | XFS_BTREE_NOERROR); | ||
968 | bno_cur_gt = NULL; | ||
969 | break; | ||
970 | } | ||
971 | /* | ||
972 | * If we reach a big enough entry, | ||
973 | * compare the two and pick the best. | ||
974 | */ | ||
975 | if (gtlena >= args->minlen) { | ||
976 | args->len = | ||
977 | XFS_EXTLEN_MIN(gtlena, | ||
978 | args->maxlen); | ||
979 | xfs_alloc_fix_len(args); | ||
980 | rlen = args->len; | ||
981 | gtdiff = xfs_alloc_compute_diff( | ||
982 | args->agbno, rlen, | ||
983 | args->alignment, | ||
984 | gtbno, gtlen, >new); | ||
985 | /* | ||
986 | * Right side is better. | ||
987 | */ | ||
988 | if (gtdiff < ltdiff) { | ||
989 | xfs_btree_del_cursor( | ||
990 | bno_cur_lt, | ||
991 | XFS_BTREE_NOERROR); | ||
992 | bno_cur_lt = NULL; | ||
993 | } | ||
994 | /* | ||
995 | * Left side is better. | ||
996 | */ | ||
997 | else { | ||
998 | xfs_btree_del_cursor( | ||
999 | bno_cur_gt, | ||
1000 | XFS_BTREE_NOERROR); | ||
1001 | bno_cur_gt = NULL; | ||
1002 | } | ||
1003 | break; | ||
1004 | } | ||
1005 | /* | ||
1006 | * Fell off the right end. | ||
1007 | */ | ||
1008 | if ((error = xfs_btree_increment( | ||
1009 | bno_cur_gt, 0, &i))) | ||
1010 | goto error0; | ||
1011 | if (!i) { | ||
1012 | xfs_btree_del_cursor( | ||
1013 | bno_cur_gt, | ||
1014 | XFS_BTREE_NOERROR); | ||
1015 | bno_cur_gt = NULL; | ||
1016 | break; | ||
1017 | } | ||
1018 | } | ||
1019 | } | ||
1020 | /* | ||
1021 | * The left side is perfect, trash the right side. | ||
1022 | */ | ||
1023 | else { | ||
1024 | xfs_btree_del_cursor(bno_cur_gt, | ||
1025 | XFS_BTREE_NOERROR); | ||
1026 | bno_cur_gt = NULL; | ||
1027 | } | ||
1028 | } | ||
1029 | /* | ||
1030 | * It's the right side that was found first, look left. | ||
1031 | */ | ||
1032 | else { | ||
1033 | /* | ||
1034 | * Fix up the length. | ||
1035 | */ | 1046 | */ |
1036 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); | 1047 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); |
1037 | xfs_alloc_fix_len(args); | 1048 | xfs_alloc_fix_len(args); |
1038 | rlen = args->len; | 1049 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
1039 | gtdiff = xfs_alloc_compute_diff(args->agbno, rlen, | ||
1040 | args->alignment, gtbno, gtlen, >new); | 1050 | args->alignment, gtbno, gtlen, >new); |
1041 | /* | 1051 | |
1042 | * Right side entry isn't perfect. | 1052 | error = xfs_alloc_find_best_extent(args, |
1043 | */ | 1053 | &bno_cur_gt, &bno_cur_lt, |
1044 | if (gtdiff) { | 1054 | gtdiff, <bno, <len, <lena, |
1045 | /* | 1055 | 1 /* search left */); |
1046 | * Look until we find a better one, run out of | ||
1047 | * space, or run off the end. | ||
1048 | */ | ||
1049 | while (bno_cur_lt && bno_cur_gt) { | ||
1050 | if ((error = xfs_alloc_get_rec( | ||
1051 | bno_cur_lt, <bno, | ||
1052 | <len, &i))) | ||
1053 | goto error0; | ||
1054 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1055 | xfs_alloc_compute_aligned(ltbno, ltlen, | ||
1056 | args->alignment, args->minlen, | ||
1057 | <bnoa, <lena); | ||
1058 | /* | ||
1059 | * The right one is clearly better. | ||
1060 | */ | ||
1061 | if (ltbnoa <= args->agbno - gtdiff) { | ||
1062 | xfs_btree_del_cursor( | ||
1063 | bno_cur_lt, | ||
1064 | XFS_BTREE_NOERROR); | ||
1065 | bno_cur_lt = NULL; | ||
1066 | break; | ||
1067 | } | ||
1068 | /* | ||
1069 | * If we reach a big enough entry, | ||
1070 | * compare the two and pick the best. | ||
1071 | */ | ||
1072 | if (ltlena >= args->minlen) { | ||
1073 | args->len = XFS_EXTLEN_MIN( | ||
1074 | ltlena, args->maxlen); | ||
1075 | xfs_alloc_fix_len(args); | ||
1076 | rlen = args->len; | ||
1077 | ltdiff = xfs_alloc_compute_diff( | ||
1078 | args->agbno, rlen, | ||
1079 | args->alignment, | ||
1080 | ltbno, ltlen, <new); | ||
1081 | /* | ||
1082 | * Left side is better. | ||
1083 | */ | ||
1084 | if (ltdiff < gtdiff) { | ||
1085 | xfs_btree_del_cursor( | ||
1086 | bno_cur_gt, | ||
1087 | XFS_BTREE_NOERROR); | ||
1088 | bno_cur_gt = NULL; | ||
1089 | } | ||
1090 | /* | ||
1091 | * Right side is better. | ||
1092 | */ | ||
1093 | else { | ||
1094 | xfs_btree_del_cursor( | ||
1095 | bno_cur_lt, | ||
1096 | XFS_BTREE_NOERROR); | ||
1097 | bno_cur_lt = NULL; | ||
1098 | } | ||
1099 | break; | ||
1100 | } | ||
1101 | /* | ||
1102 | * Fell off the left end. | ||
1103 | */ | ||
1104 | if ((error = xfs_btree_decrement( | ||
1105 | bno_cur_lt, 0, &i))) | ||
1106 | goto error0; | ||
1107 | if (!i) { | ||
1108 | xfs_btree_del_cursor(bno_cur_lt, | ||
1109 | XFS_BTREE_NOERROR); | ||
1110 | bno_cur_lt = NULL; | ||
1111 | break; | ||
1112 | } | ||
1113 | } | ||
1114 | } | ||
1115 | /* | ||
1116 | * The right side is perfect, trash the left side. | ||
1117 | */ | ||
1118 | else { | ||
1119 | xfs_btree_del_cursor(bno_cur_lt, | ||
1120 | XFS_BTREE_NOERROR); | ||
1121 | bno_cur_lt = NULL; | ||
1122 | } | ||
1123 | } | 1056 | } |
1057 | |||
1058 | if (error) | ||
1059 | goto error0; | ||
1124 | } | 1060 | } |
1061 | |||
1125 | /* | 1062 | /* |
1126 | * If we couldn't get anything, give up. | 1063 | * If we couldn't get anything, give up. |
1127 | */ | 1064 | */ |
@@ -1130,6 +1067,7 @@ xfs_alloc_ag_vextent_near( | |||
1130 | args->agbno = NULLAGBLOCK; | 1067 | args->agbno = NULLAGBLOCK; |
1131 | return 0; | 1068 | return 0; |
1132 | } | 1069 | } |
1070 | |||
1133 | /* | 1071 | /* |
1134 | * At this point we have selected a freespace entry, either to the | 1072 | * At this point we have selected a freespace entry, either to the |
1135 | * left or to the right. If it's on the right, copy all the | 1073 | * left or to the right. If it's on the right, copy all the |
@@ -1146,6 +1084,7 @@ xfs_alloc_ag_vextent_near( | |||
1146 | j = 1; | 1084 | j = 1; |
1147 | } else | 1085 | } else |
1148 | j = 0; | 1086 | j = 0; |
1087 | |||
1149 | /* | 1088 | /* |
1150 | * Fix up the length and compute the useful address. | 1089 | * Fix up the length and compute the useful address. |
1151 | */ | 1090 | */ |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index a6cff8edcdb6..71e90dc2aeb1 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -637,7 +637,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) | |||
637 | * It didn't all fit, so we have to sort everything on hashval. | 637 | * It didn't all fit, so we have to sort everything on hashval. |
638 | */ | 638 | */ |
639 | sbsize = sf->hdr.count * sizeof(*sbuf); | 639 | sbsize = sf->hdr.count * sizeof(*sbuf); |
640 | sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP); | 640 | sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS); |
641 | 641 | ||
642 | /* | 642 | /* |
643 | * Scan the attribute list for the rest of the entries, storing | 643 | * Scan the attribute list for the rest of the entries, storing |
@@ -2386,7 +2386,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) | |||
2386 | args.dp = context->dp; | 2386 | args.dp = context->dp; |
2387 | args.whichfork = XFS_ATTR_FORK; | 2387 | args.whichfork = XFS_ATTR_FORK; |
2388 | args.valuelen = valuelen; | 2388 | args.valuelen = valuelen; |
2389 | args.value = kmem_alloc(valuelen, KM_SLEEP); | 2389 | args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); |
2390 | args.rmtblkno = be32_to_cpu(name_rmt->valueblk); | 2390 | args.rmtblkno = be32_to_cpu(name_rmt->valueblk); |
2391 | args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); | 2391 | args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); |
2392 | retval = xfs_attr_rmtval_get(&args); | 2392 | retval = xfs_attr_rmtval_get(&args); |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 04f9cca8da7e..2f9e97c128a0 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -634,9 +634,8 @@ xfs_btree_read_bufl( | |||
634 | return error; | 634 | return error; |
635 | } | 635 | } |
636 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); | 636 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); |
637 | if (bp != NULL) { | 637 | if (bp) |
638 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); | 638 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); |
639 | } | ||
640 | *bpp = bp; | 639 | *bpp = bp; |
641 | return 0; | 640 | return 0; |
642 | } | 641 | } |
@@ -944,13 +943,13 @@ xfs_btree_set_refs( | |||
944 | switch (cur->bc_btnum) { | 943 | switch (cur->bc_btnum) { |
945 | case XFS_BTNUM_BNO: | 944 | case XFS_BTNUM_BNO: |
946 | case XFS_BTNUM_CNT: | 945 | case XFS_BTNUM_CNT: |
947 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF); | 946 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF); |
948 | break; | 947 | break; |
949 | case XFS_BTNUM_INO: | 948 | case XFS_BTNUM_INO: |
950 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF); | 949 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF); |
951 | break; | 950 | break; |
952 | case XFS_BTNUM_BMAP: | 951 | case XFS_BTNUM_BMAP: |
953 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF); | 952 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF); |
954 | break; | 953 | break; |
955 | default: | 954 | default: |
956 | ASSERT(0); | 955 | ASSERT(0); |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 2686d0d54c5b..ed2b65f3f8b9 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -142,7 +142,7 @@ xfs_buf_item_log_check( | |||
142 | #endif | 142 | #endif |
143 | 143 | ||
144 | STATIC void xfs_buf_error_relse(xfs_buf_t *bp); | 144 | STATIC void xfs_buf_error_relse(xfs_buf_t *bp); |
145 | STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); | 145 | STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); |
146 | 146 | ||
147 | /* | 147 | /* |
148 | * This returns the number of log iovecs needed to log the | 148 | * This returns the number of log iovecs needed to log the |
@@ -450,7 +450,7 @@ xfs_buf_item_unpin( | |||
450 | * xfs_trans_ail_delete() drops the AIL lock. | 450 | * xfs_trans_ail_delete() drops the AIL lock. |
451 | */ | 451 | */ |
452 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { | 452 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { |
453 | xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); | 453 | xfs_buf_do_callbacks(bp); |
454 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 454 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
455 | XFS_BUF_CLR_IODONE_FUNC(bp); | 455 | XFS_BUF_CLR_IODONE_FUNC(bp); |
456 | } else { | 456 | } else { |
@@ -918,15 +918,26 @@ xfs_buf_attach_iodone( | |||
918 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); | 918 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); |
919 | } | 919 | } |
920 | 920 | ||
921 | /* | ||
922 | * We can have many callbacks on a buffer. Running the callbacks individually | ||
923 | * can cause a lot of contention on the AIL lock, so we allow for a single | ||
924 | * callback to be able to scan the remaining lip->li_bio_list for other items | ||
925 | * of the same type and callback to be processed in the first call. | ||
926 | * | ||
927 | * As a result, the loop walking the callback list below will also modify the | ||
928 | * list. it removes the first item from the list and then runs the callback. | ||
929 | * The loop then restarts from the new head of the list. This allows the | ||
930 | * callback to scan and modify the list attached to the buffer and we don't | ||
931 | * have to care about maintaining a next item pointer. | ||
932 | */ | ||
921 | STATIC void | 933 | STATIC void |
922 | xfs_buf_do_callbacks( | 934 | xfs_buf_do_callbacks( |
923 | xfs_buf_t *bp, | 935 | struct xfs_buf *bp) |
924 | xfs_log_item_t *lip) | ||
925 | { | 936 | { |
926 | xfs_log_item_t *nlip; | 937 | struct xfs_log_item *lip; |
927 | 938 | ||
928 | while (lip != NULL) { | 939 | while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) { |
929 | nlip = lip->li_bio_list; | 940 | XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list); |
930 | ASSERT(lip->li_cb != NULL); | 941 | ASSERT(lip->li_cb != NULL); |
931 | /* | 942 | /* |
932 | * Clear the next pointer so we don't have any | 943 | * Clear the next pointer so we don't have any |
@@ -936,7 +947,6 @@ xfs_buf_do_callbacks( | |||
936 | */ | 947 | */ |
937 | lip->li_bio_list = NULL; | 948 | lip->li_bio_list = NULL; |
938 | lip->li_cb(bp, lip); | 949 | lip->li_cb(bp, lip); |
939 | lip = nlip; | ||
940 | } | 950 | } |
941 | } | 951 | } |
942 | 952 | ||
@@ -970,7 +980,7 @@ xfs_buf_iodone_callbacks( | |||
970 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | 980 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); |
971 | XFS_BUF_SUPER_STALE(bp); | 981 | XFS_BUF_SUPER_STALE(bp); |
972 | trace_xfs_buf_item_iodone(bp, _RET_IP_); | 982 | trace_xfs_buf_item_iodone(bp, _RET_IP_); |
973 | xfs_buf_do_callbacks(bp, lip); | 983 | xfs_buf_do_callbacks(bp); |
974 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 984 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
975 | XFS_BUF_CLR_IODONE_FUNC(bp); | 985 | XFS_BUF_CLR_IODONE_FUNC(bp); |
976 | xfs_buf_ioend(bp, 0); | 986 | xfs_buf_ioend(bp, 0); |
@@ -1029,7 +1039,7 @@ xfs_buf_iodone_callbacks( | |||
1029 | return; | 1039 | return; |
1030 | } | 1040 | } |
1031 | 1041 | ||
1032 | xfs_buf_do_callbacks(bp, lip); | 1042 | xfs_buf_do_callbacks(bp); |
1033 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1043 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
1034 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1044 | XFS_BUF_CLR_IODONE_FUNC(bp); |
1035 | xfs_buf_ioend(bp, 0); | 1045 | xfs_buf_ioend(bp, 0); |
@@ -1063,7 +1073,7 @@ xfs_buf_error_relse( | |||
1063 | * We have to unpin the pinned buffers so do the | 1073 | * We have to unpin the pinned buffers so do the |
1064 | * callbacks. | 1074 | * callbacks. |
1065 | */ | 1075 | */ |
1066 | xfs_buf_do_callbacks(bp, lip); | 1076 | xfs_buf_do_callbacks(bp); |
1067 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1077 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
1068 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1078 | XFS_BUF_CLR_IODONE_FUNC(bp); |
1069 | XFS_BUF_SET_BRELSE_FUNC(bp,NULL); | 1079 | XFS_BUF_SET_BRELSE_FUNC(bp,NULL); |
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 0e2ed43f16c7..b6ecd2061e7c 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h | |||
@@ -105,17 +105,6 @@ typedef struct xfs_buf_log_item { | |||
105 | xfs_buf_log_format_t bli_format; /* in-log header */ | 105 | xfs_buf_log_format_t bli_format; /* in-log header */ |
106 | } xfs_buf_log_item_t; | 106 | } xfs_buf_log_item_t; |
107 | 107 | ||
108 | /* | ||
109 | * This structure is used during recovery to record the buf log | ||
110 | * items which have been canceled and should not be replayed. | ||
111 | */ | ||
112 | typedef struct xfs_buf_cancel { | ||
113 | xfs_daddr_t bc_blkno; | ||
114 | uint bc_len; | ||
115 | int bc_refcount; | ||
116 | struct xfs_buf_cancel *bc_next; | ||
117 | } xfs_buf_cancel_t; | ||
118 | |||
119 | void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); | 108 | void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); |
120 | void xfs_buf_item_relse(struct xfs_buf *); | 109 | void xfs_buf_item_relse(struct xfs_buf *); |
121 | void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); | 110 | void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index a55e687bf562..75f2ef60e579 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -48,6 +48,28 @@ xfs_efi_item_free( | |||
48 | } | 48 | } |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * Freeing the efi requires that we remove it from the AIL if it has already | ||
52 | * been placed there. However, the EFI may not yet have been placed in the AIL | ||
53 | * when called by xfs_efi_release() from EFD processing due to the ordering of | ||
54 | * committed vs unpin operations in bulk insert operations. Hence the | ||
55 | * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees | ||
56 | * the EFI. | ||
57 | */ | ||
58 | STATIC void | ||
59 | __xfs_efi_release( | ||
60 | struct xfs_efi_log_item *efip) | ||
61 | { | ||
62 | struct xfs_ail *ailp = efip->efi_item.li_ailp; | ||
63 | |||
64 | if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) { | ||
65 | spin_lock(&ailp->xa_lock); | ||
66 | /* xfs_trans_ail_delete() drops the AIL lock. */ | ||
67 | xfs_trans_ail_delete(ailp, &efip->efi_item); | ||
68 | xfs_efi_item_free(efip); | ||
69 | } | ||
70 | } | ||
71 | |||
72 | /* | ||
51 | * This returns the number of iovecs needed to log the given efi item. | 73 | * This returns the number of iovecs needed to log the given efi item. |
52 | * We only need 1 iovec for an efi item. It just logs the efi_log_format | 74 | * We only need 1 iovec for an efi item. It just logs the efi_log_format |
53 | * structure. | 75 | * structure. |
@@ -74,7 +96,8 @@ xfs_efi_item_format( | |||
74 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | 96 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); |
75 | uint size; | 97 | uint size; |
76 | 98 | ||
77 | ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents); | 99 | ASSERT(atomic_read(&efip->efi_next_extent) == |
100 | efip->efi_format.efi_nextents); | ||
78 | 101 | ||
79 | efip->efi_format.efi_type = XFS_LI_EFI; | 102 | efip->efi_format.efi_type = XFS_LI_EFI; |
80 | 103 | ||
@@ -99,10 +122,12 @@ xfs_efi_item_pin( | |||
99 | } | 122 | } |
100 | 123 | ||
101 | /* | 124 | /* |
102 | * While EFIs cannot really be pinned, the unpin operation is the | 125 | * While EFIs cannot really be pinned, the unpin operation is the last place at |
103 | * last place at which the EFI is manipulated during a transaction. | 126 | * which the EFI is manipulated during a transaction. If we are being asked to |
104 | * Here we coordinate with xfs_efi_cancel() to determine who gets to | 127 | * remove the EFI it's because the transaction has been cancelled and by |
105 | * free the EFI. | 128 | * definition that means the EFI cannot be in the AIL so remove it from the |
129 | * transaction and free it. Otherwise coordinate with xfs_efi_release() (via | ||
130 | * XFS_EFI_COMMITTED) to determine who gets to free the EFI. | ||
106 | */ | 131 | */ |
107 | STATIC void | 132 | STATIC void |
108 | xfs_efi_item_unpin( | 133 | xfs_efi_item_unpin( |
@@ -110,20 +135,14 @@ xfs_efi_item_unpin( | |||
110 | int remove) | 135 | int remove) |
111 | { | 136 | { |
112 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | 137 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); |
113 | struct xfs_ail *ailp = lip->li_ailp; | ||
114 | |||
115 | spin_lock(&ailp->xa_lock); | ||
116 | if (efip->efi_flags & XFS_EFI_CANCELED) { | ||
117 | if (remove) | ||
118 | xfs_trans_del_item(lip); | ||
119 | 138 | ||
120 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 139 | if (remove) { |
121 | xfs_trans_ail_delete(ailp, lip); | 140 | ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); |
141 | xfs_trans_del_item(lip); | ||
122 | xfs_efi_item_free(efip); | 142 | xfs_efi_item_free(efip); |
123 | } else { | 143 | return; |
124 | efip->efi_flags |= XFS_EFI_COMMITTED; | ||
125 | spin_unlock(&ailp->xa_lock); | ||
126 | } | 144 | } |
145 | __xfs_efi_release(efip); | ||
127 | } | 146 | } |
128 | 147 | ||
129 | /* | 148 | /* |
@@ -152,16 +171,20 @@ xfs_efi_item_unlock( | |||
152 | } | 171 | } |
153 | 172 | ||
154 | /* | 173 | /* |
155 | * The EFI is logged only once and cannot be moved in the log, so | 174 | * The EFI is logged only once and cannot be moved in the log, so simply return |
156 | * simply return the lsn at which it's been logged. The canceled | 175 | * the lsn at which it's been logged. For bulk transaction committed |
157 | * flag is not paid any attention here. Checking for that is delayed | 176 | * processing, the EFI may be processed but not yet unpinned prior to the EFD |
158 | * until the EFI is unpinned. | 177 | * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected |
178 | * when processing the EFD. | ||
159 | */ | 179 | */ |
160 | STATIC xfs_lsn_t | 180 | STATIC xfs_lsn_t |
161 | xfs_efi_item_committed( | 181 | xfs_efi_item_committed( |
162 | struct xfs_log_item *lip, | 182 | struct xfs_log_item *lip, |
163 | xfs_lsn_t lsn) | 183 | xfs_lsn_t lsn) |
164 | { | 184 | { |
185 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | ||
186 | |||
187 | set_bit(XFS_EFI_COMMITTED, &efip->efi_flags); | ||
165 | return lsn; | 188 | return lsn; |
166 | } | 189 | } |
167 | 190 | ||
@@ -230,6 +253,7 @@ xfs_efi_init( | |||
230 | xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); | 253 | xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); |
231 | efip->efi_format.efi_nextents = nextents; | 254 | efip->efi_format.efi_nextents = nextents; |
232 | efip->efi_format.efi_id = (__psint_t)(void*)efip; | 255 | efip->efi_format.efi_id = (__psint_t)(void*)efip; |
256 | atomic_set(&efip->efi_next_extent, 0); | ||
233 | 257 | ||
234 | return efip; | 258 | return efip; |
235 | } | 259 | } |
@@ -289,37 +313,18 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) | |||
289 | } | 313 | } |
290 | 314 | ||
291 | /* | 315 | /* |
292 | * This is called by the efd item code below to release references to | 316 | * This is called by the efd item code below to release references to the given |
293 | * the given efi item. Each efd calls this with the number of | 317 | * efi item. Each efd calls this with the number of extents that it has |
294 | * extents that it has logged, and when the sum of these reaches | 318 | * logged, and when the sum of these reaches the total number of extents logged |
295 | * the total number of extents logged by this efi item we can free | 319 | * by this efi item we can free the efi item. |
296 | * the efi item. | ||
297 | * | ||
298 | * Freeing the efi item requires that we remove it from the AIL. | ||
299 | * We'll use the AIL lock to protect our counters as well as | ||
300 | * the removal from the AIL. | ||
301 | */ | 320 | */ |
302 | void | 321 | void |
303 | xfs_efi_release(xfs_efi_log_item_t *efip, | 322 | xfs_efi_release(xfs_efi_log_item_t *efip, |
304 | uint nextents) | 323 | uint nextents) |
305 | { | 324 | { |
306 | struct xfs_ail *ailp = efip->efi_item.li_ailp; | 325 | ASSERT(atomic_read(&efip->efi_next_extent) >= nextents); |
307 | int extents_left; | 326 | if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) |
308 | 327 | __xfs_efi_release(efip); | |
309 | ASSERT(efip->efi_next_extent > 0); | ||
310 | ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); | ||
311 | |||
312 | spin_lock(&ailp->xa_lock); | ||
313 | ASSERT(efip->efi_next_extent >= nextents); | ||
314 | efip->efi_next_extent -= nextents; | ||
315 | extents_left = efip->efi_next_extent; | ||
316 | if (extents_left == 0) { | ||
317 | /* xfs_trans_ail_delete() drops the AIL lock. */ | ||
318 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); | ||
319 | xfs_efi_item_free(efip); | ||
320 | } else { | ||
321 | spin_unlock(&ailp->xa_lock); | ||
322 | } | ||
323 | } | 328 | } |
324 | 329 | ||
325 | static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) | 330 | static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) |
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 0d22c56fdf64..375f68e42531 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h | |||
@@ -111,11 +111,10 @@ typedef struct xfs_efd_log_format_64 { | |||
111 | #define XFS_EFI_MAX_FAST_EXTENTS 16 | 111 | #define XFS_EFI_MAX_FAST_EXTENTS 16 |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * Define EFI flags. | 114 | * Define EFI flag bits. Manipulated by set/clear/test_bit operators. |
115 | */ | 115 | */ |
116 | #define XFS_EFI_RECOVERED 0x1 | 116 | #define XFS_EFI_RECOVERED 1 |
117 | #define XFS_EFI_COMMITTED 0x2 | 117 | #define XFS_EFI_COMMITTED 2 |
118 | #define XFS_EFI_CANCELED 0x4 | ||
119 | 118 | ||
120 | /* | 119 | /* |
121 | * This is the "extent free intention" log item. It is used | 120 | * This is the "extent free intention" log item. It is used |
@@ -125,8 +124,8 @@ typedef struct xfs_efd_log_format_64 { | |||
125 | */ | 124 | */ |
126 | typedef struct xfs_efi_log_item { | 125 | typedef struct xfs_efi_log_item { |
127 | xfs_log_item_t efi_item; | 126 | xfs_log_item_t efi_item; |
128 | uint efi_flags; /* misc flags */ | 127 | atomic_t efi_next_extent; |
129 | uint efi_next_extent; | 128 | unsigned long efi_flags; /* misc flags */ |
130 | xfs_efi_log_format_t efi_format; | 129 | xfs_efi_log_format_t efi_format; |
131 | } xfs_efi_log_item_t; | 130 | } xfs_efi_log_item_t; |
132 | 131 | ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index a7c116e814af..f56d30e8040c 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -374,6 +374,7 @@ xfs_growfs_data_private( | |||
374 | mp->m_maxicount = icount << mp->m_sb.sb_inopblog; | 374 | mp->m_maxicount = icount << mp->m_sb.sb_inopblog; |
375 | } else | 375 | } else |
376 | mp->m_maxicount = 0; | 376 | mp->m_maxicount = 0; |
377 | xfs_set_low_space_thresholds(mp); | ||
377 | 378 | ||
378 | /* update secondary superblocks. */ | 379 | /* update secondary superblocks. */ |
379 | for (agno = 1; agno < nagcount; agno++) { | 380 | for (agno = 1; agno < nagcount; agno++) { |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index d7de5a3f7867..cb9b6d1469f7 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -43,6 +43,17 @@ | |||
43 | 43 | ||
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Define xfs inode iolock lockdep classes. We need to ensure that all active | ||
47 | * inodes are considered the same for lockdep purposes, including inodes that | ||
48 | * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to | ||
49 | * guarantee the locks are considered the same when there are multiple lock | ||
50 | * initialisation siteѕ. Also, define a reclaimable inode class so it is | ||
51 | * obvious in lockdep reports which class the report is against. | ||
52 | */ | ||
53 | static struct lock_class_key xfs_iolock_active; | ||
54 | struct lock_class_key xfs_iolock_reclaimable; | ||
55 | |||
56 | /* | ||
46 | * Allocate and initialise an xfs_inode. | 57 | * Allocate and initialise an xfs_inode. |
47 | */ | 58 | */ |
48 | STATIC struct xfs_inode * | 59 | STATIC struct xfs_inode * |
@@ -69,8 +80,11 @@ xfs_inode_alloc( | |||
69 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 80 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
70 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 81 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
71 | ASSERT(completion_done(&ip->i_flush)); | 82 | ASSERT(completion_done(&ip->i_flush)); |
83 | ASSERT(ip->i_ino == 0); | ||
72 | 84 | ||
73 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 85 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
86 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
87 | &xfs_iolock_active, "xfs_iolock_active"); | ||
74 | 88 | ||
75 | /* initialise the xfs inode */ | 89 | /* initialise the xfs inode */ |
76 | ip->i_ino = ino; | 90 | ip->i_ino = ino; |
@@ -85,9 +99,6 @@ xfs_inode_alloc( | |||
85 | ip->i_size = 0; | 99 | ip->i_size = 0; |
86 | ip->i_new_size = 0; | 100 | ip->i_new_size = 0; |
87 | 101 | ||
88 | /* prevent anyone from using this yet */ | ||
89 | VFS_I(ip)->i_state = I_NEW; | ||
90 | |||
91 | return ip; | 102 | return ip; |
92 | } | 103 | } |
93 | 104 | ||
@@ -145,7 +156,18 @@ xfs_inode_free( | |||
145 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 156 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
146 | ASSERT(completion_done(&ip->i_flush)); | 157 | ASSERT(completion_done(&ip->i_flush)); |
147 | 158 | ||
148 | call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback); | 159 | /* |
160 | * Because we use RCU freeing we need to ensure the inode always | ||
161 | * appears to be reclaimed with an invalid inode number when in the | ||
162 | * free state. The ip->i_flags_lock provides the barrier against lookup | ||
163 | * races. | ||
164 | */ | ||
165 | spin_lock(&ip->i_flags_lock); | ||
166 | ip->i_flags = XFS_IRECLAIM; | ||
167 | ip->i_ino = 0; | ||
168 | spin_unlock(&ip->i_flags_lock); | ||
169 | |||
170 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | ||
149 | } | 171 | } |
150 | 172 | ||
151 | /* | 173 | /* |
@@ -155,14 +177,29 @@ static int | |||
155 | xfs_iget_cache_hit( | 177 | xfs_iget_cache_hit( |
156 | struct xfs_perag *pag, | 178 | struct xfs_perag *pag, |
157 | struct xfs_inode *ip, | 179 | struct xfs_inode *ip, |
180 | xfs_ino_t ino, | ||
158 | int flags, | 181 | int flags, |
159 | int lock_flags) __releases(pag->pag_ici_lock) | 182 | int lock_flags) __releases(RCU) |
160 | { | 183 | { |
161 | struct inode *inode = VFS_I(ip); | 184 | struct inode *inode = VFS_I(ip); |
162 | struct xfs_mount *mp = ip->i_mount; | 185 | struct xfs_mount *mp = ip->i_mount; |
163 | int error; | 186 | int error; |
164 | 187 | ||
188 | /* | ||
189 | * check for re-use of an inode within an RCU grace period due to the | ||
190 | * radix tree nodes not being updated yet. We monitor for this by | ||
191 | * setting the inode number to zero before freeing the inode structure. | ||
192 | * If the inode has been reallocated and set up, then the inode number | ||
193 | * will not match, so check for that, too. | ||
194 | */ | ||
165 | spin_lock(&ip->i_flags_lock); | 195 | spin_lock(&ip->i_flags_lock); |
196 | if (ip->i_ino != ino) { | ||
197 | trace_xfs_iget_skip(ip); | ||
198 | XFS_STATS_INC(xs_ig_frecycle); | ||
199 | error = EAGAIN; | ||
200 | goto out_error; | ||
201 | } | ||
202 | |||
166 | 203 | ||
167 | /* | 204 | /* |
168 | * If we are racing with another cache hit that is currently | 205 | * If we are racing with another cache hit that is currently |
@@ -205,7 +242,7 @@ xfs_iget_cache_hit( | |||
205 | ip->i_flags |= XFS_IRECLAIM; | 242 | ip->i_flags |= XFS_IRECLAIM; |
206 | 243 | ||
207 | spin_unlock(&ip->i_flags_lock); | 244 | spin_unlock(&ip->i_flags_lock); |
208 | read_unlock(&pag->pag_ici_lock); | 245 | rcu_read_unlock(); |
209 | 246 | ||
210 | error = -inode_init_always(mp->m_super, inode); | 247 | error = -inode_init_always(mp->m_super, inode); |
211 | if (error) { | 248 | if (error) { |
@@ -213,7 +250,7 @@ xfs_iget_cache_hit( | |||
213 | * Re-initializing the inode failed, and we are in deep | 250 | * Re-initializing the inode failed, and we are in deep |
214 | * trouble. Try to re-add it to the reclaim list. | 251 | * trouble. Try to re-add it to the reclaim list. |
215 | */ | 252 | */ |
216 | read_lock(&pag->pag_ici_lock); | 253 | rcu_read_lock(); |
217 | spin_lock(&ip->i_flags_lock); | 254 | spin_lock(&ip->i_flags_lock); |
218 | 255 | ||
219 | ip->i_flags &= ~XFS_INEW; | 256 | ip->i_flags &= ~XFS_INEW; |
@@ -223,14 +260,20 @@ xfs_iget_cache_hit( | |||
223 | goto out_error; | 260 | goto out_error; |
224 | } | 261 | } |
225 | 262 | ||
226 | write_lock(&pag->pag_ici_lock); | 263 | spin_lock(&pag->pag_ici_lock); |
227 | spin_lock(&ip->i_flags_lock); | 264 | spin_lock(&ip->i_flags_lock); |
228 | ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); | 265 | ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); |
229 | ip->i_flags |= XFS_INEW; | 266 | ip->i_flags |= XFS_INEW; |
230 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 267 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
231 | inode->i_state = I_NEW; | 268 | inode->i_state = I_NEW; |
269 | |||
270 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | ||
271 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
272 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
273 | &xfs_iolock_active, "xfs_iolock_active"); | ||
274 | |||
232 | spin_unlock(&ip->i_flags_lock); | 275 | spin_unlock(&ip->i_flags_lock); |
233 | write_unlock(&pag->pag_ici_lock); | 276 | spin_unlock(&pag->pag_ici_lock); |
234 | } else { | 277 | } else { |
235 | /* If the VFS inode is being torn down, pause and try again. */ | 278 | /* If the VFS inode is being torn down, pause and try again. */ |
236 | if (!igrab(inode)) { | 279 | if (!igrab(inode)) { |
@@ -241,7 +284,7 @@ xfs_iget_cache_hit( | |||
241 | 284 | ||
242 | /* We've got a live one. */ | 285 | /* We've got a live one. */ |
243 | spin_unlock(&ip->i_flags_lock); | 286 | spin_unlock(&ip->i_flags_lock); |
244 | read_unlock(&pag->pag_ici_lock); | 287 | rcu_read_unlock(); |
245 | trace_xfs_iget_hit(ip); | 288 | trace_xfs_iget_hit(ip); |
246 | } | 289 | } |
247 | 290 | ||
@@ -255,7 +298,7 @@ xfs_iget_cache_hit( | |||
255 | 298 | ||
256 | out_error: | 299 | out_error: |
257 | spin_unlock(&ip->i_flags_lock); | 300 | spin_unlock(&ip->i_flags_lock); |
258 | read_unlock(&pag->pag_ici_lock); | 301 | rcu_read_unlock(); |
259 | return error; | 302 | return error; |
260 | } | 303 | } |
261 | 304 | ||
@@ -308,7 +351,7 @@ xfs_iget_cache_miss( | |||
308 | BUG(); | 351 | BUG(); |
309 | } | 352 | } |
310 | 353 | ||
311 | write_lock(&pag->pag_ici_lock); | 354 | spin_lock(&pag->pag_ici_lock); |
312 | 355 | ||
313 | /* insert the new inode */ | 356 | /* insert the new inode */ |
314 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | 357 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); |
@@ -323,14 +366,14 @@ xfs_iget_cache_miss( | |||
323 | ip->i_udquot = ip->i_gdquot = NULL; | 366 | ip->i_udquot = ip->i_gdquot = NULL; |
324 | xfs_iflags_set(ip, XFS_INEW); | 367 | xfs_iflags_set(ip, XFS_INEW); |
325 | 368 | ||
326 | write_unlock(&pag->pag_ici_lock); | 369 | spin_unlock(&pag->pag_ici_lock); |
327 | radix_tree_preload_end(); | 370 | radix_tree_preload_end(); |
328 | 371 | ||
329 | *ipp = ip; | 372 | *ipp = ip; |
330 | return 0; | 373 | return 0; |
331 | 374 | ||
332 | out_preload_end: | 375 | out_preload_end: |
333 | write_unlock(&pag->pag_ici_lock); | 376 | spin_unlock(&pag->pag_ici_lock); |
334 | radix_tree_preload_end(); | 377 | radix_tree_preload_end(); |
335 | if (lock_flags) | 378 | if (lock_flags) |
336 | xfs_iunlock(ip, lock_flags); | 379 | xfs_iunlock(ip, lock_flags); |
@@ -377,7 +420,7 @@ xfs_iget( | |||
377 | xfs_agino_t agino; | 420 | xfs_agino_t agino; |
378 | 421 | ||
379 | /* reject inode numbers outside existing AGs */ | 422 | /* reject inode numbers outside existing AGs */ |
380 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) | 423 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
381 | return EINVAL; | 424 | return EINVAL; |
382 | 425 | ||
383 | /* get the perag structure and ensure that it's inode capable */ | 426 | /* get the perag structure and ensure that it's inode capable */ |
@@ -386,15 +429,15 @@ xfs_iget( | |||
386 | 429 | ||
387 | again: | 430 | again: |
388 | error = 0; | 431 | error = 0; |
389 | read_lock(&pag->pag_ici_lock); | 432 | rcu_read_lock(); |
390 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | 433 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); |
391 | 434 | ||
392 | if (ip) { | 435 | if (ip) { |
393 | error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); | 436 | error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); |
394 | if (error) | 437 | if (error) |
395 | goto out_error_or_again; | 438 | goto out_error_or_again; |
396 | } else { | 439 | } else { |
397 | read_unlock(&pag->pag_ici_lock); | 440 | rcu_read_unlock(); |
398 | XFS_STATS_INC(xs_ig_missed); | 441 | XFS_STATS_INC(xs_ig_missed); |
399 | 442 | ||
400 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, | 443 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 108c7a085f94..be7cf625421f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -887,7 +887,7 @@ xfs_iread( | |||
887 | * around for a while. This helps to keep recently accessed | 887 | * around for a while. This helps to keep recently accessed |
888 | * meta-data in-core longer. | 888 | * meta-data in-core longer. |
889 | */ | 889 | */ |
890 | XFS_BUF_SET_REF(bp, XFS_INO_REF); | 890 | xfs_buf_set_ref(bp, XFS_INO_REF); |
891 | 891 | ||
892 | /* | 892 | /* |
893 | * Use xfs_trans_brelse() to release the buffer containing the | 893 | * Use xfs_trans_brelse() to release the buffer containing the |
@@ -2000,17 +2000,33 @@ xfs_ifree_cluster( | |||
2000 | */ | 2000 | */ |
2001 | for (i = 0; i < ninodes; i++) { | 2001 | for (i = 0; i < ninodes; i++) { |
2002 | retry: | 2002 | retry: |
2003 | read_lock(&pag->pag_ici_lock); | 2003 | rcu_read_lock(); |
2004 | ip = radix_tree_lookup(&pag->pag_ici_root, | 2004 | ip = radix_tree_lookup(&pag->pag_ici_root, |
2005 | XFS_INO_TO_AGINO(mp, (inum + i))); | 2005 | XFS_INO_TO_AGINO(mp, (inum + i))); |
2006 | 2006 | ||
2007 | /* Inode not in memory or stale, nothing to do */ | 2007 | /* Inode not in memory, nothing to do */ |
2008 | if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { | 2008 | if (!ip) { |
2009 | read_unlock(&pag->pag_ici_lock); | 2009 | rcu_read_unlock(); |
2010 | continue; | 2010 | continue; |
2011 | } | 2011 | } |
2012 | 2012 | ||
2013 | /* | 2013 | /* |
2014 | * because this is an RCU protected lookup, we could | ||
2015 | * find a recently freed or even reallocated inode | ||
2016 | * during the lookup. We need to check under the | ||
2017 | * i_flags_lock for a valid inode here. Skip it if it | ||
2018 | * is not valid, the wrong inode or stale. | ||
2019 | */ | ||
2020 | spin_lock(&ip->i_flags_lock); | ||
2021 | if (ip->i_ino != inum + i || | ||
2022 | __xfs_iflags_test(ip, XFS_ISTALE)) { | ||
2023 | spin_unlock(&ip->i_flags_lock); | ||
2024 | rcu_read_unlock(); | ||
2025 | continue; | ||
2026 | } | ||
2027 | spin_unlock(&ip->i_flags_lock); | ||
2028 | |||
2029 | /* | ||
2014 | * Don't try to lock/unlock the current inode, but we | 2030 | * Don't try to lock/unlock the current inode, but we |
2015 | * _cannot_ skip the other inodes that we did not find | 2031 | * _cannot_ skip the other inodes that we did not find |
2016 | * in the list attached to the buffer and are not | 2032 | * in the list attached to the buffer and are not |
@@ -2019,11 +2035,11 @@ retry: | |||
2019 | */ | 2035 | */ |
2020 | if (ip != free_ip && | 2036 | if (ip != free_ip && |
2021 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | 2037 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
2022 | read_unlock(&pag->pag_ici_lock); | 2038 | rcu_read_unlock(); |
2023 | delay(1); | 2039 | delay(1); |
2024 | goto retry; | 2040 | goto retry; |
2025 | } | 2041 | } |
2026 | read_unlock(&pag->pag_ici_lock); | 2042 | rcu_read_unlock(); |
2027 | 2043 | ||
2028 | xfs_iflock(ip); | 2044 | xfs_iflock(ip); |
2029 | xfs_iflags_set(ip, XFS_ISTALE); | 2045 | xfs_iflags_set(ip, XFS_ISTALE); |
@@ -2629,7 +2645,7 @@ xfs_iflush_cluster( | |||
2629 | 2645 | ||
2630 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | 2646 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); |
2631 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | 2647 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; |
2632 | read_lock(&pag->pag_ici_lock); | 2648 | rcu_read_lock(); |
2633 | /* really need a gang lookup range call here */ | 2649 | /* really need a gang lookup range call here */ |
2634 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | 2650 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, |
2635 | first_index, inodes_per_cluster); | 2651 | first_index, inodes_per_cluster); |
@@ -2640,9 +2656,21 @@ xfs_iflush_cluster( | |||
2640 | iq = ilist[i]; | 2656 | iq = ilist[i]; |
2641 | if (iq == ip) | 2657 | if (iq == ip) |
2642 | continue; | 2658 | continue; |
2643 | /* if the inode lies outside this cluster, we're done. */ | 2659 | |
2644 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) | 2660 | /* |
2645 | break; | 2661 | * because this is an RCU protected lookup, we could find a |
2662 | * recently freed or even reallocated inode during the lookup. | ||
2663 | * We need to check under the i_flags_lock for a valid inode | ||
2664 | * here. Skip it if it is not valid or the wrong inode. | ||
2665 | */ | ||
2666 | spin_lock(&ip->i_flags_lock); | ||
2667 | if (!ip->i_ino || | ||
2668 | (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { | ||
2669 | spin_unlock(&ip->i_flags_lock); | ||
2670 | continue; | ||
2671 | } | ||
2672 | spin_unlock(&ip->i_flags_lock); | ||
2673 | |||
2646 | /* | 2674 | /* |
2647 | * Do an un-protected check to see if the inode is dirty and | 2675 | * Do an un-protected check to see if the inode is dirty and |
2648 | * is a candidate for flushing. These checks will be repeated | 2676 | * is a candidate for flushing. These checks will be repeated |
@@ -2692,7 +2720,7 @@ xfs_iflush_cluster( | |||
2692 | } | 2720 | } |
2693 | 2721 | ||
2694 | out_free: | 2722 | out_free: |
2695 | read_unlock(&pag->pag_ici_lock); | 2723 | rcu_read_unlock(); |
2696 | kmem_free(ilist); | 2724 | kmem_free(ilist); |
2697 | out_put: | 2725 | out_put: |
2698 | xfs_perag_put(pag); | 2726 | xfs_perag_put(pag); |
@@ -2704,7 +2732,7 @@ cluster_corrupt_out: | |||
2704 | * Corruption detected in the clustering loop. Invalidate the | 2732 | * Corruption detected in the clustering loop. Invalidate the |
2705 | * inode buffer and shut down the filesystem. | 2733 | * inode buffer and shut down the filesystem. |
2706 | */ | 2734 | */ |
2707 | read_unlock(&pag->pag_ici_lock); | 2735 | rcu_read_unlock(); |
2708 | /* | 2736 | /* |
2709 | * Clean up the buffer. If it was B_DELWRI, just release it -- | 2737 | * Clean up the buffer. If it was B_DELWRI, just release it -- |
2710 | * brelse can handle it with no problems. If not, shut down the | 2738 | * brelse can handle it with no problems. If not, shut down the |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index fb2ca2e4cdc9..5c95fa8ec11d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -376,12 +376,13 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
376 | /* | 376 | /* |
377 | * In-core inode flags. | 377 | * In-core inode flags. |
378 | */ | 378 | */ |
379 | #define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */ | 379 | #define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ |
380 | #define XFS_ISTALE 0x0002 /* inode has been staled */ | 380 | #define XFS_ISTALE 0x0002 /* inode has been staled */ |
381 | #define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ | 381 | #define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ |
382 | #define XFS_INEW 0x0008 /* inode has just been allocated */ | 382 | #define XFS_INEW 0x0008 /* inode has just been allocated */ |
383 | #define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ | 383 | #define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ |
384 | #define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ | 384 | #define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ |
385 | #define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ | ||
385 | 386 | ||
386 | /* | 387 | /* |
387 | * Flags for inode locking. | 388 | * Flags for inode locking. |
@@ -438,6 +439,8 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
438 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) | 439 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) |
439 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) | 440 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) |
440 | 441 | ||
442 | extern struct lock_class_key xfs_iolock_reclaimable; | ||
443 | |||
441 | /* | 444 | /* |
442 | * Flags for xfs_itruncate_start(). | 445 | * Flags for xfs_itruncate_start(). |
443 | */ | 446 | */ |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 7c8d30c453c3..fd4f398bd6f1 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -842,15 +842,64 @@ xfs_inode_item_destroy( | |||
842 | * flushed to disk. It is responsible for removing the inode item | 842 | * flushed to disk. It is responsible for removing the inode item |
843 | * from the AIL if it has not been re-logged, and unlocking the inode's | 843 | * from the AIL if it has not been re-logged, and unlocking the inode's |
844 | * flush lock. | 844 | * flush lock. |
845 | * | ||
846 | * To reduce AIL lock traffic as much as possible, we scan the buffer log item | ||
847 | * list for other inodes that will run this function. We remove them from the | ||
848 | * buffer list so we can process all the inode IO completions in one AIL lock | ||
849 | * traversal. | ||
845 | */ | 850 | */ |
846 | void | 851 | void |
847 | xfs_iflush_done( | 852 | xfs_iflush_done( |
848 | struct xfs_buf *bp, | 853 | struct xfs_buf *bp, |
849 | struct xfs_log_item *lip) | 854 | struct xfs_log_item *lip) |
850 | { | 855 | { |
851 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | 856 | struct xfs_inode_log_item *iip; |
852 | xfs_inode_t *ip = iip->ili_inode; | 857 | struct xfs_log_item *blip; |
858 | struct xfs_log_item *next; | ||
859 | struct xfs_log_item *prev; | ||
853 | struct xfs_ail *ailp = lip->li_ailp; | 860 | struct xfs_ail *ailp = lip->li_ailp; |
861 | int need_ail = 0; | ||
862 | |||
863 | /* | ||
864 | * Scan the buffer IO completions for other inodes being completed and | ||
865 | * attach them to the current inode log item. | ||
866 | */ | ||
867 | blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | ||
868 | prev = NULL; | ||
869 | while (blip != NULL) { | ||
870 | if (lip->li_cb != xfs_iflush_done) { | ||
871 | prev = blip; | ||
872 | blip = blip->li_bio_list; | ||
873 | continue; | ||
874 | } | ||
875 | |||
876 | /* remove from list */ | ||
877 | next = blip->li_bio_list; | ||
878 | if (!prev) { | ||
879 | XFS_BUF_SET_FSPRIVATE(bp, next); | ||
880 | } else { | ||
881 | prev->li_bio_list = next; | ||
882 | } | ||
883 | |||
884 | /* add to current list */ | ||
885 | blip->li_bio_list = lip->li_bio_list; | ||
886 | lip->li_bio_list = blip; | ||
887 | |||
888 | /* | ||
889 | * while we have the item, do the unlocked check for needing | ||
890 | * the AIL lock. | ||
891 | */ | ||
892 | iip = INODE_ITEM(blip); | ||
893 | if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) | ||
894 | need_ail++; | ||
895 | |||
896 | blip = next; | ||
897 | } | ||
898 | |||
899 | /* make sure we capture the state of the initial inode. */ | ||
900 | iip = INODE_ITEM(lip); | ||
901 | if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) | ||
902 | need_ail++; | ||
854 | 903 | ||
855 | /* | 904 | /* |
856 | * We only want to pull the item from the AIL if it is | 905 | * We only want to pull the item from the AIL if it is |
@@ -861,28 +910,37 @@ xfs_iflush_done( | |||
861 | * the lock since it's cheaper, and then we recheck while | 910 | * the lock since it's cheaper, and then we recheck while |
862 | * holding the lock before removing the inode from the AIL. | 911 | * holding the lock before removing the inode from the AIL. |
863 | */ | 912 | */ |
864 | if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) { | 913 | if (need_ail) { |
914 | struct xfs_log_item *log_items[need_ail]; | ||
915 | int i = 0; | ||
865 | spin_lock(&ailp->xa_lock); | 916 | spin_lock(&ailp->xa_lock); |
866 | if (lip->li_lsn == iip->ili_flush_lsn) { | 917 | for (blip = lip; blip; blip = blip->li_bio_list) { |
867 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 918 | iip = INODE_ITEM(blip); |
868 | xfs_trans_ail_delete(ailp, lip); | 919 | if (iip->ili_logged && |
869 | } else { | 920 | blip->li_lsn == iip->ili_flush_lsn) { |
870 | spin_unlock(&ailp->xa_lock); | 921 | log_items[i++] = blip; |
922 | } | ||
923 | ASSERT(i <= need_ail); | ||
871 | } | 924 | } |
925 | /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ | ||
926 | xfs_trans_ail_delete_bulk(ailp, log_items, i); | ||
872 | } | 927 | } |
873 | 928 | ||
874 | iip->ili_logged = 0; | ||
875 | 929 | ||
876 | /* | 930 | /* |
877 | * Clear the ili_last_fields bits now that we know that the | 931 | * clean up and unlock the flush lock now we are done. We can clear the |
878 | * data corresponding to them is safely on disk. | 932 | * ili_last_fields bits now that we know that the data corresponding to |
933 | * them is safely on disk. | ||
879 | */ | 934 | */ |
880 | iip->ili_last_fields = 0; | 935 | for (blip = lip; blip; blip = next) { |
936 | next = blip->li_bio_list; | ||
937 | blip->li_bio_list = NULL; | ||
881 | 938 | ||
882 | /* | 939 | iip = INODE_ITEM(blip); |
883 | * Release the inode's flush lock since we're done with it. | 940 | iip->ili_logged = 0; |
884 | */ | 941 | iip->ili_last_fields = 0; |
885 | xfs_ifunlock(ip); | 942 | xfs_ifunlock(iip->ili_inode); |
943 | } | ||
886 | } | 944 | } |
887 | 945 | ||
888 | /* | 946 | /* |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 20576146369f..55582bd66659 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -47,127 +47,8 @@ | |||
47 | 47 | ||
48 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ | 48 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ |
49 | << mp->m_writeio_log) | 49 | << mp->m_writeio_log) |
50 | #define XFS_STRAT_WRITE_IMAPS 2 | ||
51 | #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP | 50 | #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP |
52 | 51 | ||
53 | STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, | ||
54 | int, struct xfs_bmbt_irec *, int *); | ||
55 | STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int, | ||
56 | struct xfs_bmbt_irec *, int *); | ||
57 | STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, | ||
58 | struct xfs_bmbt_irec *, int *); | ||
59 | |||
60 | int | ||
61 | xfs_iomap( | ||
62 | struct xfs_inode *ip, | ||
63 | xfs_off_t offset, | ||
64 | ssize_t count, | ||
65 | int flags, | ||
66 | struct xfs_bmbt_irec *imap, | ||
67 | int *nimaps, | ||
68 | int *new) | ||
69 | { | ||
70 | struct xfs_mount *mp = ip->i_mount; | ||
71 | xfs_fileoff_t offset_fsb, end_fsb; | ||
72 | int error = 0; | ||
73 | int lockmode = 0; | ||
74 | int bmapi_flags = 0; | ||
75 | |||
76 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | ||
77 | |||
78 | *new = 0; | ||
79 | |||
80 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
81 | return XFS_ERROR(EIO); | ||
82 | |||
83 | trace_xfs_iomap_enter(ip, offset, count, flags, NULL); | ||
84 | |||
85 | switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { | ||
86 | case BMAPI_READ: | ||
87 | lockmode = xfs_ilock_map_shared(ip); | ||
88 | bmapi_flags = XFS_BMAPI_ENTIRE; | ||
89 | break; | ||
90 | case BMAPI_WRITE: | ||
91 | lockmode = XFS_ILOCK_EXCL; | ||
92 | if (flags & BMAPI_IGNSTATE) | ||
93 | bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; | ||
94 | xfs_ilock(ip, lockmode); | ||
95 | break; | ||
96 | case BMAPI_ALLOCATE: | ||
97 | lockmode = XFS_ILOCK_SHARED; | ||
98 | bmapi_flags = XFS_BMAPI_ENTIRE; | ||
99 | |||
100 | /* Attempt non-blocking lock */ | ||
101 | if (flags & BMAPI_TRYLOCK) { | ||
102 | if (!xfs_ilock_nowait(ip, lockmode)) | ||
103 | return XFS_ERROR(EAGAIN); | ||
104 | } else { | ||
105 | xfs_ilock(ip, lockmode); | ||
106 | } | ||
107 | break; | ||
108 | default: | ||
109 | BUG(); | ||
110 | } | ||
111 | |||
112 | ASSERT(offset <= mp->m_maxioffset); | ||
113 | if ((xfs_fsize_t)offset + count > mp->m_maxioffset) | ||
114 | count = mp->m_maxioffset - offset; | ||
115 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | ||
116 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
117 | |||
118 | error = xfs_bmapi(NULL, ip, offset_fsb, | ||
119 | (xfs_filblks_t)(end_fsb - offset_fsb), | ||
120 | bmapi_flags, NULL, 0, imap, | ||
121 | nimaps, NULL); | ||
122 | |||
123 | if (error) | ||
124 | goto out; | ||
125 | |||
126 | switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { | ||
127 | case BMAPI_WRITE: | ||
128 | /* If we found an extent, return it */ | ||
129 | if (*nimaps && | ||
130 | (imap->br_startblock != HOLESTARTBLOCK) && | ||
131 | (imap->br_startblock != DELAYSTARTBLOCK)) { | ||
132 | trace_xfs_iomap_found(ip, offset, count, flags, imap); | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | if (flags & BMAPI_DIRECT) { | ||
137 | error = xfs_iomap_write_direct(ip, offset, count, flags, | ||
138 | imap, nimaps); | ||
139 | } else { | ||
140 | error = xfs_iomap_write_delay(ip, offset, count, flags, | ||
141 | imap, nimaps); | ||
142 | } | ||
143 | if (!error) { | ||
144 | trace_xfs_iomap_alloc(ip, offset, count, flags, imap); | ||
145 | } | ||
146 | *new = 1; | ||
147 | break; | ||
148 | case BMAPI_ALLOCATE: | ||
149 | /* If we found an extent, return it */ | ||
150 | xfs_iunlock(ip, lockmode); | ||
151 | lockmode = 0; | ||
152 | |||
153 | if (*nimaps && !isnullstartblock(imap->br_startblock)) { | ||
154 | trace_xfs_iomap_found(ip, offset, count, flags, imap); | ||
155 | break; | ||
156 | } | ||
157 | |||
158 | error = xfs_iomap_write_allocate(ip, offset, count, | ||
159 | imap, nimaps); | ||
160 | break; | ||
161 | } | ||
162 | |||
163 | ASSERT(*nimaps <= 1); | ||
164 | |||
165 | out: | ||
166 | if (lockmode) | ||
167 | xfs_iunlock(ip, lockmode); | ||
168 | return XFS_ERROR(error); | ||
169 | } | ||
170 | |||
171 | STATIC int | 52 | STATIC int |
172 | xfs_iomap_eof_align_last_fsb( | 53 | xfs_iomap_eof_align_last_fsb( |
173 | xfs_mount_t *mp, | 54 | xfs_mount_t *mp, |
@@ -236,14 +117,13 @@ xfs_cmn_err_fsblock_zero( | |||
236 | return EFSCORRUPTED; | 117 | return EFSCORRUPTED; |
237 | } | 118 | } |
238 | 119 | ||
239 | STATIC int | 120 | int |
240 | xfs_iomap_write_direct( | 121 | xfs_iomap_write_direct( |
241 | xfs_inode_t *ip, | 122 | xfs_inode_t *ip, |
242 | xfs_off_t offset, | 123 | xfs_off_t offset, |
243 | size_t count, | 124 | size_t count, |
244 | int flags, | ||
245 | xfs_bmbt_irec_t *imap, | 125 | xfs_bmbt_irec_t *imap, |
246 | int *nmaps) | 126 | int nmaps) |
247 | { | 127 | { |
248 | xfs_mount_t *mp = ip->i_mount; | 128 | xfs_mount_t *mp = ip->i_mount; |
249 | xfs_fileoff_t offset_fsb; | 129 | xfs_fileoff_t offset_fsb; |
@@ -279,7 +159,7 @@ xfs_iomap_write_direct( | |||
279 | if (error) | 159 | if (error) |
280 | goto error_out; | 160 | goto error_out; |
281 | } else { | 161 | } else { |
282 | if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK)) | 162 | if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) |
283 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) | 163 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) |
284 | imap->br_blockcount + | 164 | imap->br_blockcount + |
285 | imap->br_startoff); | 165 | imap->br_startoff); |
@@ -331,7 +211,7 @@ xfs_iomap_write_direct( | |||
331 | xfs_trans_ijoin(tp, ip); | 211 | xfs_trans_ijoin(tp, ip); |
332 | 212 | ||
333 | bmapi_flag = XFS_BMAPI_WRITE; | 213 | bmapi_flag = XFS_BMAPI_WRITE; |
334 | if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) | 214 | if (offset < ip->i_size || extsz) |
335 | bmapi_flag |= XFS_BMAPI_PREALLOC; | 215 | bmapi_flag |= XFS_BMAPI_PREALLOC; |
336 | 216 | ||
337 | /* | 217 | /* |
@@ -370,7 +250,6 @@ xfs_iomap_write_direct( | |||
370 | goto error_out; | 250 | goto error_out; |
371 | } | 251 | } |
372 | 252 | ||
373 | *nmaps = 1; | ||
374 | return 0; | 253 | return 0; |
375 | 254 | ||
376 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | 255 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ |
@@ -379,7 +258,6 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | |||
379 | 258 | ||
380 | error1: /* Just cancel transaction */ | 259 | error1: /* Just cancel transaction */ |
381 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | 260 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
382 | *nmaps = 0; /* nothing set-up here */ | ||
383 | 261 | ||
384 | error_out: | 262 | error_out: |
385 | return XFS_ERROR(error); | 263 | return XFS_ERROR(error); |
@@ -389,6 +267,9 @@ error_out: | |||
389 | * If the caller is doing a write at the end of the file, then extend the | 267 | * If the caller is doing a write at the end of the file, then extend the |
390 | * allocation out to the file system's write iosize. We clean up any extra | 268 | * allocation out to the file system's write iosize. We clean up any extra |
391 | * space left over when the file is closed in xfs_inactive(). | 269 | * space left over when the file is closed in xfs_inactive(). |
270 | * | ||
271 | * If we find we already have delalloc preallocation beyond EOF, don't do more | ||
272 | * preallocation as it it not needed. | ||
392 | */ | 273 | */ |
393 | STATIC int | 274 | STATIC int |
394 | xfs_iomap_eof_want_preallocate( | 275 | xfs_iomap_eof_want_preallocate( |
@@ -396,7 +277,6 @@ xfs_iomap_eof_want_preallocate( | |||
396 | xfs_inode_t *ip, | 277 | xfs_inode_t *ip, |
397 | xfs_off_t offset, | 278 | xfs_off_t offset, |
398 | size_t count, | 279 | size_t count, |
399 | int ioflag, | ||
400 | xfs_bmbt_irec_t *imap, | 280 | xfs_bmbt_irec_t *imap, |
401 | int nimaps, | 281 | int nimaps, |
402 | int *prealloc) | 282 | int *prealloc) |
@@ -405,6 +285,7 @@ xfs_iomap_eof_want_preallocate( | |||
405 | xfs_filblks_t count_fsb; | 285 | xfs_filblks_t count_fsb; |
406 | xfs_fsblock_t firstblock; | 286 | xfs_fsblock_t firstblock; |
407 | int n, error, imaps; | 287 | int n, error, imaps; |
288 | int found_delalloc = 0; | ||
408 | 289 | ||
409 | *prealloc = 0; | 290 | *prealloc = 0; |
410 | if ((offset + count) <= ip->i_size) | 291 | if ((offset + count) <= ip->i_size) |
@@ -429,20 +310,66 @@ xfs_iomap_eof_want_preallocate( | |||
429 | return 0; | 310 | return 0; |
430 | start_fsb += imap[n].br_blockcount; | 311 | start_fsb += imap[n].br_blockcount; |
431 | count_fsb -= imap[n].br_blockcount; | 312 | count_fsb -= imap[n].br_blockcount; |
313 | |||
314 | if (imap[n].br_startblock == DELAYSTARTBLOCK) | ||
315 | found_delalloc = 1; | ||
432 | } | 316 | } |
433 | } | 317 | } |
434 | *prealloc = 1; | 318 | if (!found_delalloc) |
319 | *prealloc = 1; | ||
435 | return 0; | 320 | return 0; |
436 | } | 321 | } |
437 | 322 | ||
438 | STATIC int | 323 | /* |
324 | * If we don't have a user specified preallocation size, dynamically increase | ||
325 | * the preallocation size as the size of the file grows. Cap the maximum size | ||
326 | * at a single extent or less if the filesystem is near full. The closer the | ||
327 | * filesystem is to full, the smaller the maximum prealocation. | ||
328 | */ | ||
329 | STATIC xfs_fsblock_t | ||
330 | xfs_iomap_prealloc_size( | ||
331 | struct xfs_mount *mp, | ||
332 | struct xfs_inode *ip) | ||
333 | { | ||
334 | xfs_fsblock_t alloc_blocks = 0; | ||
335 | |||
336 | if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { | ||
337 | int shift = 0; | ||
338 | int64_t freesp; | ||
339 | |||
340 | alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size); | ||
341 | alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, | ||
342 | rounddown_pow_of_two(alloc_blocks)); | ||
343 | |||
344 | xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); | ||
345 | freesp = mp->m_sb.sb_fdblocks; | ||
346 | if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { | ||
347 | shift = 2; | ||
348 | if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) | ||
349 | shift++; | ||
350 | if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) | ||
351 | shift++; | ||
352 | if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) | ||
353 | shift++; | ||
354 | if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) | ||
355 | shift++; | ||
356 | } | ||
357 | if (shift) | ||
358 | alloc_blocks >>= shift; | ||
359 | } | ||
360 | |||
361 | if (alloc_blocks < mp->m_writeio_blocks) | ||
362 | alloc_blocks = mp->m_writeio_blocks; | ||
363 | |||
364 | return alloc_blocks; | ||
365 | } | ||
366 | |||
367 | int | ||
439 | xfs_iomap_write_delay( | 368 | xfs_iomap_write_delay( |
440 | xfs_inode_t *ip, | 369 | xfs_inode_t *ip, |
441 | xfs_off_t offset, | 370 | xfs_off_t offset, |
442 | size_t count, | 371 | size_t count, |
443 | int ioflag, | 372 | xfs_bmbt_irec_t *ret_imap) |
444 | xfs_bmbt_irec_t *ret_imap, | ||
445 | int *nmaps) | ||
446 | { | 373 | { |
447 | xfs_mount_t *mp = ip->i_mount; | 374 | xfs_mount_t *mp = ip->i_mount; |
448 | xfs_fileoff_t offset_fsb; | 375 | xfs_fileoff_t offset_fsb; |
@@ -469,16 +396,19 @@ xfs_iomap_write_delay( | |||
469 | extsz = xfs_get_extsz_hint(ip); | 396 | extsz = xfs_get_extsz_hint(ip); |
470 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 397 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
471 | 398 | ||
399 | |||
472 | error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, | 400 | error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, |
473 | ioflag, imap, XFS_WRITE_IMAPS, &prealloc); | 401 | imap, XFS_WRITE_IMAPS, &prealloc); |
474 | if (error) | 402 | if (error) |
475 | return error; | 403 | return error; |
476 | 404 | ||
477 | retry: | 405 | retry: |
478 | if (prealloc) { | 406 | if (prealloc) { |
407 | xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); | ||
408 | |||
479 | aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); | 409 | aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); |
480 | ioalign = XFS_B_TO_FSBT(mp, aligned_offset); | 410 | ioalign = XFS_B_TO_FSBT(mp, aligned_offset); |
481 | last_fsb = ioalign + mp->m_writeio_blocks; | 411 | last_fsb = ioalign + alloc_blocks; |
482 | } else { | 412 | } else { |
483 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); | 413 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); |
484 | } | 414 | } |
@@ -496,22 +426,31 @@ retry: | |||
496 | XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | | 426 | XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | |
497 | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, | 427 | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, |
498 | &nimaps, NULL); | 428 | &nimaps, NULL); |
499 | if (error && (error != ENOSPC)) | 429 | switch (error) { |
430 | case 0: | ||
431 | case ENOSPC: | ||
432 | case EDQUOT: | ||
433 | break; | ||
434 | default: | ||
500 | return XFS_ERROR(error); | 435 | return XFS_ERROR(error); |
436 | } | ||
501 | 437 | ||
502 | /* | 438 | /* |
503 | * If bmapi returned us nothing, and if we didn't get back EDQUOT, | 439 | * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For |
504 | * then we must have run out of space - flush all other inodes with | 440 | * ENOSPC, * flush all other inodes with delalloc blocks to free up |
505 | * delalloc blocks and retry without EOF preallocation. | 441 | * some of the excess reserved metadata space. For both cases, retry |
442 | * without EOF preallocation. | ||
506 | */ | 443 | */ |
507 | if (nimaps == 0) { | 444 | if (nimaps == 0) { |
508 | trace_xfs_delalloc_enospc(ip, offset, count); | 445 | trace_xfs_delalloc_enospc(ip, offset, count); |
509 | if (flushed) | 446 | if (flushed) |
510 | return XFS_ERROR(ENOSPC); | 447 | return XFS_ERROR(error ? error : ENOSPC); |
511 | 448 | ||
512 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 449 | if (error == ENOSPC) { |
513 | xfs_flush_inodes(ip); | 450 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
514 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 451 | xfs_flush_inodes(ip); |
452 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
453 | } | ||
515 | 454 | ||
516 | flushed = 1; | 455 | flushed = 1; |
517 | error = 0; | 456 | error = 0; |
@@ -523,8 +462,6 @@ retry: | |||
523 | return xfs_cmn_err_fsblock_zero(ip, &imap[0]); | 462 | return xfs_cmn_err_fsblock_zero(ip, &imap[0]); |
524 | 463 | ||
525 | *ret_imap = imap[0]; | 464 | *ret_imap = imap[0]; |
526 | *nmaps = 1; | ||
527 | |||
528 | return 0; | 465 | return 0; |
529 | } | 466 | } |
530 | 467 | ||
@@ -538,13 +475,12 @@ retry: | |||
538 | * We no longer bother to look at the incoming map - all we have to | 475 | * We no longer bother to look at the incoming map - all we have to |
539 | * guarantee is that whatever we allocate fills the required range. | 476 | * guarantee is that whatever we allocate fills the required range. |
540 | */ | 477 | */ |
541 | STATIC int | 478 | int |
542 | xfs_iomap_write_allocate( | 479 | xfs_iomap_write_allocate( |
543 | xfs_inode_t *ip, | 480 | xfs_inode_t *ip, |
544 | xfs_off_t offset, | 481 | xfs_off_t offset, |
545 | size_t count, | 482 | size_t count, |
546 | xfs_bmbt_irec_t *imap, | 483 | xfs_bmbt_irec_t *imap) |
547 | int *retmap) | ||
548 | { | 484 | { |
549 | xfs_mount_t *mp = ip->i_mount; | 485 | xfs_mount_t *mp = ip->i_mount; |
550 | xfs_fileoff_t offset_fsb, last_block; | 486 | xfs_fileoff_t offset_fsb, last_block; |
@@ -557,8 +493,6 @@ xfs_iomap_write_allocate( | |||
557 | int error = 0; | 493 | int error = 0; |
558 | int nres; | 494 | int nres; |
559 | 495 | ||
560 | *retmap = 0; | ||
561 | |||
562 | /* | 496 | /* |
563 | * Make sure that the dquots are there. | 497 | * Make sure that the dquots are there. |
564 | */ | 498 | */ |
@@ -680,7 +614,6 @@ xfs_iomap_write_allocate( | |||
680 | if ((offset_fsb >= imap->br_startoff) && | 614 | if ((offset_fsb >= imap->br_startoff) && |
681 | (offset_fsb < (imap->br_startoff + | 615 | (offset_fsb < (imap->br_startoff + |
682 | imap->br_blockcount))) { | 616 | imap->br_blockcount))) { |
683 | *retmap = 1; | ||
684 | XFS_STATS_INC(xs_xstrat_quick); | 617 | XFS_STATS_INC(xs_xstrat_quick); |
685 | return 0; | 618 | return 0; |
686 | } | 619 | } |
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 7748a430f50d..80615760959a 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
@@ -18,30 +18,15 @@ | |||
18 | #ifndef __XFS_IOMAP_H__ | 18 | #ifndef __XFS_IOMAP_H__ |
19 | #define __XFS_IOMAP_H__ | 19 | #define __XFS_IOMAP_H__ |
20 | 20 | ||
21 | /* base extent manipulation calls */ | ||
22 | #define BMAPI_READ (1 << 0) /* read extents */ | ||
23 | #define BMAPI_WRITE (1 << 1) /* create extents */ | ||
24 | #define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */ | ||
25 | |||
26 | /* modifiers */ | ||
27 | #define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */ | ||
28 | #define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */ | ||
29 | #define BMAPI_MMA (1 << 6) /* allocate for mmap write */ | ||
30 | #define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */ | ||
31 | |||
32 | #define BMAPI_FLAGS \ | ||
33 | { BMAPI_READ, "READ" }, \ | ||
34 | { BMAPI_WRITE, "WRITE" }, \ | ||
35 | { BMAPI_ALLOCATE, "ALLOCATE" }, \ | ||
36 | { BMAPI_IGNSTATE, "IGNSTATE" }, \ | ||
37 | { BMAPI_DIRECT, "DIRECT" }, \ | ||
38 | { BMAPI_TRYLOCK, "TRYLOCK" } | ||
39 | |||
40 | struct xfs_inode; | 21 | struct xfs_inode; |
41 | struct xfs_bmbt_irec; | 22 | struct xfs_bmbt_irec; |
42 | 23 | ||
43 | extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, | 24 | extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, |
44 | struct xfs_bmbt_irec *, int *, int *); | 25 | struct xfs_bmbt_irec *, int); |
26 | extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, | ||
27 | struct xfs_bmbt_irec *); | ||
28 | extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, | ||
29 | struct xfs_bmbt_irec *); | ||
45 | extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); | 30 | extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); |
46 | 31 | ||
47 | #endif /* __XFS_IOMAP_H__*/ | 32 | #endif /* __XFS_IOMAP_H__*/ |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index cee4ab9f8a9e..0bf24b11d0c4 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -47,7 +47,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, | |||
47 | xfs_buftarg_t *log_target, | 47 | xfs_buftarg_t *log_target, |
48 | xfs_daddr_t blk_offset, | 48 | xfs_daddr_t blk_offset, |
49 | int num_bblks); | 49 | int num_bblks); |
50 | STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); | 50 | STATIC int xlog_space_left(struct log *log, atomic64_t *head); |
51 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); | 51 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); |
52 | STATIC void xlog_dealloc_log(xlog_t *log); | 52 | STATIC void xlog_dealloc_log(xlog_t *log); |
53 | 53 | ||
@@ -70,7 +70,7 @@ STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); | |||
70 | /* local functions to manipulate grant head */ | 70 | /* local functions to manipulate grant head */ |
71 | STATIC int xlog_grant_log_space(xlog_t *log, | 71 | STATIC int xlog_grant_log_space(xlog_t *log, |
72 | xlog_ticket_t *xtic); | 72 | xlog_ticket_t *xtic); |
73 | STATIC void xlog_grant_push_ail(xfs_mount_t *mp, | 73 | STATIC void xlog_grant_push_ail(struct log *log, |
74 | int need_bytes); | 74 | int need_bytes); |
75 | STATIC void xlog_regrant_reserve_log_space(xlog_t *log, | 75 | STATIC void xlog_regrant_reserve_log_space(xlog_t *log, |
76 | xlog_ticket_t *ticket); | 76 | xlog_ticket_t *ticket); |
@@ -81,98 +81,73 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, | |||
81 | 81 | ||
82 | #if defined(DEBUG) | 82 | #if defined(DEBUG) |
83 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); | 83 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); |
84 | STATIC void xlog_verify_grant_head(xlog_t *log, int equals); | 84 | STATIC void xlog_verify_grant_tail(struct log *log); |
85 | STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, | 85 | STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, |
86 | int count, boolean_t syncing); | 86 | int count, boolean_t syncing); |
87 | STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, | 87 | STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, |
88 | xfs_lsn_t tail_lsn); | 88 | xfs_lsn_t tail_lsn); |
89 | #else | 89 | #else |
90 | #define xlog_verify_dest_ptr(a,b) | 90 | #define xlog_verify_dest_ptr(a,b) |
91 | #define xlog_verify_grant_head(a,b) | 91 | #define xlog_verify_grant_tail(a) |
92 | #define xlog_verify_iclog(a,b,c,d) | 92 | #define xlog_verify_iclog(a,b,c,d) |
93 | #define xlog_verify_tail_lsn(a,b,c) | 93 | #define xlog_verify_tail_lsn(a,b,c) |
94 | #endif | 94 | #endif |
95 | 95 | ||
96 | STATIC int xlog_iclogs_empty(xlog_t *log); | 96 | STATIC int xlog_iclogs_empty(xlog_t *log); |
97 | 97 | ||
98 | |||
99 | static void | 98 | static void |
100 | xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) | 99 | xlog_grant_sub_space( |
100 | struct log *log, | ||
101 | atomic64_t *head, | ||
102 | int bytes) | ||
101 | { | 103 | { |
102 | if (*qp) { | 104 | int64_t head_val = atomic64_read(head); |
103 | tic->t_next = (*qp); | 105 | int64_t new, old; |
104 | tic->t_prev = (*qp)->t_prev; | ||
105 | (*qp)->t_prev->t_next = tic; | ||
106 | (*qp)->t_prev = tic; | ||
107 | } else { | ||
108 | tic->t_prev = tic->t_next = tic; | ||
109 | *qp = tic; | ||
110 | } | ||
111 | 106 | ||
112 | tic->t_flags |= XLOG_TIC_IN_Q; | 107 | do { |
113 | } | 108 | int cycle, space; |
114 | 109 | ||
115 | static void | 110 | xlog_crack_grant_head_val(head_val, &cycle, &space); |
116 | xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) | ||
117 | { | ||
118 | if (tic == tic->t_next) { | ||
119 | *qp = NULL; | ||
120 | } else { | ||
121 | *qp = tic->t_next; | ||
122 | tic->t_next->t_prev = tic->t_prev; | ||
123 | tic->t_prev->t_next = tic->t_next; | ||
124 | } | ||
125 | 111 | ||
126 | tic->t_next = tic->t_prev = NULL; | 112 | space -= bytes; |
127 | tic->t_flags &= ~XLOG_TIC_IN_Q; | 113 | if (space < 0) { |
114 | space += log->l_logsize; | ||
115 | cycle--; | ||
116 | } | ||
117 | |||
118 | old = head_val; | ||
119 | new = xlog_assign_grant_head_val(cycle, space); | ||
120 | head_val = atomic64_cmpxchg(head, old, new); | ||
121 | } while (head_val != old); | ||
128 | } | 122 | } |
129 | 123 | ||
130 | static void | 124 | static void |
131 | xlog_grant_sub_space(struct log *log, int bytes) | 125 | xlog_grant_add_space( |
126 | struct log *log, | ||
127 | atomic64_t *head, | ||
128 | int bytes) | ||
132 | { | 129 | { |
133 | log->l_grant_write_bytes -= bytes; | 130 | int64_t head_val = atomic64_read(head); |
134 | if (log->l_grant_write_bytes < 0) { | 131 | int64_t new, old; |
135 | log->l_grant_write_bytes += log->l_logsize; | ||
136 | log->l_grant_write_cycle--; | ||
137 | } | ||
138 | |||
139 | log->l_grant_reserve_bytes -= bytes; | ||
140 | if ((log)->l_grant_reserve_bytes < 0) { | ||
141 | log->l_grant_reserve_bytes += log->l_logsize; | ||
142 | log->l_grant_reserve_cycle--; | ||
143 | } | ||
144 | 132 | ||
145 | } | 133 | do { |
134 | int tmp; | ||
135 | int cycle, space; | ||
146 | 136 | ||
147 | static void | 137 | xlog_crack_grant_head_val(head_val, &cycle, &space); |
148 | xlog_grant_add_space_write(struct log *log, int bytes) | ||
149 | { | ||
150 | int tmp = log->l_logsize - log->l_grant_write_bytes; | ||
151 | if (tmp > bytes) | ||
152 | log->l_grant_write_bytes += bytes; | ||
153 | else { | ||
154 | log->l_grant_write_cycle++; | ||
155 | log->l_grant_write_bytes = bytes - tmp; | ||
156 | } | ||
157 | } | ||
158 | 138 | ||
159 | static void | 139 | tmp = log->l_logsize - space; |
160 | xlog_grant_add_space_reserve(struct log *log, int bytes) | 140 | if (tmp > bytes) |
161 | { | 141 | space += bytes; |
162 | int tmp = log->l_logsize - log->l_grant_reserve_bytes; | 142 | else { |
163 | if (tmp > bytes) | 143 | space = bytes - tmp; |
164 | log->l_grant_reserve_bytes += bytes; | 144 | cycle++; |
165 | else { | 145 | } |
166 | log->l_grant_reserve_cycle++; | ||
167 | log->l_grant_reserve_bytes = bytes - tmp; | ||
168 | } | ||
169 | } | ||
170 | 146 | ||
171 | static inline void | 147 | old = head_val; |
172 | xlog_grant_add_space(struct log *log, int bytes) | 148 | new = xlog_assign_grant_head_val(cycle, space); |
173 | { | 149 | head_val = atomic64_cmpxchg(head, old, new); |
174 | xlog_grant_add_space_write(log, bytes); | 150 | } while (head_val != old); |
175 | xlog_grant_add_space_reserve(log, bytes); | ||
176 | } | 151 | } |
177 | 152 | ||
178 | static void | 153 | static void |
@@ -355,7 +330,7 @@ xfs_log_reserve( | |||
355 | 330 | ||
356 | trace_xfs_log_reserve(log, internal_ticket); | 331 | trace_xfs_log_reserve(log, internal_ticket); |
357 | 332 | ||
358 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); | 333 | xlog_grant_push_ail(log, internal_ticket->t_unit_res); |
359 | retval = xlog_regrant_write_log_space(log, internal_ticket); | 334 | retval = xlog_regrant_write_log_space(log, internal_ticket); |
360 | } else { | 335 | } else { |
361 | /* may sleep if need to allocate more tickets */ | 336 | /* may sleep if need to allocate more tickets */ |
@@ -369,7 +344,7 @@ xfs_log_reserve( | |||
369 | 344 | ||
370 | trace_xfs_log_reserve(log, internal_ticket); | 345 | trace_xfs_log_reserve(log, internal_ticket); |
371 | 346 | ||
372 | xlog_grant_push_ail(mp, | 347 | xlog_grant_push_ail(log, |
373 | (internal_ticket->t_unit_res * | 348 | (internal_ticket->t_unit_res * |
374 | internal_ticket->t_cnt)); | 349 | internal_ticket->t_cnt)); |
375 | retval = xlog_grant_log_space(log, internal_ticket); | 350 | retval = xlog_grant_log_space(log, internal_ticket); |
@@ -584,8 +559,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
584 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || | 559 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || |
585 | iclog->ic_state == XLOG_STATE_DIRTY)) { | 560 | iclog->ic_state == XLOG_STATE_DIRTY)) { |
586 | if (!XLOG_FORCED_SHUTDOWN(log)) { | 561 | if (!XLOG_FORCED_SHUTDOWN(log)) { |
587 | sv_wait(&iclog->ic_force_wait, PMEM, | 562 | xlog_wait(&iclog->ic_force_wait, |
588 | &log->l_icloglock, s); | 563 | &log->l_icloglock); |
589 | } else { | 564 | } else { |
590 | spin_unlock(&log->l_icloglock); | 565 | spin_unlock(&log->l_icloglock); |
591 | } | 566 | } |
@@ -625,8 +600,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
625 | || iclog->ic_state == XLOG_STATE_DIRTY | 600 | || iclog->ic_state == XLOG_STATE_DIRTY |
626 | || iclog->ic_state == XLOG_STATE_IOERROR) ) { | 601 | || iclog->ic_state == XLOG_STATE_IOERROR) ) { |
627 | 602 | ||
628 | sv_wait(&iclog->ic_force_wait, PMEM, | 603 | xlog_wait(&iclog->ic_force_wait, |
629 | &log->l_icloglock, s); | 604 | &log->l_icloglock); |
630 | } else { | 605 | } else { |
631 | spin_unlock(&log->l_icloglock); | 606 | spin_unlock(&log->l_icloglock); |
632 | } | 607 | } |
@@ -703,55 +678,46 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
703 | { | 678 | { |
704 | xlog_ticket_t *tic; | 679 | xlog_ticket_t *tic; |
705 | xlog_t *log = mp->m_log; | 680 | xlog_t *log = mp->m_log; |
706 | int need_bytes, free_bytes, cycle, bytes; | 681 | int need_bytes, free_bytes; |
707 | 682 | ||
708 | if (XLOG_FORCED_SHUTDOWN(log)) | 683 | if (XLOG_FORCED_SHUTDOWN(log)) |
709 | return; | 684 | return; |
710 | 685 | ||
711 | if (tail_lsn == 0) { | 686 | if (tail_lsn == 0) |
712 | /* needed since sync_lsn is 64 bits */ | 687 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
713 | spin_lock(&log->l_icloglock); | ||
714 | tail_lsn = log->l_last_sync_lsn; | ||
715 | spin_unlock(&log->l_icloglock); | ||
716 | } | ||
717 | |||
718 | spin_lock(&log->l_grant_lock); | ||
719 | 688 | ||
720 | /* Also an invalid lsn. 1 implies that we aren't passing in a valid | 689 | /* tail_lsn == 1 implies that we weren't passed a valid value. */ |
721 | * tail_lsn. | 690 | if (tail_lsn != 1) |
722 | */ | 691 | atomic64_set(&log->l_tail_lsn, tail_lsn); |
723 | if (tail_lsn != 1) { | ||
724 | log->l_tail_lsn = tail_lsn; | ||
725 | } | ||
726 | 692 | ||
727 | if ((tic = log->l_write_headq)) { | 693 | if (!list_empty_careful(&log->l_writeq)) { |
728 | #ifdef DEBUG | 694 | #ifdef DEBUG |
729 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 695 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
730 | panic("Recovery problem"); | 696 | panic("Recovery problem"); |
731 | #endif | 697 | #endif |
732 | cycle = log->l_grant_write_cycle; | 698 | spin_lock(&log->l_grant_write_lock); |
733 | bytes = log->l_grant_write_bytes; | 699 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); |
734 | free_bytes = xlog_space_left(log, cycle, bytes); | 700 | list_for_each_entry(tic, &log->l_writeq, t_queue) { |
735 | do { | ||
736 | ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); | 701 | ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); |
737 | 702 | ||
738 | if (free_bytes < tic->t_unit_res && tail_lsn != 1) | 703 | if (free_bytes < tic->t_unit_res && tail_lsn != 1) |
739 | break; | 704 | break; |
740 | tail_lsn = 0; | 705 | tail_lsn = 0; |
741 | free_bytes -= tic->t_unit_res; | 706 | free_bytes -= tic->t_unit_res; |
742 | sv_signal(&tic->t_wait); | 707 | trace_xfs_log_regrant_write_wake_up(log, tic); |
743 | tic = tic->t_next; | 708 | wake_up(&tic->t_wait); |
744 | } while (tic != log->l_write_headq); | 709 | } |
710 | spin_unlock(&log->l_grant_write_lock); | ||
745 | } | 711 | } |
746 | if ((tic = log->l_reserve_headq)) { | 712 | |
713 | if (!list_empty_careful(&log->l_reserveq)) { | ||
747 | #ifdef DEBUG | 714 | #ifdef DEBUG |
748 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 715 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
749 | panic("Recovery problem"); | 716 | panic("Recovery problem"); |
750 | #endif | 717 | #endif |
751 | cycle = log->l_grant_reserve_cycle; | 718 | spin_lock(&log->l_grant_reserve_lock); |
752 | bytes = log->l_grant_reserve_bytes; | 719 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); |
753 | free_bytes = xlog_space_left(log, cycle, bytes); | 720 | list_for_each_entry(tic, &log->l_reserveq, t_queue) { |
754 | do { | ||
755 | if (tic->t_flags & XLOG_TIC_PERM_RESERV) | 721 | if (tic->t_flags & XLOG_TIC_PERM_RESERV) |
756 | need_bytes = tic->t_unit_res*tic->t_cnt; | 722 | need_bytes = tic->t_unit_res*tic->t_cnt; |
757 | else | 723 | else |
@@ -760,12 +726,12 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
760 | break; | 726 | break; |
761 | tail_lsn = 0; | 727 | tail_lsn = 0; |
762 | free_bytes -= need_bytes; | 728 | free_bytes -= need_bytes; |
763 | sv_signal(&tic->t_wait); | 729 | trace_xfs_log_grant_wake_up(log, tic); |
764 | tic = tic->t_next; | 730 | wake_up(&tic->t_wait); |
765 | } while (tic != log->l_reserve_headq); | 731 | } |
732 | spin_unlock(&log->l_grant_reserve_lock); | ||
766 | } | 733 | } |
767 | spin_unlock(&log->l_grant_lock); | 734 | } |
768 | } /* xfs_log_move_tail */ | ||
769 | 735 | ||
770 | /* | 736 | /* |
771 | * Determine if we have a transaction that has gone to disk | 737 | * Determine if we have a transaction that has gone to disk |
@@ -831,23 +797,19 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
831 | * We may be holding the log iclog lock upon entering this routine. | 797 | * We may be holding the log iclog lock upon entering this routine. |
832 | */ | 798 | */ |
833 | xfs_lsn_t | 799 | xfs_lsn_t |
834 | xlog_assign_tail_lsn(xfs_mount_t *mp) | 800 | xlog_assign_tail_lsn( |
801 | struct xfs_mount *mp) | ||
835 | { | 802 | { |
836 | xfs_lsn_t tail_lsn; | 803 | xfs_lsn_t tail_lsn; |
837 | xlog_t *log = mp->m_log; | 804 | struct log *log = mp->m_log; |
838 | 805 | ||
839 | tail_lsn = xfs_trans_ail_tail(mp->m_ail); | 806 | tail_lsn = xfs_trans_ail_tail(mp->m_ail); |
840 | spin_lock(&log->l_grant_lock); | 807 | if (!tail_lsn) |
841 | if (tail_lsn != 0) { | 808 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
842 | log->l_tail_lsn = tail_lsn; | ||
843 | } else { | ||
844 | tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn; | ||
845 | } | ||
846 | spin_unlock(&log->l_grant_lock); | ||
847 | 809 | ||
810 | atomic64_set(&log->l_tail_lsn, tail_lsn); | ||
848 | return tail_lsn; | 811 | return tail_lsn; |
849 | } /* xlog_assign_tail_lsn */ | 812 | } |
850 | |||
851 | 813 | ||
852 | /* | 814 | /* |
853 | * Return the space in the log between the tail and the head. The head | 815 | * Return the space in the log between the tail and the head. The head |
@@ -864,21 +826,26 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) | |||
864 | * result is that we return the size of the log as the amount of space left. | 826 | * result is that we return the size of the log as the amount of space left. |
865 | */ | 827 | */ |
866 | STATIC int | 828 | STATIC int |
867 | xlog_space_left(xlog_t *log, int cycle, int bytes) | 829 | xlog_space_left( |
868 | { | 830 | struct log *log, |
869 | int free_bytes; | 831 | atomic64_t *head) |
870 | int tail_bytes; | 832 | { |
871 | int tail_cycle; | 833 | int free_bytes; |
872 | 834 | int tail_bytes; | |
873 | tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn)); | 835 | int tail_cycle; |
874 | tail_cycle = CYCLE_LSN(log->l_tail_lsn); | 836 | int head_cycle; |
875 | if ((tail_cycle == cycle) && (bytes >= tail_bytes)) { | 837 | int head_bytes; |
876 | free_bytes = log->l_logsize - (bytes - tail_bytes); | 838 | |
877 | } else if ((tail_cycle + 1) < cycle) { | 839 | xlog_crack_grant_head(head, &head_cycle, &head_bytes); |
840 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes); | ||
841 | tail_bytes = BBTOB(tail_bytes); | ||
842 | if (tail_cycle == head_cycle && head_bytes >= tail_bytes) | ||
843 | free_bytes = log->l_logsize - (head_bytes - tail_bytes); | ||
844 | else if (tail_cycle + 1 < head_cycle) | ||
878 | return 0; | 845 | return 0; |
879 | } else if (tail_cycle < cycle) { | 846 | else if (tail_cycle < head_cycle) { |
880 | ASSERT(tail_cycle == (cycle - 1)); | 847 | ASSERT(tail_cycle == (head_cycle - 1)); |
881 | free_bytes = tail_bytes - bytes; | 848 | free_bytes = tail_bytes - head_bytes; |
882 | } else { | 849 | } else { |
883 | /* | 850 | /* |
884 | * The reservation head is behind the tail. | 851 | * The reservation head is behind the tail. |
@@ -889,12 +856,12 @@ xlog_space_left(xlog_t *log, int cycle, int bytes) | |||
889 | "xlog_space_left: head behind tail\n" | 856 | "xlog_space_left: head behind tail\n" |
890 | " tail_cycle = %d, tail_bytes = %d\n" | 857 | " tail_cycle = %d, tail_bytes = %d\n" |
891 | " GH cycle = %d, GH bytes = %d", | 858 | " GH cycle = %d, GH bytes = %d", |
892 | tail_cycle, tail_bytes, cycle, bytes); | 859 | tail_cycle, tail_bytes, head_cycle, head_bytes); |
893 | ASSERT(0); | 860 | ASSERT(0); |
894 | free_bytes = log->l_logsize; | 861 | free_bytes = log->l_logsize; |
895 | } | 862 | } |
896 | return free_bytes; | 863 | return free_bytes; |
897 | } /* xlog_space_left */ | 864 | } |
898 | 865 | ||
899 | 866 | ||
900 | /* | 867 | /* |
@@ -1047,12 +1014,16 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1047 | log->l_flags |= XLOG_ACTIVE_RECOVERY; | 1014 | log->l_flags |= XLOG_ACTIVE_RECOVERY; |
1048 | 1015 | ||
1049 | log->l_prev_block = -1; | 1016 | log->l_prev_block = -1; |
1050 | log->l_tail_lsn = xlog_assign_lsn(1, 0); | ||
1051 | /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ | 1017 | /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ |
1052 | log->l_last_sync_lsn = log->l_tail_lsn; | 1018 | xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); |
1019 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); | ||
1053 | log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ | 1020 | log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ |
1054 | log->l_grant_reserve_cycle = 1; | 1021 | xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); |
1055 | log->l_grant_write_cycle = 1; | 1022 | xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); |
1023 | INIT_LIST_HEAD(&log->l_reserveq); | ||
1024 | INIT_LIST_HEAD(&log->l_writeq); | ||
1025 | spin_lock_init(&log->l_grant_reserve_lock); | ||
1026 | spin_lock_init(&log->l_grant_write_lock); | ||
1056 | 1027 | ||
1057 | error = EFSCORRUPTED; | 1028 | error = EFSCORRUPTED; |
1058 | if (xfs_sb_version_hassector(&mp->m_sb)) { | 1029 | if (xfs_sb_version_hassector(&mp->m_sb)) { |
@@ -1094,8 +1065,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1094 | log->l_xbuf = bp; | 1065 | log->l_xbuf = bp; |
1095 | 1066 | ||
1096 | spin_lock_init(&log->l_icloglock); | 1067 | spin_lock_init(&log->l_icloglock); |
1097 | spin_lock_init(&log->l_grant_lock); | 1068 | init_waitqueue_head(&log->l_flush_wait); |
1098 | sv_init(&log->l_flush_wait, 0, "flush_wait"); | ||
1099 | 1069 | ||
1100 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1070 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
1101 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1071 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
@@ -1151,8 +1121,8 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1151 | 1121 | ||
1152 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); | 1122 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); |
1153 | ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); | 1123 | ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); |
1154 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); | 1124 | init_waitqueue_head(&iclog->ic_force_wait); |
1155 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); | 1125 | init_waitqueue_head(&iclog->ic_write_wait); |
1156 | 1126 | ||
1157 | iclogp = &iclog->ic_next; | 1127 | iclogp = &iclog->ic_next; |
1158 | } | 1128 | } |
@@ -1167,15 +1137,11 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1167 | out_free_iclog: | 1137 | out_free_iclog: |
1168 | for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { | 1138 | for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { |
1169 | prev_iclog = iclog->ic_next; | 1139 | prev_iclog = iclog->ic_next; |
1170 | if (iclog->ic_bp) { | 1140 | if (iclog->ic_bp) |
1171 | sv_destroy(&iclog->ic_force_wait); | ||
1172 | sv_destroy(&iclog->ic_write_wait); | ||
1173 | xfs_buf_free(iclog->ic_bp); | 1141 | xfs_buf_free(iclog->ic_bp); |
1174 | } | ||
1175 | kmem_free(iclog); | 1142 | kmem_free(iclog); |
1176 | } | 1143 | } |
1177 | spinlock_destroy(&log->l_icloglock); | 1144 | spinlock_destroy(&log->l_icloglock); |
1178 | spinlock_destroy(&log->l_grant_lock); | ||
1179 | xfs_buf_free(log->l_xbuf); | 1145 | xfs_buf_free(log->l_xbuf); |
1180 | out_free_log: | 1146 | out_free_log: |
1181 | kmem_free(log); | 1147 | kmem_free(log); |
@@ -1223,61 +1189,60 @@ xlog_commit_record( | |||
1223 | * water mark. In this manner, we would be creating a low water mark. | 1189 | * water mark. In this manner, we would be creating a low water mark. |
1224 | */ | 1190 | */ |
1225 | STATIC void | 1191 | STATIC void |
1226 | xlog_grant_push_ail(xfs_mount_t *mp, | 1192 | xlog_grant_push_ail( |
1227 | int need_bytes) | 1193 | struct log *log, |
1194 | int need_bytes) | ||
1228 | { | 1195 | { |
1229 | xlog_t *log = mp->m_log; /* pointer to the log */ | 1196 | xfs_lsn_t threshold_lsn = 0; |
1230 | xfs_lsn_t tail_lsn; /* lsn of the log tail */ | 1197 | xfs_lsn_t last_sync_lsn; |
1231 | xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */ | 1198 | int free_blocks; |
1232 | int free_blocks; /* free blocks left to write to */ | 1199 | int free_bytes; |
1233 | int free_bytes; /* free bytes left to write to */ | 1200 | int threshold_block; |
1234 | int threshold_block; /* block in lsn we'd like to be at */ | 1201 | int threshold_cycle; |
1235 | int threshold_cycle; /* lsn cycle we'd like to be at */ | 1202 | int free_threshold; |
1236 | int free_threshold; | 1203 | |
1237 | 1204 | ASSERT(BTOBB(need_bytes) < log->l_logBBsize); | |
1238 | ASSERT(BTOBB(need_bytes) < log->l_logBBsize); | 1205 | |
1239 | 1206 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); | |
1240 | spin_lock(&log->l_grant_lock); | 1207 | free_blocks = BTOBBT(free_bytes); |
1241 | free_bytes = xlog_space_left(log, | 1208 | |
1242 | log->l_grant_reserve_cycle, | 1209 | /* |
1243 | log->l_grant_reserve_bytes); | 1210 | * Set the threshold for the minimum number of free blocks in the |
1244 | tail_lsn = log->l_tail_lsn; | 1211 | * log to the maximum of what the caller needs, one quarter of the |
1245 | free_blocks = BTOBBT(free_bytes); | 1212 | * log, and 256 blocks. |
1246 | 1213 | */ | |
1247 | /* | 1214 | free_threshold = BTOBB(need_bytes); |
1248 | * Set the threshold for the minimum number of free blocks in the | 1215 | free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); |
1249 | * log to the maximum of what the caller needs, one quarter of the | 1216 | free_threshold = MAX(free_threshold, 256); |
1250 | * log, and 256 blocks. | 1217 | if (free_blocks >= free_threshold) |
1251 | */ | 1218 | return; |
1252 | free_threshold = BTOBB(need_bytes); | 1219 | |
1253 | free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); | 1220 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle, |
1254 | free_threshold = MAX(free_threshold, 256); | 1221 | &threshold_block); |
1255 | if (free_blocks < free_threshold) { | 1222 | threshold_block += free_threshold; |
1256 | threshold_block = BLOCK_LSN(tail_lsn) + free_threshold; | ||
1257 | threshold_cycle = CYCLE_LSN(tail_lsn); | ||
1258 | if (threshold_block >= log->l_logBBsize) { | 1223 | if (threshold_block >= log->l_logBBsize) { |
1259 | threshold_block -= log->l_logBBsize; | 1224 | threshold_block -= log->l_logBBsize; |
1260 | threshold_cycle += 1; | 1225 | threshold_cycle += 1; |
1261 | } | 1226 | } |
1262 | threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block); | 1227 | threshold_lsn = xlog_assign_lsn(threshold_cycle, |
1228 | threshold_block); | ||
1229 | /* | ||
1230 | * Don't pass in an lsn greater than the lsn of the last | ||
1231 | * log record known to be on disk. Use a snapshot of the last sync lsn | ||
1232 | * so that it doesn't change between the compare and the set. | ||
1233 | */ | ||
1234 | last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); | ||
1235 | if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0) | ||
1236 | threshold_lsn = last_sync_lsn; | ||
1263 | 1237 | ||
1264 | /* Don't pass in an lsn greater than the lsn of the last | 1238 | /* |
1265 | * log record known to be on disk. | 1239 | * Get the transaction layer to kick the dirty buffers out to |
1240 | * disk asynchronously. No point in trying to do this if | ||
1241 | * the filesystem is shutting down. | ||
1266 | */ | 1242 | */ |
1267 | if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) | 1243 | if (!XLOG_FORCED_SHUTDOWN(log)) |
1268 | threshold_lsn = log->l_last_sync_lsn; | 1244 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); |
1269 | } | 1245 | } |
1270 | spin_unlock(&log->l_grant_lock); | ||
1271 | |||
1272 | /* | ||
1273 | * Get the transaction layer to kick the dirty buffers out to | ||
1274 | * disk asynchronously. No point in trying to do this if | ||
1275 | * the filesystem is shutting down. | ||
1276 | */ | ||
1277 | if (threshold_lsn && | ||
1278 | !XLOG_FORCED_SHUTDOWN(log)) | ||
1279 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); | ||
1280 | } /* xlog_grant_push_ail */ | ||
1281 | 1246 | ||
1282 | /* | 1247 | /* |
1283 | * The bdstrat callback function for log bufs. This gives us a central | 1248 | * The bdstrat callback function for log bufs. This gives us a central |
@@ -1372,9 +1337,8 @@ xlog_sync(xlog_t *log, | |||
1372 | roundoff < BBTOB(1))); | 1337 | roundoff < BBTOB(1))); |
1373 | 1338 | ||
1374 | /* move grant heads by roundoff in sync */ | 1339 | /* move grant heads by roundoff in sync */ |
1375 | spin_lock(&log->l_grant_lock); | 1340 | xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); |
1376 | xlog_grant_add_space(log, roundoff); | 1341 | xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); |
1377 | spin_unlock(&log->l_grant_lock); | ||
1378 | 1342 | ||
1379 | /* put cycle number in every block */ | 1343 | /* put cycle number in every block */ |
1380 | xlog_pack_data(log, iclog, roundoff); | 1344 | xlog_pack_data(log, iclog, roundoff); |
@@ -1489,15 +1453,12 @@ xlog_dealloc_log(xlog_t *log) | |||
1489 | 1453 | ||
1490 | iclog = log->l_iclog; | 1454 | iclog = log->l_iclog; |
1491 | for (i=0; i<log->l_iclog_bufs; i++) { | 1455 | for (i=0; i<log->l_iclog_bufs; i++) { |
1492 | sv_destroy(&iclog->ic_force_wait); | ||
1493 | sv_destroy(&iclog->ic_write_wait); | ||
1494 | xfs_buf_free(iclog->ic_bp); | 1456 | xfs_buf_free(iclog->ic_bp); |
1495 | next_iclog = iclog->ic_next; | 1457 | next_iclog = iclog->ic_next; |
1496 | kmem_free(iclog); | 1458 | kmem_free(iclog); |
1497 | iclog = next_iclog; | 1459 | iclog = next_iclog; |
1498 | } | 1460 | } |
1499 | spinlock_destroy(&log->l_icloglock); | 1461 | spinlock_destroy(&log->l_icloglock); |
1500 | spinlock_destroy(&log->l_grant_lock); | ||
1501 | 1462 | ||
1502 | xfs_buf_free(log->l_xbuf); | 1463 | xfs_buf_free(log->l_xbuf); |
1503 | log->l_mp->m_log = NULL; | 1464 | log->l_mp->m_log = NULL; |
@@ -2232,7 +2193,7 @@ xlog_state_do_callback( | |||
2232 | lowest_lsn = xlog_get_lowest_lsn(log); | 2193 | lowest_lsn = xlog_get_lowest_lsn(log); |
2233 | if (lowest_lsn && | 2194 | if (lowest_lsn && |
2234 | XFS_LSN_CMP(lowest_lsn, | 2195 | XFS_LSN_CMP(lowest_lsn, |
2235 | be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { | 2196 | be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { |
2236 | iclog = iclog->ic_next; | 2197 | iclog = iclog->ic_next; |
2237 | continue; /* Leave this iclog for | 2198 | continue; /* Leave this iclog for |
2238 | * another thread */ | 2199 | * another thread */ |
@@ -2240,23 +2201,21 @@ xlog_state_do_callback( | |||
2240 | 2201 | ||
2241 | iclog->ic_state = XLOG_STATE_CALLBACK; | 2202 | iclog->ic_state = XLOG_STATE_CALLBACK; |
2242 | 2203 | ||
2243 | spin_unlock(&log->l_icloglock); | ||
2244 | 2204 | ||
2245 | /* l_last_sync_lsn field protected by | 2205 | /* |
2246 | * l_grant_lock. Don't worry about iclog's lsn. | 2206 | * update the last_sync_lsn before we drop the |
2247 | * No one else can be here except us. | 2207 | * icloglock to ensure we are the only one that |
2208 | * can update it. | ||
2248 | */ | 2209 | */ |
2249 | spin_lock(&log->l_grant_lock); | 2210 | ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), |
2250 | ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn, | 2211 | be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); |
2251 | be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); | 2212 | atomic64_set(&log->l_last_sync_lsn, |
2252 | log->l_last_sync_lsn = | 2213 | be64_to_cpu(iclog->ic_header.h_lsn)); |
2253 | be64_to_cpu(iclog->ic_header.h_lsn); | ||
2254 | spin_unlock(&log->l_grant_lock); | ||
2255 | 2214 | ||
2256 | } else { | 2215 | } else |
2257 | spin_unlock(&log->l_icloglock); | ||
2258 | ioerrors++; | 2216 | ioerrors++; |
2259 | } | 2217 | |
2218 | spin_unlock(&log->l_icloglock); | ||
2260 | 2219 | ||
2261 | /* | 2220 | /* |
2262 | * Keep processing entries in the callback list until | 2221 | * Keep processing entries in the callback list until |
@@ -2297,7 +2256,7 @@ xlog_state_do_callback( | |||
2297 | xlog_state_clean_log(log); | 2256 | xlog_state_clean_log(log); |
2298 | 2257 | ||
2299 | /* wake up threads waiting in xfs_log_force() */ | 2258 | /* wake up threads waiting in xfs_log_force() */ |
2300 | sv_broadcast(&iclog->ic_force_wait); | 2259 | wake_up_all(&iclog->ic_force_wait); |
2301 | 2260 | ||
2302 | iclog = iclog->ic_next; | 2261 | iclog = iclog->ic_next; |
2303 | } while (first_iclog != iclog); | 2262 | } while (first_iclog != iclog); |
@@ -2344,7 +2303,7 @@ xlog_state_do_callback( | |||
2344 | spin_unlock(&log->l_icloglock); | 2303 | spin_unlock(&log->l_icloglock); |
2345 | 2304 | ||
2346 | if (wake) | 2305 | if (wake) |
2347 | sv_broadcast(&log->l_flush_wait); | 2306 | wake_up_all(&log->l_flush_wait); |
2348 | } | 2307 | } |
2349 | 2308 | ||
2350 | 2309 | ||
@@ -2395,7 +2354,7 @@ xlog_state_done_syncing( | |||
2395 | * iclog buffer, we wake them all, one will get to do the | 2354 | * iclog buffer, we wake them all, one will get to do the |
2396 | * I/O, the others get to wait for the result. | 2355 | * I/O, the others get to wait for the result. |
2397 | */ | 2356 | */ |
2398 | sv_broadcast(&iclog->ic_write_wait); | 2357 | wake_up_all(&iclog->ic_write_wait); |
2399 | spin_unlock(&log->l_icloglock); | 2358 | spin_unlock(&log->l_icloglock); |
2400 | xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ | 2359 | xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ |
2401 | } /* xlog_state_done_syncing */ | 2360 | } /* xlog_state_done_syncing */ |
@@ -2444,7 +2403,7 @@ restart: | |||
2444 | XFS_STATS_INC(xs_log_noiclogs); | 2403 | XFS_STATS_INC(xs_log_noiclogs); |
2445 | 2404 | ||
2446 | /* Wait for log writes to have flushed */ | 2405 | /* Wait for log writes to have flushed */ |
2447 | sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0); | 2406 | xlog_wait(&log->l_flush_wait, &log->l_icloglock); |
2448 | goto restart; | 2407 | goto restart; |
2449 | } | 2408 | } |
2450 | 2409 | ||
@@ -2527,6 +2486,18 @@ restart: | |||
2527 | * | 2486 | * |
2528 | * Once a ticket gets put onto the reserveq, it will only return after | 2487 | * Once a ticket gets put onto the reserveq, it will only return after |
2529 | * the needed reservation is satisfied. | 2488 | * the needed reservation is satisfied. |
2489 | * | ||
2490 | * This function is structured so that it has a lock free fast path. This is | ||
2491 | * necessary because every new transaction reservation will come through this | ||
2492 | * path. Hence any lock will be globally hot if we take it unconditionally on | ||
2493 | * every pass. | ||
2494 | * | ||
2495 | * As tickets are only ever moved on and off the reserveq under the | ||
2496 | * l_grant_reserve_lock, we only need to take that lock if we are going | ||
2497 | * to add the ticket to the queue and sleep. We can avoid taking the lock if the | ||
2498 | * ticket was never added to the reserveq because the t_queue list head will be | ||
2499 | * empty and we hold the only reference to it so it can safely be checked | ||
2500 | * unlocked. | ||
2530 | */ | 2501 | */ |
2531 | STATIC int | 2502 | STATIC int |
2532 | xlog_grant_log_space(xlog_t *log, | 2503 | xlog_grant_log_space(xlog_t *log, |
@@ -2534,24 +2505,27 @@ xlog_grant_log_space(xlog_t *log, | |||
2534 | { | 2505 | { |
2535 | int free_bytes; | 2506 | int free_bytes; |
2536 | int need_bytes; | 2507 | int need_bytes; |
2537 | #ifdef DEBUG | ||
2538 | xfs_lsn_t tail_lsn; | ||
2539 | #endif | ||
2540 | |||
2541 | 2508 | ||
2542 | #ifdef DEBUG | 2509 | #ifdef DEBUG |
2543 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 2510 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
2544 | panic("grant Recovery problem"); | 2511 | panic("grant Recovery problem"); |
2545 | #endif | 2512 | #endif |
2546 | 2513 | ||
2547 | /* Is there space or do we need to sleep? */ | ||
2548 | spin_lock(&log->l_grant_lock); | ||
2549 | |||
2550 | trace_xfs_log_grant_enter(log, tic); | 2514 | trace_xfs_log_grant_enter(log, tic); |
2551 | 2515 | ||
2516 | need_bytes = tic->t_unit_res; | ||
2517 | if (tic->t_flags & XFS_LOG_PERM_RESERV) | ||
2518 | need_bytes *= tic->t_ocnt; | ||
2519 | |||
2552 | /* something is already sleeping; insert new transaction at end */ | 2520 | /* something is already sleeping; insert new transaction at end */ |
2553 | if (log->l_reserve_headq) { | 2521 | if (!list_empty_careful(&log->l_reserveq)) { |
2554 | xlog_ins_ticketq(&log->l_reserve_headq, tic); | 2522 | spin_lock(&log->l_grant_reserve_lock); |
2523 | /* recheck the queue now we are locked */ | ||
2524 | if (list_empty(&log->l_reserveq)) { | ||
2525 | spin_unlock(&log->l_grant_reserve_lock); | ||
2526 | goto redo; | ||
2527 | } | ||
2528 | list_add_tail(&tic->t_queue, &log->l_reserveq); | ||
2555 | 2529 | ||
2556 | trace_xfs_log_grant_sleep1(log, tic); | 2530 | trace_xfs_log_grant_sleep1(log, tic); |
2557 | 2531 | ||
@@ -2563,72 +2537,57 @@ xlog_grant_log_space(xlog_t *log, | |||
2563 | goto error_return; | 2537 | goto error_return; |
2564 | 2538 | ||
2565 | XFS_STATS_INC(xs_sleep_logspace); | 2539 | XFS_STATS_INC(xs_sleep_logspace); |
2566 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | 2540 | xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); |
2541 | |||
2567 | /* | 2542 | /* |
2568 | * If we got an error, and the filesystem is shutting down, | 2543 | * If we got an error, and the filesystem is shutting down, |
2569 | * we'll catch it down below. So just continue... | 2544 | * we'll catch it down below. So just continue... |
2570 | */ | 2545 | */ |
2571 | trace_xfs_log_grant_wake1(log, tic); | 2546 | trace_xfs_log_grant_wake1(log, tic); |
2572 | spin_lock(&log->l_grant_lock); | ||
2573 | } | 2547 | } |
2574 | if (tic->t_flags & XFS_LOG_PERM_RESERV) | ||
2575 | need_bytes = tic->t_unit_res*tic->t_ocnt; | ||
2576 | else | ||
2577 | need_bytes = tic->t_unit_res; | ||
2578 | 2548 | ||
2579 | redo: | 2549 | redo: |
2580 | if (XLOG_FORCED_SHUTDOWN(log)) | 2550 | if (XLOG_FORCED_SHUTDOWN(log)) |
2581 | goto error_return; | 2551 | goto error_return_unlocked; |
2582 | 2552 | ||
2583 | free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, | 2553 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); |
2584 | log->l_grant_reserve_bytes); | ||
2585 | if (free_bytes < need_bytes) { | 2554 | if (free_bytes < need_bytes) { |
2586 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | 2555 | spin_lock(&log->l_grant_reserve_lock); |
2587 | xlog_ins_ticketq(&log->l_reserve_headq, tic); | 2556 | if (list_empty(&tic->t_queue)) |
2557 | list_add_tail(&tic->t_queue, &log->l_reserveq); | ||
2588 | 2558 | ||
2589 | trace_xfs_log_grant_sleep2(log, tic); | 2559 | trace_xfs_log_grant_sleep2(log, tic); |
2590 | 2560 | ||
2591 | spin_unlock(&log->l_grant_lock); | ||
2592 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2593 | spin_lock(&log->l_grant_lock); | ||
2594 | |||
2595 | XFS_STATS_INC(xs_sleep_logspace); | ||
2596 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | ||
2597 | |||
2598 | spin_lock(&log->l_grant_lock); | ||
2599 | if (XLOG_FORCED_SHUTDOWN(log)) | 2561 | if (XLOG_FORCED_SHUTDOWN(log)) |
2600 | goto error_return; | 2562 | goto error_return; |
2601 | 2563 | ||
2602 | trace_xfs_log_grant_wake2(log, tic); | 2564 | xlog_grant_push_ail(log, need_bytes); |
2565 | |||
2566 | XFS_STATS_INC(xs_sleep_logspace); | ||
2567 | xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); | ||
2603 | 2568 | ||
2569 | trace_xfs_log_grant_wake2(log, tic); | ||
2604 | goto redo; | 2570 | goto redo; |
2605 | } else if (tic->t_flags & XLOG_TIC_IN_Q) | 2571 | } |
2606 | xlog_del_ticketq(&log->l_reserve_headq, tic); | ||
2607 | 2572 | ||
2608 | /* we've got enough space */ | 2573 | if (!list_empty(&tic->t_queue)) { |
2609 | xlog_grant_add_space(log, need_bytes); | 2574 | spin_lock(&log->l_grant_reserve_lock); |
2610 | #ifdef DEBUG | 2575 | list_del_init(&tic->t_queue); |
2611 | tail_lsn = log->l_tail_lsn; | 2576 | spin_unlock(&log->l_grant_reserve_lock); |
2612 | /* | ||
2613 | * Check to make sure the grant write head didn't just over lap the | ||
2614 | * tail. If the cycles are the same, we can't be overlapping. | ||
2615 | * Otherwise, make sure that the cycles differ by exactly one and | ||
2616 | * check the byte count. | ||
2617 | */ | ||
2618 | if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { | ||
2619 | ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); | ||
2620 | ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); | ||
2621 | } | 2577 | } |
2622 | #endif | 2578 | |
2579 | /* we've got enough space */ | ||
2580 | xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); | ||
2581 | xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); | ||
2623 | trace_xfs_log_grant_exit(log, tic); | 2582 | trace_xfs_log_grant_exit(log, tic); |
2624 | xlog_verify_grant_head(log, 1); | 2583 | xlog_verify_grant_tail(log); |
2625 | spin_unlock(&log->l_grant_lock); | ||
2626 | return 0; | 2584 | return 0; |
2627 | 2585 | ||
2628 | error_return: | 2586 | error_return_unlocked: |
2629 | if (tic->t_flags & XLOG_TIC_IN_Q) | 2587 | spin_lock(&log->l_grant_reserve_lock); |
2630 | xlog_del_ticketq(&log->l_reserve_headq, tic); | 2588 | error_return: |
2631 | 2589 | list_del_init(&tic->t_queue); | |
2590 | spin_unlock(&log->l_grant_reserve_lock); | ||
2632 | trace_xfs_log_grant_error(log, tic); | 2591 | trace_xfs_log_grant_error(log, tic); |
2633 | 2592 | ||
2634 | /* | 2593 | /* |
@@ -2638,7 +2597,6 @@ redo: | |||
2638 | */ | 2597 | */ |
2639 | tic->t_curr_res = 0; | 2598 | tic->t_curr_res = 0; |
2640 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ | 2599 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ |
2641 | spin_unlock(&log->l_grant_lock); | ||
2642 | return XFS_ERROR(EIO); | 2600 | return XFS_ERROR(EIO); |
2643 | } /* xlog_grant_log_space */ | 2601 | } /* xlog_grant_log_space */ |
2644 | 2602 | ||
@@ -2646,17 +2604,14 @@ redo: | |||
2646 | /* | 2604 | /* |
2647 | * Replenish the byte reservation required by moving the grant write head. | 2605 | * Replenish the byte reservation required by moving the grant write head. |
2648 | * | 2606 | * |
2649 | * | 2607 | * Similar to xlog_grant_log_space, the function is structured to have a lock |
2608 | * free fast path. | ||
2650 | */ | 2609 | */ |
2651 | STATIC int | 2610 | STATIC int |
2652 | xlog_regrant_write_log_space(xlog_t *log, | 2611 | xlog_regrant_write_log_space(xlog_t *log, |
2653 | xlog_ticket_t *tic) | 2612 | xlog_ticket_t *tic) |
2654 | { | 2613 | { |
2655 | int free_bytes, need_bytes; | 2614 | int free_bytes, need_bytes; |
2656 | xlog_ticket_t *ntic; | ||
2657 | #ifdef DEBUG | ||
2658 | xfs_lsn_t tail_lsn; | ||
2659 | #endif | ||
2660 | 2615 | ||
2661 | tic->t_curr_res = tic->t_unit_res; | 2616 | tic->t_curr_res = tic->t_unit_res; |
2662 | xlog_tic_reset_res(tic); | 2617 | xlog_tic_reset_res(tic); |
@@ -2669,12 +2624,9 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
2669 | panic("regrant Recovery problem"); | 2624 | panic("regrant Recovery problem"); |
2670 | #endif | 2625 | #endif |
2671 | 2626 | ||
2672 | spin_lock(&log->l_grant_lock); | ||
2673 | |||
2674 | trace_xfs_log_regrant_write_enter(log, tic); | 2627 | trace_xfs_log_regrant_write_enter(log, tic); |
2675 | |||
2676 | if (XLOG_FORCED_SHUTDOWN(log)) | 2628 | if (XLOG_FORCED_SHUTDOWN(log)) |
2677 | goto error_return; | 2629 | goto error_return_unlocked; |
2678 | 2630 | ||
2679 | /* If there are other waiters on the queue then give them a | 2631 | /* If there are other waiters on the queue then give them a |
2680 | * chance at logspace before us. Wake up the first waiters, | 2632 | * chance at logspace before us. Wake up the first waiters, |
@@ -2683,92 +2635,76 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
2683 | * this transaction. | 2635 | * this transaction. |
2684 | */ | 2636 | */ |
2685 | need_bytes = tic->t_unit_res; | 2637 | need_bytes = tic->t_unit_res; |
2686 | if ((ntic = log->l_write_headq)) { | 2638 | if (!list_empty_careful(&log->l_writeq)) { |
2687 | free_bytes = xlog_space_left(log, log->l_grant_write_cycle, | 2639 | struct xlog_ticket *ntic; |
2688 | log->l_grant_write_bytes); | 2640 | |
2689 | do { | 2641 | spin_lock(&log->l_grant_write_lock); |
2642 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); | ||
2643 | list_for_each_entry(ntic, &log->l_writeq, t_queue) { | ||
2690 | ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); | 2644 | ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); |
2691 | 2645 | ||
2692 | if (free_bytes < ntic->t_unit_res) | 2646 | if (free_bytes < ntic->t_unit_res) |
2693 | break; | 2647 | break; |
2694 | free_bytes -= ntic->t_unit_res; | 2648 | free_bytes -= ntic->t_unit_res; |
2695 | sv_signal(&ntic->t_wait); | 2649 | wake_up(&ntic->t_wait); |
2696 | ntic = ntic->t_next; | 2650 | } |
2697 | } while (ntic != log->l_write_headq); | ||
2698 | |||
2699 | if (ntic != log->l_write_headq) { | ||
2700 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | ||
2701 | xlog_ins_ticketq(&log->l_write_headq, tic); | ||
2702 | 2651 | ||
2652 | if (ntic != list_first_entry(&log->l_writeq, | ||
2653 | struct xlog_ticket, t_queue)) { | ||
2654 | if (list_empty(&tic->t_queue)) | ||
2655 | list_add_tail(&tic->t_queue, &log->l_writeq); | ||
2703 | trace_xfs_log_regrant_write_sleep1(log, tic); | 2656 | trace_xfs_log_regrant_write_sleep1(log, tic); |
2704 | 2657 | ||
2705 | spin_unlock(&log->l_grant_lock); | 2658 | xlog_grant_push_ail(log, need_bytes); |
2706 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2707 | spin_lock(&log->l_grant_lock); | ||
2708 | 2659 | ||
2709 | XFS_STATS_INC(xs_sleep_logspace); | 2660 | XFS_STATS_INC(xs_sleep_logspace); |
2710 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, | 2661 | xlog_wait(&tic->t_wait, &log->l_grant_write_lock); |
2711 | &log->l_grant_lock, s); | ||
2712 | |||
2713 | /* If we're shutting down, this tic is already | ||
2714 | * off the queue */ | ||
2715 | spin_lock(&log->l_grant_lock); | ||
2716 | if (XLOG_FORCED_SHUTDOWN(log)) | ||
2717 | goto error_return; | ||
2718 | |||
2719 | trace_xfs_log_regrant_write_wake1(log, tic); | 2662 | trace_xfs_log_regrant_write_wake1(log, tic); |
2720 | } | 2663 | } else |
2664 | spin_unlock(&log->l_grant_write_lock); | ||
2721 | } | 2665 | } |
2722 | 2666 | ||
2723 | redo: | 2667 | redo: |
2724 | if (XLOG_FORCED_SHUTDOWN(log)) | 2668 | if (XLOG_FORCED_SHUTDOWN(log)) |
2725 | goto error_return; | 2669 | goto error_return_unlocked; |
2726 | 2670 | ||
2727 | free_bytes = xlog_space_left(log, log->l_grant_write_cycle, | 2671 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); |
2728 | log->l_grant_write_bytes); | ||
2729 | if (free_bytes < need_bytes) { | 2672 | if (free_bytes < need_bytes) { |
2730 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | 2673 | spin_lock(&log->l_grant_write_lock); |
2731 | xlog_ins_ticketq(&log->l_write_headq, tic); | 2674 | if (list_empty(&tic->t_queue)) |
2732 | spin_unlock(&log->l_grant_lock); | 2675 | list_add_tail(&tic->t_queue, &log->l_writeq); |
2733 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2734 | spin_lock(&log->l_grant_lock); | ||
2735 | |||
2736 | XFS_STATS_INC(xs_sleep_logspace); | ||
2737 | trace_xfs_log_regrant_write_sleep2(log, tic); | ||
2738 | |||
2739 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | ||
2740 | 2676 | ||
2741 | /* If we're shutting down, this tic is already off the queue */ | ||
2742 | spin_lock(&log->l_grant_lock); | ||
2743 | if (XLOG_FORCED_SHUTDOWN(log)) | 2677 | if (XLOG_FORCED_SHUTDOWN(log)) |
2744 | goto error_return; | 2678 | goto error_return; |
2745 | 2679 | ||
2680 | xlog_grant_push_ail(log, need_bytes); | ||
2681 | |||
2682 | XFS_STATS_INC(xs_sleep_logspace); | ||
2683 | trace_xfs_log_regrant_write_sleep2(log, tic); | ||
2684 | xlog_wait(&tic->t_wait, &log->l_grant_write_lock); | ||
2685 | |||
2746 | trace_xfs_log_regrant_write_wake2(log, tic); | 2686 | trace_xfs_log_regrant_write_wake2(log, tic); |
2747 | goto redo; | 2687 | goto redo; |
2748 | } else if (tic->t_flags & XLOG_TIC_IN_Q) | 2688 | } |
2749 | xlog_del_ticketq(&log->l_write_headq, tic); | ||
2750 | 2689 | ||
2751 | /* we've got enough space */ | 2690 | if (!list_empty(&tic->t_queue)) { |
2752 | xlog_grant_add_space_write(log, need_bytes); | 2691 | spin_lock(&log->l_grant_write_lock); |
2753 | #ifdef DEBUG | 2692 | list_del_init(&tic->t_queue); |
2754 | tail_lsn = log->l_tail_lsn; | 2693 | spin_unlock(&log->l_grant_write_lock); |
2755 | if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { | ||
2756 | ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); | ||
2757 | ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); | ||
2758 | } | 2694 | } |
2759 | #endif | ||
2760 | 2695 | ||
2696 | /* we've got enough space */ | ||
2697 | xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); | ||
2761 | trace_xfs_log_regrant_write_exit(log, tic); | 2698 | trace_xfs_log_regrant_write_exit(log, tic); |
2762 | 2699 | xlog_verify_grant_tail(log); | |
2763 | xlog_verify_grant_head(log, 1); | ||
2764 | spin_unlock(&log->l_grant_lock); | ||
2765 | return 0; | 2700 | return 0; |
2766 | 2701 | ||
2767 | 2702 | ||
2703 | error_return_unlocked: | ||
2704 | spin_lock(&log->l_grant_write_lock); | ||
2768 | error_return: | 2705 | error_return: |
2769 | if (tic->t_flags & XLOG_TIC_IN_Q) | 2706 | list_del_init(&tic->t_queue); |
2770 | xlog_del_ticketq(&log->l_reserve_headq, tic); | 2707 | spin_unlock(&log->l_grant_write_lock); |
2771 | |||
2772 | trace_xfs_log_regrant_write_error(log, tic); | 2708 | trace_xfs_log_regrant_write_error(log, tic); |
2773 | 2709 | ||
2774 | /* | 2710 | /* |
@@ -2778,7 +2714,6 @@ redo: | |||
2778 | */ | 2714 | */ |
2779 | tic->t_curr_res = 0; | 2715 | tic->t_curr_res = 0; |
2780 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ | 2716 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ |
2781 | spin_unlock(&log->l_grant_lock); | ||
2782 | return XFS_ERROR(EIO); | 2717 | return XFS_ERROR(EIO); |
2783 | } /* xlog_regrant_write_log_space */ | 2718 | } /* xlog_regrant_write_log_space */ |
2784 | 2719 | ||
@@ -2799,27 +2734,24 @@ xlog_regrant_reserve_log_space(xlog_t *log, | |||
2799 | if (ticket->t_cnt > 0) | 2734 | if (ticket->t_cnt > 0) |
2800 | ticket->t_cnt--; | 2735 | ticket->t_cnt--; |
2801 | 2736 | ||
2802 | spin_lock(&log->l_grant_lock); | 2737 | xlog_grant_sub_space(log, &log->l_grant_reserve_head, |
2803 | xlog_grant_sub_space(log, ticket->t_curr_res); | 2738 | ticket->t_curr_res); |
2739 | xlog_grant_sub_space(log, &log->l_grant_write_head, | ||
2740 | ticket->t_curr_res); | ||
2804 | ticket->t_curr_res = ticket->t_unit_res; | 2741 | ticket->t_curr_res = ticket->t_unit_res; |
2805 | xlog_tic_reset_res(ticket); | 2742 | xlog_tic_reset_res(ticket); |
2806 | 2743 | ||
2807 | trace_xfs_log_regrant_reserve_sub(log, ticket); | 2744 | trace_xfs_log_regrant_reserve_sub(log, ticket); |
2808 | 2745 | ||
2809 | xlog_verify_grant_head(log, 1); | ||
2810 | |||
2811 | /* just return if we still have some of the pre-reserved space */ | 2746 | /* just return if we still have some of the pre-reserved space */ |
2812 | if (ticket->t_cnt > 0) { | 2747 | if (ticket->t_cnt > 0) |
2813 | spin_unlock(&log->l_grant_lock); | ||
2814 | return; | 2748 | return; |
2815 | } | ||
2816 | 2749 | ||
2817 | xlog_grant_add_space_reserve(log, ticket->t_unit_res); | 2750 | xlog_grant_add_space(log, &log->l_grant_reserve_head, |
2751 | ticket->t_unit_res); | ||
2818 | 2752 | ||
2819 | trace_xfs_log_regrant_reserve_exit(log, ticket); | 2753 | trace_xfs_log_regrant_reserve_exit(log, ticket); |
2820 | 2754 | ||
2821 | xlog_verify_grant_head(log, 0); | ||
2822 | spin_unlock(&log->l_grant_lock); | ||
2823 | ticket->t_curr_res = ticket->t_unit_res; | 2755 | ticket->t_curr_res = ticket->t_unit_res; |
2824 | xlog_tic_reset_res(ticket); | 2756 | xlog_tic_reset_res(ticket); |
2825 | } /* xlog_regrant_reserve_log_space */ | 2757 | } /* xlog_regrant_reserve_log_space */ |
@@ -2843,28 +2775,29 @@ STATIC void | |||
2843 | xlog_ungrant_log_space(xlog_t *log, | 2775 | xlog_ungrant_log_space(xlog_t *log, |
2844 | xlog_ticket_t *ticket) | 2776 | xlog_ticket_t *ticket) |
2845 | { | 2777 | { |
2778 | int bytes; | ||
2779 | |||
2846 | if (ticket->t_cnt > 0) | 2780 | if (ticket->t_cnt > 0) |
2847 | ticket->t_cnt--; | 2781 | ticket->t_cnt--; |
2848 | 2782 | ||
2849 | spin_lock(&log->l_grant_lock); | ||
2850 | trace_xfs_log_ungrant_enter(log, ticket); | 2783 | trace_xfs_log_ungrant_enter(log, ticket); |
2851 | |||
2852 | xlog_grant_sub_space(log, ticket->t_curr_res); | ||
2853 | |||
2854 | trace_xfs_log_ungrant_sub(log, ticket); | 2784 | trace_xfs_log_ungrant_sub(log, ticket); |
2855 | 2785 | ||
2856 | /* If this is a permanent reservation ticket, we may be able to free | 2786 | /* |
2787 | * If this is a permanent reservation ticket, we may be able to free | ||
2857 | * up more space based on the remaining count. | 2788 | * up more space based on the remaining count. |
2858 | */ | 2789 | */ |
2790 | bytes = ticket->t_curr_res; | ||
2859 | if (ticket->t_cnt > 0) { | 2791 | if (ticket->t_cnt > 0) { |
2860 | ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); | 2792 | ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); |
2861 | xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); | 2793 | bytes += ticket->t_unit_res*ticket->t_cnt; |
2862 | } | 2794 | } |
2863 | 2795 | ||
2796 | xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); | ||
2797 | xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); | ||
2798 | |||
2864 | trace_xfs_log_ungrant_exit(log, ticket); | 2799 | trace_xfs_log_ungrant_exit(log, ticket); |
2865 | 2800 | ||
2866 | xlog_verify_grant_head(log, 1); | ||
2867 | spin_unlock(&log->l_grant_lock); | ||
2868 | xfs_log_move_tail(log->l_mp, 1); | 2801 | xfs_log_move_tail(log->l_mp, 1); |
2869 | } /* xlog_ungrant_log_space */ | 2802 | } /* xlog_ungrant_log_space */ |
2870 | 2803 | ||
@@ -2901,11 +2834,11 @@ xlog_state_release_iclog( | |||
2901 | 2834 | ||
2902 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { | 2835 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { |
2903 | /* update tail before writing to iclog */ | 2836 | /* update tail before writing to iclog */ |
2904 | xlog_assign_tail_lsn(log->l_mp); | 2837 | xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); |
2905 | sync++; | 2838 | sync++; |
2906 | iclog->ic_state = XLOG_STATE_SYNCING; | 2839 | iclog->ic_state = XLOG_STATE_SYNCING; |
2907 | iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); | 2840 | iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); |
2908 | xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); | 2841 | xlog_verify_tail_lsn(log, iclog, tail_lsn); |
2909 | /* cycle incremented when incrementing curr_block */ | 2842 | /* cycle incremented when incrementing curr_block */ |
2910 | } | 2843 | } |
2911 | spin_unlock(&log->l_icloglock); | 2844 | spin_unlock(&log->l_icloglock); |
@@ -3088,7 +3021,7 @@ maybe_sleep: | |||
3088 | return XFS_ERROR(EIO); | 3021 | return XFS_ERROR(EIO); |
3089 | } | 3022 | } |
3090 | XFS_STATS_INC(xs_log_force_sleep); | 3023 | XFS_STATS_INC(xs_log_force_sleep); |
3091 | sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s); | 3024 | xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
3092 | /* | 3025 | /* |
3093 | * No need to grab the log lock here since we're | 3026 | * No need to grab the log lock here since we're |
3094 | * only deciding whether or not to return EIO | 3027 | * only deciding whether or not to return EIO |
@@ -3206,8 +3139,8 @@ try_again: | |||
3206 | 3139 | ||
3207 | XFS_STATS_INC(xs_log_force_sleep); | 3140 | XFS_STATS_INC(xs_log_force_sleep); |
3208 | 3141 | ||
3209 | sv_wait(&iclog->ic_prev->ic_write_wait, | 3142 | xlog_wait(&iclog->ic_prev->ic_write_wait, |
3210 | PSWP, &log->l_icloglock, s); | 3143 | &log->l_icloglock); |
3211 | if (log_flushed) | 3144 | if (log_flushed) |
3212 | *log_flushed = 1; | 3145 | *log_flushed = 1; |
3213 | already_slept = 1; | 3146 | already_slept = 1; |
@@ -3235,7 +3168,7 @@ try_again: | |||
3235 | return XFS_ERROR(EIO); | 3168 | return XFS_ERROR(EIO); |
3236 | } | 3169 | } |
3237 | XFS_STATS_INC(xs_log_force_sleep); | 3170 | XFS_STATS_INC(xs_log_force_sleep); |
3238 | sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); | 3171 | xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
3239 | /* | 3172 | /* |
3240 | * No need to grab the log lock here since we're | 3173 | * No need to grab the log lock here since we're |
3241 | * only deciding whether or not to return EIO | 3174 | * only deciding whether or not to return EIO |
@@ -3310,10 +3243,8 @@ xfs_log_ticket_put( | |||
3310 | xlog_ticket_t *ticket) | 3243 | xlog_ticket_t *ticket) |
3311 | { | 3244 | { |
3312 | ASSERT(atomic_read(&ticket->t_ref) > 0); | 3245 | ASSERT(atomic_read(&ticket->t_ref) > 0); |
3313 | if (atomic_dec_and_test(&ticket->t_ref)) { | 3246 | if (atomic_dec_and_test(&ticket->t_ref)) |
3314 | sv_destroy(&ticket->t_wait); | ||
3315 | kmem_zone_free(xfs_log_ticket_zone, ticket); | 3247 | kmem_zone_free(xfs_log_ticket_zone, ticket); |
3316 | } | ||
3317 | } | 3248 | } |
3318 | 3249 | ||
3319 | xlog_ticket_t * | 3250 | xlog_ticket_t * |
@@ -3435,6 +3366,7 @@ xlog_ticket_alloc( | |||
3435 | } | 3366 | } |
3436 | 3367 | ||
3437 | atomic_set(&tic->t_ref, 1); | 3368 | atomic_set(&tic->t_ref, 1); |
3369 | INIT_LIST_HEAD(&tic->t_queue); | ||
3438 | tic->t_unit_res = unit_bytes; | 3370 | tic->t_unit_res = unit_bytes; |
3439 | tic->t_curr_res = unit_bytes; | 3371 | tic->t_curr_res = unit_bytes; |
3440 | tic->t_cnt = cnt; | 3372 | tic->t_cnt = cnt; |
@@ -3445,7 +3377,7 @@ xlog_ticket_alloc( | |||
3445 | tic->t_trans_type = 0; | 3377 | tic->t_trans_type = 0; |
3446 | if (xflags & XFS_LOG_PERM_RESERV) | 3378 | if (xflags & XFS_LOG_PERM_RESERV) |
3447 | tic->t_flags |= XLOG_TIC_PERM_RESERV; | 3379 | tic->t_flags |= XLOG_TIC_PERM_RESERV; |
3448 | sv_init(&tic->t_wait, SV_DEFAULT, "logtick"); | 3380 | init_waitqueue_head(&tic->t_wait); |
3449 | 3381 | ||
3450 | xlog_tic_reset_res(tic); | 3382 | xlog_tic_reset_res(tic); |
3451 | 3383 | ||
@@ -3484,18 +3416,25 @@ xlog_verify_dest_ptr( | |||
3484 | } | 3416 | } |
3485 | 3417 | ||
3486 | STATIC void | 3418 | STATIC void |
3487 | xlog_verify_grant_head(xlog_t *log, int equals) | 3419 | xlog_verify_grant_tail( |
3420 | struct log *log) | ||
3488 | { | 3421 | { |
3489 | if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) { | 3422 | int tail_cycle, tail_blocks; |
3490 | if (equals) | 3423 | int cycle, space; |
3491 | ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes); | 3424 | |
3492 | else | 3425 | /* |
3493 | ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes); | 3426 | * Check to make sure the grant write head didn't just over lap the |
3494 | } else { | 3427 | * tail. If the cycles are the same, we can't be overlapping. |
3495 | ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle); | 3428 | * Otherwise, make sure that the cycles differ by exactly one and |
3496 | ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes); | 3429 | * check the byte count. |
3497 | } | 3430 | */ |
3498 | } /* xlog_verify_grant_head */ | 3431 | xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); |
3432 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); | ||
3433 | if (tail_cycle != cycle) { | ||
3434 | ASSERT(cycle - 1 == tail_cycle); | ||
3435 | ASSERT(space <= BBTOB(tail_blocks)); | ||
3436 | } | ||
3437 | } | ||
3499 | 3438 | ||
3500 | /* check if it will fit */ | 3439 | /* check if it will fit */ |
3501 | STATIC void | 3440 | STATIC void |
@@ -3716,12 +3655,10 @@ xfs_log_force_umount( | |||
3716 | xlog_cil_force(log); | 3655 | xlog_cil_force(log); |
3717 | 3656 | ||
3718 | /* | 3657 | /* |
3719 | * We must hold both the GRANT lock and the LOG lock, | 3658 | * mark the filesystem and the as in a shutdown state and wake |
3720 | * before we mark the filesystem SHUTDOWN and wake | 3659 | * everybody up to tell them the bad news. |
3721 | * everybody up to tell the bad news. | ||
3722 | */ | 3660 | */ |
3723 | spin_lock(&log->l_icloglock); | 3661 | spin_lock(&log->l_icloglock); |
3724 | spin_lock(&log->l_grant_lock); | ||
3725 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; | 3662 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; |
3726 | if (mp->m_sb_bp) | 3663 | if (mp->m_sb_bp) |
3727 | XFS_BUF_DONE(mp->m_sb_bp); | 3664 | XFS_BUF_DONE(mp->m_sb_bp); |
@@ -3742,27 +3679,21 @@ xfs_log_force_umount( | |||
3742 | spin_unlock(&log->l_icloglock); | 3679 | spin_unlock(&log->l_icloglock); |
3743 | 3680 | ||
3744 | /* | 3681 | /* |
3745 | * We don't want anybody waiting for log reservations | 3682 | * We don't want anybody waiting for log reservations after this. That |
3746 | * after this. That means we have to wake up everybody | 3683 | * means we have to wake up everybody queued up on reserveq as well as |
3747 | * queued up on reserve_headq as well as write_headq. | 3684 | * writeq. In addition, we make sure in xlog_{re}grant_log_space that |
3748 | * In addition, we make sure in xlog_{re}grant_log_space | 3685 | * we don't enqueue anything once the SHUTDOWN flag is set, and this |
3749 | * that we don't enqueue anything once the SHUTDOWN flag | 3686 | * action is protected by the grant locks. |
3750 | * is set, and this action is protected by the GRANTLOCK. | ||
3751 | */ | 3687 | */ |
3752 | if ((tic = log->l_reserve_headq)) { | 3688 | spin_lock(&log->l_grant_reserve_lock); |
3753 | do { | 3689 | list_for_each_entry(tic, &log->l_reserveq, t_queue) |
3754 | sv_signal(&tic->t_wait); | 3690 | wake_up(&tic->t_wait); |
3755 | tic = tic->t_next; | 3691 | spin_unlock(&log->l_grant_reserve_lock); |
3756 | } while (tic != log->l_reserve_headq); | 3692 | |
3757 | } | 3693 | spin_lock(&log->l_grant_write_lock); |
3758 | 3694 | list_for_each_entry(tic, &log->l_writeq, t_queue) | |
3759 | if ((tic = log->l_write_headq)) { | 3695 | wake_up(&tic->t_wait); |
3760 | do { | 3696 | spin_unlock(&log->l_grant_write_lock); |
3761 | sv_signal(&tic->t_wait); | ||
3762 | tic = tic->t_next; | ||
3763 | } while (tic != log->l_write_headq); | ||
3764 | } | ||
3765 | spin_unlock(&log->l_grant_lock); | ||
3766 | 3697 | ||
3767 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { | 3698 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { |
3768 | ASSERT(!logerror); | 3699 | ASSERT(!logerror); |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 23d6ceb5e97b..9dc8125d04e5 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -61,7 +61,7 @@ xlog_cil_init( | |||
61 | INIT_LIST_HEAD(&cil->xc_committing); | 61 | INIT_LIST_HEAD(&cil->xc_committing); |
62 | spin_lock_init(&cil->xc_cil_lock); | 62 | spin_lock_init(&cil->xc_cil_lock); |
63 | init_rwsem(&cil->xc_ctx_lock); | 63 | init_rwsem(&cil->xc_ctx_lock); |
64 | sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait"); | 64 | init_waitqueue_head(&cil->xc_commit_wait); |
65 | 65 | ||
66 | INIT_LIST_HEAD(&ctx->committing); | 66 | INIT_LIST_HEAD(&ctx->committing); |
67 | INIT_LIST_HEAD(&ctx->busy_extents); | 67 | INIT_LIST_HEAD(&ctx->busy_extents); |
@@ -361,15 +361,10 @@ xlog_cil_committed( | |||
361 | int abort) | 361 | int abort) |
362 | { | 362 | { |
363 | struct xfs_cil_ctx *ctx = args; | 363 | struct xfs_cil_ctx *ctx = args; |
364 | struct xfs_log_vec *lv; | ||
365 | int abortflag = abort ? XFS_LI_ABORTED : 0; | ||
366 | struct xfs_busy_extent *busyp, *n; | 364 | struct xfs_busy_extent *busyp, *n; |
367 | 365 | ||
368 | /* unpin all the log items */ | 366 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, |
369 | for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) { | 367 | ctx->start_lsn, abort); |
370 | xfs_trans_item_committed(lv->lv_item, ctx->start_lsn, | ||
371 | abortflag); | ||
372 | } | ||
373 | 368 | ||
374 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) | 369 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) |
375 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); | 370 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); |
@@ -568,7 +563,7 @@ restart: | |||
568 | * It is still being pushed! Wait for the push to | 563 | * It is still being pushed! Wait for the push to |
569 | * complete, then start again from the beginning. | 564 | * complete, then start again from the beginning. |
570 | */ | 565 | */ |
571 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 566 | xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); |
572 | goto restart; | 567 | goto restart; |
573 | } | 568 | } |
574 | } | 569 | } |
@@ -592,7 +587,7 @@ restart: | |||
592 | */ | 587 | */ |
593 | spin_lock(&cil->xc_cil_lock); | 588 | spin_lock(&cil->xc_cil_lock); |
594 | ctx->commit_lsn = commit_lsn; | 589 | ctx->commit_lsn = commit_lsn; |
595 | sv_broadcast(&cil->xc_commit_wait); | 590 | wake_up_all(&cil->xc_commit_wait); |
596 | spin_unlock(&cil->xc_cil_lock); | 591 | spin_unlock(&cil->xc_cil_lock); |
597 | 592 | ||
598 | /* release the hounds! */ | 593 | /* release the hounds! */ |
@@ -757,7 +752,7 @@ restart: | |||
757 | * It is still being pushed! Wait for the push to | 752 | * It is still being pushed! Wait for the push to |
758 | * complete, then start again from the beginning. | 753 | * complete, then start again from the beginning. |
759 | */ | 754 | */ |
760 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 755 | xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); |
761 | goto restart; | 756 | goto restart; |
762 | } | 757 | } |
763 | if (ctx->sequence != sequence) | 758 | if (ctx->sequence != sequence) |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index edcdfe01617f..d5f8be8f4bf6 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -21,7 +21,6 @@ | |||
21 | struct xfs_buf; | 21 | struct xfs_buf; |
22 | struct log; | 22 | struct log; |
23 | struct xlog_ticket; | 23 | struct xlog_ticket; |
24 | struct xfs_buf_cancel; | ||
25 | struct xfs_mount; | 24 | struct xfs_mount; |
26 | 25 | ||
27 | /* | 26 | /* |
@@ -54,7 +53,6 @@ struct xfs_mount; | |||
54 | BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ | 53 | BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ |
55 | XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) | 54 | XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) |
56 | 55 | ||
57 | |||
58 | static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) | 56 | static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) |
59 | { | 57 | { |
60 | return ((xfs_lsn_t)cycle << 32) | block; | 58 | return ((xfs_lsn_t)cycle << 32) | block; |
@@ -133,12 +131,10 @@ static inline uint xlog_get_client_id(__be32 i) | |||
133 | */ | 131 | */ |
134 | #define XLOG_TIC_INITED 0x1 /* has been initialized */ | 132 | #define XLOG_TIC_INITED 0x1 /* has been initialized */ |
135 | #define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ | 133 | #define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ |
136 | #define XLOG_TIC_IN_Q 0x4 | ||
137 | 134 | ||
138 | #define XLOG_TIC_FLAGS \ | 135 | #define XLOG_TIC_FLAGS \ |
139 | { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ | 136 | { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ |
140 | { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \ | 137 | { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" } |
141 | { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" } | ||
142 | 138 | ||
143 | #endif /* __KERNEL__ */ | 139 | #endif /* __KERNEL__ */ |
144 | 140 | ||
@@ -244,9 +240,8 @@ typedef struct xlog_res { | |||
244 | } xlog_res_t; | 240 | } xlog_res_t; |
245 | 241 | ||
246 | typedef struct xlog_ticket { | 242 | typedef struct xlog_ticket { |
247 | sv_t t_wait; /* ticket wait queue : 20 */ | 243 | wait_queue_head_t t_wait; /* ticket wait queue */ |
248 | struct xlog_ticket *t_next; /* :4|8 */ | 244 | struct list_head t_queue; /* reserve/write queue */ |
249 | struct xlog_ticket *t_prev; /* :4|8 */ | ||
250 | xlog_tid_t t_tid; /* transaction identifier : 4 */ | 245 | xlog_tid_t t_tid; /* transaction identifier : 4 */ |
251 | atomic_t t_ref; /* ticket reference count : 4 */ | 246 | atomic_t t_ref; /* ticket reference count : 4 */ |
252 | int t_curr_res; /* current reservation in bytes : 4 */ | 247 | int t_curr_res; /* current reservation in bytes : 4 */ |
@@ -353,8 +348,8 @@ typedef union xlog_in_core2 { | |||
353 | * and move everything else out to subsequent cachelines. | 348 | * and move everything else out to subsequent cachelines. |
354 | */ | 349 | */ |
355 | typedef struct xlog_in_core { | 350 | typedef struct xlog_in_core { |
356 | sv_t ic_force_wait; | 351 | wait_queue_head_t ic_force_wait; |
357 | sv_t ic_write_wait; | 352 | wait_queue_head_t ic_write_wait; |
358 | struct xlog_in_core *ic_next; | 353 | struct xlog_in_core *ic_next; |
359 | struct xlog_in_core *ic_prev; | 354 | struct xlog_in_core *ic_prev; |
360 | struct xfs_buf *ic_bp; | 355 | struct xfs_buf *ic_bp; |
@@ -421,7 +416,7 @@ struct xfs_cil { | |||
421 | struct xfs_cil_ctx *xc_ctx; | 416 | struct xfs_cil_ctx *xc_ctx; |
422 | struct rw_semaphore xc_ctx_lock; | 417 | struct rw_semaphore xc_ctx_lock; |
423 | struct list_head xc_committing; | 418 | struct list_head xc_committing; |
424 | sv_t xc_commit_wait; | 419 | wait_queue_head_t xc_commit_wait; |
425 | xfs_lsn_t xc_current_sequence; | 420 | xfs_lsn_t xc_current_sequence; |
426 | }; | 421 | }; |
427 | 422 | ||
@@ -491,7 +486,7 @@ typedef struct log { | |||
491 | struct xfs_buftarg *l_targ; /* buftarg of log */ | 486 | struct xfs_buftarg *l_targ; /* buftarg of log */ |
492 | uint l_flags; | 487 | uint l_flags; |
493 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ | 488 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ |
494 | struct xfs_buf_cancel **l_buf_cancel_table; | 489 | struct list_head *l_buf_cancel_table; |
495 | int l_iclog_hsize; /* size of iclog header */ | 490 | int l_iclog_hsize; /* size of iclog header */ |
496 | int l_iclog_heads; /* # of iclog header sectors */ | 491 | int l_iclog_heads; /* # of iclog header sectors */ |
497 | uint l_sectBBsize; /* sector size in BBs (2^n) */ | 492 | uint l_sectBBsize; /* sector size in BBs (2^n) */ |
@@ -503,29 +498,40 @@ typedef struct log { | |||
503 | int l_logBBsize; /* size of log in BB chunks */ | 498 | int l_logBBsize; /* size of log in BB chunks */ |
504 | 499 | ||
505 | /* The following block of fields are changed while holding icloglock */ | 500 | /* The following block of fields are changed while holding icloglock */ |
506 | sv_t l_flush_wait ____cacheline_aligned_in_smp; | 501 | wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp; |
507 | /* waiting for iclog flush */ | 502 | /* waiting for iclog flush */ |
508 | int l_covered_state;/* state of "covering disk | 503 | int l_covered_state;/* state of "covering disk |
509 | * log entries" */ | 504 | * log entries" */ |
510 | xlog_in_core_t *l_iclog; /* head log queue */ | 505 | xlog_in_core_t *l_iclog; /* head log queue */ |
511 | spinlock_t l_icloglock; /* grab to change iclog state */ | 506 | spinlock_t l_icloglock; /* grab to change iclog state */ |
512 | xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed | ||
513 | * buffers */ | ||
514 | xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ | ||
515 | int l_curr_cycle; /* Cycle number of log writes */ | 507 | int l_curr_cycle; /* Cycle number of log writes */ |
516 | int l_prev_cycle; /* Cycle number before last | 508 | int l_prev_cycle; /* Cycle number before last |
517 | * block increment */ | 509 | * block increment */ |
518 | int l_curr_block; /* current logical log block */ | 510 | int l_curr_block; /* current logical log block */ |
519 | int l_prev_block; /* previous logical log block */ | 511 | int l_prev_block; /* previous logical log block */ |
520 | 512 | ||
521 | /* The following block of fields are changed while holding grant_lock */ | 513 | /* |
522 | spinlock_t l_grant_lock ____cacheline_aligned_in_smp; | 514 | * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and |
523 | xlog_ticket_t *l_reserve_headq; | 515 | * read without needing to hold specific locks. To avoid operations |
524 | xlog_ticket_t *l_write_headq; | 516 | * contending with other hot objects, place each of them on a separate |
525 | int l_grant_reserve_cycle; | 517 | * cacheline. |
526 | int l_grant_reserve_bytes; | 518 | */ |
527 | int l_grant_write_cycle; | 519 | /* lsn of last LR on disk */ |
528 | int l_grant_write_bytes; | 520 | atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp; |
521 | /* lsn of 1st LR with unflushed * buffers */ | ||
522 | atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; | ||
523 | |||
524 | /* | ||
525 | * ticket grant locks, queues and accounting have their own cachlines | ||
526 | * as these are quite hot and can be operated on concurrently. | ||
527 | */ | ||
528 | spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp; | ||
529 | struct list_head l_reserveq; | ||
530 | atomic64_t l_grant_reserve_head; | ||
531 | |||
532 | spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp; | ||
533 | struct list_head l_writeq; | ||
534 | atomic64_t l_grant_write_head; | ||
529 | 535 | ||
530 | /* The following field are used for debugging; need to hold icloglock */ | 536 | /* The following field are used for debugging; need to hold icloglock */ |
531 | #ifdef DEBUG | 537 | #ifdef DEBUG |
@@ -534,6 +540,9 @@ typedef struct log { | |||
534 | 540 | ||
535 | } xlog_t; | 541 | } xlog_t; |
536 | 542 | ||
543 | #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ | ||
544 | ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE)) | ||
545 | |||
537 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) | 546 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) |
538 | 547 | ||
539 | /* common routines */ | 548 | /* common routines */ |
@@ -562,6 +571,61 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector, | |||
562 | xlog_in_core_t **commit_iclog, uint flags); | 571 | xlog_in_core_t **commit_iclog, uint flags); |
563 | 572 | ||
564 | /* | 573 | /* |
574 | * When we crack an atomic LSN, we sample it first so that the value will not | ||
575 | * change while we are cracking it into the component values. This means we | ||
576 | * will always get consistent component values to work from. This should always | ||
577 | * be used to smaple and crack LSNs taht are stored and updated in atomic | ||
578 | * variables. | ||
579 | */ | ||
580 | static inline void | ||
581 | xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block) | ||
582 | { | ||
583 | xfs_lsn_t val = atomic64_read(lsn); | ||
584 | |||
585 | *cycle = CYCLE_LSN(val); | ||
586 | *block = BLOCK_LSN(val); | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * Calculate and assign a value to an atomic LSN variable from component pieces. | ||
591 | */ | ||
592 | static inline void | ||
593 | xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block) | ||
594 | { | ||
595 | atomic64_set(lsn, xlog_assign_lsn(cycle, block)); | ||
596 | } | ||
597 | |||
598 | /* | ||
599 | * When we crack the grant head, we sample it first so that the value will not | ||
600 | * change while we are cracking it into the component values. This means we | ||
601 | * will always get consistent component values to work from. | ||
602 | */ | ||
603 | static inline void | ||
604 | xlog_crack_grant_head_val(int64_t val, int *cycle, int *space) | ||
605 | { | ||
606 | *cycle = val >> 32; | ||
607 | *space = val & 0xffffffff; | ||
608 | } | ||
609 | |||
610 | static inline void | ||
611 | xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space) | ||
612 | { | ||
613 | xlog_crack_grant_head_val(atomic64_read(head), cycle, space); | ||
614 | } | ||
615 | |||
616 | static inline int64_t | ||
617 | xlog_assign_grant_head_val(int cycle, int space) | ||
618 | { | ||
619 | return ((int64_t)cycle << 32) | space; | ||
620 | } | ||
621 | |||
622 | static inline void | ||
623 | xlog_assign_grant_head(atomic64_t *head, int cycle, int space) | ||
624 | { | ||
625 | atomic64_set(head, xlog_assign_grant_head_val(cycle, space)); | ||
626 | } | ||
627 | |||
628 | /* | ||
565 | * Committed Item List interfaces | 629 | * Committed Item List interfaces |
566 | */ | 630 | */ |
567 | int xlog_cil_init(struct log *log); | 631 | int xlog_cil_init(struct log *log); |
@@ -585,6 +649,21 @@ xlog_cil_force(struct log *log) | |||
585 | */ | 649 | */ |
586 | #define XLOG_UNMOUNT_REC_TYPE (-1U) | 650 | #define XLOG_UNMOUNT_REC_TYPE (-1U) |
587 | 651 | ||
652 | /* | ||
653 | * Wrapper function for waiting on a wait queue serialised against wakeups | ||
654 | * by a spinlock. This matches the semantics of all the wait queues used in the | ||
655 | * log code. | ||
656 | */ | ||
657 | static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock) | ||
658 | { | ||
659 | DECLARE_WAITQUEUE(wait, current); | ||
660 | |||
661 | add_wait_queue_exclusive(wq, &wait); | ||
662 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
663 | spin_unlock(lock); | ||
664 | schedule(); | ||
665 | remove_wait_queue(wq, &wait); | ||
666 | } | ||
588 | #endif /* __KERNEL__ */ | 667 | #endif /* __KERNEL__ */ |
589 | 668 | ||
590 | #endif /* __XFS_LOG_PRIV_H__ */ | 669 | #endif /* __XFS_LOG_PRIV_H__ */ |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 966d3f97458c..204d8e5fa7fa 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -53,6 +53,17 @@ STATIC void xlog_recover_check_summary(xlog_t *); | |||
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * This structure is used during recovery to record the buf log items which | ||
57 | * have been canceled and should not be replayed. | ||
58 | */ | ||
59 | struct xfs_buf_cancel { | ||
60 | xfs_daddr_t bc_blkno; | ||
61 | uint bc_len; | ||
62 | int bc_refcount; | ||
63 | struct list_head bc_list; | ||
64 | }; | ||
65 | |||
66 | /* | ||
56 | * Sector aligned buffer routines for buffer create/read/write/access | 67 | * Sector aligned buffer routines for buffer create/read/write/access |
57 | */ | 68 | */ |
58 | 69 | ||
@@ -925,12 +936,12 @@ xlog_find_tail( | |||
925 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); | 936 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); |
926 | if (found == 2) | 937 | if (found == 2) |
927 | log->l_curr_cycle++; | 938 | log->l_curr_cycle++; |
928 | log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); | 939 | atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); |
929 | log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); | 940 | atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); |
930 | log->l_grant_reserve_cycle = log->l_curr_cycle; | 941 | xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle, |
931 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); | 942 | BBTOB(log->l_curr_block)); |
932 | log->l_grant_write_cycle = log->l_curr_cycle; | 943 | xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle, |
933 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); | 944 | BBTOB(log->l_curr_block)); |
934 | 945 | ||
935 | /* | 946 | /* |
936 | * Look for unmount record. If we find it, then we know there | 947 | * Look for unmount record. If we find it, then we know there |
@@ -960,7 +971,7 @@ xlog_find_tail( | |||
960 | } | 971 | } |
961 | after_umount_blk = (i + hblks + (int) | 972 | after_umount_blk = (i + hblks + (int) |
962 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; | 973 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; |
963 | tail_lsn = log->l_tail_lsn; | 974 | tail_lsn = atomic64_read(&log->l_tail_lsn); |
964 | if (*head_blk == after_umount_blk && | 975 | if (*head_blk == after_umount_blk && |
965 | be32_to_cpu(rhead->h_num_logops) == 1) { | 976 | be32_to_cpu(rhead->h_num_logops) == 1) { |
966 | umount_data_blk = (i + hblks) % log->l_logBBsize; | 977 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
@@ -975,12 +986,10 @@ xlog_find_tail( | |||
975 | * log records will point recovery to after the | 986 | * log records will point recovery to after the |
976 | * current unmount record. | 987 | * current unmount record. |
977 | */ | 988 | */ |
978 | log->l_tail_lsn = | 989 | xlog_assign_atomic_lsn(&log->l_tail_lsn, |
979 | xlog_assign_lsn(log->l_curr_cycle, | 990 | log->l_curr_cycle, after_umount_blk); |
980 | after_umount_blk); | 991 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, |
981 | log->l_last_sync_lsn = | 992 | log->l_curr_cycle, after_umount_blk); |
982 | xlog_assign_lsn(log->l_curr_cycle, | ||
983 | after_umount_blk); | ||
984 | *tail_blk = after_umount_blk; | 993 | *tail_blk = after_umount_blk; |
985 | 994 | ||
986 | /* | 995 | /* |
@@ -1605,82 +1614,45 @@ xlog_recover_reorder_trans( | |||
1605 | * record in the table to tell us how many times we expect to see this | 1614 | * record in the table to tell us how many times we expect to see this |
1606 | * record during the second pass. | 1615 | * record during the second pass. |
1607 | */ | 1616 | */ |
1608 | STATIC void | 1617 | STATIC int |
1609 | xlog_recover_do_buffer_pass1( | 1618 | xlog_recover_buffer_pass1( |
1610 | xlog_t *log, | 1619 | struct log *log, |
1611 | xfs_buf_log_format_t *buf_f) | 1620 | xlog_recover_item_t *item) |
1612 | { | 1621 | { |
1613 | xfs_buf_cancel_t *bcp; | 1622 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
1614 | xfs_buf_cancel_t *nextp; | 1623 | struct list_head *bucket; |
1615 | xfs_buf_cancel_t *prevp; | 1624 | struct xfs_buf_cancel *bcp; |
1616 | xfs_buf_cancel_t **bucket; | ||
1617 | xfs_daddr_t blkno = 0; | ||
1618 | uint len = 0; | ||
1619 | ushort flags = 0; | ||
1620 | |||
1621 | switch (buf_f->blf_type) { | ||
1622 | case XFS_LI_BUF: | ||
1623 | blkno = buf_f->blf_blkno; | ||
1624 | len = buf_f->blf_len; | ||
1625 | flags = buf_f->blf_flags; | ||
1626 | break; | ||
1627 | } | ||
1628 | 1625 | ||
1629 | /* | 1626 | /* |
1630 | * If this isn't a cancel buffer item, then just return. | 1627 | * If this isn't a cancel buffer item, then just return. |
1631 | */ | 1628 | */ |
1632 | if (!(flags & XFS_BLF_CANCEL)) { | 1629 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { |
1633 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); | 1630 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); |
1634 | return; | 1631 | return 0; |
1635 | } | ||
1636 | |||
1637 | /* | ||
1638 | * Insert an xfs_buf_cancel record into the hash table of | ||
1639 | * them. If there is already an identical record, bump | ||
1640 | * its reference count. | ||
1641 | */ | ||
1642 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | ||
1643 | XLOG_BC_TABLE_SIZE]; | ||
1644 | /* | ||
1645 | * If the hash bucket is empty then just insert a new record into | ||
1646 | * the bucket. | ||
1647 | */ | ||
1648 | if (*bucket == NULL) { | ||
1649 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | ||
1650 | KM_SLEEP); | ||
1651 | bcp->bc_blkno = blkno; | ||
1652 | bcp->bc_len = len; | ||
1653 | bcp->bc_refcount = 1; | ||
1654 | bcp->bc_next = NULL; | ||
1655 | *bucket = bcp; | ||
1656 | return; | ||
1657 | } | 1632 | } |
1658 | 1633 | ||
1659 | /* | 1634 | /* |
1660 | * The hash bucket is not empty, so search for duplicates of our | 1635 | * Insert an xfs_buf_cancel record into the hash table of them. |
1661 | * record. If we find one them just bump its refcount. If not | 1636 | * If there is already an identical record, bump its reference count. |
1662 | * then add us at the end of the list. | ||
1663 | */ | 1637 | */ |
1664 | prevp = NULL; | 1638 | bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); |
1665 | nextp = *bucket; | 1639 | list_for_each_entry(bcp, bucket, bc_list) { |
1666 | while (nextp != NULL) { | 1640 | if (bcp->bc_blkno == buf_f->blf_blkno && |
1667 | if (nextp->bc_blkno == blkno && nextp->bc_len == len) { | 1641 | bcp->bc_len == buf_f->blf_len) { |
1668 | nextp->bc_refcount++; | 1642 | bcp->bc_refcount++; |
1669 | trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); | 1643 | trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); |
1670 | return; | 1644 | return 0; |
1671 | } | 1645 | } |
1672 | prevp = nextp; | 1646 | } |
1673 | nextp = nextp->bc_next; | 1647 | |
1674 | } | 1648 | bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); |
1675 | ASSERT(prevp != NULL); | 1649 | bcp->bc_blkno = buf_f->blf_blkno; |
1676 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | 1650 | bcp->bc_len = buf_f->blf_len; |
1677 | KM_SLEEP); | ||
1678 | bcp->bc_blkno = blkno; | ||
1679 | bcp->bc_len = len; | ||
1680 | bcp->bc_refcount = 1; | 1651 | bcp->bc_refcount = 1; |
1681 | bcp->bc_next = NULL; | 1652 | list_add_tail(&bcp->bc_list, bucket); |
1682 | prevp->bc_next = bcp; | 1653 | |
1683 | trace_xfs_log_recover_buf_cancel_add(log, buf_f); | 1654 | trace_xfs_log_recover_buf_cancel_add(log, buf_f); |
1655 | return 0; | ||
1684 | } | 1656 | } |
1685 | 1657 | ||
1686 | /* | 1658 | /* |
@@ -1698,14 +1670,13 @@ xlog_recover_do_buffer_pass1( | |||
1698 | */ | 1670 | */ |
1699 | STATIC int | 1671 | STATIC int |
1700 | xlog_check_buffer_cancelled( | 1672 | xlog_check_buffer_cancelled( |
1701 | xlog_t *log, | 1673 | struct log *log, |
1702 | xfs_daddr_t blkno, | 1674 | xfs_daddr_t blkno, |
1703 | uint len, | 1675 | uint len, |
1704 | ushort flags) | 1676 | ushort flags) |
1705 | { | 1677 | { |
1706 | xfs_buf_cancel_t *bcp; | 1678 | struct list_head *bucket; |
1707 | xfs_buf_cancel_t *prevp; | 1679 | struct xfs_buf_cancel *bcp; |
1708 | xfs_buf_cancel_t **bucket; | ||
1709 | 1680 | ||
1710 | if (log->l_buf_cancel_table == NULL) { | 1681 | if (log->l_buf_cancel_table == NULL) { |
1711 | /* | 1682 | /* |
@@ -1716,128 +1687,70 @@ xlog_check_buffer_cancelled( | |||
1716 | return 0; | 1687 | return 0; |
1717 | } | 1688 | } |
1718 | 1689 | ||
1719 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | ||
1720 | XLOG_BC_TABLE_SIZE]; | ||
1721 | bcp = *bucket; | ||
1722 | if (bcp == NULL) { | ||
1723 | /* | ||
1724 | * There is no corresponding entry in the table built | ||
1725 | * in pass one, so this buffer has not been cancelled. | ||
1726 | */ | ||
1727 | ASSERT(!(flags & XFS_BLF_CANCEL)); | ||
1728 | return 0; | ||
1729 | } | ||
1730 | |||
1731 | /* | 1690 | /* |
1732 | * Search for an entry in the buffer cancel table that | 1691 | * Search for an entry in the cancel table that matches our buffer. |
1733 | * matches our buffer. | ||
1734 | */ | 1692 | */ |
1735 | prevp = NULL; | 1693 | bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); |
1736 | while (bcp != NULL) { | 1694 | list_for_each_entry(bcp, bucket, bc_list) { |
1737 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) { | 1695 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) |
1738 | /* | 1696 | goto found; |
1739 | * We've go a match, so return 1 so that the | ||
1740 | * recovery of this buffer is cancelled. | ||
1741 | * If this buffer is actually a buffer cancel | ||
1742 | * log item, then decrement the refcount on the | ||
1743 | * one in the table and remove it if this is the | ||
1744 | * last reference. | ||
1745 | */ | ||
1746 | if (flags & XFS_BLF_CANCEL) { | ||
1747 | bcp->bc_refcount--; | ||
1748 | if (bcp->bc_refcount == 0) { | ||
1749 | if (prevp == NULL) { | ||
1750 | *bucket = bcp->bc_next; | ||
1751 | } else { | ||
1752 | prevp->bc_next = bcp->bc_next; | ||
1753 | } | ||
1754 | kmem_free(bcp); | ||
1755 | } | ||
1756 | } | ||
1757 | return 1; | ||
1758 | } | ||
1759 | prevp = bcp; | ||
1760 | bcp = bcp->bc_next; | ||
1761 | } | 1697 | } |
1698 | |||
1762 | /* | 1699 | /* |
1763 | * We didn't find a corresponding entry in the table, so | 1700 | * We didn't find a corresponding entry in the table, so return 0 so |
1764 | * return 0 so that the buffer is NOT cancelled. | 1701 | * that the buffer is NOT cancelled. |
1765 | */ | 1702 | */ |
1766 | ASSERT(!(flags & XFS_BLF_CANCEL)); | 1703 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1767 | return 0; | 1704 | return 0; |
1768 | } | ||
1769 | 1705 | ||
1770 | STATIC int | 1706 | found: |
1771 | xlog_recover_do_buffer_pass2( | 1707 | /* |
1772 | xlog_t *log, | 1708 | * We've go a match, so return 1 so that the recovery of this buffer |
1773 | xfs_buf_log_format_t *buf_f) | 1709 | * is cancelled. If this buffer is actually a buffer cancel log |
1774 | { | 1710 | * item, then decrement the refcount on the one in the table and |
1775 | xfs_daddr_t blkno = 0; | 1711 | * remove it if this is the last reference. |
1776 | ushort flags = 0; | 1712 | */ |
1777 | uint len = 0; | 1713 | if (flags & XFS_BLF_CANCEL) { |
1778 | 1714 | if (--bcp->bc_refcount == 0) { | |
1779 | switch (buf_f->blf_type) { | 1715 | list_del(&bcp->bc_list); |
1780 | case XFS_LI_BUF: | 1716 | kmem_free(bcp); |
1781 | blkno = buf_f->blf_blkno; | 1717 | } |
1782 | flags = buf_f->blf_flags; | ||
1783 | len = buf_f->blf_len; | ||
1784 | break; | ||
1785 | } | 1718 | } |
1786 | 1719 | return 1; | |
1787 | return xlog_check_buffer_cancelled(log, blkno, len, flags); | ||
1788 | } | 1720 | } |
1789 | 1721 | ||
1790 | /* | 1722 | /* |
1791 | * Perform recovery for a buffer full of inodes. In these buffers, | 1723 | * Perform recovery for a buffer full of inodes. In these buffers, the only |
1792 | * the only data which should be recovered is that which corresponds | 1724 | * data which should be recovered is that which corresponds to the |
1793 | * to the di_next_unlinked pointers in the on disk inode structures. | 1725 | * di_next_unlinked pointers in the on disk inode structures. The rest of the |
1794 | * The rest of the data for the inodes is always logged through the | 1726 | * data for the inodes is always logged through the inodes themselves rather |
1795 | * inodes themselves rather than the inode buffer and is recovered | 1727 | * than the inode buffer and is recovered in xlog_recover_inode_pass2(). |
1796 | * in xlog_recover_do_inode_trans(). | ||
1797 | * | 1728 | * |
1798 | * The only time when buffers full of inodes are fully recovered is | 1729 | * The only time when buffers full of inodes are fully recovered is when the |
1799 | * when the buffer is full of newly allocated inodes. In this case | 1730 | * buffer is full of newly allocated inodes. In this case the buffer will |
1800 | * the buffer will not be marked as an inode buffer and so will be | 1731 | * not be marked as an inode buffer and so will be sent to |
1801 | * sent to xlog_recover_do_reg_buffer() below during recovery. | 1732 | * xlog_recover_do_reg_buffer() below during recovery. |
1802 | */ | 1733 | */ |
1803 | STATIC int | 1734 | STATIC int |
1804 | xlog_recover_do_inode_buffer( | 1735 | xlog_recover_do_inode_buffer( |
1805 | xfs_mount_t *mp, | 1736 | struct xfs_mount *mp, |
1806 | xlog_recover_item_t *item, | 1737 | xlog_recover_item_t *item, |
1807 | xfs_buf_t *bp, | 1738 | struct xfs_buf *bp, |
1808 | xfs_buf_log_format_t *buf_f) | 1739 | xfs_buf_log_format_t *buf_f) |
1809 | { | 1740 | { |
1810 | int i; | 1741 | int i; |
1811 | int item_index; | 1742 | int item_index = 0; |
1812 | int bit; | 1743 | int bit = 0; |
1813 | int nbits; | 1744 | int nbits = 0; |
1814 | int reg_buf_offset; | 1745 | int reg_buf_offset = 0; |
1815 | int reg_buf_bytes; | 1746 | int reg_buf_bytes = 0; |
1816 | int next_unlinked_offset; | 1747 | int next_unlinked_offset; |
1817 | int inodes_per_buf; | 1748 | int inodes_per_buf; |
1818 | xfs_agino_t *logged_nextp; | 1749 | xfs_agino_t *logged_nextp; |
1819 | xfs_agino_t *buffer_nextp; | 1750 | xfs_agino_t *buffer_nextp; |
1820 | unsigned int *data_map = NULL; | ||
1821 | unsigned int map_size = 0; | ||
1822 | 1751 | ||
1823 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); | 1752 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); |
1824 | 1753 | ||
1825 | switch (buf_f->blf_type) { | ||
1826 | case XFS_LI_BUF: | ||
1827 | data_map = buf_f->blf_data_map; | ||
1828 | map_size = buf_f->blf_map_size; | ||
1829 | break; | ||
1830 | } | ||
1831 | /* | ||
1832 | * Set the variables corresponding to the current region to | ||
1833 | * 0 so that we'll initialize them on the first pass through | ||
1834 | * the loop. | ||
1835 | */ | ||
1836 | reg_buf_offset = 0; | ||
1837 | reg_buf_bytes = 0; | ||
1838 | bit = 0; | ||
1839 | nbits = 0; | ||
1840 | item_index = 0; | ||
1841 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; | 1754 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; |
1842 | for (i = 0; i < inodes_per_buf; i++) { | 1755 | for (i = 0; i < inodes_per_buf; i++) { |
1843 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + | 1756 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + |
@@ -1852,18 +1765,18 @@ xlog_recover_do_inode_buffer( | |||
1852 | * the current di_next_unlinked field. | 1765 | * the current di_next_unlinked field. |
1853 | */ | 1766 | */ |
1854 | bit += nbits; | 1767 | bit += nbits; |
1855 | bit = xfs_next_bit(data_map, map_size, bit); | 1768 | bit = xfs_next_bit(buf_f->blf_data_map, |
1769 | buf_f->blf_map_size, bit); | ||
1856 | 1770 | ||
1857 | /* | 1771 | /* |
1858 | * If there are no more logged regions in the | 1772 | * If there are no more logged regions in the |
1859 | * buffer, then we're done. | 1773 | * buffer, then we're done. |
1860 | */ | 1774 | */ |
1861 | if (bit == -1) { | 1775 | if (bit == -1) |
1862 | return 0; | 1776 | return 0; |
1863 | } | ||
1864 | 1777 | ||
1865 | nbits = xfs_contig_bits(data_map, map_size, | 1778 | nbits = xfs_contig_bits(buf_f->blf_data_map, |
1866 | bit); | 1779 | buf_f->blf_map_size, bit); |
1867 | ASSERT(nbits > 0); | 1780 | ASSERT(nbits > 0); |
1868 | reg_buf_offset = bit << XFS_BLF_SHIFT; | 1781 | reg_buf_offset = bit << XFS_BLF_SHIFT; |
1869 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; | 1782 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; |
@@ -1875,9 +1788,8 @@ xlog_recover_do_inode_buffer( | |||
1875 | * di_next_unlinked field, then move on to the next | 1788 | * di_next_unlinked field, then move on to the next |
1876 | * di_next_unlinked field. | 1789 | * di_next_unlinked field. |
1877 | */ | 1790 | */ |
1878 | if (next_unlinked_offset < reg_buf_offset) { | 1791 | if (next_unlinked_offset < reg_buf_offset) |
1879 | continue; | 1792 | continue; |
1880 | } | ||
1881 | 1793 | ||
1882 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1794 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
1883 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); | 1795 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
@@ -1913,36 +1825,29 @@ xlog_recover_do_inode_buffer( | |||
1913 | * given buffer. The bitmap in the buf log format structure indicates | 1825 | * given buffer. The bitmap in the buf log format structure indicates |
1914 | * where to place the logged data. | 1826 | * where to place the logged data. |
1915 | */ | 1827 | */ |
1916 | /*ARGSUSED*/ | ||
1917 | STATIC void | 1828 | STATIC void |
1918 | xlog_recover_do_reg_buffer( | 1829 | xlog_recover_do_reg_buffer( |
1919 | struct xfs_mount *mp, | 1830 | struct xfs_mount *mp, |
1920 | xlog_recover_item_t *item, | 1831 | xlog_recover_item_t *item, |
1921 | xfs_buf_t *bp, | 1832 | struct xfs_buf *bp, |
1922 | xfs_buf_log_format_t *buf_f) | 1833 | xfs_buf_log_format_t *buf_f) |
1923 | { | 1834 | { |
1924 | int i; | 1835 | int i; |
1925 | int bit; | 1836 | int bit; |
1926 | int nbits; | 1837 | int nbits; |
1927 | unsigned int *data_map = NULL; | ||
1928 | unsigned int map_size = 0; | ||
1929 | int error; | 1838 | int error; |
1930 | 1839 | ||
1931 | trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); | 1840 | trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); |
1932 | 1841 | ||
1933 | switch (buf_f->blf_type) { | ||
1934 | case XFS_LI_BUF: | ||
1935 | data_map = buf_f->blf_data_map; | ||
1936 | map_size = buf_f->blf_map_size; | ||
1937 | break; | ||
1938 | } | ||
1939 | bit = 0; | 1842 | bit = 0; |
1940 | i = 1; /* 0 is the buf format structure */ | 1843 | i = 1; /* 0 is the buf format structure */ |
1941 | while (1) { | 1844 | while (1) { |
1942 | bit = xfs_next_bit(data_map, map_size, bit); | 1845 | bit = xfs_next_bit(buf_f->blf_data_map, |
1846 | buf_f->blf_map_size, bit); | ||
1943 | if (bit == -1) | 1847 | if (bit == -1) |
1944 | break; | 1848 | break; |
1945 | nbits = xfs_contig_bits(data_map, map_size, bit); | 1849 | nbits = xfs_contig_bits(buf_f->blf_data_map, |
1850 | buf_f->blf_map_size, bit); | ||
1946 | ASSERT(nbits > 0); | 1851 | ASSERT(nbits > 0); |
1947 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1852 | ASSERT(item->ri_buf[i].i_addr != NULL); |
1948 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); | 1853 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
@@ -2176,77 +2081,46 @@ xlog_recover_do_dquot_buffer( | |||
2176 | * for more details on the implementation of the table of cancel records. | 2081 | * for more details on the implementation of the table of cancel records. |
2177 | */ | 2082 | */ |
2178 | STATIC int | 2083 | STATIC int |
2179 | xlog_recover_do_buffer_trans( | 2084 | xlog_recover_buffer_pass2( |
2180 | xlog_t *log, | 2085 | xlog_t *log, |
2181 | xlog_recover_item_t *item, | 2086 | xlog_recover_item_t *item) |
2182 | int pass) | ||
2183 | { | 2087 | { |
2184 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 2088 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
2185 | xfs_mount_t *mp; | 2089 | xfs_mount_t *mp = log->l_mp; |
2186 | xfs_buf_t *bp; | 2090 | xfs_buf_t *bp; |
2187 | int error; | 2091 | int error; |
2188 | int cancel; | ||
2189 | xfs_daddr_t blkno; | ||
2190 | int len; | ||
2191 | ushort flags; | ||
2192 | uint buf_flags; | 2092 | uint buf_flags; |
2193 | 2093 | ||
2194 | if (pass == XLOG_RECOVER_PASS1) { | 2094 | /* |
2195 | /* | 2095 | * In this pass we only want to recover all the buffers which have |
2196 | * In this pass we're only looking for buf items | 2096 | * not been cancelled and are not cancellation buffers themselves. |
2197 | * with the XFS_BLF_CANCEL bit set. | 2097 | */ |
2198 | */ | 2098 | if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, |
2199 | xlog_recover_do_buffer_pass1(log, buf_f); | 2099 | buf_f->blf_len, buf_f->blf_flags)) { |
2100 | trace_xfs_log_recover_buf_cancel(log, buf_f); | ||
2200 | return 0; | 2101 | return 0; |
2201 | } else { | ||
2202 | /* | ||
2203 | * In this pass we want to recover all the buffers | ||
2204 | * which have not been cancelled and are not | ||
2205 | * cancellation buffers themselves. The routine | ||
2206 | * we call here will tell us whether or not to | ||
2207 | * continue with the replay of this buffer. | ||
2208 | */ | ||
2209 | cancel = xlog_recover_do_buffer_pass2(log, buf_f); | ||
2210 | if (cancel) { | ||
2211 | trace_xfs_log_recover_buf_cancel(log, buf_f); | ||
2212 | return 0; | ||
2213 | } | ||
2214 | } | 2102 | } |
2103 | |||
2215 | trace_xfs_log_recover_buf_recover(log, buf_f); | 2104 | trace_xfs_log_recover_buf_recover(log, buf_f); |
2216 | switch (buf_f->blf_type) { | ||
2217 | case XFS_LI_BUF: | ||
2218 | blkno = buf_f->blf_blkno; | ||
2219 | len = buf_f->blf_len; | ||
2220 | flags = buf_f->blf_flags; | ||
2221 | break; | ||
2222 | default: | ||
2223 | xfs_fs_cmn_err(CE_ALERT, log->l_mp, | ||
2224 | "xfs_log_recover: unknown buffer type 0x%x, logdev %s", | ||
2225 | buf_f->blf_type, log->l_mp->m_logname ? | ||
2226 | log->l_mp->m_logname : "internal"); | ||
2227 | XFS_ERROR_REPORT("xlog_recover_do_buffer_trans", | ||
2228 | XFS_ERRLEVEL_LOW, log->l_mp); | ||
2229 | return XFS_ERROR(EFSCORRUPTED); | ||
2230 | } | ||
2231 | 2105 | ||
2232 | mp = log->l_mp; | ||
2233 | buf_flags = XBF_LOCK; | 2106 | buf_flags = XBF_LOCK; |
2234 | if (!(flags & XFS_BLF_INODE_BUF)) | 2107 | if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF)) |
2235 | buf_flags |= XBF_MAPPED; | 2108 | buf_flags |= XBF_MAPPED; |
2236 | 2109 | ||
2237 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); | 2110 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
2111 | buf_flags); | ||
2238 | if (XFS_BUF_ISERROR(bp)) { | 2112 | if (XFS_BUF_ISERROR(bp)) { |
2239 | xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, | 2113 | xfs_ioerror_alert("xlog_recover_do..(read#1)", mp, |
2240 | bp, blkno); | 2114 | bp, buf_f->blf_blkno); |
2241 | error = XFS_BUF_GETERROR(bp); | 2115 | error = XFS_BUF_GETERROR(bp); |
2242 | xfs_buf_relse(bp); | 2116 | xfs_buf_relse(bp); |
2243 | return error; | 2117 | return error; |
2244 | } | 2118 | } |
2245 | 2119 | ||
2246 | error = 0; | 2120 | error = 0; |
2247 | if (flags & XFS_BLF_INODE_BUF) { | 2121 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
2248 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2122 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2249 | } else if (flags & | 2123 | } else if (buf_f->blf_flags & |
2250 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { | 2124 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
2251 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); | 2125 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
2252 | } else { | 2126 | } else { |
@@ -2286,16 +2160,14 @@ xlog_recover_do_buffer_trans( | |||
2286 | } | 2160 | } |
2287 | 2161 | ||
2288 | STATIC int | 2162 | STATIC int |
2289 | xlog_recover_do_inode_trans( | 2163 | xlog_recover_inode_pass2( |
2290 | xlog_t *log, | 2164 | xlog_t *log, |
2291 | xlog_recover_item_t *item, | 2165 | xlog_recover_item_t *item) |
2292 | int pass) | ||
2293 | { | 2166 | { |
2294 | xfs_inode_log_format_t *in_f; | 2167 | xfs_inode_log_format_t *in_f; |
2295 | xfs_mount_t *mp; | 2168 | xfs_mount_t *mp = log->l_mp; |
2296 | xfs_buf_t *bp; | 2169 | xfs_buf_t *bp; |
2297 | xfs_dinode_t *dip; | 2170 | xfs_dinode_t *dip; |
2298 | xfs_ino_t ino; | ||
2299 | int len; | 2171 | int len; |
2300 | xfs_caddr_t src; | 2172 | xfs_caddr_t src; |
2301 | xfs_caddr_t dest; | 2173 | xfs_caddr_t dest; |
@@ -2305,10 +2177,6 @@ xlog_recover_do_inode_trans( | |||
2305 | xfs_icdinode_t *dicp; | 2177 | xfs_icdinode_t *dicp; |
2306 | int need_free = 0; | 2178 | int need_free = 0; |
2307 | 2179 | ||
2308 | if (pass == XLOG_RECOVER_PASS1) { | ||
2309 | return 0; | ||
2310 | } | ||
2311 | |||
2312 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { | 2180 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { |
2313 | in_f = item->ri_buf[0].i_addr; | 2181 | in_f = item->ri_buf[0].i_addr; |
2314 | } else { | 2182 | } else { |
@@ -2318,8 +2186,6 @@ xlog_recover_do_inode_trans( | |||
2318 | if (error) | 2186 | if (error) |
2319 | goto error; | 2187 | goto error; |
2320 | } | 2188 | } |
2321 | ino = in_f->ilf_ino; | ||
2322 | mp = log->l_mp; | ||
2323 | 2189 | ||
2324 | /* | 2190 | /* |
2325 | * Inode buffers can be freed, look out for it, | 2191 | * Inode buffers can be freed, look out for it, |
@@ -2354,8 +2220,8 @@ xlog_recover_do_inode_trans( | |||
2354 | xfs_buf_relse(bp); | 2220 | xfs_buf_relse(bp); |
2355 | xfs_fs_cmn_err(CE_ALERT, mp, | 2221 | xfs_fs_cmn_err(CE_ALERT, mp, |
2356 | "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", | 2222 | "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", |
2357 | dip, bp, ino); | 2223 | dip, bp, in_f->ilf_ino); |
2358 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", | 2224 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", |
2359 | XFS_ERRLEVEL_LOW, mp); | 2225 | XFS_ERRLEVEL_LOW, mp); |
2360 | error = EFSCORRUPTED; | 2226 | error = EFSCORRUPTED; |
2361 | goto error; | 2227 | goto error; |
@@ -2365,8 +2231,8 @@ xlog_recover_do_inode_trans( | |||
2365 | xfs_buf_relse(bp); | 2231 | xfs_buf_relse(bp); |
2366 | xfs_fs_cmn_err(CE_ALERT, mp, | 2232 | xfs_fs_cmn_err(CE_ALERT, mp, |
2367 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", | 2233 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", |
2368 | item, ino); | 2234 | item, in_f->ilf_ino); |
2369 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", | 2235 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", |
2370 | XFS_ERRLEVEL_LOW, mp); | 2236 | XFS_ERRLEVEL_LOW, mp); |
2371 | error = EFSCORRUPTED; | 2237 | error = EFSCORRUPTED; |
2372 | goto error; | 2238 | goto error; |
@@ -2394,12 +2260,12 @@ xlog_recover_do_inode_trans( | |||
2394 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { | 2260 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { |
2395 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2261 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2396 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { | 2262 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { |
2397 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", | 2263 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", |
2398 | XFS_ERRLEVEL_LOW, mp, dicp); | 2264 | XFS_ERRLEVEL_LOW, mp, dicp); |
2399 | xfs_buf_relse(bp); | 2265 | xfs_buf_relse(bp); |
2400 | xfs_fs_cmn_err(CE_ALERT, mp, | 2266 | xfs_fs_cmn_err(CE_ALERT, mp, |
2401 | "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2267 | "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2402 | item, dip, bp, ino); | 2268 | item, dip, bp, in_f->ilf_ino); |
2403 | error = EFSCORRUPTED; | 2269 | error = EFSCORRUPTED; |
2404 | goto error; | 2270 | goto error; |
2405 | } | 2271 | } |
@@ -2407,40 +2273,40 @@ xlog_recover_do_inode_trans( | |||
2407 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2273 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2408 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && | 2274 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && |
2409 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { | 2275 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { |
2410 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", | 2276 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", |
2411 | XFS_ERRLEVEL_LOW, mp, dicp); | 2277 | XFS_ERRLEVEL_LOW, mp, dicp); |
2412 | xfs_buf_relse(bp); | 2278 | xfs_buf_relse(bp); |
2413 | xfs_fs_cmn_err(CE_ALERT, mp, | 2279 | xfs_fs_cmn_err(CE_ALERT, mp, |
2414 | "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2280 | "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2415 | item, dip, bp, ino); | 2281 | item, dip, bp, in_f->ilf_ino); |
2416 | error = EFSCORRUPTED; | 2282 | error = EFSCORRUPTED; |
2417 | goto error; | 2283 | goto error; |
2418 | } | 2284 | } |
2419 | } | 2285 | } |
2420 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ | 2286 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ |
2421 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", | 2287 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", |
2422 | XFS_ERRLEVEL_LOW, mp, dicp); | 2288 | XFS_ERRLEVEL_LOW, mp, dicp); |
2423 | xfs_buf_relse(bp); | 2289 | xfs_buf_relse(bp); |
2424 | xfs_fs_cmn_err(CE_ALERT, mp, | 2290 | xfs_fs_cmn_err(CE_ALERT, mp, |
2425 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", | 2291 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", |
2426 | item, dip, bp, ino, | 2292 | item, dip, bp, in_f->ilf_ino, |
2427 | dicp->di_nextents + dicp->di_anextents, | 2293 | dicp->di_nextents + dicp->di_anextents, |
2428 | dicp->di_nblocks); | 2294 | dicp->di_nblocks); |
2429 | error = EFSCORRUPTED; | 2295 | error = EFSCORRUPTED; |
2430 | goto error; | 2296 | goto error; |
2431 | } | 2297 | } |
2432 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { | 2298 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { |
2433 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", | 2299 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", |
2434 | XFS_ERRLEVEL_LOW, mp, dicp); | 2300 | XFS_ERRLEVEL_LOW, mp, dicp); |
2435 | xfs_buf_relse(bp); | 2301 | xfs_buf_relse(bp); |
2436 | xfs_fs_cmn_err(CE_ALERT, mp, | 2302 | xfs_fs_cmn_err(CE_ALERT, mp, |
2437 | "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", | 2303 | "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", |
2438 | item, dip, bp, ino, dicp->di_forkoff); | 2304 | item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); |
2439 | error = EFSCORRUPTED; | 2305 | error = EFSCORRUPTED; |
2440 | goto error; | 2306 | goto error; |
2441 | } | 2307 | } |
2442 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { | 2308 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { |
2443 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", | 2309 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", |
2444 | XFS_ERRLEVEL_LOW, mp, dicp); | 2310 | XFS_ERRLEVEL_LOW, mp, dicp); |
2445 | xfs_buf_relse(bp); | 2311 | xfs_buf_relse(bp); |
2446 | xfs_fs_cmn_err(CE_ALERT, mp, | 2312 | xfs_fs_cmn_err(CE_ALERT, mp, |
@@ -2532,7 +2398,7 @@ xlog_recover_do_inode_trans( | |||
2532 | break; | 2398 | break; |
2533 | 2399 | ||
2534 | default: | 2400 | default: |
2535 | xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); | 2401 | xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag"); |
2536 | ASSERT(0); | 2402 | ASSERT(0); |
2537 | xfs_buf_relse(bp); | 2403 | xfs_buf_relse(bp); |
2538 | error = EIO; | 2404 | error = EIO; |
@@ -2556,18 +2422,11 @@ error: | |||
2556 | * of that type. | 2422 | * of that type. |
2557 | */ | 2423 | */ |
2558 | STATIC int | 2424 | STATIC int |
2559 | xlog_recover_do_quotaoff_trans( | 2425 | xlog_recover_quotaoff_pass1( |
2560 | xlog_t *log, | 2426 | xlog_t *log, |
2561 | xlog_recover_item_t *item, | 2427 | xlog_recover_item_t *item) |
2562 | int pass) | ||
2563 | { | 2428 | { |
2564 | xfs_qoff_logformat_t *qoff_f; | 2429 | xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; |
2565 | |||
2566 | if (pass == XLOG_RECOVER_PASS2) { | ||
2567 | return (0); | ||
2568 | } | ||
2569 | |||
2570 | qoff_f = item->ri_buf[0].i_addr; | ||
2571 | ASSERT(qoff_f); | 2430 | ASSERT(qoff_f); |
2572 | 2431 | ||
2573 | /* | 2432 | /* |
@@ -2588,22 +2447,17 @@ xlog_recover_do_quotaoff_trans( | |||
2588 | * Recover a dquot record | 2447 | * Recover a dquot record |
2589 | */ | 2448 | */ |
2590 | STATIC int | 2449 | STATIC int |
2591 | xlog_recover_do_dquot_trans( | 2450 | xlog_recover_dquot_pass2( |
2592 | xlog_t *log, | 2451 | xlog_t *log, |
2593 | xlog_recover_item_t *item, | 2452 | xlog_recover_item_t *item) |
2594 | int pass) | ||
2595 | { | 2453 | { |
2596 | xfs_mount_t *mp; | 2454 | xfs_mount_t *mp = log->l_mp; |
2597 | xfs_buf_t *bp; | 2455 | xfs_buf_t *bp; |
2598 | struct xfs_disk_dquot *ddq, *recddq; | 2456 | struct xfs_disk_dquot *ddq, *recddq; |
2599 | int error; | 2457 | int error; |
2600 | xfs_dq_logformat_t *dq_f; | 2458 | xfs_dq_logformat_t *dq_f; |
2601 | uint type; | 2459 | uint type; |
2602 | 2460 | ||
2603 | if (pass == XLOG_RECOVER_PASS1) { | ||
2604 | return 0; | ||
2605 | } | ||
2606 | mp = log->l_mp; | ||
2607 | 2461 | ||
2608 | /* | 2462 | /* |
2609 | * Filesystems are required to send in quota flags at mount time. | 2463 | * Filesystems are required to send in quota flags at mount time. |
@@ -2647,7 +2501,7 @@ xlog_recover_do_dquot_trans( | |||
2647 | if ((error = xfs_qm_dqcheck(recddq, | 2501 | if ((error = xfs_qm_dqcheck(recddq, |
2648 | dq_f->qlf_id, | 2502 | dq_f->qlf_id, |
2649 | 0, XFS_QMOPT_DOWARN, | 2503 | 0, XFS_QMOPT_DOWARN, |
2650 | "xlog_recover_do_dquot_trans (log copy)"))) { | 2504 | "xlog_recover_dquot_pass2 (log copy)"))) { |
2651 | return XFS_ERROR(EIO); | 2505 | return XFS_ERROR(EIO); |
2652 | } | 2506 | } |
2653 | ASSERT(dq_f->qlf_len == 1); | 2507 | ASSERT(dq_f->qlf_len == 1); |
@@ -2670,7 +2524,7 @@ xlog_recover_do_dquot_trans( | |||
2670 | * minimal initialization then. | 2524 | * minimal initialization then. |
2671 | */ | 2525 | */ |
2672 | if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, | 2526 | if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, |
2673 | "xlog_recover_do_dquot_trans")) { | 2527 | "xlog_recover_dquot_pass2")) { |
2674 | xfs_buf_relse(bp); | 2528 | xfs_buf_relse(bp); |
2675 | return XFS_ERROR(EIO); | 2529 | return XFS_ERROR(EIO); |
2676 | } | 2530 | } |
@@ -2693,38 +2547,31 @@ xlog_recover_do_dquot_trans( | |||
2693 | * LSN. | 2547 | * LSN. |
2694 | */ | 2548 | */ |
2695 | STATIC int | 2549 | STATIC int |
2696 | xlog_recover_do_efi_trans( | 2550 | xlog_recover_efi_pass2( |
2697 | xlog_t *log, | 2551 | xlog_t *log, |
2698 | xlog_recover_item_t *item, | 2552 | xlog_recover_item_t *item, |
2699 | xfs_lsn_t lsn, | 2553 | xfs_lsn_t lsn) |
2700 | int pass) | ||
2701 | { | 2554 | { |
2702 | int error; | 2555 | int error; |
2703 | xfs_mount_t *mp; | 2556 | xfs_mount_t *mp = log->l_mp; |
2704 | xfs_efi_log_item_t *efip; | 2557 | xfs_efi_log_item_t *efip; |
2705 | xfs_efi_log_format_t *efi_formatp; | 2558 | xfs_efi_log_format_t *efi_formatp; |
2706 | 2559 | ||
2707 | if (pass == XLOG_RECOVER_PASS1) { | ||
2708 | return 0; | ||
2709 | } | ||
2710 | |||
2711 | efi_formatp = item->ri_buf[0].i_addr; | 2560 | efi_formatp = item->ri_buf[0].i_addr; |
2712 | 2561 | ||
2713 | mp = log->l_mp; | ||
2714 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); | 2562 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); |
2715 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), | 2563 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), |
2716 | &(efip->efi_format)))) { | 2564 | &(efip->efi_format)))) { |
2717 | xfs_efi_item_free(efip); | 2565 | xfs_efi_item_free(efip); |
2718 | return error; | 2566 | return error; |
2719 | } | 2567 | } |
2720 | efip->efi_next_extent = efi_formatp->efi_nextents; | 2568 | atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); |
2721 | efip->efi_flags |= XFS_EFI_COMMITTED; | ||
2722 | 2569 | ||
2723 | spin_lock(&log->l_ailp->xa_lock); | 2570 | spin_lock(&log->l_ailp->xa_lock); |
2724 | /* | 2571 | /* |
2725 | * xfs_trans_ail_update() drops the AIL lock. | 2572 | * xfs_trans_ail_update() drops the AIL lock. |
2726 | */ | 2573 | */ |
2727 | xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); | 2574 | xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); |
2728 | return 0; | 2575 | return 0; |
2729 | } | 2576 | } |
2730 | 2577 | ||
@@ -2737,11 +2584,10 @@ xlog_recover_do_efi_trans( | |||
2737 | * efd format structure. If we find it, we remove the efi from the | 2584 | * efd format structure. If we find it, we remove the efi from the |
2738 | * AIL and free it. | 2585 | * AIL and free it. |
2739 | */ | 2586 | */ |
2740 | STATIC void | 2587 | STATIC int |
2741 | xlog_recover_do_efd_trans( | 2588 | xlog_recover_efd_pass2( |
2742 | xlog_t *log, | 2589 | xlog_t *log, |
2743 | xlog_recover_item_t *item, | 2590 | xlog_recover_item_t *item) |
2744 | int pass) | ||
2745 | { | 2591 | { |
2746 | xfs_efd_log_format_t *efd_formatp; | 2592 | xfs_efd_log_format_t *efd_formatp; |
2747 | xfs_efi_log_item_t *efip = NULL; | 2593 | xfs_efi_log_item_t *efip = NULL; |
@@ -2750,10 +2596,6 @@ xlog_recover_do_efd_trans( | |||
2750 | struct xfs_ail_cursor cur; | 2596 | struct xfs_ail_cursor cur; |
2751 | struct xfs_ail *ailp = log->l_ailp; | 2597 | struct xfs_ail *ailp = log->l_ailp; |
2752 | 2598 | ||
2753 | if (pass == XLOG_RECOVER_PASS1) { | ||
2754 | return; | ||
2755 | } | ||
2756 | |||
2757 | efd_formatp = item->ri_buf[0].i_addr; | 2599 | efd_formatp = item->ri_buf[0].i_addr; |
2758 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + | 2600 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + |
2759 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || | 2601 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || |
@@ -2785,62 +2627,6 @@ xlog_recover_do_efd_trans( | |||
2785 | } | 2627 | } |
2786 | xfs_trans_ail_cursor_done(ailp, &cur); | 2628 | xfs_trans_ail_cursor_done(ailp, &cur); |
2787 | spin_unlock(&ailp->xa_lock); | 2629 | spin_unlock(&ailp->xa_lock); |
2788 | } | ||
2789 | |||
2790 | /* | ||
2791 | * Perform the transaction | ||
2792 | * | ||
2793 | * If the transaction modifies a buffer or inode, do it now. Otherwise, | ||
2794 | * EFIs and EFDs get queued up by adding entries into the AIL for them. | ||
2795 | */ | ||
2796 | STATIC int | ||
2797 | xlog_recover_do_trans( | ||
2798 | xlog_t *log, | ||
2799 | xlog_recover_t *trans, | ||
2800 | int pass) | ||
2801 | { | ||
2802 | int error = 0; | ||
2803 | xlog_recover_item_t *item; | ||
2804 | |||
2805 | error = xlog_recover_reorder_trans(log, trans, pass); | ||
2806 | if (error) | ||
2807 | return error; | ||
2808 | |||
2809 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | ||
2810 | trace_xfs_log_recover_item_recover(log, trans, item, pass); | ||
2811 | switch (ITEM_TYPE(item)) { | ||
2812 | case XFS_LI_BUF: | ||
2813 | error = xlog_recover_do_buffer_trans(log, item, pass); | ||
2814 | break; | ||
2815 | case XFS_LI_INODE: | ||
2816 | error = xlog_recover_do_inode_trans(log, item, pass); | ||
2817 | break; | ||
2818 | case XFS_LI_EFI: | ||
2819 | error = xlog_recover_do_efi_trans(log, item, | ||
2820 | trans->r_lsn, pass); | ||
2821 | break; | ||
2822 | case XFS_LI_EFD: | ||
2823 | xlog_recover_do_efd_trans(log, item, pass); | ||
2824 | error = 0; | ||
2825 | break; | ||
2826 | case XFS_LI_DQUOT: | ||
2827 | error = xlog_recover_do_dquot_trans(log, item, pass); | ||
2828 | break; | ||
2829 | case XFS_LI_QUOTAOFF: | ||
2830 | error = xlog_recover_do_quotaoff_trans(log, item, | ||
2831 | pass); | ||
2832 | break; | ||
2833 | default: | ||
2834 | xlog_warn( | ||
2835 | "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item)); | ||
2836 | ASSERT(0); | ||
2837 | error = XFS_ERROR(EIO); | ||
2838 | break; | ||
2839 | } | ||
2840 | |||
2841 | if (error) | ||
2842 | return error; | ||
2843 | } | ||
2844 | 2630 | ||
2845 | return 0; | 2631 | return 0; |
2846 | } | 2632 | } |
@@ -2852,7 +2638,7 @@ xlog_recover_do_trans( | |||
2852 | */ | 2638 | */ |
2853 | STATIC void | 2639 | STATIC void |
2854 | xlog_recover_free_trans( | 2640 | xlog_recover_free_trans( |
2855 | xlog_recover_t *trans) | 2641 | struct xlog_recover *trans) |
2856 | { | 2642 | { |
2857 | xlog_recover_item_t *item, *n; | 2643 | xlog_recover_item_t *item, *n; |
2858 | int i; | 2644 | int i; |
@@ -2871,17 +2657,95 @@ xlog_recover_free_trans( | |||
2871 | } | 2657 | } |
2872 | 2658 | ||
2873 | STATIC int | 2659 | STATIC int |
2660 | xlog_recover_commit_pass1( | ||
2661 | struct log *log, | ||
2662 | struct xlog_recover *trans, | ||
2663 | xlog_recover_item_t *item) | ||
2664 | { | ||
2665 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); | ||
2666 | |||
2667 | switch (ITEM_TYPE(item)) { | ||
2668 | case XFS_LI_BUF: | ||
2669 | return xlog_recover_buffer_pass1(log, item); | ||
2670 | case XFS_LI_QUOTAOFF: | ||
2671 | return xlog_recover_quotaoff_pass1(log, item); | ||
2672 | case XFS_LI_INODE: | ||
2673 | case XFS_LI_EFI: | ||
2674 | case XFS_LI_EFD: | ||
2675 | case XFS_LI_DQUOT: | ||
2676 | /* nothing to do in pass 1 */ | ||
2677 | return 0; | ||
2678 | default: | ||
2679 | xlog_warn( | ||
2680 | "XFS: invalid item type (%d) xlog_recover_commit_pass1", | ||
2681 | ITEM_TYPE(item)); | ||
2682 | ASSERT(0); | ||
2683 | return XFS_ERROR(EIO); | ||
2684 | } | ||
2685 | } | ||
2686 | |||
2687 | STATIC int | ||
2688 | xlog_recover_commit_pass2( | ||
2689 | struct log *log, | ||
2690 | struct xlog_recover *trans, | ||
2691 | xlog_recover_item_t *item) | ||
2692 | { | ||
2693 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); | ||
2694 | |||
2695 | switch (ITEM_TYPE(item)) { | ||
2696 | case XFS_LI_BUF: | ||
2697 | return xlog_recover_buffer_pass2(log, item); | ||
2698 | case XFS_LI_INODE: | ||
2699 | return xlog_recover_inode_pass2(log, item); | ||
2700 | case XFS_LI_EFI: | ||
2701 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); | ||
2702 | case XFS_LI_EFD: | ||
2703 | return xlog_recover_efd_pass2(log, item); | ||
2704 | case XFS_LI_DQUOT: | ||
2705 | return xlog_recover_dquot_pass2(log, item); | ||
2706 | case XFS_LI_QUOTAOFF: | ||
2707 | /* nothing to do in pass2 */ | ||
2708 | return 0; | ||
2709 | default: | ||
2710 | xlog_warn( | ||
2711 | "XFS: invalid item type (%d) xlog_recover_commit_pass2", | ||
2712 | ITEM_TYPE(item)); | ||
2713 | ASSERT(0); | ||
2714 | return XFS_ERROR(EIO); | ||
2715 | } | ||
2716 | } | ||
2717 | |||
2718 | /* | ||
2719 | * Perform the transaction. | ||
2720 | * | ||
2721 | * If the transaction modifies a buffer or inode, do it now. Otherwise, | ||
2722 | * EFIs and EFDs get queued up by adding entries into the AIL for them. | ||
2723 | */ | ||
2724 | STATIC int | ||
2874 | xlog_recover_commit_trans( | 2725 | xlog_recover_commit_trans( |
2875 | xlog_t *log, | 2726 | struct log *log, |
2876 | xlog_recover_t *trans, | 2727 | struct xlog_recover *trans, |
2877 | int pass) | 2728 | int pass) |
2878 | { | 2729 | { |
2879 | int error; | 2730 | int error = 0; |
2731 | xlog_recover_item_t *item; | ||
2880 | 2732 | ||
2881 | hlist_del(&trans->r_list); | 2733 | hlist_del(&trans->r_list); |
2882 | if ((error = xlog_recover_do_trans(log, trans, pass))) | 2734 | |
2735 | error = xlog_recover_reorder_trans(log, trans, pass); | ||
2736 | if (error) | ||
2883 | return error; | 2737 | return error; |
2884 | xlog_recover_free_trans(trans); /* no error */ | 2738 | |
2739 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | ||
2740 | if (pass == XLOG_RECOVER_PASS1) | ||
2741 | error = xlog_recover_commit_pass1(log, trans, item); | ||
2742 | else | ||
2743 | error = xlog_recover_commit_pass2(log, trans, item); | ||
2744 | if (error) | ||
2745 | return error; | ||
2746 | } | ||
2747 | |||
2748 | xlog_recover_free_trans(trans); | ||
2885 | return 0; | 2749 | return 0; |
2886 | } | 2750 | } |
2887 | 2751 | ||
@@ -3011,7 +2875,7 @@ xlog_recover_process_efi( | |||
3011 | xfs_extent_t *extp; | 2875 | xfs_extent_t *extp; |
3012 | xfs_fsblock_t startblock_fsb; | 2876 | xfs_fsblock_t startblock_fsb; |
3013 | 2877 | ||
3014 | ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); | 2878 | ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); |
3015 | 2879 | ||
3016 | /* | 2880 | /* |
3017 | * First check the validity of the extents described by the | 2881 | * First check the validity of the extents described by the |
@@ -3050,7 +2914,7 @@ xlog_recover_process_efi( | |||
3050 | extp->ext_len); | 2914 | extp->ext_len); |
3051 | } | 2915 | } |
3052 | 2916 | ||
3053 | efip->efi_flags |= XFS_EFI_RECOVERED; | 2917 | set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); |
3054 | error = xfs_trans_commit(tp, 0); | 2918 | error = xfs_trans_commit(tp, 0); |
3055 | return error; | 2919 | return error; |
3056 | 2920 | ||
@@ -3107,7 +2971,7 @@ xlog_recover_process_efis( | |||
3107 | * Skip EFIs that we've already processed. | 2971 | * Skip EFIs that we've already processed. |
3108 | */ | 2972 | */ |
3109 | efip = (xfs_efi_log_item_t *)lip; | 2973 | efip = (xfs_efi_log_item_t *)lip; |
3110 | if (efip->efi_flags & XFS_EFI_RECOVERED) { | 2974 | if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) { |
3111 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 2975 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
3112 | continue; | 2976 | continue; |
3113 | } | 2977 | } |
@@ -3724,7 +3588,7 @@ xlog_do_log_recovery( | |||
3724 | xfs_daddr_t head_blk, | 3588 | xfs_daddr_t head_blk, |
3725 | xfs_daddr_t tail_blk) | 3589 | xfs_daddr_t tail_blk) |
3726 | { | 3590 | { |
3727 | int error; | 3591 | int error, i; |
3728 | 3592 | ||
3729 | ASSERT(head_blk != tail_blk); | 3593 | ASSERT(head_blk != tail_blk); |
3730 | 3594 | ||
@@ -3732,10 +3596,12 @@ xlog_do_log_recovery( | |||
3732 | * First do a pass to find all of the cancelled buf log items. | 3596 | * First do a pass to find all of the cancelled buf log items. |
3733 | * Store them in the buf_cancel_table for use in the second pass. | 3597 | * Store them in the buf_cancel_table for use in the second pass. |
3734 | */ | 3598 | */ |
3735 | log->l_buf_cancel_table = | 3599 | log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * |
3736 | (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * | 3600 | sizeof(struct list_head), |
3737 | sizeof(xfs_buf_cancel_t*), | ||
3738 | KM_SLEEP); | 3601 | KM_SLEEP); |
3602 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | ||
3603 | INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); | ||
3604 | |||
3739 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, | 3605 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, |
3740 | XLOG_RECOVER_PASS1); | 3606 | XLOG_RECOVER_PASS1); |
3741 | if (error != 0) { | 3607 | if (error != 0) { |
@@ -3754,7 +3620,7 @@ xlog_do_log_recovery( | |||
3754 | int i; | 3620 | int i; |
3755 | 3621 | ||
3756 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | 3622 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) |
3757 | ASSERT(log->l_buf_cancel_table[i] == NULL); | 3623 | ASSERT(list_empty(&log->l_buf_cancel_table[i])); |
3758 | } | 3624 | } |
3759 | #endif /* DEBUG */ | 3625 | #endif /* DEBUG */ |
3760 | 3626 | ||
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 19e9dfa1c254..d447aef84bc3 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -472,7 +472,7 @@ xfs_initialize_perag( | |||
472 | goto out_unwind; | 472 | goto out_unwind; |
473 | pag->pag_agno = index; | 473 | pag->pag_agno = index; |
474 | pag->pag_mount = mp; | 474 | pag->pag_mount = mp; |
475 | rwlock_init(&pag->pag_ici_lock); | 475 | spin_lock_init(&pag->pag_ici_lock); |
476 | mutex_init(&pag->pag_ici_reclaim_lock); | 476 | mutex_init(&pag->pag_ici_reclaim_lock); |
477 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); | 477 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); |
478 | spin_lock_init(&pag->pag_buf_lock); | 478 | spin_lock_init(&pag->pag_buf_lock); |
@@ -975,6 +975,24 @@ xfs_set_rw_sizes(xfs_mount_t *mp) | |||
975 | } | 975 | } |
976 | 976 | ||
977 | /* | 977 | /* |
978 | * precalculate the low space thresholds for dynamic speculative preallocation. | ||
979 | */ | ||
980 | void | ||
981 | xfs_set_low_space_thresholds( | ||
982 | struct xfs_mount *mp) | ||
983 | { | ||
984 | int i; | ||
985 | |||
986 | for (i = 0; i < XFS_LOWSP_MAX; i++) { | ||
987 | __uint64_t space = mp->m_sb.sb_dblocks; | ||
988 | |||
989 | do_div(space, 100); | ||
990 | mp->m_low_space[i] = space * (i + 1); | ||
991 | } | ||
992 | } | ||
993 | |||
994 | |||
995 | /* | ||
978 | * Set whether we're using inode alignment. | 996 | * Set whether we're using inode alignment. |
979 | */ | 997 | */ |
980 | STATIC void | 998 | STATIC void |
@@ -1196,6 +1214,9 @@ xfs_mountfs( | |||
1196 | */ | 1214 | */ |
1197 | xfs_set_rw_sizes(mp); | 1215 | xfs_set_rw_sizes(mp); |
1198 | 1216 | ||
1217 | /* set the low space thresholds for dynamic preallocation */ | ||
1218 | xfs_set_low_space_thresholds(mp); | ||
1219 | |||
1199 | /* | 1220 | /* |
1200 | * Set the inode cluster size. | 1221 | * Set the inode cluster size. |
1201 | * This may still be overridden by the file system | 1222 | * This may still be overridden by the file system |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 5861b4980740..a62e8971539d 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -103,6 +103,16 @@ extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t, | |||
103 | xfs_mod_incore_sb(mp, field, delta, rsvd) | 103 | xfs_mod_incore_sb(mp, field, delta, rsvd) |
104 | #endif | 104 | #endif |
105 | 105 | ||
106 | /* dynamic preallocation free space thresholds, 5% down to 1% */ | ||
107 | enum { | ||
108 | XFS_LOWSP_1_PCNT = 0, | ||
109 | XFS_LOWSP_2_PCNT, | ||
110 | XFS_LOWSP_3_PCNT, | ||
111 | XFS_LOWSP_4_PCNT, | ||
112 | XFS_LOWSP_5_PCNT, | ||
113 | XFS_LOWSP_MAX, | ||
114 | }; | ||
115 | |||
106 | typedef struct xfs_mount { | 116 | typedef struct xfs_mount { |
107 | struct super_block *m_super; | 117 | struct super_block *m_super; |
108 | xfs_tid_t m_tid; /* next unused tid for fs */ | 118 | xfs_tid_t m_tid; /* next unused tid for fs */ |
@@ -202,6 +212,8 @@ typedef struct xfs_mount { | |||
202 | __int64_t m_update_flags; /* sb flags we need to update | 212 | __int64_t m_update_flags; /* sb flags we need to update |
203 | on the next remount,rw */ | 213 | on the next remount,rw */ |
204 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ | 214 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ |
215 | int64_t m_low_space[XFS_LOWSP_MAX]; | ||
216 | /* low free space thresholds */ | ||
205 | } xfs_mount_t; | 217 | } xfs_mount_t; |
206 | 218 | ||
207 | /* | 219 | /* |
@@ -379,6 +391,8 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); | |||
379 | 391 | ||
380 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); | 392 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); |
381 | 393 | ||
394 | extern void xfs_set_low_space_thresholds(struct xfs_mount *); | ||
395 | |||
382 | #endif /* __KERNEL__ */ | 396 | #endif /* __KERNEL__ */ |
383 | 397 | ||
384 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); | 398 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index f6d956b7711e..f80a067a4658 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -1350,7 +1350,7 @@ xfs_trans_fill_vecs( | |||
1350 | * they could be immediately flushed and we'd have to race with the flusher | 1350 | * they could be immediately flushed and we'd have to race with the flusher |
1351 | * trying to pull the item from the AIL as we add it. | 1351 | * trying to pull the item from the AIL as we add it. |
1352 | */ | 1352 | */ |
1353 | void | 1353 | static void |
1354 | xfs_trans_item_committed( | 1354 | xfs_trans_item_committed( |
1355 | struct xfs_log_item *lip, | 1355 | struct xfs_log_item *lip, |
1356 | xfs_lsn_t commit_lsn, | 1356 | xfs_lsn_t commit_lsn, |
@@ -1425,6 +1425,83 @@ xfs_trans_committed( | |||
1425 | xfs_trans_free(tp); | 1425 | xfs_trans_free(tp); |
1426 | } | 1426 | } |
1427 | 1427 | ||
1428 | static inline void | ||
1429 | xfs_log_item_batch_insert( | ||
1430 | struct xfs_ail *ailp, | ||
1431 | struct xfs_log_item **log_items, | ||
1432 | int nr_items, | ||
1433 | xfs_lsn_t commit_lsn) | ||
1434 | { | ||
1435 | int i; | ||
1436 | |||
1437 | spin_lock(&ailp->xa_lock); | ||
1438 | /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ | ||
1439 | xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); | ||
1440 | |||
1441 | for (i = 0; i < nr_items; i++) | ||
1442 | IOP_UNPIN(log_items[i], 0); | ||
1443 | } | ||
1444 | |||
1445 | /* | ||
1446 | * Bulk operation version of xfs_trans_committed that takes a log vector of | ||
1447 | * items to insert into the AIL. This uses bulk AIL insertion techniques to | ||
1448 | * minimise lock traffic. | ||
1449 | */ | ||
1450 | void | ||
1451 | xfs_trans_committed_bulk( | ||
1452 | struct xfs_ail *ailp, | ||
1453 | struct xfs_log_vec *log_vector, | ||
1454 | xfs_lsn_t commit_lsn, | ||
1455 | int aborted) | ||
1456 | { | ||
1457 | #define LOG_ITEM_BATCH_SIZE 32 | ||
1458 | struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; | ||
1459 | struct xfs_log_vec *lv; | ||
1460 | int i = 0; | ||
1461 | |||
1462 | /* unpin all the log items */ | ||
1463 | for (lv = log_vector; lv; lv = lv->lv_next ) { | ||
1464 | struct xfs_log_item *lip = lv->lv_item; | ||
1465 | xfs_lsn_t item_lsn; | ||
1466 | |||
1467 | if (aborted) | ||
1468 | lip->li_flags |= XFS_LI_ABORTED; | ||
1469 | item_lsn = IOP_COMMITTED(lip, commit_lsn); | ||
1470 | |||
1471 | /* item_lsn of -1 means the item was freed */ | ||
1472 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) | ||
1473 | continue; | ||
1474 | |||
1475 | if (item_lsn != commit_lsn) { | ||
1476 | |||
1477 | /* | ||
1478 | * Not a bulk update option due to unusual item_lsn. | ||
1479 | * Push into AIL immediately, rechecking the lsn once | ||
1480 | * we have the ail lock. Then unpin the item. | ||
1481 | */ | ||
1482 | spin_lock(&ailp->xa_lock); | ||
1483 | if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) | ||
1484 | xfs_trans_ail_update(ailp, lip, item_lsn); | ||
1485 | else | ||
1486 | spin_unlock(&ailp->xa_lock); | ||
1487 | IOP_UNPIN(lip, 0); | ||
1488 | continue; | ||
1489 | } | ||
1490 | |||
1491 | /* Item is a candidate for bulk AIL insert. */ | ||
1492 | log_items[i++] = lv->lv_item; | ||
1493 | if (i >= LOG_ITEM_BATCH_SIZE) { | ||
1494 | xfs_log_item_batch_insert(ailp, log_items, | ||
1495 | LOG_ITEM_BATCH_SIZE, commit_lsn); | ||
1496 | i = 0; | ||
1497 | } | ||
1498 | } | ||
1499 | |||
1500 | /* make sure we insert the remainder! */ | ||
1501 | if (i) | ||
1502 | xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); | ||
1503 | } | ||
1504 | |||
1428 | /* | 1505 | /* |
1429 | * Called from the trans_commit code when we notice that | 1506 | * Called from the trans_commit code when we notice that |
1430 | * the filesystem is in the middle of a forced shutdown. | 1507 | * the filesystem is in the middle of a forced shutdown. |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 246286b77a86..c2042b736b81 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -294,8 +294,8 @@ struct xfs_log_item_desc { | |||
294 | #define XFS_ALLOC_BTREE_REF 2 | 294 | #define XFS_ALLOC_BTREE_REF 2 |
295 | #define XFS_BMAP_BTREE_REF 2 | 295 | #define XFS_BMAP_BTREE_REF 2 |
296 | #define XFS_DIR_BTREE_REF 2 | 296 | #define XFS_DIR_BTREE_REF 2 |
297 | #define XFS_INO_REF 2 | ||
297 | #define XFS_ATTR_BTREE_REF 1 | 298 | #define XFS_ATTR_BTREE_REF 1 |
298 | #define XFS_INO_REF 1 | ||
299 | #define XFS_DQUOT_REF 1 | 299 | #define XFS_DQUOT_REF 1 |
300 | 300 | ||
301 | #ifdef __KERNEL__ | 301 | #ifdef __KERNEL__ |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index dc9069568ff7..c5bbbc45db91 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -28,8 +28,8 @@ | |||
28 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
29 | #include "xfs_error.h" | 29 | #include "xfs_error.h" |
30 | 30 | ||
31 | STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *); | 31 | STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); |
32 | STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); | 32 | STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); |
33 | STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); | 33 | STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); |
34 | STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); | 34 | STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); |
35 | 35 | ||
@@ -449,129 +449,152 @@ xfs_trans_unlocked_item( | |||
449 | xfs_log_move_tail(ailp->xa_mount, 1); | 449 | xfs_log_move_tail(ailp->xa_mount, 1); |
450 | } /* xfs_trans_unlocked_item */ | 450 | } /* xfs_trans_unlocked_item */ |
451 | 451 | ||
452 | |||
453 | /* | 452 | /* |
454 | * Update the position of the item in the AIL with the new | 453 | * xfs_trans_ail_update - bulk AIL insertion operation. |
455 | * lsn. If it is not yet in the AIL, add it. Otherwise, move | 454 | * |
456 | * it to its new position by removing it and re-adding it. | 455 | * @xfs_trans_ail_update takes an array of log items that all need to be |
456 | * positioned at the same LSN in the AIL. If an item is not in the AIL, it will | ||
457 | * be added. Otherwise, it will be repositioned by removing it and re-adding | ||
458 | * it to the AIL. If we move the first item in the AIL, update the log tail to | ||
459 | * match the new minimum LSN in the AIL. | ||
457 | * | 460 | * |
458 | * Wakeup anyone with an lsn less than the item's lsn. If the item | 461 | * This function takes the AIL lock once to execute the update operations on |
459 | * we move in the AIL is the minimum one, update the tail lsn in the | 462 | * all the items in the array, and as such should not be called with the AIL |
460 | * log manager. | 463 | * lock held. As a result, once we have the AIL lock, we need to check each log |
464 | * item LSN to confirm it needs to be moved forward in the AIL. | ||
461 | * | 465 | * |
462 | * This function must be called with the AIL lock held. The lock | 466 | * To optimise the insert operation, we delete all the items from the AIL in |
463 | * is dropped before returning. | 467 | * the first pass, moving them into a temporary list, then splice the temporary |
468 | * list into the correct position in the AIL. This avoids needing to do an | ||
469 | * insert operation on every item. | ||
470 | * | ||
471 | * This function must be called with the AIL lock held. The lock is dropped | ||
472 | * before returning. | ||
464 | */ | 473 | */ |
465 | void | 474 | void |
466 | xfs_trans_ail_update( | 475 | xfs_trans_ail_update_bulk( |
467 | struct xfs_ail *ailp, | 476 | struct xfs_ail *ailp, |
468 | xfs_log_item_t *lip, | 477 | struct xfs_log_item **log_items, |
469 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | 478 | int nr_items, |
479 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | ||
470 | { | 480 | { |
471 | xfs_log_item_t *dlip = NULL; | 481 | xfs_log_item_t *mlip; |
472 | xfs_log_item_t *mlip; /* ptr to minimum lip */ | ||
473 | xfs_lsn_t tail_lsn; | 482 | xfs_lsn_t tail_lsn; |
483 | int mlip_changed = 0; | ||
484 | int i; | ||
485 | LIST_HEAD(tmp); | ||
474 | 486 | ||
475 | mlip = xfs_ail_min(ailp); | 487 | mlip = xfs_ail_min(ailp); |
476 | 488 | ||
477 | if (lip->li_flags & XFS_LI_IN_AIL) { | 489 | for (i = 0; i < nr_items; i++) { |
478 | dlip = xfs_ail_delete(ailp, lip); | 490 | struct xfs_log_item *lip = log_items[i]; |
479 | ASSERT(dlip == lip); | 491 | if (lip->li_flags & XFS_LI_IN_AIL) { |
480 | xfs_trans_ail_cursor_clear(ailp, dlip); | 492 | /* check if we really need to move the item */ |
481 | } else { | 493 | if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0) |
482 | lip->li_flags |= XFS_LI_IN_AIL; | 494 | continue; |
495 | |||
496 | xfs_ail_delete(ailp, lip); | ||
497 | if (mlip == lip) | ||
498 | mlip_changed = 1; | ||
499 | } else { | ||
500 | lip->li_flags |= XFS_LI_IN_AIL; | ||
501 | } | ||
502 | lip->li_lsn = lsn; | ||
503 | list_add(&lip->li_ail, &tmp); | ||
483 | } | 504 | } |
484 | 505 | ||
485 | lip->li_lsn = lsn; | 506 | xfs_ail_splice(ailp, &tmp, lsn); |
486 | xfs_ail_insert(ailp, lip); | ||
487 | 507 | ||
488 | if (mlip == dlip) { | 508 | if (!mlip_changed) { |
489 | mlip = xfs_ail_min(ailp); | ||
490 | /* | ||
491 | * It is not safe to access mlip after the AIL lock is | ||
492 | * dropped, so we must get a copy of li_lsn before we do | ||
493 | * so. This is especially important on 32-bit platforms | ||
494 | * where accessing and updating 64-bit values like li_lsn | ||
495 | * is not atomic. | ||
496 | */ | ||
497 | tail_lsn = mlip->li_lsn; | ||
498 | spin_unlock(&ailp->xa_lock); | ||
499 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
500 | } else { | ||
501 | spin_unlock(&ailp->xa_lock); | 509 | spin_unlock(&ailp->xa_lock); |
510 | return; | ||
502 | } | 511 | } |
503 | 512 | ||
504 | 513 | /* | |
505 | } /* xfs_trans_update_ail */ | 514 | * It is not safe to access mlip after the AIL lock is dropped, so we |
515 | * must get a copy of li_lsn before we do so. This is especially | ||
516 | * important on 32-bit platforms where accessing and updating 64-bit | ||
517 | * values like li_lsn is not atomic. | ||
518 | */ | ||
519 | mlip = xfs_ail_min(ailp); | ||
520 | tail_lsn = mlip->li_lsn; | ||
521 | spin_unlock(&ailp->xa_lock); | ||
522 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
523 | } | ||
506 | 524 | ||
507 | /* | 525 | /* |
508 | * Delete the given item from the AIL. It must already be in | 526 | * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL |
509 | * the AIL. | ||
510 | * | 527 | * |
511 | * Wakeup anyone with an lsn less than item's lsn. If the item | 528 | * @xfs_trans_ail_delete_bulk takes an array of log items that all need to |
512 | * we delete in the AIL is the minimum one, update the tail lsn in the | 529 | * removed from the AIL. The caller is already holding the AIL lock, and done |
513 | * log manager. | 530 | * all the checks necessary to ensure the items passed in via @log_items are |
531 | * ready for deletion. This includes checking that the items are in the AIL. | ||
514 | * | 532 | * |
515 | * Clear the IN_AIL flag from the item, reset its lsn to 0, and | 533 | * For each log item to be removed, unlink it from the AIL, clear the IN_AIL |
516 | * bump the AIL's generation count to indicate that the tree | 534 | * flag from the item and reset the item's lsn to 0. If we remove the first |
517 | * has changed. | 535 | * item in the AIL, update the log tail to match the new minimum LSN in the |
536 | * AIL. | ||
518 | * | 537 | * |
519 | * This function must be called with the AIL lock held. The lock | 538 | * This function will not drop the AIL lock until all items are removed from |
520 | * is dropped before returning. | 539 | * the AIL to minimise the amount of lock traffic on the AIL. This does not |
540 | * greatly increase the AIL hold time, but does significantly reduce the amount | ||
541 | * of traffic on the lock, especially during IO completion. | ||
542 | * | ||
543 | * This function must be called with the AIL lock held. The lock is dropped | ||
544 | * before returning. | ||
521 | */ | 545 | */ |
522 | void | 546 | void |
523 | xfs_trans_ail_delete( | 547 | xfs_trans_ail_delete_bulk( |
524 | struct xfs_ail *ailp, | 548 | struct xfs_ail *ailp, |
525 | xfs_log_item_t *lip) __releases(ailp->xa_lock) | 549 | struct xfs_log_item **log_items, |
550 | int nr_items) __releases(ailp->xa_lock) | ||
526 | { | 551 | { |
527 | xfs_log_item_t *dlip; | ||
528 | xfs_log_item_t *mlip; | 552 | xfs_log_item_t *mlip; |
529 | xfs_lsn_t tail_lsn; | 553 | xfs_lsn_t tail_lsn; |
554 | int mlip_changed = 0; | ||
555 | int i; | ||
530 | 556 | ||
531 | if (lip->li_flags & XFS_LI_IN_AIL) { | 557 | mlip = xfs_ail_min(ailp); |
532 | mlip = xfs_ail_min(ailp); | ||
533 | dlip = xfs_ail_delete(ailp, lip); | ||
534 | ASSERT(dlip == lip); | ||
535 | xfs_trans_ail_cursor_clear(ailp, dlip); | ||
536 | |||
537 | 558 | ||
538 | lip->li_flags &= ~XFS_LI_IN_AIL; | 559 | for (i = 0; i < nr_items; i++) { |
539 | lip->li_lsn = 0; | 560 | struct xfs_log_item *lip = log_items[i]; |
561 | if (!(lip->li_flags & XFS_LI_IN_AIL)) { | ||
562 | struct xfs_mount *mp = ailp->xa_mount; | ||
540 | 563 | ||
541 | if (mlip == dlip) { | ||
542 | mlip = xfs_ail_min(ailp); | ||
543 | /* | ||
544 | * It is not safe to access mlip after the AIL lock | ||
545 | * is dropped, so we must get a copy of li_lsn | ||
546 | * before we do so. This is especially important | ||
547 | * on 32-bit platforms where accessing and updating | ||
548 | * 64-bit values like li_lsn is not atomic. | ||
549 | */ | ||
550 | tail_lsn = mlip ? mlip->li_lsn : 0; | ||
551 | spin_unlock(&ailp->xa_lock); | ||
552 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
553 | } else { | ||
554 | spin_unlock(&ailp->xa_lock); | 564 | spin_unlock(&ailp->xa_lock); |
565 | if (!XFS_FORCED_SHUTDOWN(mp)) { | ||
566 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, | ||
567 | "%s: attempting to delete a log item that is not in the AIL", | ||
568 | __func__); | ||
569 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
570 | } | ||
571 | return; | ||
555 | } | 572 | } |
573 | |||
574 | xfs_ail_delete(ailp, lip); | ||
575 | lip->li_flags &= ~XFS_LI_IN_AIL; | ||
576 | lip->li_lsn = 0; | ||
577 | if (mlip == lip) | ||
578 | mlip_changed = 1; | ||
556 | } | 579 | } |
557 | else { | ||
558 | /* | ||
559 | * If the file system is not being shutdown, we are in | ||
560 | * serious trouble if we get to this stage. | ||
561 | */ | ||
562 | struct xfs_mount *mp = ailp->xa_mount; | ||
563 | 580 | ||
581 | if (!mlip_changed) { | ||
564 | spin_unlock(&ailp->xa_lock); | 582 | spin_unlock(&ailp->xa_lock); |
565 | if (!XFS_FORCED_SHUTDOWN(mp)) { | 583 | return; |
566 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, | ||
567 | "%s: attempting to delete a log item that is not in the AIL", | ||
568 | __func__); | ||
569 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
570 | } | ||
571 | } | 584 | } |
572 | } | ||
573 | |||
574 | 585 | ||
586 | /* | ||
587 | * It is not safe to access mlip after the AIL lock is dropped, so we | ||
588 | * must get a copy of li_lsn before we do so. This is especially | ||
589 | * important on 32-bit platforms where accessing and updating 64-bit | ||
590 | * values like li_lsn is not atomic. It is possible we've emptied the | ||
591 | * AIL here, so if that is the case, pass an LSN of 0 to the tail move. | ||
592 | */ | ||
593 | mlip = xfs_ail_min(ailp); | ||
594 | tail_lsn = mlip ? mlip->li_lsn : 0; | ||
595 | spin_unlock(&ailp->xa_lock); | ||
596 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
597 | } | ||
575 | 598 | ||
576 | /* | 599 | /* |
577 | * The active item list (AIL) is a doubly linked list of log | 600 | * The active item list (AIL) is a doubly linked list of log |
@@ -623,16 +646,13 @@ xfs_trans_ail_destroy( | |||
623 | } | 646 | } |
624 | 647 | ||
625 | /* | 648 | /* |
626 | * Insert the given log item into the AIL. | 649 | * splice the log item list into the AIL at the given LSN. |
627 | * We almost always insert at the end of the list, so on inserts | ||
628 | * we search from the end of the list to find where the | ||
629 | * new item belongs. | ||
630 | */ | 650 | */ |
631 | STATIC void | 651 | STATIC void |
632 | xfs_ail_insert( | 652 | xfs_ail_splice( |
633 | struct xfs_ail *ailp, | 653 | struct xfs_ail *ailp, |
634 | xfs_log_item_t *lip) | 654 | struct list_head *list, |
635 | /* ARGSUSED */ | 655 | xfs_lsn_t lsn) |
636 | { | 656 | { |
637 | xfs_log_item_t *next_lip; | 657 | xfs_log_item_t *next_lip; |
638 | 658 | ||
@@ -640,39 +660,33 @@ xfs_ail_insert( | |||
640 | * If the list is empty, just insert the item. | 660 | * If the list is empty, just insert the item. |
641 | */ | 661 | */ |
642 | if (list_empty(&ailp->xa_ail)) { | 662 | if (list_empty(&ailp->xa_ail)) { |
643 | list_add(&lip->li_ail, &ailp->xa_ail); | 663 | list_splice(list, &ailp->xa_ail); |
644 | return; | 664 | return; |
645 | } | 665 | } |
646 | 666 | ||
647 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { | 667 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { |
648 | if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0) | 668 | if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) |
649 | break; | 669 | break; |
650 | } | 670 | } |
651 | 671 | ||
652 | ASSERT((&next_lip->li_ail == &ailp->xa_ail) || | 672 | ASSERT((&next_lip->li_ail == &ailp->xa_ail) || |
653 | (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); | 673 | (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)); |
654 | |||
655 | list_add(&lip->li_ail, &next_lip->li_ail); | ||
656 | 674 | ||
657 | xfs_ail_check(ailp, lip); | 675 | list_splice_init(list, &next_lip->li_ail); |
658 | return; | 676 | return; |
659 | } | 677 | } |
660 | 678 | ||
661 | /* | 679 | /* |
662 | * Delete the given item from the AIL. Return a pointer to the item. | 680 | * Delete the given item from the AIL. Return a pointer to the item. |
663 | */ | 681 | */ |
664 | /*ARGSUSED*/ | 682 | STATIC void |
665 | STATIC xfs_log_item_t * | ||
666 | xfs_ail_delete( | 683 | xfs_ail_delete( |
667 | struct xfs_ail *ailp, | 684 | struct xfs_ail *ailp, |
668 | xfs_log_item_t *lip) | 685 | xfs_log_item_t *lip) |
669 | /* ARGSUSED */ | ||
670 | { | 686 | { |
671 | xfs_ail_check(ailp, lip); | 687 | xfs_ail_check(ailp, lip); |
672 | |||
673 | list_del(&lip->li_ail); | 688 | list_del(&lip->li_ail); |
674 | 689 | xfs_trans_ail_cursor_clear(ailp, lip); | |
675 | return lip; | ||
676 | } | 690 | } |
677 | 691 | ||
678 | /* | 692 | /* |
@@ -682,7 +696,6 @@ xfs_ail_delete( | |||
682 | STATIC xfs_log_item_t * | 696 | STATIC xfs_log_item_t * |
683 | xfs_ail_min( | 697 | xfs_ail_min( |
684 | struct xfs_ail *ailp) | 698 | struct xfs_ail *ailp) |
685 | /* ARGSUSED */ | ||
686 | { | 699 | { |
687 | if (list_empty(&ailp->xa_ail)) | 700 | if (list_empty(&ailp->xa_ail)) |
688 | return NULL; | 701 | return NULL; |
@@ -699,7 +712,6 @@ STATIC xfs_log_item_t * | |||
699 | xfs_ail_next( | 712 | xfs_ail_next( |
700 | struct xfs_ail *ailp, | 713 | struct xfs_ail *ailp, |
701 | xfs_log_item_t *lip) | 714 | xfs_log_item_t *lip) |
702 | /* ARGSUSED */ | ||
703 | { | 715 | { |
704 | if (lip->li_ail.next == &ailp->xa_ail) | 716 | if (lip->li_ail.next == &ailp->xa_ail) |
705 | return NULL; | 717 | return NULL; |
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index f783d5e9fa70..f7590f5badea 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c | |||
@@ -69,12 +69,16 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp, | |||
69 | tp->t_flags |= XFS_TRANS_DIRTY; | 69 | tp->t_flags |= XFS_TRANS_DIRTY; |
70 | efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; | 70 | efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; |
71 | 71 | ||
72 | next_extent = efip->efi_next_extent; | 72 | /* |
73 | * atomic_inc_return gives us the value after the increment; | ||
74 | * we want to use it as an array index so we need to subtract 1 from | ||
75 | * it. | ||
76 | */ | ||
77 | next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; | ||
73 | ASSERT(next_extent < efip->efi_format.efi_nextents); | 78 | ASSERT(next_extent < efip->efi_format.efi_nextents); |
74 | extp = &(efip->efi_format.efi_extents[next_extent]); | 79 | extp = &(efip->efi_format.efi_extents[next_extent]); |
75 | extp->ext_start = start_block; | 80 | extp->ext_start = start_block; |
76 | extp->ext_len = ext_len; | 81 | extp->ext_len = ext_len; |
77 | efip->efi_next_extent++; | ||
78 | } | 82 | } |
79 | 83 | ||
80 | 84 | ||
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 62da86c90de5..35162c238fa3 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -22,15 +22,17 @@ struct xfs_log_item; | |||
22 | struct xfs_log_item_desc; | 22 | struct xfs_log_item_desc; |
23 | struct xfs_mount; | 23 | struct xfs_mount; |
24 | struct xfs_trans; | 24 | struct xfs_trans; |
25 | struct xfs_ail; | ||
26 | struct xfs_log_vec; | ||
25 | 27 | ||
26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); | 28 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); |
27 | void xfs_trans_del_item(struct xfs_log_item *); | 29 | void xfs_trans_del_item(struct xfs_log_item *); |
28 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, | 30 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, |
29 | int flags); | 31 | int flags); |
30 | void xfs_trans_item_committed(struct xfs_log_item *lip, | ||
31 | xfs_lsn_t commit_lsn, int aborted); | ||
32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); | 32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); |
33 | 33 | ||
34 | void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, | ||
35 | xfs_lsn_t commit_lsn, int aborted); | ||
34 | /* | 36 | /* |
35 | * AIL traversal cursor. | 37 | * AIL traversal cursor. |
36 | * | 38 | * |
@@ -73,12 +75,29 @@ struct xfs_ail { | |||
73 | /* | 75 | /* |
74 | * From xfs_trans_ail.c | 76 | * From xfs_trans_ail.c |
75 | */ | 77 | */ |
76 | void xfs_trans_ail_update(struct xfs_ail *ailp, | 78 | void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, |
77 | struct xfs_log_item *lip, xfs_lsn_t lsn) | 79 | struct xfs_log_item **log_items, int nr_items, |
78 | __releases(ailp->xa_lock); | 80 | xfs_lsn_t lsn) __releases(ailp->xa_lock); |
79 | void xfs_trans_ail_delete(struct xfs_ail *ailp, | 81 | static inline void |
80 | struct xfs_log_item *lip) | 82 | xfs_trans_ail_update( |
81 | __releases(ailp->xa_lock); | 83 | struct xfs_ail *ailp, |
84 | struct xfs_log_item *lip, | ||
85 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | ||
86 | { | ||
87 | xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); | ||
88 | } | ||
89 | |||
90 | void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, | ||
91 | struct xfs_log_item **log_items, int nr_items) | ||
92 | __releases(ailp->xa_lock); | ||
93 | static inline void | ||
94 | xfs_trans_ail_delete( | ||
95 | struct xfs_ail *ailp, | ||
96 | xfs_log_item_t *lip) __releases(ailp->xa_lock) | ||
97 | { | ||
98 | xfs_trans_ail_delete_bulk(ailp, &lip, 1); | ||
99 | } | ||
100 | |||
82 | void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); | 101 | void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); |
83 | void xfs_trans_unlocked_item(struct xfs_ail *, | 102 | void xfs_trans_unlocked_item(struct xfs_ail *, |
84 | xfs_log_item_t *); | 103 | xfs_log_item_t *); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 8e4a63c4151a..d8e6f8cd6f0c 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -964,29 +964,48 @@ xfs_release( | |||
964 | xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); | 964 | xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); |
965 | } | 965 | } |
966 | 966 | ||
967 | if (ip->i_d.di_nlink != 0) { | 967 | if (ip->i_d.di_nlink == 0) |
968 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && | 968 | return 0; |
969 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || | ||
970 | ip->i_delayed_blks > 0)) && | ||
971 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && | ||
972 | (!(ip->i_d.di_flags & | ||
973 | (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { | ||
974 | 969 | ||
975 | /* | 970 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && |
976 | * If we can't get the iolock just skip truncating | 971 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || |
977 | * the blocks past EOF because we could deadlock | 972 | ip->i_delayed_blks > 0)) && |
978 | * with the mmap_sem otherwise. We'll get another | 973 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && |
979 | * chance to drop them once the last reference to | 974 | (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { |
980 | * the inode is dropped, so we'll never leak blocks | ||
981 | * permanently. | ||
982 | */ | ||
983 | error = xfs_free_eofblocks(mp, ip, | ||
984 | XFS_FREE_EOF_TRYLOCK); | ||
985 | if (error) | ||
986 | return error; | ||
987 | } | ||
988 | } | ||
989 | 975 | ||
976 | /* | ||
977 | * If we can't get the iolock just skip truncating the blocks | ||
978 | * past EOF because we could deadlock with the mmap_sem | ||
979 | * otherwise. We'll get another chance to drop them once the | ||
980 | * last reference to the inode is dropped, so we'll never leak | ||
981 | * blocks permanently. | ||
982 | * | ||
983 | * Further, check if the inode is being opened, written and | ||
984 | * closed frequently and we have delayed allocation blocks | ||
985 | * oustanding (e.g. streaming writes from the NFS server), | ||
986 | * truncating the blocks past EOF will cause fragmentation to | ||
987 | * occur. | ||
988 | * | ||
989 | * In this case don't do the truncation, either, but we have to | ||
990 | * be careful how we detect this case. Blocks beyond EOF show | ||
991 | * up as i_delayed_blks even when the inode is clean, so we | ||
992 | * need to truncate them away first before checking for a dirty | ||
993 | * release. Hence on the first dirty close we will still remove | ||
994 | * the speculative allocation, but after that we will leave it | ||
995 | * in place. | ||
996 | */ | ||
997 | if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) | ||
998 | return 0; | ||
999 | |||
1000 | error = xfs_free_eofblocks(mp, ip, | ||
1001 | XFS_FREE_EOF_TRYLOCK); | ||
1002 | if (error) | ||
1003 | return error; | ||
1004 | |||
1005 | /* delalloc blocks after truncation means it really is dirty */ | ||
1006 | if (ip->i_delayed_blks) | ||
1007 | xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); | ||
1008 | } | ||
990 | return 0; | 1009 | return 0; |
991 | } | 1010 | } |
992 | 1011 | ||