aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
authorAlex Elder <aelder@sgi.com>2011-01-10 22:35:55 -0500
committerAlex Elder <aelder@sgi.com>2011-01-10 22:35:55 -0500
commit92f1c008ae79e32b83c0607d184b194f302bb3ee (patch)
tree070980c581ca39a050a1b86a50fe4c52437cdba1 /fs/xfs/linux-2.6
parente54be894eae10eca9892e965cc9532f5d5a11767 (diff)
parentd0eb2f38b250b7d6c993adf81b0e4ded0565497e (diff)
Merge branch 'master' into for-linus-merged
This merge pulls the XFS master branch into the latest Linus master. This results in a merge conflict whose best fix is not obvious. I manually fixed the conflict, in "fs/xfs/xfs_iget.c". Dave Chinner had done work that resulted in RCU freeing of inodes separate from what Nick Piggin had done, and their results differed slightly in xfs_inode_free(). The fix updates Nick's call_rcu() with the use of VFS_I(), while incorporating needed updates to some XFS inode fields implemented in Dave's series. Dave's RCU callback function has also been removed. Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/sv.h59
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c425
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h16
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c235
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h22
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c22
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c92
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h59
10 files changed, 496 insertions, 447 deletions
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
deleted file mode 100644
index 4dfc7c37081..00000000000
--- a/fs/xfs/linux-2.6/sv.h
+++ /dev/null
@@ -1,59 +0,0 @@
1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_SV_H__
19#define __XFS_SUPPORT_SV_H__
20
21#include <linux/wait.h>
22#include <linux/sched.h>
23#include <linux/spinlock.h>
24
25/*
26 * Synchronisation variables.
27 *
28 * (Parameters "pri", "svf" and "rts" are not implemented)
29 */
30
31typedef struct sv_s {
32 wait_queue_head_t waiters;
33} sv_t;
34
35static inline void _sv_wait(sv_t *sv, spinlock_t *lock)
36{
37 DECLARE_WAITQUEUE(wait, current);
38
39 add_wait_queue_exclusive(&sv->waiters, &wait);
40 __set_current_state(TASK_UNINTERRUPTIBLE);
41 spin_unlock(lock);
42
43 schedule();
44
45 remove_wait_queue(&sv->waiters, &wait);
46}
47
48#define sv_init(sv,flag,name) \
49 init_waitqueue_head(&(sv)->waiters)
50#define sv_destroy(sv) \
51 /*NOTHING*/
52#define sv_wait(sv, pri, lock, s) \
53 _sv_wait(sv, lock)
54#define sv_signal(sv) \
55 wake_up(&(sv)->waiters)
56#define sv_broadcast(sv) \
57 wake_up_all(&(sv)->waiters)
58
59#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 691f61223ed..ec7bbb5645b 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -38,15 +38,6 @@
38#include <linux/pagevec.h> 38#include <linux/pagevec.h>
39#include <linux/writeback.h> 39#include <linux/writeback.h>
40 40
41/*
42 * Types of I/O for bmap clustering and I/O completion tracking.
43 */
44enum {
45 IO_READ, /* mapping for a read */
46 IO_DELAY, /* mapping covers delalloc region */
47 IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
48 IO_NEW /* just allocated */
49};
50 41
51/* 42/*
52 * Prime number of hash buckets since address is used as the key. 43 * Prime number of hash buckets since address is used as the key.
@@ -182,9 +173,6 @@ xfs_setfilesize(
182 xfs_inode_t *ip = XFS_I(ioend->io_inode); 173 xfs_inode_t *ip = XFS_I(ioend->io_inode);
183 xfs_fsize_t isize; 174 xfs_fsize_t isize;
184 175
185 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
186 ASSERT(ioend->io_type != IO_READ);
187
188 if (unlikely(ioend->io_error)) 176 if (unlikely(ioend->io_error))
189 return 0; 177 return 0;
190 178
@@ -244,10 +232,8 @@ xfs_end_io(
244 * We might have to update the on-disk file size after extending 232 * We might have to update the on-disk file size after extending
245 * writes. 233 * writes.
246 */ 234 */
247 if (ioend->io_type != IO_READ) { 235 error = xfs_setfilesize(ioend);
248 error = xfs_setfilesize(ioend); 236 ASSERT(!error || error == EAGAIN);
249 ASSERT(!error || error == EAGAIN);
250 }
251 237
252 /* 238 /*
253 * If we didn't complete processing of the ioend, requeue it to the 239 * If we didn't complete processing of the ioend, requeue it to the
@@ -318,14 +304,63 @@ STATIC int
318xfs_map_blocks( 304xfs_map_blocks(
319 struct inode *inode, 305 struct inode *inode,
320 loff_t offset, 306 loff_t offset,
321 ssize_t count,
322 struct xfs_bmbt_irec *imap, 307 struct xfs_bmbt_irec *imap,
323 int flags) 308 int type,
309 int nonblocking)
324{ 310{
325 int nmaps = 1; 311 struct xfs_inode *ip = XFS_I(inode);
326 int new = 0; 312 struct xfs_mount *mp = ip->i_mount;
313 ssize_t count = 1 << inode->i_blkbits;
314 xfs_fileoff_t offset_fsb, end_fsb;
315 int error = 0;
316 int bmapi_flags = XFS_BMAPI_ENTIRE;
317 int nimaps = 1;
318
319 if (XFS_FORCED_SHUTDOWN(mp))
320 return -XFS_ERROR(EIO);
321
322 if (type == IO_UNWRITTEN)
323 bmapi_flags |= XFS_BMAPI_IGSTATE;
324
325 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
326 if (nonblocking)
327 return -XFS_ERROR(EAGAIN);
328 xfs_ilock(ip, XFS_ILOCK_SHARED);
329 }
327 330
328 return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new); 331 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
332 (ip->i_df.if_flags & XFS_IFEXTENTS));
333 ASSERT(offset <= mp->m_maxioffset);
334
335 if (offset + count > mp->m_maxioffset)
336 count = mp->m_maxioffset - offset;
337 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
338 offset_fsb = XFS_B_TO_FSBT(mp, offset);
339 error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
340 bmapi_flags, NULL, 0, imap, &nimaps, NULL);
341 xfs_iunlock(ip, XFS_ILOCK_SHARED);
342
343 if (error)
344 return -XFS_ERROR(error);
345
346 if (type == IO_DELALLOC &&
347 (!nimaps || isnullstartblock(imap->br_startblock))) {
348 error = xfs_iomap_write_allocate(ip, offset, count, imap);
349 if (!error)
350 trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
351 return -XFS_ERROR(error);
352 }
353
354#ifdef DEBUG
355 if (type == IO_UNWRITTEN) {
356 ASSERT(nimaps);
357 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
358 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
359 }
360#endif
361 if (nimaps)
362 trace_xfs_map_blocks_found(ip, offset, count, type, imap);
363 return 0;
329} 364}
330 365
331STATIC int 366STATIC int
@@ -380,26 +415,18 @@ xfs_submit_ioend_bio(
380 415
381 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? 416 submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
382 WRITE_SYNC_PLUG : WRITE, bio); 417 WRITE_SYNC_PLUG : WRITE, bio);
383 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
384 bio_put(bio);
385} 418}
386 419
387STATIC struct bio * 420STATIC struct bio *
388xfs_alloc_ioend_bio( 421xfs_alloc_ioend_bio(
389 struct buffer_head *bh) 422 struct buffer_head *bh)
390{ 423{
391 struct bio *bio;
392 int nvecs = bio_get_nr_vecs(bh->b_bdev); 424 int nvecs = bio_get_nr_vecs(bh->b_bdev);
393 425 struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
394 do {
395 bio = bio_alloc(GFP_NOIO, nvecs);
396 nvecs >>= 1;
397 } while (!bio);
398 426
399 ASSERT(bio->bi_private == NULL); 427 ASSERT(bio->bi_private == NULL);
400 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 428 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
401 bio->bi_bdev = bh->b_bdev; 429 bio->bi_bdev = bh->b_bdev;
402 bio_get(bio);
403 return bio; 430 return bio;
404} 431}
405 432
@@ -470,9 +497,8 @@ xfs_submit_ioend(
470 /* Pass 1 - start writeback */ 497 /* Pass 1 - start writeback */
471 do { 498 do {
472 next = ioend->io_list; 499 next = ioend->io_list;
473 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 500 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
474 xfs_start_buffer_writeback(bh); 501 xfs_start_buffer_writeback(bh);
475 }
476 } while ((ioend = next) != NULL); 502 } while ((ioend = next) != NULL);
477 503
478 /* Pass 2 - submit I/O */ 504 /* Pass 2 - submit I/O */
@@ -600,117 +626,13 @@ xfs_map_at_offset(
600 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 626 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
601 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 627 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
602 628
603 lock_buffer(bh);
604 xfs_map_buffer(inode, bh, imap, offset); 629 xfs_map_buffer(inode, bh, imap, offset);
605 bh->b_bdev = xfs_find_bdev_for_inode(inode);
606 set_buffer_mapped(bh); 630 set_buffer_mapped(bh);
607 clear_buffer_delay(bh); 631 clear_buffer_delay(bh);
608 clear_buffer_unwritten(bh); 632 clear_buffer_unwritten(bh);
609} 633}
610 634
611/* 635/*
612 * Look for a page at index that is suitable for clustering.
613 */
614STATIC unsigned int
615xfs_probe_page(
616 struct page *page,
617 unsigned int pg_offset)
618{
619 struct buffer_head *bh, *head;
620 int ret = 0;
621
622 if (PageWriteback(page))
623 return 0;
624 if (!PageDirty(page))
625 return 0;
626 if (!page->mapping)
627 return 0;
628 if (!page_has_buffers(page))
629 return 0;
630
631 bh = head = page_buffers(page);
632 do {
633 if (!buffer_uptodate(bh))
634 break;
635 if (!buffer_mapped(bh))
636 break;
637 ret += bh->b_size;
638 if (ret >= pg_offset)
639 break;
640 } while ((bh = bh->b_this_page) != head);
641
642 return ret;
643}
644
645STATIC size_t
646xfs_probe_cluster(
647 struct inode *inode,
648 struct page *startpage,
649 struct buffer_head *bh,
650 struct buffer_head *head)
651{
652 struct pagevec pvec;
653 pgoff_t tindex, tlast, tloff;
654 size_t total = 0;
655 int done = 0, i;
656
657 /* First sum forwards in this page */
658 do {
659 if (!buffer_uptodate(bh) || !buffer_mapped(bh))
660 return total;
661 total += bh->b_size;
662 } while ((bh = bh->b_this_page) != head);
663
664 /* if we reached the end of the page, sum forwards in following pages */
665 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
666 tindex = startpage->index + 1;
667
668 /* Prune this back to avoid pathological behavior */
669 tloff = min(tlast, startpage->index + 64);
670
671 pagevec_init(&pvec, 0);
672 while (!done && tindex <= tloff) {
673 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
674
675 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
676 break;
677
678 for (i = 0; i < pagevec_count(&pvec); i++) {
679 struct page *page = pvec.pages[i];
680 size_t pg_offset, pg_len = 0;
681
682 if (tindex == tlast) {
683 pg_offset =
684 i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
685 if (!pg_offset) {
686 done = 1;
687 break;
688 }
689 } else
690 pg_offset = PAGE_CACHE_SIZE;
691
692 if (page->index == tindex && trylock_page(page)) {
693 pg_len = xfs_probe_page(page, pg_offset);
694 unlock_page(page);
695 }
696
697 if (!pg_len) {
698 done = 1;
699 break;
700 }
701
702 total += pg_len;
703 tindex++;
704 }
705
706 pagevec_release(&pvec);
707 cond_resched();
708 }
709
710 return total;
711}
712
713/*
714 * Test if a given page is suitable for writing as part of an unwritten 636 * Test if a given page is suitable for writing as part of an unwritten
715 * or delayed allocate extent. 637 * or delayed allocate extent.
716 */ 638 */
@@ -731,9 +653,9 @@ xfs_is_delayed_page(
731 if (buffer_unwritten(bh)) 653 if (buffer_unwritten(bh))
732 acceptable = (type == IO_UNWRITTEN); 654 acceptable = (type == IO_UNWRITTEN);
733 else if (buffer_delay(bh)) 655 else if (buffer_delay(bh))
734 acceptable = (type == IO_DELAY); 656 acceptable = (type == IO_DELALLOC);
735 else if (buffer_dirty(bh) && buffer_mapped(bh)) 657 else if (buffer_dirty(bh) && buffer_mapped(bh))
736 acceptable = (type == IO_NEW); 658 acceptable = (type == IO_OVERWRITE);
737 else 659 else
738 break; 660 break;
739 } while ((bh = bh->b_this_page) != head); 661 } while ((bh = bh->b_this_page) != head);
@@ -758,8 +680,7 @@ xfs_convert_page(
758 loff_t tindex, 680 loff_t tindex,
759 struct xfs_bmbt_irec *imap, 681 struct xfs_bmbt_irec *imap,
760 xfs_ioend_t **ioendp, 682 xfs_ioend_t **ioendp,
761 struct writeback_control *wbc, 683 struct writeback_control *wbc)
762 int all_bh)
763{ 684{
764 struct buffer_head *bh, *head; 685 struct buffer_head *bh, *head;
765 xfs_off_t end_offset; 686 xfs_off_t end_offset;
@@ -814,37 +735,30 @@ xfs_convert_page(
814 continue; 735 continue;
815 } 736 }
816 737
817 if (buffer_unwritten(bh) || buffer_delay(bh)) { 738 if (buffer_unwritten(bh) || buffer_delay(bh) ||
739 buffer_mapped(bh)) {
818 if (buffer_unwritten(bh)) 740 if (buffer_unwritten(bh))
819 type = IO_UNWRITTEN; 741 type = IO_UNWRITTEN;
742 else if (buffer_delay(bh))
743 type = IO_DELALLOC;
820 else 744 else
821 type = IO_DELAY; 745 type = IO_OVERWRITE;
822 746
823 if (!xfs_imap_valid(inode, imap, offset)) { 747 if (!xfs_imap_valid(inode, imap, offset)) {
824 done = 1; 748 done = 1;
825 continue; 749 continue;
826 } 750 }
827 751
828 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 752 lock_buffer(bh);
829 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 753 if (type != IO_OVERWRITE)
830 754 xfs_map_at_offset(inode, bh, imap, offset);
831 xfs_map_at_offset(inode, bh, imap, offset);
832 xfs_add_to_ioend(inode, bh, offset, type, 755 xfs_add_to_ioend(inode, bh, offset, type,
833 ioendp, done); 756 ioendp, done);
834 757
835 page_dirty--; 758 page_dirty--;
836 count++; 759 count++;
837 } else { 760 } else {
838 type = IO_NEW; 761 done = 1;
839 if (buffer_mapped(bh) && all_bh) {
840 lock_buffer(bh);
841 xfs_add_to_ioend(inode, bh, offset,
842 type, ioendp, done);
843 count++;
844 page_dirty--;
845 } else {
846 done = 1;
847 }
848 } 762 }
849 } while (offset += len, (bh = bh->b_this_page) != head); 763 } while (offset += len, (bh = bh->b_this_page) != head);
850 764
@@ -876,7 +790,6 @@ xfs_cluster_write(
876 struct xfs_bmbt_irec *imap, 790 struct xfs_bmbt_irec *imap,
877 xfs_ioend_t **ioendp, 791 xfs_ioend_t **ioendp,
878 struct writeback_control *wbc, 792 struct writeback_control *wbc,
879 int all_bh,
880 pgoff_t tlast) 793 pgoff_t tlast)
881{ 794{
882 struct pagevec pvec; 795 struct pagevec pvec;
@@ -891,7 +804,7 @@ xfs_cluster_write(
891 804
892 for (i = 0; i < pagevec_count(&pvec); i++) { 805 for (i = 0; i < pagevec_count(&pvec); i++) {
893 done = xfs_convert_page(inode, pvec.pages[i], tindex++, 806 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
894 imap, ioendp, wbc, all_bh); 807 imap, ioendp, wbc);
895 if (done) 808 if (done)
896 break; 809 break;
897 } 810 }
@@ -935,7 +848,7 @@ xfs_aops_discard_page(
935 struct buffer_head *bh, *head; 848 struct buffer_head *bh, *head;
936 loff_t offset = page_offset(page); 849 loff_t offset = page_offset(page);
937 850
938 if (!xfs_is_delayed_page(page, IO_DELAY)) 851 if (!xfs_is_delayed_page(page, IO_DELALLOC))
939 goto out_invalidate; 852 goto out_invalidate;
940 853
941 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 854 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1002,10 +915,10 @@ xfs_vm_writepage(
1002 unsigned int type; 915 unsigned int type;
1003 __uint64_t end_offset; 916 __uint64_t end_offset;
1004 pgoff_t end_index, last_index; 917 pgoff_t end_index, last_index;
1005 ssize_t size, len; 918 ssize_t len;
1006 int flags, err, imap_valid = 0, uptodate = 1; 919 int err, imap_valid = 0, uptodate = 1;
1007 int count = 0; 920 int count = 0;
1008 int all_bh = 0; 921 int nonblocking = 0;
1009 922
1010 trace_xfs_writepage(inode, page, 0); 923 trace_xfs_writepage(inode, page, 0);
1011 924
@@ -1056,10 +969,14 @@ xfs_vm_writepage(
1056 969
1057 bh = head = page_buffers(page); 970 bh = head = page_buffers(page);
1058 offset = page_offset(page); 971 offset = page_offset(page);
1059 flags = BMAPI_READ; 972 type = IO_OVERWRITE;
1060 type = IO_NEW; 973
974 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
975 nonblocking = 1;
1061 976
1062 do { 977 do {
978 int new_ioend = 0;
979
1063 if (offset >= end_offset) 980 if (offset >= end_offset)
1064 break; 981 break;
1065 if (!buffer_uptodate(bh)) 982 if (!buffer_uptodate(bh))
@@ -1076,90 +993,54 @@ xfs_vm_writepage(
1076 continue; 993 continue;
1077 } 994 }
1078 995
1079 if (imap_valid) 996 if (buffer_unwritten(bh)) {
1080 imap_valid = xfs_imap_valid(inode, &imap, offset); 997 if (type != IO_UNWRITTEN) {
1081
1082 if (buffer_unwritten(bh) || buffer_delay(bh)) {
1083 int new_ioend = 0;
1084
1085 /*
1086 * Make sure we don't use a read-only iomap
1087 */
1088 if (flags == BMAPI_READ)
1089 imap_valid = 0;
1090
1091 if (buffer_unwritten(bh)) {
1092 type = IO_UNWRITTEN; 998 type = IO_UNWRITTEN;
1093 flags = BMAPI_WRITE | BMAPI_IGNSTATE; 999 imap_valid = 0;
1094 } else if (buffer_delay(bh)) {
1095 type = IO_DELAY;
1096 flags = BMAPI_ALLOCATE;
1097
1098 if (wbc->sync_mode == WB_SYNC_NONE)
1099 flags |= BMAPI_TRYLOCK;
1100 }
1101
1102 if (!imap_valid) {
1103 /*
1104 * If we didn't have a valid mapping then we
1105 * need to ensure that we put the new mapping
1106 * in a new ioend structure. This needs to be
1107 * done to ensure that the ioends correctly
1108 * reflect the block mappings at io completion
1109 * for unwritten extent conversion.
1110 */
1111 new_ioend = 1;
1112 err = xfs_map_blocks(inode, offset, len,
1113 &imap, flags);
1114 if (err)
1115 goto error;
1116 imap_valid = xfs_imap_valid(inode, &imap,
1117 offset);
1118 } 1000 }
1119 if (imap_valid) { 1001 } else if (buffer_delay(bh)) {
1120 xfs_map_at_offset(inode, bh, &imap, offset); 1002 if (type != IO_DELALLOC) {
1121 xfs_add_to_ioend(inode, bh, offset, type, 1003 type = IO_DELALLOC;
1122 &ioend, new_ioend); 1004 imap_valid = 0;
1123 count++;
1124 } 1005 }
1125 } else if (buffer_uptodate(bh)) { 1006 } else if (buffer_uptodate(bh)) {
1126 /* 1007 if (type != IO_OVERWRITE) {
1127 * we got here because the buffer is already mapped. 1008 type = IO_OVERWRITE;
1128 * That means it must already have extents allocated 1009 imap_valid = 0;
1129 * underneath it. Map the extent by reading it.
1130 */
1131 if (!imap_valid || flags != BMAPI_READ) {
1132 flags = BMAPI_READ;
1133 size = xfs_probe_cluster(inode, page, bh, head);
1134 err = xfs_map_blocks(inode, offset, size,
1135 &imap, flags);
1136 if (err)
1137 goto error;
1138 imap_valid = xfs_imap_valid(inode, &imap,
1139 offset);
1140 } 1010 }
1011 } else {
1012 if (PageUptodate(page)) {
1013 ASSERT(buffer_mapped(bh));
1014 imap_valid = 0;
1015 }
1016 continue;
1017 }
1141 1018
1019 if (imap_valid)
1020 imap_valid = xfs_imap_valid(inode, &imap, offset);
1021 if (!imap_valid) {
1142 /* 1022 /*
1143 * We set the type to IO_NEW in case we are doing a 1023 * If we didn't have a valid mapping then we need to
1144 * small write at EOF that is extending the file but 1024 * put the new mapping into a separate ioend structure.
1145 * without needing an allocation. We need to update the 1025 * This ensures non-contiguous extents always have
1146 * file size on I/O completion in this case so it is 1026 * separate ioends, which is particularly important
1147 * the same case as having just allocated a new extent 1027 * for unwritten extent conversion at I/O completion
1148 * that we are writing into for the first time. 1028 * time.
1149 */ 1029 */
1150 type = IO_NEW; 1030 new_ioend = 1;
1151 if (trylock_buffer(bh)) { 1031 err = xfs_map_blocks(inode, offset, &imap, type,
1152 if (imap_valid) 1032 nonblocking);
1153 all_bh = 1; 1033 if (err)
1154 xfs_add_to_ioend(inode, bh, offset, type, 1034 goto error;
1155 &ioend, !imap_valid); 1035 imap_valid = xfs_imap_valid(inode, &imap, offset);
1156 count++; 1036 }
1157 } else { 1037 if (imap_valid) {
1158 imap_valid = 0; 1038 lock_buffer(bh);
1159 } 1039 if (type != IO_OVERWRITE)
1160 } else if (PageUptodate(page)) { 1040 xfs_map_at_offset(inode, bh, &imap, offset);
1161 ASSERT(buffer_mapped(bh)); 1041 xfs_add_to_ioend(inode, bh, offset, type, &ioend,
1162 imap_valid = 0; 1042 new_ioend);
1043 count++;
1163 } 1044 }
1164 1045
1165 if (!iohead) 1046 if (!iohead)
@@ -1188,7 +1069,7 @@ xfs_vm_writepage(
1188 end_index = last_index; 1069 end_index = last_index;
1189 1070
1190 xfs_cluster_write(inode, page->index + 1, &imap, &ioend, 1071 xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
1191 wbc, all_bh, end_index); 1072 wbc, end_index);
1192 } 1073 }
1193 1074
1194 if (iohead) 1075 if (iohead)
@@ -1257,13 +1138,19 @@ __xfs_get_blocks(
1257 int create, 1138 int create,
1258 int direct) 1139 int direct)
1259{ 1140{
1260 int flags = create ? BMAPI_WRITE : BMAPI_READ; 1141 struct xfs_inode *ip = XFS_I(inode);
1142 struct xfs_mount *mp = ip->i_mount;
1143 xfs_fileoff_t offset_fsb, end_fsb;
1144 int error = 0;
1145 int lockmode = 0;
1261 struct xfs_bmbt_irec imap; 1146 struct xfs_bmbt_irec imap;
1147 int nimaps = 1;
1262 xfs_off_t offset; 1148 xfs_off_t offset;
1263 ssize_t size; 1149 ssize_t size;
1264 int nimap = 1;
1265 int new = 0; 1150 int new = 0;
1266 int error; 1151
1152 if (XFS_FORCED_SHUTDOWN(mp))
1153 return -XFS_ERROR(EIO);
1267 1154
1268 offset = (xfs_off_t)iblock << inode->i_blkbits; 1155 offset = (xfs_off_t)iblock << inode->i_blkbits;
1269 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1156 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
@@ -1272,15 +1159,45 @@ __xfs_get_blocks(
1272 if (!create && direct && offset >= i_size_read(inode)) 1159 if (!create && direct && offset >= i_size_read(inode))
1273 return 0; 1160 return 0;
1274 1161
1275 if (direct && create) 1162 if (create) {
1276 flags |= BMAPI_DIRECT; 1163 lockmode = XFS_ILOCK_EXCL;
1164 xfs_ilock(ip, lockmode);
1165 } else {
1166 lockmode = xfs_ilock_map_shared(ip);
1167 }
1168
1169 ASSERT(offset <= mp->m_maxioffset);
1170 if (offset + size > mp->m_maxioffset)
1171 size = mp->m_maxioffset - offset;
1172 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
1173 offset_fsb = XFS_B_TO_FSBT(mp, offset);
1277 1174
1278 error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, 1175 error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
1279 &new); 1176 XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL);
1280 if (error) 1177 if (error)
1281 return -error; 1178 goto out_unlock;
1282 if (nimap == 0) 1179
1283 return 0; 1180 if (create &&
1181 (!nimaps ||
1182 (imap.br_startblock == HOLESTARTBLOCK ||
1183 imap.br_startblock == DELAYSTARTBLOCK))) {
1184 if (direct) {
1185 error = xfs_iomap_write_direct(ip, offset, size,
1186 &imap, nimaps);
1187 } else {
1188 error = xfs_iomap_write_delay(ip, offset, size, &imap);
1189 }
1190 if (error)
1191 goto out_unlock;
1192
1193 trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
1194 } else if (nimaps) {
1195 trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
1196 } else {
1197 trace_xfs_get_blocks_notfound(ip, offset, size);
1198 goto out_unlock;
1199 }
1200 xfs_iunlock(ip, lockmode);
1284 1201
1285 if (imap.br_startblock != HOLESTARTBLOCK && 1202 if (imap.br_startblock != HOLESTARTBLOCK &&
1286 imap.br_startblock != DELAYSTARTBLOCK) { 1203 imap.br_startblock != DELAYSTARTBLOCK) {
@@ -1347,6 +1264,10 @@ __xfs_get_blocks(
1347 } 1264 }
1348 1265
1349 return 0; 1266 return 0;
1267
1268out_unlock:
1269 xfs_iunlock(ip, lockmode);
1270 return -error;
1350} 1271}
1351 1272
1352int 1273int
@@ -1434,7 +1355,7 @@ xfs_vm_direct_IO(
1434 ssize_t ret; 1355 ssize_t ret;
1435 1356
1436 if (rw & WRITE) { 1357 if (rw & WRITE) {
1437 iocb->private = xfs_alloc_ioend(inode, IO_NEW); 1358 iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
1438 1359
1439 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1360 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1440 offset, nr_segs, 1361 offset, nr_segs,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index c5057fb6237..71f721e1a71 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -23,6 +23,22 @@ extern struct workqueue_struct *xfsconvertd_workqueue;
23extern mempool_t *xfs_ioend_pool; 23extern mempool_t *xfs_ioend_pool;
24 24
25/* 25/*
26 * Types of I/O for bmap clustering and I/O completion tracking.
27 */
28enum {
29 IO_DIRECT = 0, /* special case for direct I/O ioends */
30 IO_DELALLOC, /* mapping covers delalloc region */
31 IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
32 IO_OVERWRITE, /* mapping covers already allocated extent */
33};
34
35#define XFS_IO_TYPES \
36 { 0, "" }, \
37 { IO_DELALLOC, "delalloc" }, \
38 { IO_UNWRITTEN, "unwritten" }, \
39 { IO_OVERWRITE, "overwrite" }
40
41/*
26 * xfs_ioend struct manages large extent writes for XFS. 42 * xfs_ioend struct manages large extent writes for XFS.
27 * It can manage several multi-page bio's at once. 43 * It can manage several multi-page bio's at once.
28 */ 44 */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4c5deb6e9e3..92f1f2acc6a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -44,12 +44,7 @@
44 44
45static kmem_zone_t *xfs_buf_zone; 45static kmem_zone_t *xfs_buf_zone;
46STATIC int xfsbufd(void *); 46STATIC int xfsbufd(void *);
47STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
48STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); 47STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
49static struct shrinker xfs_buf_shake = {
50 .shrink = xfsbufd_wakeup,
51 .seeks = DEFAULT_SEEKS,
52};
53 48
54static struct workqueue_struct *xfslogd_workqueue; 49static struct workqueue_struct *xfslogd_workqueue;
55struct workqueue_struct *xfsdatad_workqueue; 50struct workqueue_struct *xfsdatad_workqueue;
@@ -168,8 +163,79 @@ test_page_region(
168} 163}
169 164
170/* 165/*
171 * Internal xfs_buf_t object manipulation 166 * xfs_buf_lru_add - add a buffer to the LRU.
167 *
168 * The LRU takes a new reference to the buffer so that it will only be freed
169 * once the shrinker takes the buffer off the LRU.
172 */ 170 */
171STATIC void
172xfs_buf_lru_add(
173 struct xfs_buf *bp)
174{
175 struct xfs_buftarg *btp = bp->b_target;
176
177 spin_lock(&btp->bt_lru_lock);
178 if (list_empty(&bp->b_lru)) {
179 atomic_inc(&bp->b_hold);
180 list_add_tail(&bp->b_lru, &btp->bt_lru);
181 btp->bt_lru_nr++;
182 }
183 spin_unlock(&btp->bt_lru_lock);
184}
185
186/*
187 * xfs_buf_lru_del - remove a buffer from the LRU
188 *
189 * The unlocked check is safe here because it only occurs when there are not
190 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
191 * to optimise the shrinker removing the buffer from the LRU and calling
192 * xfs_buf_free(). i.e. it removes an unneccessary round trip on the
193 * bt_lru_lock.
194 */
195STATIC void
196xfs_buf_lru_del(
197 struct xfs_buf *bp)
198{
199 struct xfs_buftarg *btp = bp->b_target;
200
201 if (list_empty(&bp->b_lru))
202 return;
203
204 spin_lock(&btp->bt_lru_lock);
205 if (!list_empty(&bp->b_lru)) {
206 list_del_init(&bp->b_lru);
207 btp->bt_lru_nr--;
208 }
209 spin_unlock(&btp->bt_lru_lock);
210}
211
212/*
213 * When we mark a buffer stale, we remove the buffer from the LRU and clear the
214 * b_lru_ref count so that the buffer is freed immediately when the buffer
215 * reference count falls to zero. If the buffer is already on the LRU, we need
216 * to remove the reference that LRU holds on the buffer.
217 *
218 * This prevents build-up of stale buffers on the LRU.
219 */
220void
221xfs_buf_stale(
222 struct xfs_buf *bp)
223{
224 bp->b_flags |= XBF_STALE;
225 atomic_set(&(bp)->b_lru_ref, 0);
226 if (!list_empty(&bp->b_lru)) {
227 struct xfs_buftarg *btp = bp->b_target;
228
229 spin_lock(&btp->bt_lru_lock);
230 if (!list_empty(&bp->b_lru)) {
231 list_del_init(&bp->b_lru);
232 btp->bt_lru_nr--;
233 atomic_dec(&bp->b_hold);
234 }
235 spin_unlock(&btp->bt_lru_lock);
236 }
237 ASSERT(atomic_read(&bp->b_hold) >= 1);
238}
173 239
174STATIC void 240STATIC void
175_xfs_buf_initialize( 241_xfs_buf_initialize(
@@ -186,7 +252,9 @@ _xfs_buf_initialize(
186 252
187 memset(bp, 0, sizeof(xfs_buf_t)); 253 memset(bp, 0, sizeof(xfs_buf_t));
188 atomic_set(&bp->b_hold, 1); 254 atomic_set(&bp->b_hold, 1);
255 atomic_set(&bp->b_lru_ref, 1);
189 init_completion(&bp->b_iowait); 256 init_completion(&bp->b_iowait);
257 INIT_LIST_HEAD(&bp->b_lru);
190 INIT_LIST_HEAD(&bp->b_list); 258 INIT_LIST_HEAD(&bp->b_list);
191 RB_CLEAR_NODE(&bp->b_rbnode); 259 RB_CLEAR_NODE(&bp->b_rbnode);
192 sema_init(&bp->b_sema, 0); /* held, no waiters */ 260 sema_init(&bp->b_sema, 0); /* held, no waiters */
@@ -262,6 +330,8 @@ xfs_buf_free(
262{ 330{
263 trace_xfs_buf_free(bp, _RET_IP_); 331 trace_xfs_buf_free(bp, _RET_IP_);
264 332
333 ASSERT(list_empty(&bp->b_lru));
334
265 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 335 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
266 uint i; 336 uint i;
267 337
@@ -337,7 +407,6 @@ _xfs_buf_lookup_pages(
337 __func__, gfp_mask); 407 __func__, gfp_mask);
338 408
339 XFS_STATS_INC(xb_page_retries); 409 XFS_STATS_INC(xb_page_retries);
340 xfsbufd_wakeup(NULL, 0, gfp_mask);
341 congestion_wait(BLK_RW_ASYNC, HZ/50); 410 congestion_wait(BLK_RW_ASYNC, HZ/50);
342 goto retry; 411 goto retry;
343 } 412 }
@@ -828,6 +897,7 @@ xfs_buf_rele(
828 897
829 if (!pag) { 898 if (!pag) {
830 ASSERT(!bp->b_relse); 899 ASSERT(!bp->b_relse);
900 ASSERT(list_empty(&bp->b_lru));
831 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); 901 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
832 if (atomic_dec_and_test(&bp->b_hold)) 902 if (atomic_dec_and_test(&bp->b_hold))
833 xfs_buf_free(bp); 903 xfs_buf_free(bp);
@@ -835,13 +905,19 @@ xfs_buf_rele(
835 } 905 }
836 906
837 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); 907 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
908
838 ASSERT(atomic_read(&bp->b_hold) > 0); 909 ASSERT(atomic_read(&bp->b_hold) > 0);
839 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { 910 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
840 if (bp->b_relse) { 911 if (bp->b_relse) {
841 atomic_inc(&bp->b_hold); 912 atomic_inc(&bp->b_hold);
842 spin_unlock(&pag->pag_buf_lock); 913 spin_unlock(&pag->pag_buf_lock);
843 bp->b_relse(bp); 914 bp->b_relse(bp);
915 } else if (!(bp->b_flags & XBF_STALE) &&
916 atomic_read(&bp->b_lru_ref)) {
917 xfs_buf_lru_add(bp);
918 spin_unlock(&pag->pag_buf_lock);
844 } else { 919 } else {
920 xfs_buf_lru_del(bp);
845 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); 921 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
846 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); 922 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
847 spin_unlock(&pag->pag_buf_lock); 923 spin_unlock(&pag->pag_buf_lock);
@@ -1438,51 +1514,84 @@ xfs_buf_iomove(
1438 */ 1514 */
1439 1515
1440/* 1516/*
1441 * Wait for any bufs with callbacks that have been submitted but 1517 * Wait for any bufs with callbacks that have been submitted but have not yet
1442 * have not yet returned... walk the hash list for the target. 1518 * returned. These buffers will have an elevated hold count, so wait on those
1519 * while freeing all the buffers only held by the LRU.
1443 */ 1520 */
1444void 1521void
1445xfs_wait_buftarg( 1522xfs_wait_buftarg(
1446 struct xfs_buftarg *btp) 1523 struct xfs_buftarg *btp)
1447{ 1524{
1448 struct xfs_perag *pag; 1525 struct xfs_buf *bp;
1449 uint i;
1450 1526
1451 for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { 1527restart:
1452 pag = xfs_perag_get(btp->bt_mount, i); 1528 spin_lock(&btp->bt_lru_lock);
1453 spin_lock(&pag->pag_buf_lock); 1529 while (!list_empty(&btp->bt_lru)) {
1454 while (rb_first(&pag->pag_buf_tree)) { 1530 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1455 spin_unlock(&pag->pag_buf_lock); 1531 if (atomic_read(&bp->b_hold) > 1) {
1532 spin_unlock(&btp->bt_lru_lock);
1456 delay(100); 1533 delay(100);
1457 spin_lock(&pag->pag_buf_lock); 1534 goto restart;
1458 } 1535 }
1459 spin_unlock(&pag->pag_buf_lock); 1536 /*
1460 xfs_perag_put(pag); 1537 * clear the LRU reference count so the bufer doesn't get
1538 * ignored in xfs_buf_rele().
1539 */
1540 atomic_set(&bp->b_lru_ref, 0);
1541 spin_unlock(&btp->bt_lru_lock);
1542 xfs_buf_rele(bp);
1543 spin_lock(&btp->bt_lru_lock);
1461 } 1544 }
1545 spin_unlock(&btp->bt_lru_lock);
1462} 1546}
1463 1547
1464/* 1548int
1465 * buftarg list for delwrite queue processing 1549xfs_buftarg_shrink(
1466 */ 1550 struct shrinker *shrink,
1467static LIST_HEAD(xfs_buftarg_list); 1551 int nr_to_scan,
1468static DEFINE_SPINLOCK(xfs_buftarg_lock); 1552 gfp_t mask)
1469
1470STATIC void
1471xfs_register_buftarg(
1472 xfs_buftarg_t *btp)
1473{ 1553{
1474 spin_lock(&xfs_buftarg_lock); 1554 struct xfs_buftarg *btp = container_of(shrink,
1475 list_add(&btp->bt_list, &xfs_buftarg_list); 1555 struct xfs_buftarg, bt_shrinker);
1476 spin_unlock(&xfs_buftarg_lock); 1556 struct xfs_buf *bp;
1477} 1557 LIST_HEAD(dispose);
1478 1558
1479STATIC void 1559 if (!nr_to_scan)
1480xfs_unregister_buftarg( 1560 return btp->bt_lru_nr;
1481 xfs_buftarg_t *btp) 1561
1482{ 1562 spin_lock(&btp->bt_lru_lock);
1483 spin_lock(&xfs_buftarg_lock); 1563 while (!list_empty(&btp->bt_lru)) {
1484 list_del(&btp->bt_list); 1564 if (nr_to_scan-- <= 0)
1485 spin_unlock(&xfs_buftarg_lock); 1565 break;
1566
1567 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1568
1569 /*
1570 * Decrement the b_lru_ref count unless the value is already
1571 * zero. If the value is already zero, we need to reclaim the
1572 * buffer, otherwise it gets another trip through the LRU.
1573 */
1574 if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1575 list_move_tail(&bp->b_lru, &btp->bt_lru);
1576 continue;
1577 }
1578
1579 /*
1580 * remove the buffer from the LRU now to avoid needing another
1581 * lock round trip inside xfs_buf_rele().
1582 */
1583 list_move(&bp->b_lru, &dispose);
1584 btp->bt_lru_nr--;
1585 }
1586 spin_unlock(&btp->bt_lru_lock);
1587
1588 while (!list_empty(&dispose)) {
1589 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1590 list_del_init(&bp->b_lru);
1591 xfs_buf_rele(bp);
1592 }
1593
1594 return btp->bt_lru_nr;
1486} 1595}
1487 1596
1488void 1597void
@@ -1490,17 +1599,14 @@ xfs_free_buftarg(
1490 struct xfs_mount *mp, 1599 struct xfs_mount *mp,
1491 struct xfs_buftarg *btp) 1600 struct xfs_buftarg *btp)
1492{ 1601{
1602 unregister_shrinker(&btp->bt_shrinker);
1603
1493 xfs_flush_buftarg(btp, 1); 1604 xfs_flush_buftarg(btp, 1);
1494 if (mp->m_flags & XFS_MOUNT_BARRIER) 1605 if (mp->m_flags & XFS_MOUNT_BARRIER)
1495 xfs_blkdev_issue_flush(btp); 1606 xfs_blkdev_issue_flush(btp);
1496 iput(btp->bt_mapping->host); 1607 iput(btp->bt_mapping->host);
1497 1608
1498 /* Unregister the buftarg first so that we don't get a
1499 * wakeup finding a non-existent task
1500 */
1501 xfs_unregister_buftarg(btp);
1502 kthread_stop(btp->bt_task); 1609 kthread_stop(btp->bt_task);
1503
1504 kmem_free(btp); 1610 kmem_free(btp);
1505} 1611}
1506 1612
@@ -1597,20 +1703,13 @@ xfs_alloc_delwrite_queue(
1597 xfs_buftarg_t *btp, 1703 xfs_buftarg_t *btp,
1598 const char *fsname) 1704 const char *fsname)
1599{ 1705{
1600 int error = 0;
1601
1602 INIT_LIST_HEAD(&btp->bt_list);
1603 INIT_LIST_HEAD(&btp->bt_delwrite_queue); 1706 INIT_LIST_HEAD(&btp->bt_delwrite_queue);
1604 spin_lock_init(&btp->bt_delwrite_lock); 1707 spin_lock_init(&btp->bt_delwrite_lock);
1605 btp->bt_flags = 0; 1708 btp->bt_flags = 0;
1606 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); 1709 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
1607 if (IS_ERR(btp->bt_task)) { 1710 if (IS_ERR(btp->bt_task))
1608 error = PTR_ERR(btp->bt_task); 1711 return PTR_ERR(btp->bt_task);
1609 goto out_error; 1712 return 0;
1610 }
1611 xfs_register_buftarg(btp);
1612out_error:
1613 return error;
1614} 1713}
1615 1714
1616xfs_buftarg_t * 1715xfs_buftarg_t *
@@ -1627,12 +1726,17 @@ xfs_alloc_buftarg(
1627 btp->bt_mount = mp; 1726 btp->bt_mount = mp;
1628 btp->bt_dev = bdev->bd_dev; 1727 btp->bt_dev = bdev->bd_dev;
1629 btp->bt_bdev = bdev; 1728 btp->bt_bdev = bdev;
1729 INIT_LIST_HEAD(&btp->bt_lru);
1730 spin_lock_init(&btp->bt_lru_lock);
1630 if (xfs_setsize_buftarg_early(btp, bdev)) 1731 if (xfs_setsize_buftarg_early(btp, bdev))
1631 goto error; 1732 goto error;
1632 if (xfs_mapping_buftarg(btp, bdev)) 1733 if (xfs_mapping_buftarg(btp, bdev))
1633 goto error; 1734 goto error;
1634 if (xfs_alloc_delwrite_queue(btp, fsname)) 1735 if (xfs_alloc_delwrite_queue(btp, fsname))
1635 goto error; 1736 goto error;
1737 btp->bt_shrinker.shrink = xfs_buftarg_shrink;
1738 btp->bt_shrinker.seeks = DEFAULT_SEEKS;
1739 register_shrinker(&btp->bt_shrinker);
1636 return btp; 1740 return btp;
1637 1741
1638error: 1742error:
@@ -1737,27 +1841,6 @@ xfs_buf_runall_queues(
1737 flush_workqueue(queue); 1841 flush_workqueue(queue);
1738} 1842}
1739 1843
1740STATIC int
1741xfsbufd_wakeup(
1742 struct shrinker *shrink,
1743 int priority,
1744 gfp_t mask)
1745{
1746 xfs_buftarg_t *btp;
1747
1748 spin_lock(&xfs_buftarg_lock);
1749 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1750 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1751 continue;
1752 if (list_empty(&btp->bt_delwrite_queue))
1753 continue;
1754 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1755 wake_up_process(btp->bt_task);
1756 }
1757 spin_unlock(&xfs_buftarg_lock);
1758 return 0;
1759}
1760
1761/* 1844/*
1762 * Move as many buffers as specified to the supplied list 1845 * Move as many buffers as specified to the supplied list
1763 * idicating if we skipped any buffers to prevent deadlocks. 1846 * idicating if we skipped any buffers to prevent deadlocks.
@@ -1952,7 +2035,6 @@ xfs_buf_init(void)
1952 if (!xfsconvertd_workqueue) 2035 if (!xfsconvertd_workqueue)
1953 goto out_destroy_xfsdatad_workqueue; 2036 goto out_destroy_xfsdatad_workqueue;
1954 2037
1955 register_shrinker(&xfs_buf_shake);
1956 return 0; 2038 return 0;
1957 2039
1958 out_destroy_xfsdatad_workqueue: 2040 out_destroy_xfsdatad_workqueue:
@@ -1968,7 +2050,6 @@ xfs_buf_init(void)
1968void 2050void
1969xfs_buf_terminate(void) 2051xfs_buf_terminate(void)
1970{ 2052{
1971 unregister_shrinker(&xfs_buf_shake);
1972 destroy_workqueue(xfsconvertd_workqueue); 2053 destroy_workqueue(xfsconvertd_workqueue);
1973 destroy_workqueue(xfsdatad_workqueue); 2054 destroy_workqueue(xfsdatad_workqueue);
1974 destroy_workqueue(xfslogd_workqueue); 2055 destroy_workqueue(xfslogd_workqueue);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 383a3f37cf9..a76c2428faf 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -128,10 +128,15 @@ typedef struct xfs_buftarg {
128 128
129 /* per device delwri queue */ 129 /* per device delwri queue */
130 struct task_struct *bt_task; 130 struct task_struct *bt_task;
131 struct list_head bt_list;
132 struct list_head bt_delwrite_queue; 131 struct list_head bt_delwrite_queue;
133 spinlock_t bt_delwrite_lock; 132 spinlock_t bt_delwrite_lock;
134 unsigned long bt_flags; 133 unsigned long bt_flags;
134
135 /* LRU control structures */
136 struct shrinker bt_shrinker;
137 struct list_head bt_lru;
138 spinlock_t bt_lru_lock;
139 unsigned int bt_lru_nr;
135} xfs_buftarg_t; 140} xfs_buftarg_t;
136 141
137/* 142/*
@@ -164,9 +169,11 @@ typedef struct xfs_buf {
164 xfs_off_t b_file_offset; /* offset in file */ 169 xfs_off_t b_file_offset; /* offset in file */
165 size_t b_buffer_length;/* size of buffer in bytes */ 170 size_t b_buffer_length;/* size of buffer in bytes */
166 atomic_t b_hold; /* reference count */ 171 atomic_t b_hold; /* reference count */
172 atomic_t b_lru_ref; /* lru reclaim ref count */
167 xfs_buf_flags_t b_flags; /* status flags */ 173 xfs_buf_flags_t b_flags; /* status flags */
168 struct semaphore b_sema; /* semaphore for lockables */ 174 struct semaphore b_sema; /* semaphore for lockables */
169 175
176 struct list_head b_lru; /* lru list */
170 wait_queue_head_t b_waiters; /* unpin waiters */ 177 wait_queue_head_t b_waiters; /* unpin waiters */
171 struct list_head b_list; 178 struct list_head b_list;
172 struct xfs_perag *b_pag; /* contains rbtree root */ 179 struct xfs_perag *b_pag; /* contains rbtree root */
@@ -264,7 +271,8 @@ extern void xfs_buf_terminate(void);
264#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ 271#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
265 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) 272 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
266 273
267#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) 274void xfs_buf_stale(struct xfs_buf *bp);
275#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
268#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) 276#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
269#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) 277#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
270#define XFS_BUF_SUPER_STALE(bp) do { \ 278#define XFS_BUF_SUPER_STALE(bp) do { \
@@ -328,9 +336,15 @@ extern void xfs_buf_terminate(void);
328#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) 336#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
329#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) 337#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
330 338
331#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) 339static inline void
340xfs_buf_set_ref(
341 struct xfs_buf *bp,
342 int lru_ref)
343{
344 atomic_set(&bp->b_lru_ref, lru_ref);
345}
346#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref)
332#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) 347#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
333#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
334 348
335#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) 349#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
336 350
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 3764d74790e..fc0114da7fd 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -70,8 +70,16 @@ xfs_fs_encode_fh(
70 else 70 else
71 fileid_type = FILEID_INO32_GEN_PARENT; 71 fileid_type = FILEID_INO32_GEN_PARENT;
72 72
73 /* filesystem may contain 64bit inode numbers */ 73 /*
74 if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) 74 * If the the filesystem may contain 64bit inode numbers, we need
75 * to use larger file handles that can represent them.
76 *
77 * While we only allocate inodes that do not fit into 32 bits any
78 * large enough filesystem may contain them, thus the slightly
79 * confusing looking conditional below.
80 */
81 if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
82 (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
75 fileid_type |= XFS_FILEID_TYPE_64FLAG; 83 fileid_type |= XFS_FILEID_TYPE_64FLAG;
76 84
77 /* 85 /*
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 214ddd71ff7..09649499774 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -37,7 +37,6 @@
37 37
38#include <kmem.h> 38#include <kmem.h>
39#include <mrlock.h> 39#include <mrlock.h>
40#include <sv.h>
41#include <time.h> 40#include <time.h>
42 41
43#include <support/debug.h> 42#include <support/debug.h>
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 064f964d4f3..c51faaa5e29 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -834,8 +834,11 @@ xfsaild_wakeup(
834 struct xfs_ail *ailp, 834 struct xfs_ail *ailp,
835 xfs_lsn_t threshold_lsn) 835 xfs_lsn_t threshold_lsn)
836{ 836{
837 ailp->xa_target = threshold_lsn; 837 /* only ever move the target forwards */
838 wake_up_process(ailp->xa_task); 838 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
839 ailp->xa_target = threshold_lsn;
840 wake_up_process(ailp->xa_task);
841 }
839} 842}
840 843
841STATIC int 844STATIC int
@@ -847,8 +850,17 @@ xfsaild(
847 long tout = 0; /* milliseconds */ 850 long tout = 0; /* milliseconds */
848 851
849 while (!kthread_should_stop()) { 852 while (!kthread_should_stop()) {
850 schedule_timeout_interruptible(tout ? 853 /*
851 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); 854 * for short sleeps indicating congestion, don't allow us to
855 * get woken early. Otherwise all we do is bang on the AIL lock
856 * without making progress.
857 */
858 if (tout && tout <= 20)
859 __set_current_state(TASK_KILLABLE);
860 else
861 __set_current_state(TASK_INTERRUPTIBLE);
862 schedule_timeout(tout ?
863 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
852 864
853 /* swsusp */ 865 /* swsusp */
854 try_to_freeze(); 866 try_to_freeze();
@@ -1118,6 +1130,8 @@ xfs_fs_evict_inode(
1118 */ 1130 */
1119 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); 1131 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
1120 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 1132 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
1133 lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
1134 &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
1121 1135
1122 xfs_inactive(ip); 1136 xfs_inactive(ip);
1123} 1137}
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index afb0d7cfad1..a02480de975 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab(
53{ 53{
54 struct inode *inode = VFS_I(ip); 54 struct inode *inode = VFS_I(ip);
55 55
56 ASSERT(rcu_read_lock_held());
57
58 /*
59 * check for stale RCU freed inode
60 *
61 * If the inode has been reallocated, it doesn't matter if it's not in
62 * the AG we are walking - we are walking for writeback, so if it
63 * passes all the "valid inode" checks and is dirty, then we'll write
64 * it back anyway. If it has been reallocated and still being
65 * initialised, the XFS_INEW check below will catch it.
66 */
67 spin_lock(&ip->i_flags_lock);
68 if (!ip->i_ino)
69 goto out_unlock_noent;
70
71 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
72 if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
73 goto out_unlock_noent;
74 spin_unlock(&ip->i_flags_lock);
75
56 /* nothing to sync during shutdown */ 76 /* nothing to sync during shutdown */
57 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 77 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
58 return EFSCORRUPTED; 78 return EFSCORRUPTED;
59 79
60 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
61 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
62 return ENOENT;
63
64 /* If we can't grab the inode, it must on it's way to reclaim. */ 80 /* If we can't grab the inode, it must on it's way to reclaim. */
65 if (!igrab(inode)) 81 if (!igrab(inode))
66 return ENOENT; 82 return ENOENT;
@@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab(
72 88
73 /* inode is valid */ 89 /* inode is valid */
74 return 0; 90 return 0;
91
92out_unlock_noent:
93 spin_unlock(&ip->i_flags_lock);
94 return ENOENT;
75} 95}
76 96
77STATIC int 97STATIC int
@@ -98,12 +118,12 @@ restart:
98 int error = 0; 118 int error = 0;
99 int i; 119 int i;
100 120
101 read_lock(&pag->pag_ici_lock); 121 rcu_read_lock();
102 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 122 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
103 (void **)batch, first_index, 123 (void **)batch, first_index,
104 XFS_LOOKUP_BATCH); 124 XFS_LOOKUP_BATCH);
105 if (!nr_found) { 125 if (!nr_found) {
106 read_unlock(&pag->pag_ici_lock); 126 rcu_read_unlock();
107 break; 127 break;
108 } 128 }
109 129
@@ -118,18 +138,26 @@ restart:
118 batch[i] = NULL; 138 batch[i] = NULL;
119 139
120 /* 140 /*
121 * Update the index for the next lookup. Catch overflows 141 * Update the index for the next lookup. Catch
122 * into the next AG range which can occur if we have inodes 142 * overflows into the next AG range which can occur if
123 * in the last block of the AG and we are currently 143 * we have inodes in the last block of the AG and we
124 * pointing to the last inode. 144 * are currently pointing to the last inode.
145 *
146 * Because we may see inodes that are from the wrong AG
147 * due to RCU freeing and reallocation, only update the
148 * index if it lies in this AG. It was a race that lead
149 * us to see this inode, so another lookup from the
150 * same index will not find it again.
125 */ 151 */
152 if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
153 continue;
126 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 154 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
127 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 155 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
128 done = 1; 156 done = 1;
129 } 157 }
130 158
131 /* unlock now we've grabbed the inodes. */ 159 /* unlock now we've grabbed the inodes. */
132 read_unlock(&pag->pag_ici_lock); 160 rcu_read_unlock();
133 161
134 for (i = 0; i < nr_found; i++) { 162 for (i = 0; i < nr_found; i++) {
135 if (!batch[i]) 163 if (!batch[i])
@@ -592,12 +620,12 @@ xfs_inode_set_reclaim_tag(
592 struct xfs_perag *pag; 620 struct xfs_perag *pag;
593 621
594 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 622 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
595 write_lock(&pag->pag_ici_lock); 623 spin_lock(&pag->pag_ici_lock);
596 spin_lock(&ip->i_flags_lock); 624 spin_lock(&ip->i_flags_lock);
597 __xfs_inode_set_reclaim_tag(pag, ip); 625 __xfs_inode_set_reclaim_tag(pag, ip);
598 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 626 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
599 spin_unlock(&ip->i_flags_lock); 627 spin_unlock(&ip->i_flags_lock);
600 write_unlock(&pag->pag_ici_lock); 628 spin_unlock(&pag->pag_ici_lock);
601 xfs_perag_put(pag); 629 xfs_perag_put(pag);
602} 630}
603 631
@@ -639,9 +667,14 @@ xfs_reclaim_inode_grab(
639 struct xfs_inode *ip, 667 struct xfs_inode *ip,
640 int flags) 668 int flags)
641{ 669{
670 ASSERT(rcu_read_lock_held());
671
672 /* quick check for stale RCU freed inode */
673 if (!ip->i_ino)
674 return 1;
642 675
643 /* 676 /*
644 * do some unlocked checks first to avoid unnecceary lock traffic. 677 * do some unlocked checks first to avoid unnecessary lock traffic.
645 * The first is a flush lock check, the second is a already in reclaim 678 * The first is a flush lock check, the second is a already in reclaim
646 * check. Only do these checks if we are not going to block on locks. 679 * check. Only do these checks if we are not going to block on locks.
647 */ 680 */
@@ -654,11 +687,16 @@ xfs_reclaim_inode_grab(
654 * The radix tree lock here protects a thread in xfs_iget from racing 687 * The radix tree lock here protects a thread in xfs_iget from racing
655 * with us starting reclaim on the inode. Once we have the 688 * with us starting reclaim on the inode. Once we have the
656 * XFS_IRECLAIM flag set it will not touch us. 689 * XFS_IRECLAIM flag set it will not touch us.
690 *
691 * Due to RCU lookup, we may find inodes that have been freed and only
692 * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that
693 * aren't candidates for reclaim at all, so we must check the
694 * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
657 */ 695 */
658 spin_lock(&ip->i_flags_lock); 696 spin_lock(&ip->i_flags_lock);
659 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); 697 if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
660 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { 698 __xfs_iflags_test(ip, XFS_IRECLAIM)) {
661 /* ignore as it is already under reclaim */ 699 /* not a reclaim candidate. */
662 spin_unlock(&ip->i_flags_lock); 700 spin_unlock(&ip->i_flags_lock);
663 return 1; 701 return 1;
664 } 702 }
@@ -795,12 +833,12 @@ reclaim:
795 * added to the tree assert that it's been there before to catch 833 * added to the tree assert that it's been there before to catch
796 * problems with the inode life time early on. 834 * problems with the inode life time early on.
797 */ 835 */
798 write_lock(&pag->pag_ici_lock); 836 spin_lock(&pag->pag_ici_lock);
799 if (!radix_tree_delete(&pag->pag_ici_root, 837 if (!radix_tree_delete(&pag->pag_ici_root,
800 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) 838 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
801 ASSERT(0); 839 ASSERT(0);
802 __xfs_inode_clear_reclaim(pag, ip); 840 __xfs_inode_clear_reclaim(pag, ip);
803 write_unlock(&pag->pag_ici_lock); 841 spin_unlock(&pag->pag_ici_lock);
804 842
805 /* 843 /*
806 * Here we do an (almost) spurious inode lock in order to coordinate 844 * Here we do an (almost) spurious inode lock in order to coordinate
@@ -864,14 +902,14 @@ restart:
864 struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 902 struct xfs_inode *batch[XFS_LOOKUP_BATCH];
865 int i; 903 int i;
866 904
867 write_lock(&pag->pag_ici_lock); 905 rcu_read_lock();
868 nr_found = radix_tree_gang_lookup_tag( 906 nr_found = radix_tree_gang_lookup_tag(
869 &pag->pag_ici_root, 907 &pag->pag_ici_root,
870 (void **)batch, first_index, 908 (void **)batch, first_index,
871 XFS_LOOKUP_BATCH, 909 XFS_LOOKUP_BATCH,
872 XFS_ICI_RECLAIM_TAG); 910 XFS_ICI_RECLAIM_TAG);
873 if (!nr_found) { 911 if (!nr_found) {
874 write_unlock(&pag->pag_ici_lock); 912 rcu_read_unlock();
875 break; 913 break;
876 } 914 }
877 915
@@ -891,14 +929,24 @@ restart:
891 * occur if we have inodes in the last block of 929 * occur if we have inodes in the last block of
892 * the AG and we are currently pointing to the 930 * the AG and we are currently pointing to the
893 * last inode. 931 * last inode.
932 *
933 * Because we may see inodes that are from the
934 * wrong AG due to RCU freeing and
935 * reallocation, only update the index if it
936 * lies in this AG. It was a race that lead us
937 * to see this inode, so another lookup from
938 * the same index will not find it again.
894 */ 939 */
940 if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
941 pag->pag_agno)
942 continue;
895 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 943 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
896 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 944 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
897 done = 1; 945 done = 1;
898 } 946 }
899 947
900 /* unlock now we've grabbed the inodes. */ 948 /* unlock now we've grabbed the inodes. */
901 write_unlock(&pag->pag_ici_lock); 949 rcu_read_unlock();
902 950
903 for (i = 0; i < nr_found; i++) { 951 for (i = 0; i < nr_found; i++) {
904 if (!batch[i]) 952 if (!batch[i])
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index acef2e98c59..647af2a2e7a 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -766,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
766 __field(int, curr_res) 766 __field(int, curr_res)
767 __field(int, unit_res) 767 __field(int, unit_res)
768 __field(unsigned int, flags) 768 __field(unsigned int, flags)
769 __field(void *, reserve_headq) 769 __field(int, reserveq)
770 __field(void *, write_headq) 770 __field(int, writeq)
771 __field(int, grant_reserve_cycle) 771 __field(int, grant_reserve_cycle)
772 __field(int, grant_reserve_bytes) 772 __field(int, grant_reserve_bytes)
773 __field(int, grant_write_cycle) 773 __field(int, grant_write_cycle)
@@ -784,19 +784,21 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
784 __entry->curr_res = tic->t_curr_res; 784 __entry->curr_res = tic->t_curr_res;
785 __entry->unit_res = tic->t_unit_res; 785 __entry->unit_res = tic->t_unit_res;
786 __entry->flags = tic->t_flags; 786 __entry->flags = tic->t_flags;
787 __entry->reserve_headq = log->l_reserve_headq; 787 __entry->reserveq = list_empty(&log->l_reserveq);
788 __entry->write_headq = log->l_write_headq; 788 __entry->writeq = list_empty(&log->l_writeq);
789 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; 789 xlog_crack_grant_head(&log->l_grant_reserve_head,
790 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; 790 &__entry->grant_reserve_cycle,
791 __entry->grant_write_cycle = log->l_grant_write_cycle; 791 &__entry->grant_reserve_bytes);
792 __entry->grant_write_bytes = log->l_grant_write_bytes; 792 xlog_crack_grant_head(&log->l_grant_write_head,
793 &__entry->grant_write_cycle,
794 &__entry->grant_write_bytes);
793 __entry->curr_cycle = log->l_curr_cycle; 795 __entry->curr_cycle = log->l_curr_cycle;
794 __entry->curr_block = log->l_curr_block; 796 __entry->curr_block = log->l_curr_block;
795 __entry->tail_lsn = log->l_tail_lsn; 797 __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
796 ), 798 ),
797 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " 799 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
798 "t_unit_res %u t_flags %s reserve_headq 0x%p " 800 "t_unit_res %u t_flags %s reserveq %s "
799 "write_headq 0x%p grant_reserve_cycle %d " 801 "writeq %s grant_reserve_cycle %d "
800 "grant_reserve_bytes %d grant_write_cycle %d " 802 "grant_reserve_bytes %d grant_write_cycle %d "
801 "grant_write_bytes %d curr_cycle %d curr_block %d " 803 "grant_write_bytes %d curr_cycle %d curr_block %d "
802 "tail_cycle %d tail_block %d", 804 "tail_cycle %d tail_block %d",
@@ -807,8 +809,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
807 __entry->curr_res, 809 __entry->curr_res,
808 __entry->unit_res, 810 __entry->unit_res,
809 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), 811 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
810 __entry->reserve_headq, 812 __entry->reserveq ? "empty" : "active",
811 __entry->write_headq, 813 __entry->writeq ? "empty" : "active",
812 __entry->grant_reserve_cycle, 814 __entry->grant_reserve_cycle,
813 __entry->grant_reserve_bytes, 815 __entry->grant_reserve_bytes,
814 __entry->grant_write_cycle, 816 __entry->grant_write_cycle,
@@ -835,6 +837,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
835DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); 837DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
836DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); 838DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
837DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); 839DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
840DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
838DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); 841DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
839DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); 842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
840DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); 843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
@@ -842,6 +845,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); 845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); 846DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
844DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); 847DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
848DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); 849DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
846DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); 850DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
847DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); 851DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
@@ -935,10 +939,10 @@ DEFINE_PAGE_EVENT(xfs_writepage);
935DEFINE_PAGE_EVENT(xfs_releasepage); 939DEFINE_PAGE_EVENT(xfs_releasepage);
936DEFINE_PAGE_EVENT(xfs_invalidatepage); 940DEFINE_PAGE_EVENT(xfs_invalidatepage);
937 941
938DECLARE_EVENT_CLASS(xfs_iomap_class, 942DECLARE_EVENT_CLASS(xfs_imap_class,
939 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, 943 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
940 int flags, struct xfs_bmbt_irec *irec), 944 int type, struct xfs_bmbt_irec *irec),
941 TP_ARGS(ip, offset, count, flags, irec), 945 TP_ARGS(ip, offset, count, type, irec),
942 TP_STRUCT__entry( 946 TP_STRUCT__entry(
943 __field(dev_t, dev) 947 __field(dev_t, dev)
944 __field(xfs_ino_t, ino) 948 __field(xfs_ino_t, ino)
@@ -946,7 +950,7 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
946 __field(loff_t, new_size) 950 __field(loff_t, new_size)
947 __field(loff_t, offset) 951 __field(loff_t, offset)
948 __field(size_t, count) 952 __field(size_t, count)
949 __field(int, flags) 953 __field(int, type)
950 __field(xfs_fileoff_t, startoff) 954 __field(xfs_fileoff_t, startoff)
951 __field(xfs_fsblock_t, startblock) 955 __field(xfs_fsblock_t, startblock)
952 __field(xfs_filblks_t, blockcount) 956 __field(xfs_filblks_t, blockcount)
@@ -958,13 +962,13 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
958 __entry->new_size = ip->i_new_size; 962 __entry->new_size = ip->i_new_size;
959 __entry->offset = offset; 963 __entry->offset = offset;
960 __entry->count = count; 964 __entry->count = count;
961 __entry->flags = flags; 965 __entry->type = type;
962 __entry->startoff = irec ? irec->br_startoff : 0; 966 __entry->startoff = irec ? irec->br_startoff : 0;
963 __entry->startblock = irec ? irec->br_startblock : 0; 967 __entry->startblock = irec ? irec->br_startblock : 0;
964 __entry->blockcount = irec ? irec->br_blockcount : 0; 968 __entry->blockcount = irec ? irec->br_blockcount : 0;
965 ), 969 ),
966 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " 970 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
967 "offset 0x%llx count %zd flags %s " 971 "offset 0x%llx count %zd type %s "
968 "startoff 0x%llx startblock %lld blockcount 0x%llx", 972 "startoff 0x%llx startblock %lld blockcount 0x%llx",
969 MAJOR(__entry->dev), MINOR(__entry->dev), 973 MAJOR(__entry->dev), MINOR(__entry->dev),
970 __entry->ino, 974 __entry->ino,
@@ -972,20 +976,21 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
972 __entry->new_size, 976 __entry->new_size,
973 __entry->offset, 977 __entry->offset,
974 __entry->count, 978 __entry->count,
975 __print_flags(__entry->flags, "|", BMAPI_FLAGS), 979 __print_symbolic(__entry->type, XFS_IO_TYPES),
976 __entry->startoff, 980 __entry->startoff,
977 (__int64_t)__entry->startblock, 981 (__int64_t)__entry->startblock,
978 __entry->blockcount) 982 __entry->blockcount)
979) 983)
980 984
981#define DEFINE_IOMAP_EVENT(name) \ 985#define DEFINE_IOMAP_EVENT(name) \
982DEFINE_EVENT(xfs_iomap_class, name, \ 986DEFINE_EVENT(xfs_imap_class, name, \
983 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ 987 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
984 int flags, struct xfs_bmbt_irec *irec), \ 988 int type, struct xfs_bmbt_irec *irec), \
985 TP_ARGS(ip, offset, count, flags, irec)) 989 TP_ARGS(ip, offset, count, type, irec))
986DEFINE_IOMAP_EVENT(xfs_iomap_enter); 990DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
987DEFINE_IOMAP_EVENT(xfs_iomap_found); 991DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
988DEFINE_IOMAP_EVENT(xfs_iomap_alloc); 992DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
993DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
989 994
990DECLARE_EVENT_CLASS(xfs_simple_io_class, 995DECLARE_EVENT_CLASS(xfs_simple_io_class,
991 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), 996 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1022,6 +1027,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \
1022 TP_ARGS(ip, offset, count)) 1027 TP_ARGS(ip, offset, count))
1023DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); 1028DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
1024DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); 1029DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
1030DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
1025 1031
1026 1032
1027TRACE_EVENT(xfs_itruncate_start, 1033TRACE_EVENT(xfs_itruncate_start,
@@ -1420,6 +1426,7 @@ DEFINE_EVENT(xfs_alloc_class, name, \
1420 TP_PROTO(struct xfs_alloc_arg *args), \ 1426 TP_PROTO(struct xfs_alloc_arg *args), \
1421 TP_ARGS(args)) 1427 TP_ARGS(args))
1422DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); 1428DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
1429DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
1423DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); 1430DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
1424DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); 1431DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
1425DEFINE_ALLOC_EVENT(xfs_alloc_near_first); 1432DEFINE_ALLOC_EVENT(xfs_alloc_near_first);