aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c649
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c303
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h179
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h28
-rw-r--r--fs/xfs/linux-2.6/xfs_dmapi_priv.h28
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c104
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c35
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.h25
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.h23
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c49
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c77
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c215
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c521
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h133
-rw-r--r--fs/xfs/linux-2.6/xfs_version.h29
-rw-r--r--fs/xfs/quota/xfs_dquot.c276
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c301
-rw-r--r--fs/xfs/quota/xfs_qm.c228
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c12
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c10
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c133
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c35
-rw-r--r--fs/xfs/support/debug.c1
-rw-r--r--fs/xfs/xfs_ag.h9
-rw-r--r--fs/xfs/xfs_alloc.c19
-rw-r--r--fs/xfs/xfs_alloc.h20
-rw-r--r--fs/xfs/xfs_alloc_btree.c38
-rw-r--r--fs/xfs/xfs_attr.c128
-rw-r--r--fs/xfs/xfs_attr_leaf.c5
-rw-r--r--fs/xfs/xfs_bmap.c383
-rw-r--r--fs/xfs/xfs_bmap.h44
-rw-r--r--fs/xfs/xfs_bmap_btree.c5
-rw-r--r--fs/xfs/xfs_btree.c61
-rw-r--r--fs/xfs/xfs_btree.h14
-rw-r--r--fs/xfs/xfs_buf_item.c235
-rw-r--r--fs/xfs/xfs_buf_item.h2
-rw-r--r--fs/xfs/xfs_da_btree.c22
-rw-r--r--fs/xfs/xfs_dfrag.c16
-rw-r--r--fs/xfs/xfs_dinode.h5
-rw-r--r--fs/xfs/xfs_dir2.c11
-rw-r--r--fs/xfs/xfs_dir2_block.c8
-rw-r--r--fs/xfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/xfs_dir2_leaf.c6
-rw-r--r--fs/xfs/xfs_dir2_node.c2
-rw-r--r--fs/xfs/xfs_dir2_sf.c2
-rw-r--r--fs/xfs/xfs_dmapi.h170
-rw-r--r--fs/xfs/xfs_dmops.c55
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c278
-rw-r--r--fs/xfs/xfs_filestream.c84
-rw-r--r--fs/xfs/xfs_filestream.h82
-rw-r--r--fs/xfs/xfs_fs.h11
-rw-r--r--fs/xfs/xfs_fsops.c50
-rw-r--r--fs/xfs/xfs_fsops.h2
-rw-r--r--fs/xfs/xfs_ialloc.c22
-rw-r--r--fs/xfs/xfs_ialloc_btree.c37
-rw-r--r--fs/xfs/xfs_iget.c112
-rw-r--r--fs/xfs/xfs_inode.c126
-rw-r--r--fs/xfs/xfs_inode.h36
-rw-r--r--fs/xfs/xfs_inode_item.c282
-rw-r--r--fs/xfs/xfs_inode_item.h12
-rw-r--r--fs/xfs/xfs_iomap.c76
-rw-r--r--fs/xfs/xfs_iomap.h22
-rw-r--r--fs/xfs/xfs_itable.c11
-rw-r--r--fs/xfs/xfs_log.c41
-rw-r--r--fs/xfs/xfs_log.h11
-rw-r--r--fs/xfs/xfs_log_cil.c481
-rw-r--r--fs/xfs/xfs_log_priv.h50
-rw-r--r--fs/xfs/xfs_log_recover.c67
-rw-r--r--fs/xfs/xfs_mount.c311
-rw-r--r--fs/xfs/xfs_mount.h78
-rw-r--r--fs/xfs/xfs_refcache.h52
-rw-r--r--fs/xfs/xfs_rename.c77
-rw-r--r--fs/xfs/xfs_rtalloc.c38
-rw-r--r--fs/xfs/xfs_rw.c15
-rw-r--r--fs/xfs/xfs_sb.h10
-rw-r--r--fs/xfs/xfs_trans.c305
-rw-r--r--fs/xfs/xfs_trans.h120
-rw-r--r--fs/xfs/xfs_trans_ail.c1
-rw-r--r--fs/xfs/xfs_trans_buf.c77
-rw-r--r--fs/xfs/xfs_trans_extfree.c23
-rw-r--r--fs/xfs/xfs_trans_inode.c98
-rw-r--r--fs/xfs/xfs_trans_item.c441
-rw-r--r--fs/xfs/xfs_trans_priv.h17
-rw-r--r--fs/xfs/xfs_types.h2
-rw-r--r--fs/xfs/xfs_utils.c96
-rw-r--r--fs/xfs/xfs_utils.h4
-rw-r--r--fs/xfs/xfs_vnodeops.c409
-rw-r--r--fs/xfs/xfs_vnodeops.h6
102 files changed, 3152 insertions, 5656 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 480f28127f09..6100ec0fa1d4 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -22,6 +22,7 @@ config XFS_FS
22config XFS_QUOTA 22config XFS_QUOTA
23 bool "XFS Quota support" 23 bool "XFS Quota support"
24 depends on XFS_FS 24 depends on XFS_FS
25 select QUOTACTL
25 help 26 help
26 If you say Y here, you will be able to set limits for disk usage on 27 If you say Y here, you will be able to set limits for disk usage on
27 a per user and/or a per group basis under XFS. XFS considers quota 28 a per user and/or a per group basis under XFS. XFS considers quota
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c8fb13f83b3f..0dce969d6cad 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -87,11 +87,9 @@ xfs-y += xfs_alloc.o \
87 xfs_trans_buf.o \ 87 xfs_trans_buf.o \
88 xfs_trans_extfree.o \ 88 xfs_trans_extfree.o \
89 xfs_trans_inode.o \ 89 xfs_trans_inode.o \
90 xfs_trans_item.o \
91 xfs_utils.o \ 90 xfs_utils.o \
92 xfs_vnodeops.o \ 91 xfs_vnodeops.o \
93 xfs_rw.o \ 92 xfs_rw.o
94 xfs_dmops.o
95 93
96xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o 94xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o
97 95
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 9f769b5b38fc..b2771862fd3d 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -225,7 +225,7 @@ xfs_check_acl(struct inode *inode, int mask)
225 struct posix_acl *acl; 225 struct posix_acl *acl;
226 int error = -EAGAIN; 226 int error = -EAGAIN;
227 227
228 xfs_itrace_entry(ip); 228 trace_xfs_check_acl(ip);
229 229
230 /* 230 /*
231 * If there is no attribute fork no ACL exists on this inode and 231 * If there is no attribute fork no ACL exists on this inode and
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 34640d6dbdcb..c9af48fffcd7 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -21,19 +21,12 @@
21#include "xfs_inum.h" 21#include "xfs_inum.h"
22#include "xfs_sb.h" 22#include "xfs_sb.h"
23#include "xfs_ag.h" 23#include "xfs_ag.h"
24#include "xfs_dir2.h"
25#include "xfs_trans.h" 24#include "xfs_trans.h"
26#include "xfs_dmapi.h"
27#include "xfs_mount.h" 25#include "xfs_mount.h"
28#include "xfs_bmap_btree.h" 26#include "xfs_bmap_btree.h"
29#include "xfs_alloc_btree.h"
30#include "xfs_ialloc_btree.h"
31#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 27#include "xfs_dinode.h"
34#include "xfs_inode.h" 28#include "xfs_inode.h"
35#include "xfs_alloc.h" 29#include "xfs_alloc.h"
36#include "xfs_btree.h"
37#include "xfs_error.h" 30#include "xfs_error.h"
38#include "xfs_rw.h" 31#include "xfs_rw.h"
39#include "xfs_iomap.h" 32#include "xfs_iomap.h"
@@ -92,18 +85,15 @@ void
92xfs_count_page_state( 85xfs_count_page_state(
93 struct page *page, 86 struct page *page,
94 int *delalloc, 87 int *delalloc,
95 int *unmapped,
96 int *unwritten) 88 int *unwritten)
97{ 89{
98 struct buffer_head *bh, *head; 90 struct buffer_head *bh, *head;
99 91
100 *delalloc = *unmapped = *unwritten = 0; 92 *delalloc = *unwritten = 0;
101 93
102 bh = head = page_buffers(page); 94 bh = head = page_buffers(page);
103 do { 95 do {
104 if (buffer_uptodate(bh) && !buffer_mapped(bh)) 96 if (buffer_unwritten(bh))
105 (*unmapped) = 1;
106 else if (buffer_unwritten(bh))
107 (*unwritten) = 1; 97 (*unwritten) = 1;
108 else if (buffer_delay(bh)) 98 else if (buffer_delay(bh))
109 (*delalloc) = 1; 99 (*delalloc) = 1;
@@ -212,23 +202,17 @@ xfs_setfilesize(
212} 202}
213 203
214/* 204/*
215 * Schedule IO completion handling on a xfsdatad if this was 205 * Schedule IO completion handling on the final put of an ioend.
216 * the final hold on this ioend. If we are asked to wait,
217 * flush the workqueue.
218 */ 206 */
219STATIC void 207STATIC void
220xfs_finish_ioend( 208xfs_finish_ioend(
221 xfs_ioend_t *ioend, 209 struct xfs_ioend *ioend)
222 int wait)
223{ 210{
224 if (atomic_dec_and_test(&ioend->io_remaining)) { 211 if (atomic_dec_and_test(&ioend->io_remaining)) {
225 struct workqueue_struct *wq; 212 if (ioend->io_type == IO_UNWRITTEN)
226 213 queue_work(xfsconvertd_workqueue, &ioend->io_work);
227 wq = (ioend->io_type == IO_UNWRITTEN) ? 214 else
228 xfsconvertd_workqueue : xfsdatad_workqueue; 215 queue_work(xfsdatad_workqueue, &ioend->io_work);
229 queue_work(wq, &ioend->io_work);
230 if (wait)
231 flush_workqueue(wq);
232 } 216 }
233} 217}
234 218
@@ -272,11 +256,25 @@ xfs_end_io(
272 */ 256 */
273 if (error == EAGAIN) { 257 if (error == EAGAIN) {
274 atomic_inc(&ioend->io_remaining); 258 atomic_inc(&ioend->io_remaining);
275 xfs_finish_ioend(ioend, 0); 259 xfs_finish_ioend(ioend);
276 /* ensure we don't spin on blocked ioends */ 260 /* ensure we don't spin on blocked ioends */
277 delay(1); 261 delay(1);
278 } else 262 } else {
263 if (ioend->io_iocb)
264 aio_complete(ioend->io_iocb, ioend->io_result, 0);
279 xfs_destroy_ioend(ioend); 265 xfs_destroy_ioend(ioend);
266 }
267}
268
269/*
270 * Call IO completion handling in caller context on the final put of an ioend.
271 */
272STATIC void
273xfs_finish_ioend_sync(
274 struct xfs_ioend *ioend)
275{
276 if (atomic_dec_and_test(&ioend->io_remaining))
277 xfs_end_io(&ioend->io_work);
280} 278}
281 279
282/* 280/*
@@ -309,6 +307,8 @@ xfs_alloc_ioend(
309 atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); 307 atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
310 ioend->io_offset = 0; 308 ioend->io_offset = 0;
311 ioend->io_size = 0; 309 ioend->io_size = 0;
310 ioend->io_iocb = NULL;
311 ioend->io_result = 0;
312 312
313 INIT_WORK(&ioend->io_work, xfs_end_io); 313 INIT_WORK(&ioend->io_work, xfs_end_io);
314 return ioend; 314 return ioend;
@@ -358,7 +358,7 @@ xfs_end_bio(
358 bio->bi_end_io = NULL; 358 bio->bi_end_io = NULL;
359 bio_put(bio); 359 bio_put(bio);
360 360
361 xfs_finish_ioend(ioend, 0); 361 xfs_finish_ioend(ioend);
362} 362}
363 363
364STATIC void 364STATIC void
@@ -500,7 +500,7 @@ xfs_submit_ioend(
500 } 500 }
501 if (bio) 501 if (bio)
502 xfs_submit_ioend_bio(wbc, ioend, bio); 502 xfs_submit_ioend_bio(wbc, ioend, bio);
503 xfs_finish_ioend(ioend, 0); 503 xfs_finish_ioend(ioend);
504 } while ((ioend = next) != NULL); 504 } while ((ioend = next) != NULL);
505} 505}
506 506
@@ -614,31 +614,30 @@ xfs_map_at_offset(
614STATIC unsigned int 614STATIC unsigned int
615xfs_probe_page( 615xfs_probe_page(
616 struct page *page, 616 struct page *page,
617 unsigned int pg_offset, 617 unsigned int pg_offset)
618 int mapped)
619{ 618{
619 struct buffer_head *bh, *head;
620 int ret = 0; 620 int ret = 0;
621 621
622 if (PageWriteback(page)) 622 if (PageWriteback(page))
623 return 0; 623 return 0;
624 if (!PageDirty(page))
625 return 0;
626 if (!page->mapping)
627 return 0;
628 if (!page_has_buffers(page))
629 return 0;
624 630
625 if (page->mapping && PageDirty(page)) { 631 bh = head = page_buffers(page);
626 if (page_has_buffers(page)) { 632 do {
627 struct buffer_head *bh, *head; 633 if (!buffer_uptodate(bh))
628 634 break;
629 bh = head = page_buffers(page); 635 if (!buffer_mapped(bh))
630 do { 636 break;
631 if (!buffer_uptodate(bh)) 637 ret += bh->b_size;
632 break; 638 if (ret >= pg_offset)
633 if (mapped != buffer_mapped(bh)) 639 break;
634 break; 640 } while ((bh = bh->b_this_page) != head);
635 ret += bh->b_size;
636 if (ret >= pg_offset)
637 break;
638 } while ((bh = bh->b_this_page) != head);
639 } else
640 ret = mapped ? 0 : PAGE_CACHE_SIZE;
641 }
642 641
643 return ret; 642 return ret;
644} 643}
@@ -648,8 +647,7 @@ xfs_probe_cluster(
648 struct inode *inode, 647 struct inode *inode,
649 struct page *startpage, 648 struct page *startpage,
650 struct buffer_head *bh, 649 struct buffer_head *bh,
651 struct buffer_head *head, 650 struct buffer_head *head)
652 int mapped)
653{ 651{
654 struct pagevec pvec; 652 struct pagevec pvec;
655 pgoff_t tindex, tlast, tloff; 653 pgoff_t tindex, tlast, tloff;
@@ -658,7 +656,7 @@ xfs_probe_cluster(
658 656
659 /* First sum forwards in this page */ 657 /* First sum forwards in this page */
660 do { 658 do {
661 if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh))) 659 if (!buffer_uptodate(bh) || !buffer_mapped(bh))
662 return total; 660 return total;
663 total += bh->b_size; 661 total += bh->b_size;
664 } while ((bh = bh->b_this_page) != head); 662 } while ((bh = bh->b_this_page) != head);
@@ -692,7 +690,7 @@ xfs_probe_cluster(
692 pg_offset = PAGE_CACHE_SIZE; 690 pg_offset = PAGE_CACHE_SIZE;
693 691
694 if (page->index == tindex && trylock_page(page)) { 692 if (page->index == tindex && trylock_page(page)) {
695 pg_len = xfs_probe_page(page, pg_offset, mapped); 693 pg_len = xfs_probe_page(page, pg_offset);
696 unlock_page(page); 694 unlock_page(page);
697 } 695 }
698 696
@@ -761,7 +759,6 @@ xfs_convert_page(
761 struct xfs_bmbt_irec *imap, 759 struct xfs_bmbt_irec *imap,
762 xfs_ioend_t **ioendp, 760 xfs_ioend_t **ioendp,
763 struct writeback_control *wbc, 761 struct writeback_control *wbc,
764 int startio,
765 int all_bh) 762 int all_bh)
766{ 763{
767 struct buffer_head *bh, *head; 764 struct buffer_head *bh, *head;
@@ -832,19 +829,14 @@ xfs_convert_page(
832 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 829 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
833 830
834 xfs_map_at_offset(inode, bh, imap, offset); 831 xfs_map_at_offset(inode, bh, imap, offset);
835 if (startio) { 832 xfs_add_to_ioend(inode, bh, offset, type,
836 xfs_add_to_ioend(inode, bh, offset, 833 ioendp, done);
837 type, ioendp, done); 834
838 } else {
839 set_buffer_dirty(bh);
840 unlock_buffer(bh);
841 mark_buffer_dirty(bh);
842 }
843 page_dirty--; 835 page_dirty--;
844 count++; 836 count++;
845 } else { 837 } else {
846 type = IO_NEW; 838 type = IO_NEW;
847 if (buffer_mapped(bh) && all_bh && startio) { 839 if (buffer_mapped(bh) && all_bh) {
848 lock_buffer(bh); 840 lock_buffer(bh);
849 xfs_add_to_ioend(inode, bh, offset, 841 xfs_add_to_ioend(inode, bh, offset,
850 type, ioendp, done); 842 type, ioendp, done);
@@ -859,14 +851,12 @@ xfs_convert_page(
859 if (uptodate && bh == head) 851 if (uptodate && bh == head)
860 SetPageUptodate(page); 852 SetPageUptodate(page);
861 853
862 if (startio) { 854 if (count) {
863 if (count) { 855 if (--wbc->nr_to_write <= 0 &&
864 wbc->nr_to_write--; 856 wbc->sync_mode == WB_SYNC_NONE)
865 if (wbc->nr_to_write <= 0) 857 done = 1;
866 done = 1;
867 }
868 xfs_start_page_writeback(page, !page_dirty, count);
869 } 858 }
859 xfs_start_page_writeback(page, !page_dirty, count);
870 860
871 return done; 861 return done;
872 fail_unlock_page: 862 fail_unlock_page:
@@ -886,7 +876,6 @@ xfs_cluster_write(
886 struct xfs_bmbt_irec *imap, 876 struct xfs_bmbt_irec *imap,
887 xfs_ioend_t **ioendp, 877 xfs_ioend_t **ioendp,
888 struct writeback_control *wbc, 878 struct writeback_control *wbc,
889 int startio,
890 int all_bh, 879 int all_bh,
891 pgoff_t tlast) 880 pgoff_t tlast)
892{ 881{
@@ -902,7 +891,7 @@ xfs_cluster_write(
902 891
903 for (i = 0; i < pagevec_count(&pvec); i++) { 892 for (i = 0; i < pagevec_count(&pvec); i++) {
904 done = xfs_convert_page(inode, pvec.pages[i], tindex++, 893 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
905 imap, ioendp, wbc, startio, all_bh); 894 imap, ioendp, wbc, all_bh);
906 if (done) 895 if (done)
907 break; 896 break;
908 } 897 }
@@ -981,7 +970,7 @@ xfs_aops_discard_page(
981 */ 970 */
982 error = xfs_bmapi(NULL, ip, offset_fsb, 1, 971 error = xfs_bmapi(NULL, ip, offset_fsb, 1,
983 XFS_BMAPI_ENTIRE, NULL, 0, &imap, 972 XFS_BMAPI_ENTIRE, NULL, 0, &imap,
984 &nimaps, NULL, NULL); 973 &nimaps, NULL);
985 974
986 if (error) { 975 if (error) {
987 /* something screwed, just bail */ 976 /* something screwed, just bail */
@@ -1009,7 +998,7 @@ xfs_aops_discard_page(
1009 */ 998 */
1010 xfs_bmap_init(&flist, &firstblock); 999 xfs_bmap_init(&flist, &firstblock);
1011 error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, 1000 error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
1012 &flist, NULL, &done); 1001 &flist, &done);
1013 1002
1014 ASSERT(!flist.xbf_count && !flist.xbf_first); 1003 ASSERT(!flist.xbf_count && !flist.xbf_first);
1015 if (error) { 1004 if (error) {
@@ -1032,50 +1021,66 @@ out_invalidate:
1032} 1021}
1033 1022
1034/* 1023/*
1035 * Calling this without startio set means we are being asked to make a dirty 1024 * Write out a dirty page.
1036 * page ready for freeing it's buffers. When called with startio set then 1025 *
1037 * we are coming from writepage. 1026 * For delalloc space on the page we need to allocate space and flush it.
1027 * For unwritten space on the page we need to start the conversion to
1028 * regular allocated space.
1029 * For any other dirty buffer heads on the page we should flush them.
1038 * 1030 *
1039 * When called with startio set it is important that we write the WHOLE 1031 * If we detect that a transaction would be required to flush the page, we
1040 * page if possible. 1032 * have to check the process flags first, if we are already in a transaction
1041 * The bh->b_state's cannot know if any of the blocks or which block for 1033 * or disk I/O during allocations is off, we need to fail the writepage and
1042 * that matter are dirty due to mmap writes, and therefore bh uptodate is 1034 * redirty the page.
1043 * only valid if the page itself isn't completely uptodate. Some layers
1044 * may clear the page dirty flag prior to calling write page, under the
1045 * assumption the entire page will be written out; by not writing out the
1046 * whole page the page can be reused before all valid dirty data is
1047 * written out. Note: in the case of a page that has been dirty'd by
1048 * mapwrite and but partially setup by block_prepare_write the
1049 * bh->b_states's will not agree and only ones setup by BPW/BCW will have
1050 * valid state, thus the whole page must be written out thing.
1051 */ 1035 */
1052
1053STATIC int 1036STATIC int
1054xfs_page_state_convert( 1037xfs_vm_writepage(
1055 struct inode *inode, 1038 struct page *page,
1056 struct page *page, 1039 struct writeback_control *wbc)
1057 struct writeback_control *wbc,
1058 int startio,
1059 int unmapped) /* also implies page uptodate */
1060{ 1040{
1041 struct inode *inode = page->mapping->host;
1042 int delalloc, unwritten;
1061 struct buffer_head *bh, *head; 1043 struct buffer_head *bh, *head;
1062 struct xfs_bmbt_irec imap; 1044 struct xfs_bmbt_irec imap;
1063 xfs_ioend_t *ioend = NULL, *iohead = NULL; 1045 xfs_ioend_t *ioend = NULL, *iohead = NULL;
1064 loff_t offset; 1046 loff_t offset;
1065 unsigned long p_offset = 0;
1066 unsigned int type; 1047 unsigned int type;
1067 __uint64_t end_offset; 1048 __uint64_t end_offset;
1068 pgoff_t end_index, last_index; 1049 pgoff_t end_index, last_index;
1069 ssize_t size, len; 1050 ssize_t size, len;
1070 int flags, err, imap_valid = 0, uptodate = 1; 1051 int flags, err, imap_valid = 0, uptodate = 1;
1071 int page_dirty, count = 0; 1052 int count = 0;
1072 int trylock = 0; 1053 int all_bh = 0;
1073 int all_bh = unmapped;
1074 1054
1075 if (startio) { 1055 trace_xfs_writepage(inode, page, 0);
1076 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) 1056
1077 trylock |= BMAPI_TRYLOCK; 1057 ASSERT(page_has_buffers(page));
1078 } 1058
1059 /*
1060 * Refuse to write the page out if we are called from reclaim context.
1061 *
1062 * This avoids stack overflows when called from deeply used stacks in
1063 * random callers for direct reclaim or memcg reclaim. We explicitly
1064 * allow reclaim from kswapd as the stack usage there is relatively low.
1065 *
1066 * This should really be done by the core VM, but until that happens
1067 * filesystems like XFS, btrfs and ext4 have to take care of this
1068 * by themselves.
1069 */
1070 if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
1071 goto redirty;
1072
1073 /*
1074 * We need a transaction if there are delalloc or unwritten buffers
1075 * on the page.
1076 *
1077 * If we need a transaction and the process flags say we are already
1078 * in a transaction, or no IO is allowed then mark the page dirty
1079 * again and leave the page as is.
1080 */
1081 xfs_count_page_state(page, &delalloc, &unwritten);
1082 if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
1083 goto redirty;
1079 1084
1080 /* Is this page beyond the end of the file? */ 1085 /* Is this page beyond the end of the file? */
1081 offset = i_size_read(inode); 1086 offset = i_size_read(inode);
@@ -1084,50 +1089,33 @@ xfs_page_state_convert(
1084 if (page->index >= end_index) { 1089 if (page->index >= end_index) {
1085 if ((page->index >= end_index + 1) || 1090 if ((page->index >= end_index + 1) ||
1086 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { 1091 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
1087 if (startio) 1092 unlock_page(page);
1088 unlock_page(page);
1089 return 0; 1093 return 0;
1090 } 1094 }
1091 } 1095 }
1092 1096
1093 /*
1094 * page_dirty is initially a count of buffers on the page before
1095 * EOF and is decremented as we move each into a cleanable state.
1096 *
1097 * Derivation:
1098 *
1099 * End offset is the highest offset that this page should represent.
1100 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
1101 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
1102 * hence give us the correct page_dirty count. On any other page,
1103 * it will be zero and in that case we need page_dirty to be the
1104 * count of buffers on the page.
1105 */
1106 end_offset = min_t(unsigned long long, 1097 end_offset = min_t(unsigned long long,
1107 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); 1098 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
1099 offset);
1108 len = 1 << inode->i_blkbits; 1100 len = 1 << inode->i_blkbits;
1109 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
1110 PAGE_CACHE_SIZE);
1111 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
1112 page_dirty = p_offset / len;
1113 1101
1114 bh = head = page_buffers(page); 1102 bh = head = page_buffers(page);
1115 offset = page_offset(page); 1103 offset = page_offset(page);
1116 flags = BMAPI_READ; 1104 flags = BMAPI_READ;
1117 type = IO_NEW; 1105 type = IO_NEW;
1118 1106
1119 /* TODO: cleanup count and page_dirty */
1120
1121 do { 1107 do {
1122 if (offset >= end_offset) 1108 if (offset >= end_offset)
1123 break; 1109 break;
1124 if (!buffer_uptodate(bh)) 1110 if (!buffer_uptodate(bh))
1125 uptodate = 0; 1111 uptodate = 0;
1126 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { 1112
1127 /* 1113 /*
1128 * the iomap is actually still valid, but the ioend 1114 * A hole may still be marked uptodate because discard_buffer
1129 * isn't. shouldn't happen too often. 1115 * leaves the flag set.
1130 */ 1116 */
1117 if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
1118 ASSERT(!buffer_dirty(bh));
1131 imap_valid = 0; 1119 imap_valid = 0;
1132 continue; 1120 continue;
1133 } 1121 }
@@ -1135,19 +1123,7 @@ xfs_page_state_convert(
1135 if (imap_valid) 1123 if (imap_valid)
1136 imap_valid = xfs_imap_valid(inode, &imap, offset); 1124 imap_valid = xfs_imap_valid(inode, &imap, offset);
1137 1125
1138 /* 1126 if (buffer_unwritten(bh) || buffer_delay(bh)) {
1139 * First case, map an unwritten extent and prepare for
1140 * extent state conversion transaction on completion.
1141 *
1142 * Second case, allocate space for a delalloc buffer.
1143 * We can return EAGAIN here in the release page case.
1144 *
1145 * Third case, an unmapped buffer was found, and we are
1146 * in a path where we need to write the whole page out.
1147 */
1148 if (buffer_unwritten(bh) || buffer_delay(bh) ||
1149 ((buffer_uptodate(bh) || PageUptodate(page)) &&
1150 !buffer_mapped(bh) && (unmapped || startio))) {
1151 int new_ioend = 0; 1127 int new_ioend = 0;
1152 1128
1153 /* 1129 /*
@@ -1161,15 +1137,15 @@ xfs_page_state_convert(
1161 flags = BMAPI_WRITE | BMAPI_IGNSTATE; 1137 flags = BMAPI_WRITE | BMAPI_IGNSTATE;
1162 } else if (buffer_delay(bh)) { 1138 } else if (buffer_delay(bh)) {
1163 type = IO_DELAY; 1139 type = IO_DELAY;
1164 flags = BMAPI_ALLOCATE | trylock; 1140 flags = BMAPI_ALLOCATE;
1165 } else { 1141
1166 type = IO_NEW; 1142 if (wbc->sync_mode == WB_SYNC_NONE)
1167 flags = BMAPI_WRITE | BMAPI_MMAP; 1143 flags |= BMAPI_TRYLOCK;
1168 } 1144 }
1169 1145
1170 if (!imap_valid) { 1146 if (!imap_valid) {
1171 /* 1147 /*
1172 * if we didn't have a valid mapping then we 1148 * If we didn't have a valid mapping then we
1173 * need to ensure that we put the new mapping 1149 * need to ensure that we put the new mapping
1174 * in a new ioend structure. This needs to be 1150 * in a new ioend structure. This needs to be
1175 * done to ensure that the ioends correctly 1151 * done to ensure that the ioends correctly
@@ -1177,14 +1153,7 @@ xfs_page_state_convert(
1177 * for unwritten extent conversion. 1153 * for unwritten extent conversion.
1178 */ 1154 */
1179 new_ioend = 1; 1155 new_ioend = 1;
1180 if (type == IO_NEW) { 1156 err = xfs_map_blocks(inode, offset, len,
1181 size = xfs_probe_cluster(inode,
1182 page, bh, head, 0);
1183 } else {
1184 size = len;
1185 }
1186
1187 err = xfs_map_blocks(inode, offset, size,
1188 &imap, flags); 1157 &imap, flags);
1189 if (err) 1158 if (err)
1190 goto error; 1159 goto error;
@@ -1193,19 +1162,11 @@ xfs_page_state_convert(
1193 } 1162 }
1194 if (imap_valid) { 1163 if (imap_valid) {
1195 xfs_map_at_offset(inode, bh, &imap, offset); 1164 xfs_map_at_offset(inode, bh, &imap, offset);
1196 if (startio) { 1165 xfs_add_to_ioend(inode, bh, offset, type,
1197 xfs_add_to_ioend(inode, bh, offset, 1166 &ioend, new_ioend);
1198 type, &ioend,
1199 new_ioend);
1200 } else {
1201 set_buffer_dirty(bh);
1202 unlock_buffer(bh);
1203 mark_buffer_dirty(bh);
1204 }
1205 page_dirty--;
1206 count++; 1167 count++;
1207 } 1168 }
1208 } else if (buffer_uptodate(bh) && startio) { 1169 } else if (buffer_uptodate(bh)) {
1209 /* 1170 /*
1210 * we got here because the buffer is already mapped. 1171 * we got here because the buffer is already mapped.
1211 * That means it must already have extents allocated 1172 * That means it must already have extents allocated
@@ -1213,8 +1174,7 @@ xfs_page_state_convert(
1213 */ 1174 */
1214 if (!imap_valid || flags != BMAPI_READ) { 1175 if (!imap_valid || flags != BMAPI_READ) {
1215 flags = BMAPI_READ; 1176 flags = BMAPI_READ;
1216 size = xfs_probe_cluster(inode, page, bh, 1177 size = xfs_probe_cluster(inode, page, bh, head);
1217 head, 1);
1218 err = xfs_map_blocks(inode, offset, size, 1178 err = xfs_map_blocks(inode, offset, size,
1219 &imap, flags); 1179 &imap, flags);
1220 if (err) 1180 if (err)
@@ -1233,18 +1193,16 @@ xfs_page_state_convert(
1233 */ 1193 */
1234 type = IO_NEW; 1194 type = IO_NEW;
1235 if (trylock_buffer(bh)) { 1195 if (trylock_buffer(bh)) {
1236 ASSERT(buffer_mapped(bh));
1237 if (imap_valid) 1196 if (imap_valid)
1238 all_bh = 1; 1197 all_bh = 1;
1239 xfs_add_to_ioend(inode, bh, offset, type, 1198 xfs_add_to_ioend(inode, bh, offset, type,
1240 &ioend, !imap_valid); 1199 &ioend, !imap_valid);
1241 page_dirty--;
1242 count++; 1200 count++;
1243 } else { 1201 } else {
1244 imap_valid = 0; 1202 imap_valid = 0;
1245 } 1203 }
1246 } else if ((buffer_uptodate(bh) || PageUptodate(page)) && 1204 } else if (PageUptodate(page)) {
1247 (unmapped || startio)) { 1205 ASSERT(buffer_mapped(bh));
1248 imap_valid = 0; 1206 imap_valid = 0;
1249 } 1207 }
1250 1208
@@ -1256,8 +1214,7 @@ xfs_page_state_convert(
1256 if (uptodate && bh == head) 1214 if (uptodate && bh == head)
1257 SetPageUptodate(page); 1215 SetPageUptodate(page);
1258 1216
1259 if (startio) 1217 xfs_start_page_writeback(page, 1, count);
1260 xfs_start_page_writeback(page, 1, count);
1261 1218
1262 if (ioend && imap_valid) { 1219 if (ioend && imap_valid) {
1263 xfs_off_t end_index; 1220 xfs_off_t end_index;
@@ -1275,131 +1232,30 @@ xfs_page_state_convert(
1275 end_index = last_index; 1232 end_index = last_index;
1276 1233
1277 xfs_cluster_write(inode, page->index + 1, &imap, &ioend, 1234 xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
1278 wbc, startio, all_bh, end_index); 1235 wbc, all_bh, end_index);
1279 } 1236 }
1280 1237
1281 if (iohead) 1238 if (iohead)
1282 xfs_submit_ioend(wbc, iohead); 1239 xfs_submit_ioend(wbc, iohead);
1283 1240
1284 return page_dirty; 1241 return 0;
1285 1242
1286error: 1243error:
1287 if (iohead) 1244 if (iohead)
1288 xfs_cancel_ioend(iohead); 1245 xfs_cancel_ioend(iohead);
1289 1246
1290 /* 1247 if (err == -EAGAIN)
1291 * If it's delalloc and we have nowhere to put it, 1248 goto redirty;
1292 * throw it away, unless the lower layers told
1293 * us to try again.
1294 */
1295 if (err != -EAGAIN) {
1296 if (!unmapped)
1297 xfs_aops_discard_page(page);
1298 ClearPageUptodate(page);
1299 }
1300 return err;
1301}
1302 1249
1303/* 1250 xfs_aops_discard_page(page);
1304 * writepage: Called from one of two places: 1251 ClearPageUptodate(page);
1305 * 1252 unlock_page(page);
1306 * 1. we are flushing a delalloc buffer head. 1253 return err;
1307 *
1308 * 2. we are writing out a dirty page. Typically the page dirty
1309 * state is cleared before we get here. In this case is it
1310 * conceivable we have no buffer heads.
1311 *
1312 * For delalloc space on the page we need to allocate space and
1313 * flush it. For unmapped buffer heads on the page we should
1314 * allocate space if the page is uptodate. For any other dirty
1315 * buffer heads on the page we should flush them.
1316 *
1317 * If we detect that a transaction would be required to flush
1318 * the page, we have to check the process flags first, if we
1319 * are already in a transaction or disk I/O during allocations
1320 * is off, we need to fail the writepage and redirty the page.
1321 */
1322
1323STATIC int
1324xfs_vm_writepage(
1325 struct page *page,
1326 struct writeback_control *wbc)
1327{
1328 int error;
1329 int need_trans;
1330 int delalloc, unmapped, unwritten;
1331 struct inode *inode = page->mapping->host;
1332
1333 trace_xfs_writepage(inode, page, 0);
1334
1335 /*
1336 * Refuse to write the page out if we are called from reclaim context.
1337 *
1338 * This is primarily to avoid stack overflows when called from deep
1339 * used stacks in random callers for direct reclaim, but disabling
1340 * reclaim for kswap is a nice side-effect as kswapd causes rather
1341 * suboptimal I/O patters, too.
1342 *
1343 * This should really be done by the core VM, but until that happens
1344 * filesystems like XFS, btrfs and ext4 have to take care of this
1345 * by themselves.
1346 */
1347 if (current->flags & PF_MEMALLOC)
1348 goto out_fail;
1349
1350 /*
1351 * We need a transaction if:
1352 * 1. There are delalloc buffers on the page
1353 * 2. The page is uptodate and we have unmapped buffers
1354 * 3. The page is uptodate and we have no buffers
1355 * 4. There are unwritten buffers on the page
1356 */
1357
1358 if (!page_has_buffers(page)) {
1359 unmapped = 1;
1360 need_trans = 1;
1361 } else {
1362 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
1363 if (!PageUptodate(page))
1364 unmapped = 0;
1365 need_trans = delalloc + unmapped + unwritten;
1366 }
1367
1368 /*
1369 * If we need a transaction and the process flags say
1370 * we are already in a transaction, or no IO is allowed
1371 * then mark the page dirty again and leave the page
1372 * as is.
1373 */
1374 if (current_test_flags(PF_FSTRANS) && need_trans)
1375 goto out_fail;
1376
1377 /*
1378 * Delay hooking up buffer heads until we have
1379 * made our go/no-go decision.
1380 */
1381 if (!page_has_buffers(page))
1382 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
1383
1384 /*
1385 * Convert delayed allocate, unwritten or unmapped space
1386 * to real space and flush out to disk.
1387 */
1388 error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
1389 if (error == -EAGAIN)
1390 goto out_fail;
1391 if (unlikely(error < 0))
1392 goto out_unlock;
1393
1394 return 0;
1395 1254
1396out_fail: 1255redirty:
1397 redirty_page_for_writepage(wbc, page); 1256 redirty_page_for_writepage(wbc, page);
1398 unlock_page(page); 1257 unlock_page(page);
1399 return 0; 1258 return 0;
1400out_unlock:
1401 unlock_page(page);
1402 return error;
1403} 1259}
1404 1260
1405STATIC int 1261STATIC int
@@ -1413,65 +1269,27 @@ xfs_vm_writepages(
1413 1269
1414/* 1270/*
1415 * Called to move a page into cleanable state - and from there 1271 * Called to move a page into cleanable state - and from there
1416 * to be released. Possibly the page is already clean. We always 1272 * to be released. The page should already be clean. We always
1417 * have buffer heads in this call. 1273 * have buffer heads in this call.
1418 * 1274 *
1419 * Returns 0 if the page is ok to release, 1 otherwise. 1275 * Returns 1 if the page is ok to release, 0 otherwise.
1420 *
1421 * Possible scenarios are:
1422 *
1423 * 1. We are being called to release a page which has been written
1424 * to via regular I/O. buffer heads will be dirty and possibly
1425 * delalloc. If no delalloc buffer heads in this case then we
1426 * can just return zero.
1427 *
1428 * 2. We are called to release a page which has been written via
1429 * mmap, all we need to do is ensure there is no delalloc
1430 * state in the buffer heads, if not we can let the caller
1431 * free them and we should come back later via writepage.
1432 */ 1276 */
1433STATIC int 1277STATIC int
1434xfs_vm_releasepage( 1278xfs_vm_releasepage(
1435 struct page *page, 1279 struct page *page,
1436 gfp_t gfp_mask) 1280 gfp_t gfp_mask)
1437{ 1281{
1438 struct inode *inode = page->mapping->host; 1282 int delalloc, unwritten;
1439 int dirty, delalloc, unmapped, unwritten;
1440 struct writeback_control wbc = {
1441 .sync_mode = WB_SYNC_ALL,
1442 .nr_to_write = 1,
1443 };
1444 1283
1445 trace_xfs_releasepage(inode, page, 0); 1284 trace_xfs_releasepage(page->mapping->host, page, 0);
1446 1285
1447 if (!page_has_buffers(page)) 1286 xfs_count_page_state(page, &delalloc, &unwritten);
1448 return 0;
1449
1450 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
1451 if (!delalloc && !unwritten)
1452 goto free_buffers;
1453 1287
1454 if (!(gfp_mask & __GFP_FS)) 1288 if (WARN_ON(delalloc))
1455 return 0; 1289 return 0;
1456 1290 if (WARN_ON(unwritten))
1457 /* If we are already inside a transaction or the thread cannot
1458 * do I/O, we cannot release this page.
1459 */
1460 if (current_test_flags(PF_FSTRANS))
1461 return 0; 1291 return 0;
1462 1292
1463 /*
1464 * Convert delalloc space to real space, do not flush the
1465 * data out to disk, that will be done by the caller.
1466 * Never need to allocate space here - we will always
1467 * come back to writepage in that case.
1468 */
1469 dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
1470 if (dirty == 0 && !unwritten)
1471 goto free_buffers;
1472 return 0;
1473
1474free_buffers:
1475 return try_to_free_buffers(page); 1293 return try_to_free_buffers(page);
1476} 1294}
1477 1295
@@ -1481,9 +1299,9 @@ __xfs_get_blocks(
1481 sector_t iblock, 1299 sector_t iblock,
1482 struct buffer_head *bh_result, 1300 struct buffer_head *bh_result,
1483 int create, 1301 int create,
1484 int direct, 1302 int direct)
1485 bmapi_flags_t flags)
1486{ 1303{
1304 int flags = create ? BMAPI_WRITE : BMAPI_READ;
1487 struct xfs_bmbt_irec imap; 1305 struct xfs_bmbt_irec imap;
1488 xfs_off_t offset; 1306 xfs_off_t offset;
1489 ssize_t size; 1307 ssize_t size;
@@ -1498,8 +1316,11 @@ __xfs_get_blocks(
1498 if (!create && direct && offset >= i_size_read(inode)) 1316 if (!create && direct && offset >= i_size_read(inode))
1499 return 0; 1317 return 0;
1500 1318
1501 error = xfs_iomap(XFS_I(inode), offset, size, 1319 if (direct && create)
1502 create ? flags : BMAPI_READ, &imap, &nimap, &new); 1320 flags |= BMAPI_DIRECT;
1321
1322 error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap,
1323 &new);
1503 if (error) 1324 if (error)
1504 return -error; 1325 return -error;
1505 if (nimap == 0) 1326 if (nimap == 0)
@@ -1579,8 +1400,7 @@ xfs_get_blocks(
1579 struct buffer_head *bh_result, 1400 struct buffer_head *bh_result,
1580 int create) 1401 int create)
1581{ 1402{
1582 return __xfs_get_blocks(inode, iblock, 1403 return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
1583 bh_result, create, 0, BMAPI_WRITE);
1584} 1404}
1585 1405
1586STATIC int 1406STATIC int
@@ -1590,61 +1410,59 @@ xfs_get_blocks_direct(
1590 struct buffer_head *bh_result, 1410 struct buffer_head *bh_result,
1591 int create) 1411 int create)
1592{ 1412{
1593 return __xfs_get_blocks(inode, iblock, 1413 return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
1594 bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
1595} 1414}
1596 1415
1416/*
1417 * Complete a direct I/O write request.
1418 *
1419 * If the private argument is non-NULL __xfs_get_blocks signals us that we
1420 * need to issue a transaction to convert the range from unwritten to written
1421 * extents. In case this is regular synchronous I/O we just call xfs_end_io
1422 * to do this and we are done. But in case this was a successfull AIO
1423 * request this handler is called from interrupt context, from which we
1424 * can't start transactions. In that case offload the I/O completion to
1425 * the workqueues we also use for buffered I/O completion.
1426 */
1597STATIC void 1427STATIC void
1598xfs_end_io_direct( 1428xfs_end_io_direct_write(
1599 struct kiocb *iocb, 1429 struct kiocb *iocb,
1600 loff_t offset, 1430 loff_t offset,
1601 ssize_t size, 1431 ssize_t size,
1602 void *private) 1432 void *private,
1433 int ret,
1434 bool is_async)
1603{ 1435{
1604 xfs_ioend_t *ioend = iocb->private; 1436 struct xfs_ioend *ioend = iocb->private;
1605 1437
1606 /* 1438 /*
1607 * Non-NULL private data means we need to issue a transaction to 1439 * blockdev_direct_IO can return an error even after the I/O
1608 * convert a range from unwritten to written extents. This needs 1440 * completion handler was called. Thus we need to protect
1609 * to happen from process context but aio+dio I/O completion 1441 * against double-freeing.
1610 * happens from irq context so we need to defer it to a workqueue.
1611 * This is not necessary for synchronous direct I/O, but we do
1612 * it anyway to keep the code uniform and simpler.
1613 *
1614 * Well, if only it were that simple. Because synchronous direct I/O
1615 * requires extent conversion to occur *before* we return to userspace,
1616 * we have to wait for extent conversion to complete. Look at the
1617 * iocb that has been passed to us to determine if this is AIO or
1618 * not. If it is synchronous, tell xfs_finish_ioend() to kick the
1619 * workqueue and wait for it to complete.
1620 *
1621 * The core direct I/O code might be changed to always call the
1622 * completion handler in the future, in which case all this can
1623 * go away.
1624 */ 1442 */
1443 iocb->private = NULL;
1444
1625 ioend->io_offset = offset; 1445 ioend->io_offset = offset;
1626 ioend->io_size = size; 1446 ioend->io_size = size;
1627 if (ioend->io_type == IO_READ) { 1447 if (private && size > 0)
1628 xfs_finish_ioend(ioend, 0); 1448 ioend->io_type = IO_UNWRITTEN;
1629 } else if (private && size > 0) { 1449
1630 xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); 1450 if (is_async) {
1631 } else {
1632 /* 1451 /*
1633 * A direct I/O write ioend starts it's life in unwritten 1452 * If we are converting an unwritten extent we need to delay
1634 * state in case they map an unwritten extent. This write 1453 * the AIO completion until after the unwrittent extent
1635 * didn't map an unwritten extent so switch it's completion 1454 * conversion has completed, otherwise do it ASAP.
1636 * handler.
1637 */ 1455 */
1638 ioend->io_type = IO_NEW; 1456 if (ioend->io_type == IO_UNWRITTEN) {
1639 xfs_finish_ioend(ioend, 0); 1457 ioend->io_iocb = iocb;
1458 ioend->io_result = ret;
1459 } else {
1460 aio_complete(iocb, ret, 0);
1461 }
1462 xfs_finish_ioend(ioend);
1463 } else {
1464 xfs_finish_ioend_sync(ioend);
1640 } 1465 }
1641
1642 /*
1643 * blockdev_direct_IO can return an error even after the I/O
1644 * completion handler was called. Thus we need to protect
1645 * against double-freeing.
1646 */
1647 iocb->private = NULL;
1648} 1466}
1649 1467
1650STATIC ssize_t 1468STATIC ssize_t
@@ -1655,26 +1473,45 @@ xfs_vm_direct_IO(
1655 loff_t offset, 1473 loff_t offset,
1656 unsigned long nr_segs) 1474 unsigned long nr_segs)
1657{ 1475{
1658 struct file *file = iocb->ki_filp; 1476 struct inode *inode = iocb->ki_filp->f_mapping->host;
1659 struct inode *inode = file->f_mapping->host; 1477 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1660 struct block_device *bdev; 1478 ssize_t ret;
1661 ssize_t ret;
1662
1663 bdev = xfs_find_bdev_for_inode(inode);
1664 1479
1665 iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? 1480 if (rw & WRITE) {
1666 IO_UNWRITTEN : IO_READ); 1481 iocb->private = xfs_alloc_ioend(inode, IO_NEW);
1667 1482
1668 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, 1483 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1669 offset, nr_segs, 1484 offset, nr_segs,
1670 xfs_get_blocks_direct, 1485 xfs_get_blocks_direct,
1671 xfs_end_io_direct); 1486 xfs_end_io_direct_write, NULL, 0);
1487 if (ret != -EIOCBQUEUED && iocb->private)
1488 xfs_destroy_ioend(iocb->private);
1489 } else {
1490 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1491 offset, nr_segs,
1492 xfs_get_blocks_direct,
1493 NULL, NULL, 0);
1494 }
1672 1495
1673 if (unlikely(ret != -EIOCBQUEUED && iocb->private))
1674 xfs_destroy_ioend(iocb->private);
1675 return ret; 1496 return ret;
1676} 1497}
1677 1498
1499STATIC void
1500xfs_vm_write_failed(
1501 struct address_space *mapping,
1502 loff_t to)
1503{
1504 struct inode *inode = mapping->host;
1505
1506 if (to > inode->i_size) {
1507 struct iattr ia = {
1508 .ia_valid = ATTR_SIZE | ATTR_FORCE,
1509 .ia_size = inode->i_size,
1510 };
1511 xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
1512 }
1513}
1514
1678STATIC int 1515STATIC int
1679xfs_vm_write_begin( 1516xfs_vm_write_begin(
1680 struct file *file, 1517 struct file *file,
@@ -1685,9 +1522,31 @@ xfs_vm_write_begin(
1685 struct page **pagep, 1522 struct page **pagep,
1686 void **fsdata) 1523 void **fsdata)
1687{ 1524{
1688 *pagep = NULL; 1525 int ret;
1689 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1526
1690 xfs_get_blocks); 1527 ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
1528 pagep, xfs_get_blocks);
1529 if (unlikely(ret))
1530 xfs_vm_write_failed(mapping, pos + len);
1531 return ret;
1532}
1533
1534STATIC int
1535xfs_vm_write_end(
1536 struct file *file,
1537 struct address_space *mapping,
1538 loff_t pos,
1539 unsigned len,
1540 unsigned copied,
1541 struct page *page,
1542 void *fsdata)
1543{
1544 int ret;
1545
1546 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
1547 if (unlikely(ret < len))
1548 xfs_vm_write_failed(mapping, pos + len);
1549 return ret;
1691} 1550}
1692 1551
1693STATIC sector_t 1552STATIC sector_t
@@ -1698,7 +1557,7 @@ xfs_vm_bmap(
1698 struct inode *inode = (struct inode *)mapping->host; 1557 struct inode *inode = (struct inode *)mapping->host;
1699 struct xfs_inode *ip = XFS_I(inode); 1558 struct xfs_inode *ip = XFS_I(inode);
1700 1559
1701 xfs_itrace_entry(XFS_I(inode)); 1560 trace_xfs_vm_bmap(XFS_I(inode));
1702 xfs_ilock(ip, XFS_IOLOCK_SHARED); 1561 xfs_ilock(ip, XFS_IOLOCK_SHARED);
1703 xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); 1562 xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
1704 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 1563 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -1732,7 +1591,7 @@ const struct address_space_operations xfs_address_space_operations = {
1732 .releasepage = xfs_vm_releasepage, 1591 .releasepage = xfs_vm_releasepage,
1733 .invalidatepage = xfs_vm_invalidatepage, 1592 .invalidatepage = xfs_vm_invalidatepage,
1734 .write_begin = xfs_vm_write_begin, 1593 .write_begin = xfs_vm_write_begin,
1735 .write_end = generic_write_end, 1594 .write_end = xfs_vm_write_end,
1736 .bmap = xfs_vm_bmap, 1595 .bmap = xfs_vm_bmap,
1737 .direct_IO = xfs_vm_direct_IO, 1596 .direct_IO = xfs_vm_direct_IO,
1738 .migratepage = buffer_migrate_page, 1597 .migratepage = buffer_migrate_page,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4cfc6ea87df8..c5057fb6237a 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -37,6 +37,8 @@ typedef struct xfs_ioend {
37 size_t io_size; /* size of the extent */ 37 size_t io_size; /* size of the extent */
38 xfs_off_t io_offset; /* offset in the file */ 38 xfs_off_t io_offset; /* offset in the file */
39 struct work_struct io_work; /* xfsdatad work queue */ 39 struct work_struct io_work; /* xfsdatad work queue */
40 struct kiocb *io_iocb;
41 int io_result;
40} xfs_ioend_t; 42} xfs_ioend_t;
41 43
42extern const struct address_space_operations xfs_address_space_operations; 44extern const struct address_space_operations xfs_address_space_operations;
@@ -45,6 +47,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
45extern void xfs_ioend_init(void); 47extern void xfs_ioend_init(void);
46extern void xfs_ioend_wait(struct xfs_inode *); 48extern void xfs_ioend_wait(struct xfs_inode *);
47 49
48extern void xfs_count_page_state(struct page *, int *, int *, int *); 50extern void xfs_count_page_state(struct page *, int *, int *);
49 51
50#endif /* __XFS_AOPS_H__ */ 52#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 2ee3f7a60163..63fd2c07cb57 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -39,7 +39,6 @@
39#include "xfs_inum.h" 39#include "xfs_inum.h"
40#include "xfs_log.h" 40#include "xfs_log.h"
41#include "xfs_ag.h" 41#include "xfs_ag.h"
42#include "xfs_dmapi.h"
43#include "xfs_mount.h" 42#include "xfs_mount.h"
44#include "xfs_trace.h" 43#include "xfs_trace.h"
45 44
@@ -189,8 +188,8 @@ _xfs_buf_initialize(
189 atomic_set(&bp->b_hold, 1); 188 atomic_set(&bp->b_hold, 1);
190 init_completion(&bp->b_iowait); 189 init_completion(&bp->b_iowait);
191 INIT_LIST_HEAD(&bp->b_list); 190 INIT_LIST_HEAD(&bp->b_list);
192 INIT_LIST_HEAD(&bp->b_hash_list); 191 RB_CLEAR_NODE(&bp->b_rbnode);
193 init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ 192 sema_init(&bp->b_sema, 0); /* held, no waiters */
194 XB_SET_OWNER(bp); 193 XB_SET_OWNER(bp);
195 bp->b_target = target; 194 bp->b_target = target;
196 bp->b_file_offset = range_base; 195 bp->b_file_offset = range_base;
@@ -263,8 +262,6 @@ xfs_buf_free(
263{ 262{
264 trace_xfs_buf_free(bp, _RET_IP_); 263 trace_xfs_buf_free(bp, _RET_IP_);
265 264
266 ASSERT(list_empty(&bp->b_hash_list));
267
268 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 265 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
269 uint i; 266 uint i;
270 267
@@ -423,8 +420,10 @@ _xfs_buf_find(
423{ 420{
424 xfs_off_t range_base; 421 xfs_off_t range_base;
425 size_t range_length; 422 size_t range_length;
426 xfs_bufhash_t *hash; 423 struct xfs_perag *pag;
427 xfs_buf_t *bp, *n; 424 struct rb_node **rbp;
425 struct rb_node *parent;
426 xfs_buf_t *bp;
428 427
429 range_base = (ioff << BBSHIFT); 428 range_base = (ioff << BBSHIFT);
430 range_length = (isize << BBSHIFT); 429 range_length = (isize << BBSHIFT);
@@ -433,20 +432,38 @@ _xfs_buf_find(
433 ASSERT(!(range_length < (1 << btp->bt_sshift))); 432 ASSERT(!(range_length < (1 << btp->bt_sshift)));
434 ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); 433 ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
435 434
436 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; 435 /* get tree root */
437 436 pag = xfs_perag_get(btp->bt_mount,
438 spin_lock(&hash->bh_lock); 437 xfs_daddr_to_agno(btp->bt_mount, ioff));
439 438
440 list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { 439 /* walk tree */
441 ASSERT(btp == bp->b_target); 440 spin_lock(&pag->pag_buf_lock);
442 if (bp->b_file_offset == range_base && 441 rbp = &pag->pag_buf_tree.rb_node;
443 bp->b_buffer_length == range_length) { 442 parent = NULL;
443 bp = NULL;
444 while (*rbp) {
445 parent = *rbp;
446 bp = rb_entry(parent, struct xfs_buf, b_rbnode);
447
448 if (range_base < bp->b_file_offset)
449 rbp = &(*rbp)->rb_left;
450 else if (range_base > bp->b_file_offset)
451 rbp = &(*rbp)->rb_right;
452 else {
444 /* 453 /*
445 * If we look at something, bring it to the 454 * found a block offset match. If the range doesn't
446 * front of the list for next time. 455 * match, the only way this is allowed is if the buffer
456 * in the cache is stale and the transaction that made
457 * it stale has not yet committed. i.e. we are
458 * reallocating a busy extent. Skip this buffer and
459 * continue searching to the right for an exact match.
447 */ 460 */
461 if (bp->b_buffer_length != range_length) {
462 ASSERT(bp->b_flags & XBF_STALE);
463 rbp = &(*rbp)->rb_right;
464 continue;
465 }
448 atomic_inc(&bp->b_hold); 466 atomic_inc(&bp->b_hold);
449 list_move(&bp->b_hash_list, &hash->bh_list);
450 goto found; 467 goto found;
451 } 468 }
452 } 469 }
@@ -455,17 +472,21 @@ _xfs_buf_find(
455 if (new_bp) { 472 if (new_bp) {
456 _xfs_buf_initialize(new_bp, btp, range_base, 473 _xfs_buf_initialize(new_bp, btp, range_base,
457 range_length, flags); 474 range_length, flags);
458 new_bp->b_hash = hash; 475 rb_link_node(&new_bp->b_rbnode, parent, rbp);
459 list_add(&new_bp->b_hash_list, &hash->bh_list); 476 rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
477 /* the buffer keeps the perag reference until it is freed */
478 new_bp->b_pag = pag;
479 spin_unlock(&pag->pag_buf_lock);
460 } else { 480 } else {
461 XFS_STATS_INC(xb_miss_locked); 481 XFS_STATS_INC(xb_miss_locked);
482 spin_unlock(&pag->pag_buf_lock);
483 xfs_perag_put(pag);
462 } 484 }
463
464 spin_unlock(&hash->bh_lock);
465 return new_bp; 485 return new_bp;
466 486
467found: 487found:
468 spin_unlock(&hash->bh_lock); 488 spin_unlock(&pag->pag_buf_lock);
489 xfs_perag_put(pag);
469 490
470 /* Attempt to get the semaphore without sleeping, 491 /* Attempt to get the semaphore without sleeping,
471 * if this does not work then we need to drop the 492 * if this does not work then we need to drop the
@@ -579,9 +600,9 @@ _xfs_buf_read(
579 XBF_READ_AHEAD | _XBF_RUN_QUEUES); 600 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
580 601
581 status = xfs_buf_iorequest(bp); 602 status = xfs_buf_iorequest(bp);
582 if (!status && !(flags & XBF_ASYNC)) 603 if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
583 status = xfs_buf_iowait(bp); 604 return status;
584 return status; 605 return xfs_buf_iowait(bp);
585} 606}
586 607
587xfs_buf_t * 608xfs_buf_t *
@@ -631,8 +652,7 @@ void
631xfs_buf_readahead( 652xfs_buf_readahead(
632 xfs_buftarg_t *target, 653 xfs_buftarg_t *target,
633 xfs_off_t ioff, 654 xfs_off_t ioff,
634 size_t isize, 655 size_t isize)
635 xfs_buf_flags_t flags)
636{ 656{
637 struct backing_dev_info *bdi; 657 struct backing_dev_info *bdi;
638 658
@@ -640,8 +660,42 @@ xfs_buf_readahead(
640 if (bdi_read_congested(bdi)) 660 if (bdi_read_congested(bdi))
641 return; 661 return;
642 662
643 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); 663 xfs_buf_read(target, ioff, isize,
644 xfs_buf_read(target, ioff, isize, flags); 664 XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
665}
666
667/*
668 * Read an uncached buffer from disk. Allocates and returns a locked
669 * buffer containing the disk contents or nothing.
670 */
671struct xfs_buf *
672xfs_buf_read_uncached(
673 struct xfs_mount *mp,
674 struct xfs_buftarg *target,
675 xfs_daddr_t daddr,
676 size_t length,
677 int flags)
678{
679 xfs_buf_t *bp;
680 int error;
681
682 bp = xfs_buf_get_uncached(target, length, flags);
683 if (!bp)
684 return NULL;
685
686 /* set up the buffer for a read IO */
687 xfs_buf_lock(bp);
688 XFS_BUF_SET_ADDR(bp, daddr);
689 XFS_BUF_READ(bp);
690 XFS_BUF_BUSY(bp);
691
692 xfsbdstrat(mp, bp);
693 error = xfs_buf_iowait(bp);
694 if (error || bp->b_error) {
695 xfs_buf_relse(bp);
696 return NULL;
697 }
698 return bp;
645} 699}
646 700
647xfs_buf_t * 701xfs_buf_t *
@@ -713,9 +767,10 @@ xfs_buf_associate_memory(
713} 767}
714 768
715xfs_buf_t * 769xfs_buf_t *
716xfs_buf_get_noaddr( 770xfs_buf_get_uncached(
771 struct xfs_buftarg *target,
717 size_t len, 772 size_t len,
718 xfs_buftarg_t *target) 773 int flags)
719{ 774{
720 unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; 775 unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
721 int error, i; 776 int error, i;
@@ -731,7 +786,7 @@ xfs_buf_get_noaddr(
731 goto fail_free_buf; 786 goto fail_free_buf;
732 787
733 for (i = 0; i < page_count; i++) { 788 for (i = 0; i < page_count; i++) {
734 bp->b_pages[i] = alloc_page(GFP_KERNEL); 789 bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
735 if (!bp->b_pages[i]) 790 if (!bp->b_pages[i])
736 goto fail_free_mem; 791 goto fail_free_mem;
737 } 792 }
@@ -746,7 +801,7 @@ xfs_buf_get_noaddr(
746 801
747 xfs_buf_unlock(bp); 802 xfs_buf_unlock(bp);
748 803
749 trace_xfs_buf_get_noaddr(bp, _RET_IP_); 804 trace_xfs_buf_get_uncached(bp, _RET_IP_);
750 return bp; 805 return bp;
751 806
752 fail_free_mem: 807 fail_free_mem:
@@ -780,29 +835,30 @@ void
780xfs_buf_rele( 835xfs_buf_rele(
781 xfs_buf_t *bp) 836 xfs_buf_t *bp)
782{ 837{
783 xfs_bufhash_t *hash = bp->b_hash; 838 struct xfs_perag *pag = bp->b_pag;
784 839
785 trace_xfs_buf_rele(bp, _RET_IP_); 840 trace_xfs_buf_rele(bp, _RET_IP_);
786 841
787 if (unlikely(!hash)) { 842 if (!pag) {
788 ASSERT(!bp->b_relse); 843 ASSERT(!bp->b_relse);
844 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
789 if (atomic_dec_and_test(&bp->b_hold)) 845 if (atomic_dec_and_test(&bp->b_hold))
790 xfs_buf_free(bp); 846 xfs_buf_free(bp);
791 return; 847 return;
792 } 848 }
793 849
850 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
794 ASSERT(atomic_read(&bp->b_hold) > 0); 851 ASSERT(atomic_read(&bp->b_hold) > 0);
795 if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { 852 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
796 if (bp->b_relse) { 853 if (bp->b_relse) {
797 atomic_inc(&bp->b_hold); 854 atomic_inc(&bp->b_hold);
798 spin_unlock(&hash->bh_lock); 855 spin_unlock(&pag->pag_buf_lock);
799 (*(bp->b_relse)) (bp); 856 bp->b_relse(bp);
800 } else if (bp->b_flags & XBF_FS_MANAGED) {
801 spin_unlock(&hash->bh_lock);
802 } else { 857 } else {
803 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); 858 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
804 list_del_init(&bp->b_hash_list); 859 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
805 spin_unlock(&hash->bh_lock); 860 spin_unlock(&pag->pag_buf_lock);
861 xfs_perag_put(pag);
806 xfs_buf_free(bp); 862 xfs_buf_free(bp);
807 } 863 }
808 } 864 }
@@ -865,7 +921,7 @@ xfs_buf_lock(
865 trace_xfs_buf_lock(bp, _RET_IP_); 921 trace_xfs_buf_lock(bp, _RET_IP_);
866 922
867 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 923 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
868 xfs_log_force(bp->b_mount, 0); 924 xfs_log_force(bp->b_target->bt_mount, 0);
869 if (atomic_read(&bp->b_io_remaining)) 925 if (atomic_read(&bp->b_io_remaining))
870 blk_run_address_space(bp->b_target->bt_mapping); 926 blk_run_address_space(bp->b_target->bt_mapping);
871 down(&bp->b_sema); 927 down(&bp->b_sema);
@@ -897,36 +953,6 @@ xfs_buf_unlock(
897 trace_xfs_buf_unlock(bp, _RET_IP_); 953 trace_xfs_buf_unlock(bp, _RET_IP_);
898} 954}
899 955
900
901/*
902 * Pinning Buffer Storage in Memory
903 * Ensure that no attempt to force a buffer to disk will succeed.
904 */
905void
906xfs_buf_pin(
907 xfs_buf_t *bp)
908{
909 trace_xfs_buf_pin(bp, _RET_IP_);
910 atomic_inc(&bp->b_pin_count);
911}
912
913void
914xfs_buf_unpin(
915 xfs_buf_t *bp)
916{
917 trace_xfs_buf_unpin(bp, _RET_IP_);
918
919 if (atomic_dec_and_test(&bp->b_pin_count))
920 wake_up_all(&bp->b_waiters);
921}
922
923int
924xfs_buf_ispin(
925 xfs_buf_t *bp)
926{
927 return atomic_read(&bp->b_pin_count);
928}
929
930STATIC void 956STATIC void
931xfs_buf_wait_unpin( 957xfs_buf_wait_unpin(
932 xfs_buf_t *bp) 958 xfs_buf_t *bp)
@@ -960,19 +986,7 @@ xfs_buf_iodone_work(
960 xfs_buf_t *bp = 986 xfs_buf_t *bp =
961 container_of(work, xfs_buf_t, b_iodone_work); 987 container_of(work, xfs_buf_t, b_iodone_work);
962 988
963 /* 989 if (bp->b_iodone)
964 * We can get an EOPNOTSUPP to ordered writes. Here we clear the
965 * ordered flag and reissue them. Because we can't tell the higher
966 * layers directly that they should not issue ordered I/O anymore, they
967 * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
968 */
969 if ((bp->b_error == EOPNOTSUPP) &&
970 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
971 trace_xfs_buf_ordered_retry(bp, _RET_IP_);
972 bp->b_flags &= ~XBF_ORDERED;
973 bp->b_flags |= _XFS_BARRIER_FAILED;
974 xfs_buf_iorequest(bp);
975 } else if (bp->b_iodone)
976 (*(bp->b_iodone))(bp); 990 (*(bp->b_iodone))(bp);
977 else if (bp->b_flags & XBF_ASYNC) 991 else if (bp->b_flags & XBF_ASYNC)
978 xfs_buf_relse(bp); 992 xfs_buf_relse(bp);
@@ -1018,13 +1032,11 @@ xfs_bwrite(
1018{ 1032{
1019 int error; 1033 int error;
1020 1034
1021 bp->b_strat = xfs_bdstrat_cb;
1022 bp->b_mount = mp;
1023 bp->b_flags |= XBF_WRITE; 1035 bp->b_flags |= XBF_WRITE;
1024 bp->b_flags &= ~(XBF_ASYNC | XBF_READ); 1036 bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
1025 1037
1026 xfs_buf_delwri_dequeue(bp); 1038 xfs_buf_delwri_dequeue(bp);
1027 xfs_buf_iostrategy(bp); 1039 xfs_bdstrat_cb(bp);
1028 1040
1029 error = xfs_buf_iowait(bp); 1041 error = xfs_buf_iowait(bp);
1030 if (error) 1042 if (error)
@@ -1040,9 +1052,6 @@ xfs_bdwrite(
1040{ 1052{
1041 trace_xfs_buf_bdwrite(bp, _RET_IP_); 1053 trace_xfs_buf_bdwrite(bp, _RET_IP_);
1042 1054
1043 bp->b_strat = xfs_bdstrat_cb;
1044 bp->b_mount = mp;
1045
1046 bp->b_flags &= ~XBF_READ; 1055 bp->b_flags &= ~XBF_READ;
1047 bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); 1056 bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
1048 1057
@@ -1051,7 +1060,7 @@ xfs_bdwrite(
1051 1060
1052/* 1061/*
1053 * Called when we want to stop a buffer from getting written or read. 1062 * Called when we want to stop a buffer from getting written or read.
1054 * We attach the EIO error, muck with its flags, and call biodone 1063 * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
1055 * so that the proper iodone callbacks get called. 1064 * so that the proper iodone callbacks get called.
1056 */ 1065 */
1057STATIC int 1066STATIC int
@@ -1068,22 +1077,21 @@ xfs_bioerror(
1068 XFS_BUF_ERROR(bp, EIO); 1077 XFS_BUF_ERROR(bp, EIO);
1069 1078
1070 /* 1079 /*
1071 * We're calling biodone, so delete XBF_DONE flag. 1080 * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
1072 */ 1081 */
1073 XFS_BUF_UNREAD(bp); 1082 XFS_BUF_UNREAD(bp);
1074 XFS_BUF_UNDELAYWRITE(bp); 1083 XFS_BUF_UNDELAYWRITE(bp);
1075 XFS_BUF_UNDONE(bp); 1084 XFS_BUF_UNDONE(bp);
1076 XFS_BUF_STALE(bp); 1085 XFS_BUF_STALE(bp);
1077 1086
1078 XFS_BUF_CLR_BDSTRAT_FUNC(bp); 1087 xfs_buf_ioend(bp, 0);
1079 xfs_biodone(bp);
1080 1088
1081 return EIO; 1089 return EIO;
1082} 1090}
1083 1091
1084/* 1092/*
1085 * Same as xfs_bioerror, except that we are releasing the buffer 1093 * Same as xfs_bioerror, except that we are releasing the buffer
1086 * here ourselves, and avoiding the biodone call. 1094 * here ourselves, and avoiding the xfs_buf_ioend call.
1087 * This is meant for userdata errors; metadata bufs come with 1095 * This is meant for userdata errors; metadata bufs come with
1088 * iodone functions attached, so that we can track down errors. 1096 * iodone functions attached, so that we can track down errors.
1089 */ 1097 */
@@ -1105,7 +1113,6 @@ xfs_bioerror_relse(
1105 XFS_BUF_DONE(bp); 1113 XFS_BUF_DONE(bp);
1106 XFS_BUF_STALE(bp); 1114 XFS_BUF_STALE(bp);
1107 XFS_BUF_CLR_IODONE_FUNC(bp); 1115 XFS_BUF_CLR_IODONE_FUNC(bp);
1108 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1109 if (!(fl & XBF_ASYNC)) { 1116 if (!(fl & XBF_ASYNC)) {
1110 /* 1117 /*
1111 * Mark b_error and B_ERROR _both_. 1118 * Mark b_error and B_ERROR _both_.
@@ -1133,7 +1140,7 @@ int
1133xfs_bdstrat_cb( 1140xfs_bdstrat_cb(
1134 struct xfs_buf *bp) 1141 struct xfs_buf *bp)
1135{ 1142{
1136 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { 1143 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
1137 trace_xfs_bdstrat_shut(bp, _RET_IP_); 1144 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1138 /* 1145 /*
1139 * Metadata write that didn't get logged but 1146 * Metadata write that didn't get logged but
@@ -1235,7 +1242,7 @@ _xfs_buf_ioapply(
1235 1242
1236 if (bp->b_flags & XBF_ORDERED) { 1243 if (bp->b_flags & XBF_ORDERED) {
1237 ASSERT(!(bp->b_flags & XBF_READ)); 1244 ASSERT(!(bp->b_flags & XBF_READ));
1238 rw = WRITE_BARRIER; 1245 rw = WRITE_FLUSH_FUA;
1239 } else if (bp->b_flags & XBF_LOG_BUFFER) { 1246 } else if (bp->b_flags & XBF_LOG_BUFFER) {
1240 ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); 1247 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1241 bp->b_flags &= ~_XBF_RUN_QUEUES; 1248 bp->b_flags &= ~_XBF_RUN_QUEUES;
@@ -1311,8 +1318,19 @@ submit_io:
1311 if (size) 1318 if (size)
1312 goto next_chunk; 1319 goto next_chunk;
1313 } else { 1320 } else {
1314 bio_put(bio); 1321 /*
1322 * if we get here, no pages were added to the bio. However,
1323 * we can't just error out here - if the pages are locked then
1324 * we have to unlock them otherwise we can hang on a later
1325 * access to the page.
1326 */
1315 xfs_buf_ioerror(bp, EIO); 1327 xfs_buf_ioerror(bp, EIO);
1328 if (bp->b_flags & _XBF_PAGE_LOCKED) {
1329 int i;
1330 for (i = 0; i < bp->b_page_count; i++)
1331 unlock_page(bp->b_pages[i]);
1332 }
1333 bio_put(bio);
1316 } 1334 }
1317} 1335}
1318 1336
@@ -1428,63 +1446,24 @@ xfs_buf_iomove(
1428 */ 1446 */
1429void 1447void
1430xfs_wait_buftarg( 1448xfs_wait_buftarg(
1431 xfs_buftarg_t *btp) 1449 struct xfs_buftarg *btp)
1432{
1433 xfs_buf_t *bp, *n;
1434 xfs_bufhash_t *hash;
1435 uint i;
1436
1437 for (i = 0; i < (1 << btp->bt_hashshift); i++) {
1438 hash = &btp->bt_hash[i];
1439again:
1440 spin_lock(&hash->bh_lock);
1441 list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
1442 ASSERT(btp == bp->b_target);
1443 if (!(bp->b_flags & XBF_FS_MANAGED)) {
1444 spin_unlock(&hash->bh_lock);
1445 /*
1446 * Catch superblock reference count leaks
1447 * immediately
1448 */
1449 BUG_ON(bp->b_bn == 0);
1450 delay(100);
1451 goto again;
1452 }
1453 }
1454 spin_unlock(&hash->bh_lock);
1455 }
1456}
1457
1458/*
1459 * Allocate buffer hash table for a given target.
1460 * For devices containing metadata (i.e. not the log/realtime devices)
1461 * we need to allocate a much larger hash table.
1462 */
1463STATIC void
1464xfs_alloc_bufhash(
1465 xfs_buftarg_t *btp,
1466 int external)
1467{ 1450{
1468 unsigned int i; 1451 struct xfs_perag *pag;
1452 uint i;
1469 1453
1470 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ 1454 for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
1471 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; 1455 pag = xfs_perag_get(btp->bt_mount, i);
1472 btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * 1456 spin_lock(&pag->pag_buf_lock);
1473 sizeof(xfs_bufhash_t)); 1457 while (rb_first(&pag->pag_buf_tree)) {
1474 for (i = 0; i < (1 << btp->bt_hashshift); i++) { 1458 spin_unlock(&pag->pag_buf_lock);
1475 spin_lock_init(&btp->bt_hash[i].bh_lock); 1459 delay(100);
1476 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); 1460 spin_lock(&pag->pag_buf_lock);
1461 }
1462 spin_unlock(&pag->pag_buf_lock);
1463 xfs_perag_put(pag);
1477 } 1464 }
1478} 1465}
1479 1466
1480STATIC void
1481xfs_free_bufhash(
1482 xfs_buftarg_t *btp)
1483{
1484 kmem_free_large(btp->bt_hash);
1485 btp->bt_hash = NULL;
1486}
1487
1488/* 1467/*
1489 * buftarg list for delwrite queue processing 1468 * buftarg list for delwrite queue processing
1490 */ 1469 */
@@ -1517,7 +1496,6 @@ xfs_free_buftarg(
1517 xfs_flush_buftarg(btp, 1); 1496 xfs_flush_buftarg(btp, 1);
1518 if (mp->m_flags & XFS_MOUNT_BARRIER) 1497 if (mp->m_flags & XFS_MOUNT_BARRIER)
1519 xfs_blkdev_issue_flush(btp); 1498 xfs_blkdev_issue_flush(btp);
1520 xfs_free_bufhash(btp);
1521 iput(btp->bt_mapping->host); 1499 iput(btp->bt_mapping->host);
1522 1500
1523 /* Unregister the buftarg first so that we don't get a 1501 /* Unregister the buftarg first so that we don't get a
@@ -1602,6 +1580,7 @@ xfs_mapping_buftarg(
1602 XFS_BUFTARG_NAME(btp)); 1580 XFS_BUFTARG_NAME(btp));
1603 return ENOMEM; 1581 return ENOMEM;
1604 } 1582 }
1583 inode->i_ino = get_next_ino();
1605 inode->i_mode = S_IFBLK; 1584 inode->i_mode = S_IFBLK;
1606 inode->i_bdev = bdev; 1585 inode->i_bdev = bdev;
1607 inode->i_rdev = bdev->bd_dev; 1586 inode->i_rdev = bdev->bd_dev;
@@ -1639,6 +1618,7 @@ out_error:
1639 1618
1640xfs_buftarg_t * 1619xfs_buftarg_t *
1641xfs_alloc_buftarg( 1620xfs_alloc_buftarg(
1621 struct xfs_mount *mp,
1642 struct block_device *bdev, 1622 struct block_device *bdev,
1643 int external, 1623 int external,
1644 const char *fsname) 1624 const char *fsname)
@@ -1647,6 +1627,7 @@ xfs_alloc_buftarg(
1647 1627
1648 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); 1628 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
1649 1629
1630 btp->bt_mount = mp;
1650 btp->bt_dev = bdev->bd_dev; 1631 btp->bt_dev = bdev->bd_dev;
1651 btp->bt_bdev = bdev; 1632 btp->bt_bdev = bdev;
1652 if (xfs_setsize_buftarg_early(btp, bdev)) 1633 if (xfs_setsize_buftarg_early(btp, bdev))
@@ -1655,7 +1636,6 @@ xfs_alloc_buftarg(
1655 goto error; 1636 goto error;
1656 if (xfs_alloc_delwrite_queue(btp, fsname)) 1637 if (xfs_alloc_delwrite_queue(btp, fsname))
1657 goto error; 1638 goto error;
1658 xfs_alloc_bufhash(btp, external);
1659 return btp; 1639 return btp;
1660 1640
1661error: 1641error:
@@ -1804,7 +1784,7 @@ xfs_buf_delwri_split(
1804 trace_xfs_buf_delwri_split(bp, _RET_IP_); 1784 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1805 ASSERT(bp->b_flags & XBF_DELWRI); 1785 ASSERT(bp->b_flags & XBF_DELWRI);
1806 1786
1807 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { 1787 if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
1808 if (!force && 1788 if (!force &&
1809 time_before(jiffies, bp->b_queuetime + age)) { 1789 time_before(jiffies, bp->b_queuetime + age)) {
1810 xfs_buf_unlock(bp); 1790 xfs_buf_unlock(bp);
@@ -1889,7 +1869,7 @@ xfsbufd(
1889 struct xfs_buf *bp; 1869 struct xfs_buf *bp;
1890 bp = list_first_entry(&tmp, struct xfs_buf, b_list); 1870 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1891 list_del_init(&bp->b_list); 1871 list_del_init(&bp->b_list);
1892 xfs_buf_iostrategy(bp); 1872 xfs_bdstrat_cb(bp);
1893 count++; 1873 count++;
1894 } 1874 }
1895 if (count) 1875 if (count)
@@ -1936,7 +1916,7 @@ xfs_flush_buftarg(
1936 bp->b_flags &= ~XBF_ASYNC; 1916 bp->b_flags &= ~XBF_ASYNC;
1937 list_add(&bp->b_list, &wait_list); 1917 list_add(&bp->b_list, &wait_list);
1938 } 1918 }
1939 xfs_buf_iostrategy(bp); 1919 xfs_bdstrat_cb(bp);
1940 } 1920 }
1941 1921
1942 if (wait) { 1922 if (wait) {
@@ -1946,7 +1926,7 @@ xfs_flush_buftarg(
1946 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1926 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1947 1927
1948 list_del_init(&bp->b_list); 1928 list_del_init(&bp->b_list);
1949 xfs_iowait(bp); 1929 xfs_buf_iowait(bp);
1950 xfs_buf_relse(bp); 1930 xfs_buf_relse(bp);
1951 } 1931 }
1952 } 1932 }
@@ -1962,7 +1942,8 @@ xfs_buf_init(void)
1962 if (!xfs_buf_zone) 1942 if (!xfs_buf_zone)
1963 goto out; 1943 goto out;
1964 1944
1965 xfslogd_workqueue = create_workqueue("xfslogd"); 1945 xfslogd_workqueue = alloc_workqueue("xfslogd",
1946 WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
1966 if (!xfslogd_workqueue) 1947 if (!xfslogd_workqueue)
1967 goto out_free_buf_zone; 1948 goto out_free_buf_zone;
1968 1949
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 5fbecefa5dfd..383a3f37cf98 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -44,57 +44,48 @@ typedef enum {
44 XBRW_ZERO = 3, /* Zero target memory */ 44 XBRW_ZERO = 3, /* Zero target memory */
45} xfs_buf_rw_t; 45} xfs_buf_rw_t;
46 46
47typedef enum { 47#define XBF_READ (1 << 0) /* buffer intended for reading from device */
48 XBF_READ = (1 << 0), /* buffer intended for reading from device */ 48#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
49 XBF_WRITE = (1 << 1), /* buffer intended for writing to device */ 49#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */
50 XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */ 50#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
51 XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ 51#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
52 XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ 52#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */
53 XBF_DELWRI = (1 << 6), /* buffer has dirty pages */ 53#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */
54 XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ 54#define XBF_ORDERED (1 << 11)/* use ordered writes */
55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 55#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */
56 XBF_ORDERED = (1 << 11), /* use ordered writes */ 56#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */
57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ 57
58 XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */ 58/* flags used only as arguments to access routines */
59 59#define XBF_LOCK (1 << 14)/* lock requested */
60 /* flags used only as arguments to access routines */ 60#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */
61 XBF_LOCK = (1 << 14), /* lock requested */ 61#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
62 XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ 62
63 XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ 63/* flags used only internally */
64 64#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */
65 /* flags used only internally */ 65#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
66 _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 66#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
67 _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ 67#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
68 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
69 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
70 68
71 /* 69/*
72 * Special flag for supporting metadata blocks smaller than a FSB. 70 * Special flag for supporting metadata blocks smaller than a FSB.
73 * 71 *
74 * In this case we can have multiple xfs_buf_t on a single page and 72 * In this case we can have multiple xfs_buf_t on a single page and
75 * need to lock out concurrent xfs_buf_t readers as they only 73 * need to lock out concurrent xfs_buf_t readers as they only
76 * serialise access to the buffer. 74 * serialise access to the buffer.
77 * 75 *
78 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation 76 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
79 * between reads of the page. Hence we can have one thread read the 77 * between reads of the page. Hence we can have one thread read the
80 * page and modify it, but then race with another thread that thinks 78 * page and modify it, but then race with another thread that thinks
81 * the page is not up-to-date and hence reads it again. 79 * the page is not up-to-date and hence reads it again.
82 * 80 *
83 * The result is that the first modifcation to the page is lost. 81 * The result is that the first modifcation to the page is lost.
84 * This sort of AGF/AGI reading race can happen when unlinking inodes 82 * This sort of AGF/AGI reading race can happen when unlinking inodes
85 * that require truncation and results in the AGI unlinked list 83 * that require truncation and results in the AGI unlinked list
86 * modifications being lost. 84 * modifications being lost.
87 */ 85 */
88 _XBF_PAGE_LOCKED = (1 << 22), 86#define _XBF_PAGE_LOCKED (1 << 22)
89 87
90 /* 88typedef unsigned int xfs_buf_flags_t;
91 * If we try a barrier write, but it fails we have to communicate
92 * this to the upper layers. Unfortunately b_error gets overwritten
93 * when the buffer is re-issued so we have to add another flag to
94 * keep this information.
95 */
96 _XFS_BARRIER_FAILED = (1 << 23),
97} xfs_buf_flags_t;
98 89
99#define XFS_BUF_FLAGS \ 90#define XFS_BUF_FLAGS \
100 { XBF_READ, "READ" }, \ 91 { XBF_READ, "READ" }, \
@@ -104,7 +95,6 @@ typedef enum {
104 { XBF_DONE, "DONE" }, \ 95 { XBF_DONE, "DONE" }, \
105 { XBF_DELWRI, "DELWRI" }, \ 96 { XBF_DELWRI, "DELWRI" }, \
106 { XBF_STALE, "STALE" }, \ 97 { XBF_STALE, "STALE" }, \
107 { XBF_FS_MANAGED, "FS_MANAGED" }, \
108 { XBF_ORDERED, "ORDERED" }, \ 98 { XBF_ORDERED, "ORDERED" }, \
109 { XBF_READ_AHEAD, "READ_AHEAD" }, \ 99 { XBF_READ_AHEAD, "READ_AHEAD" }, \
110 { XBF_LOCK, "LOCK" }, /* should never be set */\ 100 { XBF_LOCK, "LOCK" }, /* should never be set */\
@@ -114,8 +104,7 @@ typedef enum {
114 { _XBF_PAGES, "PAGES" }, \ 104 { _XBF_PAGES, "PAGES" }, \
115 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ 105 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
116 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 106 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
117 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \ 107 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }
118 { _XFS_BARRIER_FAILED, "BARRIER_FAILED" }
119 108
120 109
121typedef enum { 110typedef enum {
@@ -132,15 +121,11 @@ typedef struct xfs_buftarg {
132 dev_t bt_dev; 121 dev_t bt_dev;
133 struct block_device *bt_bdev; 122 struct block_device *bt_bdev;
134 struct address_space *bt_mapping; 123 struct address_space *bt_mapping;
124 struct xfs_mount *bt_mount;
135 unsigned int bt_bsize; 125 unsigned int bt_bsize;
136 unsigned int bt_sshift; 126 unsigned int bt_sshift;
137 size_t bt_smask; 127 size_t bt_smask;
138 128
139 /* per device buffer hash table */
140 uint bt_hashmask;
141 uint bt_hashshift;
142 xfs_bufhash_t *bt_hash;
143
144 /* per device delwri queue */ 129 /* per device delwri queue */
145 struct task_struct *bt_task; 130 struct task_struct *bt_task;
146 struct list_head bt_list; 131 struct list_head bt_list;
@@ -168,35 +153,41 @@ typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
168#define XB_PAGES 2 153#define XB_PAGES 2
169 154
170typedef struct xfs_buf { 155typedef struct xfs_buf {
156 /*
157 * first cacheline holds all the fields needed for an uncontended cache
158 * hit to be fully processed. The semaphore straddles the cacheline
159 * boundary, but the counter and lock sits on the first cacheline,
160 * which is the only bit that is touched if we hit the semaphore
161 * fast-path on locking.
162 */
163 struct rb_node b_rbnode; /* rbtree node */
164 xfs_off_t b_file_offset; /* offset in file */
165 size_t b_buffer_length;/* size of buffer in bytes */
166 atomic_t b_hold; /* reference count */
167 xfs_buf_flags_t b_flags; /* status flags */
171 struct semaphore b_sema; /* semaphore for lockables */ 168 struct semaphore b_sema; /* semaphore for lockables */
172 unsigned long b_queuetime; /* time buffer was queued */ 169
173 atomic_t b_pin_count; /* pin count */
174 wait_queue_head_t b_waiters; /* unpin waiters */ 170 wait_queue_head_t b_waiters; /* unpin waiters */
175 struct list_head b_list; 171 struct list_head b_list;
176 xfs_buf_flags_t b_flags; /* status flags */ 172 struct xfs_perag *b_pag; /* contains rbtree root */
177 struct list_head b_hash_list; /* hash table list */
178 xfs_bufhash_t *b_hash; /* hash table list start */
179 xfs_buftarg_t *b_target; /* buffer target (device) */ 173 xfs_buftarg_t *b_target; /* buffer target (device) */
180 atomic_t b_hold; /* reference count */
181 xfs_daddr_t b_bn; /* block number for I/O */ 174 xfs_daddr_t b_bn; /* block number for I/O */
182 xfs_off_t b_file_offset; /* offset in file */
183 size_t b_buffer_length;/* size of buffer in bytes */
184 size_t b_count_desired;/* desired transfer size */ 175 size_t b_count_desired;/* desired transfer size */
185 void *b_addr; /* virtual address of buffer */ 176 void *b_addr; /* virtual address of buffer */
186 struct work_struct b_iodone_work; 177 struct work_struct b_iodone_work;
187 atomic_t b_io_remaining; /* #outstanding I/O requests */
188 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 178 xfs_buf_iodone_t b_iodone; /* I/O completion function */
189 xfs_buf_relse_t b_relse; /* releasing function */ 179 xfs_buf_relse_t b_relse; /* releasing function */
190 xfs_buf_bdstrat_t b_strat; /* pre-write function */
191 struct completion b_iowait; /* queue for I/O waiters */ 180 struct completion b_iowait; /* queue for I/O waiters */
192 void *b_fspriv; 181 void *b_fspriv;
193 void *b_fspriv2; 182 void *b_fspriv2;
194 struct xfs_mount *b_mount;
195 unsigned short b_error; /* error code on I/O */
196 unsigned int b_page_count; /* size of page array */
197 unsigned int b_offset; /* page offset in first page */
198 struct page **b_pages; /* array of page pointers */ 183 struct page **b_pages; /* array of page pointers */
199 struct page *b_page_array[XB_PAGES]; /* inline pages */ 184 struct page *b_page_array[XB_PAGES]; /* inline pages */
185 unsigned long b_queuetime; /* time buffer was queued */
186 atomic_t b_pin_count; /* pin count */
187 atomic_t b_io_remaining; /* #outstanding I/O requests */
188 unsigned int b_page_count; /* size of page array */
189 unsigned int b_offset; /* page offset in first page */
190 unsigned short b_error; /* error code on I/O */
200#ifdef XFS_BUF_LOCK_TRACKING 191#ifdef XFS_BUF_LOCK_TRACKING
201 int b_last_holder; 192 int b_last_holder;
202#endif 193#endif
@@ -215,11 +206,13 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
215 xfs_buf_flags_t); 206 xfs_buf_flags_t);
216 207
217extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); 208extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
218extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); 209extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
219extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); 210extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
220extern void xfs_buf_hold(xfs_buf_t *); 211extern void xfs_buf_hold(xfs_buf_t *);
221extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t, 212extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
222 xfs_buf_flags_t); 213struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
214 struct xfs_buftarg *target,
215 xfs_daddr_t daddr, size_t length, int flags);
223 216
224/* Releasing Buffers */ 217/* Releasing Buffers */
225extern void xfs_buf_free(xfs_buf_t *); 218extern void xfs_buf_free(xfs_buf_t *);
@@ -244,11 +237,8 @@ extern int xfs_buf_iorequest(xfs_buf_t *);
244extern int xfs_buf_iowait(xfs_buf_t *); 237extern int xfs_buf_iowait(xfs_buf_t *);
245extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, 238extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
246 xfs_buf_rw_t); 239 xfs_buf_rw_t);
247 240#define xfs_buf_zero(bp, off, len) \
248static inline int xfs_buf_iostrategy(xfs_buf_t *bp) 241 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
249{
250 return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
251}
252 242
253static inline int xfs_buf_geterror(xfs_buf_t *bp) 243static inline int xfs_buf_geterror(xfs_buf_t *bp)
254{ 244{
@@ -258,11 +248,6 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp)
258/* Buffer Utility Routines */ 248/* Buffer Utility Routines */
259extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); 249extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
260 250
261/* Pinning Buffer Storage in Memory */
262extern void xfs_buf_pin(xfs_buf_t *);
263extern void xfs_buf_unpin(xfs_buf_t *);
264extern int xfs_buf_ispin(xfs_buf_t *);
265
266/* Delayed Write Buffer Routines */ 251/* Delayed Write Buffer Routines */
267extern void xfs_buf_delwri_dequeue(xfs_buf_t *); 252extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
268extern void xfs_buf_delwri_promote(xfs_buf_t *); 253extern void xfs_buf_delwri_promote(xfs_buf_t *);
@@ -288,8 +273,6 @@ extern void xfs_buf_terminate(void);
288 XFS_BUF_DONE(bp); \ 273 XFS_BUF_DONE(bp); \
289 } while (0) 274 } while (0)
290 275
291#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)
292
293#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) 276#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
294#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) 277#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp)
295#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) 278#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)
@@ -326,8 +309,6 @@ extern void xfs_buf_terminate(void);
326#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) 309#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
327#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) 310#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
328#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) 311#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
329#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func))
330#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL)
331 312
332#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv) 313#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
333#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) 314#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
@@ -351,7 +332,7 @@ extern void xfs_buf_terminate(void);
351#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) 332#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
352#define XFS_BUF_SET_REF(bp, ref) do { } while (0) 333#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
353 334
354#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp) 335#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
355 336
356#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp) 337#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
357#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) 338#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
@@ -370,27 +351,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
370 xfs_buf_rele(bp); 351 xfs_buf_rele(bp);
371} 352}
372 353
373#define xfs_bpin(bp) xfs_buf_pin(bp)
374#define xfs_bunpin(bp) xfs_buf_unpin(bp)
375#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
376
377#define xfs_biomove(bp, off, len, data, rw) \
378 xfs_buf_iomove((bp), (off), (len), (data), \
379 ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
380
381#define xfs_biozero(bp, off, len) \
382 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
383
384#define xfs_iowait(bp) xfs_buf_iowait(bp)
385
386#define xfs_baread(target, rablkno, ralen) \
387 xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
388
389
390/* 354/*
391 * Handling of buftargs. 355 * Handling of buftargs.
392 */ 356 */
393extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *); 357extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
358 struct block_device *, int, const char *);
394extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); 359extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
395extern void xfs_wait_buftarg(xfs_buftarg_t *); 360extern void xfs_wait_buftarg(xfs_buftarg_t *);
396extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 361extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
deleted file mode 100644
index 55bddf3b6091..000000000000
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ /dev/null
@@ -1,28 +0,0 @@
1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_CRED_H__
19#define __XFS_CRED_H__
20
21#include <linux/capability.h>
22
23/*
24 * Credentials
25 */
26typedef const struct cred cred_t;
27
28#endif /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_dmapi_priv.h b/fs/xfs/linux-2.6/xfs_dmapi_priv.h
deleted file mode 100644
index a8b0b1685eed..000000000000
--- a/fs/xfs/linux-2.6/xfs_dmapi_priv.h
+++ /dev/null
@@ -1,28 +0,0 @@
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DMAPI_PRIV_H__
19#define __XFS_DMAPI_PRIV_H__
20
21/*
22 * Based on IO_ISDIRECT, decide which i_ flag is set.
23 */
24#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
25 DM_FLAGS_IMUX : 0)
26#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
27
28#endif /*__XFS_DMAPI_PRIV_H__*/
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index e7839ee49e43..3764d74790ec 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -23,13 +23,13 @@
23#include "xfs_sb.h" 23#include "xfs_sb.h"
24#include "xfs_ag.h" 24#include "xfs_ag.h"
25#include "xfs_dir2.h" 25#include "xfs_dir2.h"
26#include "xfs_dmapi.h"
27#include "xfs_mount.h" 26#include "xfs_mount.h"
28#include "xfs_export.h" 27#include "xfs_export.h"
29#include "xfs_vnodeops.h" 28#include "xfs_vnodeops.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_inode.h" 30#include "xfs_inode.h"
32#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
32#include "xfs_trace.h"
33 33
34/* 34/*
35 * Note that we only accept fileids which are long enough rather than allow 35 * Note that we only accept fileids which are long enough rather than allow
@@ -132,8 +132,7 @@ xfs_nfs_get_inode(
132 * fine and not an indication of a corrupted filesystem as clients can 132 * fine and not an indication of a corrupted filesystem as clients can
133 * send invalid file handles and we have to handle it gracefully.. 133 * send invalid file handles and we have to handle it gracefully..
134 */ 134 */
135 error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 135 error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
136 XFS_ILOCK_SHARED, &ip);
137 if (error) { 136 if (error) {
138 /* 137 /*
139 * EINVAL means the inode cluster doesn't exist anymore. 138 * EINVAL means the inode cluster doesn't exist anymore.
@@ -148,11 +147,10 @@ xfs_nfs_get_inode(
148 } 147 }
149 148
150 if (ip->i_d.di_gen != generation) { 149 if (ip->i_d.di_gen != generation) {
151 xfs_iput_new(ip, XFS_ILOCK_SHARED); 150 IRELE(ip);
152 return ERR_PTR(-ENOENT); 151 return ERR_PTR(-ENOENT);
153 } 152 }
154 153
155 xfs_iunlock(ip, XFS_ILOCK_SHARED);
156 return VFS_I(ip); 154 return VFS_I(ip);
157} 155}
158 156
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 257a56b127cf..ba8ad422a165 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -22,23 +22,15 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_sb.h" 23#include "xfs_sb.h"
24#include "xfs_ag.h" 24#include "xfs_ag.h"
25#include "xfs_dir2.h"
26#include "xfs_trans.h" 25#include "xfs_trans.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 26#include "xfs_mount.h"
29#include "xfs_bmap_btree.h" 27#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h"
32#include "xfs_alloc.h" 28#include "xfs_alloc.h"
33#include "xfs_btree.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_dinode.h" 29#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
39#include "xfs_bmap.h" 32#include "xfs_bmap.h"
40#include "xfs_error.h" 33#include "xfs_error.h"
41#include "xfs_rw.h"
42#include "xfs_vnodeops.h" 34#include "xfs_vnodeops.h"
43#include "xfs_da_btree.h" 35#include "xfs_da_btree.h"
44#include "xfs_ioctl.h" 36#include "xfs_ioctl.h"
@@ -108,7 +100,7 @@ xfs_file_fsync(
108 int error = 0; 100 int error = 0;
109 int log_flushed = 0; 101 int log_flushed = 0;
110 102
111 xfs_itrace_entry(ip); 103 trace_xfs_file_fsync(ip);
112 104
113 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 105 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
114 return -XFS_ERROR(EIO); 106 return -XFS_ERROR(EIO);
@@ -166,8 +158,7 @@ xfs_file_fsync(
166 * transaction. So we play it safe and fire off the 158 * transaction. So we play it safe and fire off the
167 * transaction anyway. 159 * transaction anyway.
168 */ 160 */
169 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 161 xfs_trans_ijoin(tp, ip);
170 xfs_trans_ihold(tp, ip);
171 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 162 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
172 xfs_trans_set_sync(tp); 163 xfs_trans_set_sync(tp);
173 error = _xfs_trans_commit(tp, 0, &log_flushed); 164 error = _xfs_trans_commit(tp, 0, &log_flushed);
@@ -275,20 +266,6 @@ xfs_file_aio_read(
275 mutex_lock(&inode->i_mutex); 266 mutex_lock(&inode->i_mutex);
276 xfs_ilock(ip, XFS_IOLOCK_SHARED); 267 xfs_ilock(ip, XFS_IOLOCK_SHARED);
277 268
278 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
279 int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
280 int iolock = XFS_IOLOCK_SHARED;
281
282 ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size,
283 dmflags, &iolock);
284 if (ret) {
285 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
286 if (unlikely(ioflags & IO_ISDIRECT))
287 mutex_unlock(&inode->i_mutex);
288 return ret;
289 }
290 }
291
292 if (unlikely(ioflags & IO_ISDIRECT)) { 269 if (unlikely(ioflags & IO_ISDIRECT)) {
293 if (inode->i_mapping->nrpages) { 270 if (inode->i_mapping->nrpages) {
294 ret = -xfs_flushinval_pages(ip, 271 ret = -xfs_flushinval_pages(ip,
@@ -321,7 +298,6 @@ xfs_file_splice_read(
321 unsigned int flags) 298 unsigned int flags)
322{ 299{
323 struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); 300 struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
324 struct xfs_mount *mp = ip->i_mount;
325 int ioflags = 0; 301 int ioflags = 0;
326 ssize_t ret; 302 ssize_t ret;
327 303
@@ -335,18 +311,6 @@ xfs_file_splice_read(
335 311
336 xfs_ilock(ip, XFS_IOLOCK_SHARED); 312 xfs_ilock(ip, XFS_IOLOCK_SHARED);
337 313
338 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
339 int iolock = XFS_IOLOCK_SHARED;
340 int error;
341
342 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
343 FILP_DELAY_FLAG(infilp), &iolock);
344 if (error) {
345 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
346 return -error;
347 }
348 }
349
350 trace_xfs_file_splice_read(ip, count, *ppos, ioflags); 314 trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
351 315
352 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); 316 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
@@ -367,7 +331,6 @@ xfs_file_splice_write(
367{ 331{
368 struct inode *inode = outfilp->f_mapping->host; 332 struct inode *inode = outfilp->f_mapping->host;
369 struct xfs_inode *ip = XFS_I(inode); 333 struct xfs_inode *ip = XFS_I(inode);
370 struct xfs_mount *mp = ip->i_mount;
371 xfs_fsize_t isize, new_size; 334 xfs_fsize_t isize, new_size;
372 int ioflags = 0; 335 int ioflags = 0;
373 ssize_t ret; 336 ssize_t ret;
@@ -382,18 +345,6 @@ xfs_file_splice_write(
382 345
383 xfs_ilock(ip, XFS_IOLOCK_EXCL); 346 xfs_ilock(ip, XFS_IOLOCK_EXCL);
384 347
385 if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
386 int iolock = XFS_IOLOCK_EXCL;
387 int error;
388
389 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
390 FILP_DELAY_FLAG(outfilp), &iolock);
391 if (error) {
392 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
393 return -error;
394 }
395 }
396
397 new_size = *ppos + count; 348 new_size = *ppos + count;
398 349
399 xfs_ilock(ip, XFS_ILOCK_EXCL); 350 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -463,7 +414,7 @@ xfs_zero_last_block(
463 last_fsb = XFS_B_TO_FSBT(mp, isize); 414 last_fsb = XFS_B_TO_FSBT(mp, isize);
464 nimaps = 1; 415 nimaps = 1;
465 error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, 416 error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
466 &nimaps, NULL, NULL); 417 &nimaps, NULL);
467 if (error) { 418 if (error) {
468 return error; 419 return error;
469 } 420 }
@@ -558,7 +509,7 @@ xfs_zero_eof(
558 nimaps = 1; 509 nimaps = 1;
559 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; 510 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
560 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 511 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
561 0, NULL, 0, &imap, &nimaps, NULL, NULL); 512 0, NULL, 0, &imap, &nimaps, NULL);
562 if (error) { 513 if (error) {
563 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 514 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
564 return error; 515 return error;
@@ -627,7 +578,6 @@ xfs_file_aio_write(
627 int ioflags = 0; 578 int ioflags = 0;
628 xfs_fsize_t isize, new_size; 579 xfs_fsize_t isize, new_size;
629 int iolock; 580 int iolock;
630 int eventsent = 0;
631 size_t ocount = 0, count; 581 size_t ocount = 0, count;
632 int need_i_mutex; 582 int need_i_mutex;
633 583
@@ -673,33 +623,6 @@ start:
673 goto out_unlock_mutex; 623 goto out_unlock_mutex;
674 } 624 }
675 625
676 if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) &&
677 !(ioflags & IO_INVIS) && !eventsent)) {
678 int dmflags = FILP_DELAY_FLAG(file);
679
680 if (need_i_mutex)
681 dmflags |= DM_FLAGS_IMUX;
682
683 xfs_iunlock(ip, XFS_ILOCK_EXCL);
684 error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip,
685 pos, count, dmflags, &iolock);
686 if (error) {
687 goto out_unlock_internal;
688 }
689 xfs_ilock(ip, XFS_ILOCK_EXCL);
690 eventsent = 1;
691
692 /*
693 * The iolock was dropped and reacquired in XFS_SEND_DATA
694 * so we have to recheck the size when appending.
695 * We will only "goto start;" once, since having sent the
696 * event prevents another call to XFS_SEND_DATA, which is
697 * what allows the size to change in the first place.
698 */
699 if ((file->f_flags & O_APPEND) && pos != ip->i_size)
700 goto start;
701 }
702
703 if (ioflags & IO_ISDIRECT) { 626 if (ioflags & IO_ISDIRECT) {
704 xfs_buftarg_t *target = 627 xfs_buftarg_t *target =
705 XFS_IS_REALTIME_INODE(ip) ? 628 XFS_IS_REALTIME_INODE(ip) ?
@@ -830,22 +753,6 @@ write_retry:
830 xfs_iunlock(ip, XFS_ILOCK_EXCL); 753 xfs_iunlock(ip, XFS_ILOCK_EXCL);
831 } 754 }
832 755
833 if (ret == -ENOSPC &&
834 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
835 xfs_iunlock(ip, iolock);
836 if (need_i_mutex)
837 mutex_unlock(&inode->i_mutex);
838 error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip,
839 DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL,
840 0, 0, 0); /* Delay flag intentionally unused */
841 if (need_i_mutex)
842 mutex_lock(&inode->i_mutex);
843 xfs_ilock(ip, iolock);
844 if (error)
845 goto out_unlock_internal;
846 goto start;
847 }
848
849 error = -ret; 756 error = -ret;
850 if (ret <= 0) 757 if (ret <= 0)
851 goto out_unlock_internal; 758 goto out_unlock_internal;
@@ -1014,9 +921,6 @@ const struct file_operations xfs_file_operations = {
1014 .open = xfs_file_open, 921 .open = xfs_file_open,
1015 .release = xfs_file_release, 922 .release = xfs_file_release,
1016 .fsync = xfs_file_fsync, 923 .fsync = xfs_file_fsync,
1017#ifdef HAVE_FOP_OPEN_EXEC
1018 .open_exec = xfs_file_open_exec,
1019#endif
1020}; 924};
1021 925
1022const struct file_operations xfs_dir_file_operations = { 926const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index b6918d76bc7b..ed88ed16811c 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -21,10 +21,6 @@
21#include "xfs_inode.h" 21#include "xfs_inode.h"
22#include "xfs_trace.h" 22#include "xfs_trace.h"
23 23
24int fs_noerr(void) { return 0; }
25int fs_nosys(void) { return ENOSYS; }
26void fs_noval(void) { return; }
27
28/* 24/*
29 * note: all filemap functions return negative error codes. These 25 * note: all filemap functions return negative error codes. These
30 * need to be inverted before returning to the xfs core functions. 26 * need to be inverted before returning to the xfs core functions.
@@ -36,10 +32,9 @@ xfs_tosspages(
36 xfs_off_t last, 32 xfs_off_t last,
37 int fiopt) 33 int fiopt)
38{ 34{
39 struct address_space *mapping = VFS_I(ip)->i_mapping; 35 /* can't toss partial tail pages, so mask them out */
40 36 last &= ~(PAGE_SIZE - 1);
41 if (mapping->nrpages) 37 truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
42 truncate_inode_pages(mapping, first);
43} 38}
44 39
45int 40int
@@ -54,12 +49,11 @@ xfs_flushinval_pages(
54 49
55 trace_xfs_pagecache_inval(ip, first, last); 50 trace_xfs_pagecache_inval(ip, first, last);
56 51
57 if (mapping->nrpages) { 52 xfs_iflags_clear(ip, XFS_ITRUNCATED);
58 xfs_iflags_clear(ip, XFS_ITRUNCATED); 53 ret = filemap_write_and_wait_range(mapping, first,
59 ret = filemap_write_and_wait(mapping); 54 last == -1 ? LLONG_MAX : last);
60 if (!ret) 55 if (!ret)
61 truncate_inode_pages(mapping, first); 56 truncate_inode_pages_range(mapping, first, last);
62 }
63 return -ret; 57 return -ret;
64} 58}
65 59
@@ -75,10 +69,9 @@ xfs_flush_pages(
75 int ret = 0; 69 int ret = 0;
76 int ret2; 70 int ret2;
77 71
78 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 72 xfs_iflags_clear(ip, XFS_ITRUNCATED);
79 xfs_iflags_clear(ip, XFS_ITRUNCATED); 73 ret = -filemap_fdatawrite_range(mapping, first,
80 ret = -filemap_fdatawrite(mapping); 74 last == -1 ? LLONG_MAX : last);
81 }
82 if (flags & XBF_ASYNC) 75 if (flags & XBF_ASYNC)
83 return ret; 76 return ret;
84 ret2 = xfs_wait_on_pages(ip, first, last); 77 ret2 = xfs_wait_on_pages(ip, first, last);
@@ -95,7 +88,9 @@ xfs_wait_on_pages(
95{ 88{
96 struct address_space *mapping = VFS_I(ip)->i_mapping; 89 struct address_space *mapping = VFS_I(ip)->i_mapping;
97 90
98 if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) 91 if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
99 return -filemap_fdatawait(mapping); 92 return -filemap_fdatawait_range(mapping, first,
93 last == -1 ? ip->i_size - 1 : last);
94 }
100 return 0; 95 return 0;
101} 96}
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
deleted file mode 100644
index 82bb19b2599e..000000000000
--- a/fs/xfs/linux-2.6/xfs_fs_subr.h
+++ /dev/null
@@ -1,25 +0,0 @@
1/*
2 * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_FS_SUBR_H__
19#define __XFS_FS_SUBR_H__
20
21extern int fs_noerr(void);
22extern int fs_nosys(void);
23extern void fs_noval(void);
24
25#endif /* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index 2ae8b1ccb02e..76e81cff70b9 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -16,7 +16,6 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_cred.h"
20#include "xfs_sysctl.h" 19#include "xfs_sysctl.h"
21 20
22/* 21/*
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
deleted file mode 100644
index 69f71caf061c..000000000000
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_GLOBALS_H__
19#define __XFS_GLOBALS_H__
20
21extern uint64_t xfs_panic_mask; /* set to cause more panics */
22
23#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index e59a81062830..2ea238f6d38e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -23,24 +23,15 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_attr_sf.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_dinode.h" 29#include "xfs_dinode.h"
36#include "xfs_inode.h" 30#include "xfs_inode.h"
37#include "xfs_ioctl.h" 31#include "xfs_ioctl.h"
38#include "xfs_btree.h"
39#include "xfs_ialloc.h"
40#include "xfs_rtalloc.h" 32#include "xfs_rtalloc.h"
41#include "xfs_itable.h" 33#include "xfs_itable.h"
42#include "xfs_error.h" 34#include "xfs_error.h"
43#include "xfs_rw.h"
44#include "xfs_attr.h" 35#include "xfs_attr.h"
45#include "xfs_bmap.h" 36#include "xfs_bmap.h"
46#include "xfs_buf_item.h" 37#include "xfs_buf_item.h"
@@ -794,10 +785,12 @@ xfs_ioc_fsgetxattr(
794{ 785{
795 struct fsxattr fa; 786 struct fsxattr fa;
796 787
788 memset(&fa, 0, sizeof(struct fsxattr));
789
797 xfs_ilock(ip, XFS_ILOCK_SHARED); 790 xfs_ilock(ip, XFS_ILOCK_SHARED);
798 fa.fsx_xflags = xfs_ip2xflags(ip); 791 fa.fsx_xflags = xfs_ip2xflags(ip);
799 fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; 792 fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
800 fa.fsx_projid = ip->i_d.di_projid; 793 fa.fsx_projid = xfs_get_projid(ip);
801 794
802 if (attr) { 795 if (attr) {
803 if (ip->i_afp) { 796 if (ip->i_afp) {
@@ -908,7 +901,7 @@ xfs_ioctl_setattr(
908 struct xfs_dquot *olddquot = NULL; 901 struct xfs_dquot *olddquot = NULL;
909 int code; 902 int code;
910 903
911 xfs_itrace_entry(ip); 904 trace_xfs_ioctl_setattr(ip);
912 905
913 if (mp->m_flags & XFS_MOUNT_RDONLY) 906 if (mp->m_flags & XFS_MOUNT_RDONLY)
914 return XFS_ERROR(EROFS); 907 return XFS_ERROR(EROFS);
@@ -916,6 +909,13 @@ xfs_ioctl_setattr(
916 return XFS_ERROR(EIO); 909 return XFS_ERROR(EIO);
917 910
918 /* 911 /*
912 * Disallow 32bit project ids when projid32bit feature is not enabled.
913 */
914 if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
915 !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
916 return XFS_ERROR(EINVAL);
917
918 /*
919 * If disk quotas is on, we make sure that the dquots do exist on disk, 919 * If disk quotas is on, we make sure that the dquots do exist on disk,
920 * before we start any other transactions. Trying to do this later 920 * before we start any other transactions. Trying to do this later
921 * is messy. We don't care to take a readlock to look at the ids 921 * is messy. We don't care to take a readlock to look at the ids
@@ -961,7 +961,7 @@ xfs_ioctl_setattr(
961 if (mask & FSX_PROJID) { 961 if (mask & FSX_PROJID) {
962 if (XFS_IS_QUOTA_RUNNING(mp) && 962 if (XFS_IS_QUOTA_RUNNING(mp) &&
963 XFS_IS_PQUOTA_ON(mp) && 963 XFS_IS_PQUOTA_ON(mp) &&
964 ip->i_d.di_projid != fa->fsx_projid) { 964 xfs_get_projid(ip) != fa->fsx_projid) {
965 ASSERT(tp); 965 ASSERT(tp);
966 code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, 966 code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
967 capable(CAP_FOWNER) ? 967 capable(CAP_FOWNER) ?
@@ -1043,8 +1043,7 @@ xfs_ioctl_setattr(
1043 } 1043 }
1044 } 1044 }
1045 1045
1046 xfs_trans_ijoin(tp, ip, lock_flags); 1046 xfs_trans_ijoin(tp, ip);
1047 xfs_trans_ihold(tp, ip);
1048 1047
1049 /* 1048 /*
1050 * Change file ownership. Must be the owner or privileged. 1049 * Change file ownership. Must be the owner or privileged.
@@ -1064,12 +1063,12 @@ xfs_ioctl_setattr(
1064 * Change the ownerships and register quota modifications 1063 * Change the ownerships and register quota modifications
1065 * in the transaction. 1064 * in the transaction.
1066 */ 1065 */
1067 if (ip->i_d.di_projid != fa->fsx_projid) { 1066 if (xfs_get_projid(ip) != fa->fsx_projid) {
1068 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { 1067 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
1069 olddquot = xfs_qm_vop_chown(tp, ip, 1068 olddquot = xfs_qm_vop_chown(tp, ip,
1070 &ip->i_gdquot, gdqp); 1069 &ip->i_gdquot, gdqp);
1071 } 1070 }
1072 ip->i_d.di_projid = fa->fsx_projid; 1071 xfs_set_projid(ip, fa->fsx_projid);
1073 1072
1074 /* 1073 /*
1075 * We may have to rev the inode as well as 1074 * We may have to rev the inode as well as
@@ -1089,8 +1088,8 @@ xfs_ioctl_setattr(
1089 xfs_diflags_to_linux(ip); 1088 xfs_diflags_to_linux(ip);
1090 } 1089 }
1091 1090
1091 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
1092 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1092 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1093 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
1094 1093
1095 XFS_STATS_INC(xs_ig_attrchg); 1094 XFS_STATS_INC(xs_ig_attrchg);
1096 1095
@@ -1116,16 +1115,7 @@ xfs_ioctl_setattr(
1116 xfs_qm_dqrele(udqp); 1115 xfs_qm_dqrele(udqp);
1117 xfs_qm_dqrele(gdqp); 1116 xfs_qm_dqrele(gdqp);
1118 1117
1119 if (code) 1118 return code;
1120 return code;
1121
1122 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
1123 XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
1124 NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
1125 (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
1126 }
1127
1128 return 0;
1129 1119
1130 error_return: 1120 error_return:
1131 xfs_qm_dqrele(udqp); 1121 xfs_qm_dqrele(udqp);
@@ -1301,7 +1291,7 @@ xfs_file_ioctl(
1301 if (filp->f_mode & FMODE_NOCMTIME) 1291 if (filp->f_mode & FMODE_NOCMTIME)
1302 ioflags |= IO_INVIS; 1292 ioflags |= IO_INVIS;
1303 1293
1304 xfs_itrace_entry(ip); 1294 trace_xfs_file_ioctl(ip);
1305 1295
1306 switch (cmd) { 1296 switch (cmd) {
1307 case XFS_IOC_ALLOCSP: 1297 case XFS_IOC_ALLOCSP:
@@ -1311,7 +1301,8 @@ xfs_file_ioctl(
1311 case XFS_IOC_ALLOCSP64: 1301 case XFS_IOC_ALLOCSP64:
1312 case XFS_IOC_FREESP64: 1302 case XFS_IOC_FREESP64:
1313 case XFS_IOC_RESVSP64: 1303 case XFS_IOC_RESVSP64:
1314 case XFS_IOC_UNRESVSP64: { 1304 case XFS_IOC_UNRESVSP64:
1305 case XFS_IOC_ZERO_RANGE: {
1315 xfs_flock64_t bf; 1306 xfs_flock64_t bf;
1316 1307
1317 if (copy_from_user(&bf, arg, sizeof(bf))) 1308 if (copy_from_user(&bf, arg, sizeof(bf)))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 52ed49e6465c..b3486dfa5520 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -28,12 +28,8 @@
28#include "xfs_trans.h" 28#include "xfs_trans.h"
29#include "xfs_sb.h" 29#include "xfs_sb.h"
30#include "xfs_ag.h" 30#include "xfs_ag.h"
31#include "xfs_dir2.h"
32#include "xfs_dmapi.h"
33#include "xfs_mount.h" 31#include "xfs_mount.h"
34#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dir2_sf.h"
37#include "xfs_vnode.h" 33#include "xfs_vnode.h"
38#include "xfs_dinode.h" 34#include "xfs_dinode.h"
39#include "xfs_inode.h" 35#include "xfs_inode.h"
@@ -168,7 +164,8 @@ xfs_ioctl32_bstat_copyin(
168 get_user(bstat->bs_extsize, &bstat32->bs_extsize) || 164 get_user(bstat->bs_extsize, &bstat32->bs_extsize) ||
169 get_user(bstat->bs_extents, &bstat32->bs_extents) || 165 get_user(bstat->bs_extents, &bstat32->bs_extents) ||
170 get_user(bstat->bs_gen, &bstat32->bs_gen) || 166 get_user(bstat->bs_gen, &bstat32->bs_gen) ||
171 get_user(bstat->bs_projid, &bstat32->bs_projid) || 167 get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
168 get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
172 get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || 169 get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
173 get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || 170 get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) ||
174 get_user(bstat->bs_aextents, &bstat32->bs_aextents)) 171 get_user(bstat->bs_aextents, &bstat32->bs_aextents))
@@ -222,6 +219,7 @@ xfs_bulkstat_one_fmt_compat(
222 put_user(buffer->bs_extents, &p32->bs_extents) || 219 put_user(buffer->bs_extents, &p32->bs_extents) ||
223 put_user(buffer->bs_gen, &p32->bs_gen) || 220 put_user(buffer->bs_gen, &p32->bs_gen) ||
224 put_user(buffer->bs_projid, &p32->bs_projid) || 221 put_user(buffer->bs_projid, &p32->bs_projid) ||
222 put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) ||
225 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || 223 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
226 put_user(buffer->bs_dmstate, &p32->bs_dmstate) || 224 put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
227 put_user(buffer->bs_aextents, &p32->bs_aextents)) 225 put_user(buffer->bs_aextents, &p32->bs_aextents))
@@ -544,7 +542,7 @@ xfs_file_compat_ioctl(
544 if (filp->f_mode & FMODE_NOCMTIME) 542 if (filp->f_mode & FMODE_NOCMTIME)
545 ioflags |= IO_INVIS; 543 ioflags |= IO_INVIS;
546 544
547 xfs_itrace_entry(ip); 545 trace_xfs_file_compat_ioctl(ip);
548 546
549 switch (cmd) { 547 switch (cmd) {
550 /* No size or alignment issues on any arch */ 548 /* No size or alignment issues on any arch */
@@ -578,6 +576,7 @@ xfs_file_compat_ioctl(
578 case XFS_IOC_FSGEOMETRY_V1: 576 case XFS_IOC_FSGEOMETRY_V1:
579 case XFS_IOC_FSGROWFSDATA: 577 case XFS_IOC_FSGROWFSDATA:
580 case XFS_IOC_FSGROWFSRT: 578 case XFS_IOC_FSGROWFSRT:
579 case XFS_IOC_ZERO_RANGE:
581 return xfs_file_ioctl(filp, cmd, p); 580 return xfs_file_ioctl(filp, cmd, p);
582#else 581#else
583 case XFS_IOC_ALLOCSP_32: 582 case XFS_IOC_ALLOCSP_32:
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 1024c4f8ba0d..08b605792a99 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -65,8 +65,10 @@ typedef struct compat_xfs_bstat {
65 __s32 bs_extsize; /* extent size */ 65 __s32 bs_extsize; /* extent size */
66 __s32 bs_extents; /* number of extents */ 66 __s32 bs_extents; /* number of extents */
67 __u32 bs_gen; /* generation count */ 67 __u32 bs_gen; /* generation count */
68 __u16 bs_projid; /* project id */ 68 __u16 bs_projid_lo; /* lower part of project id */
69 unsigned char bs_pad[14]; /* pad space, unused */ 69#define bs_projid bs_projid_lo /* (previously just bs_projid) */
70 __u16 bs_projid_hi; /* high part of project id */
71 unsigned char bs_pad[12]; /* pad space, unused */
70 __u32 bs_dmevmask; /* DMIG event mask */ 72 __u32 bs_dmevmask; /* DMIG event mask */
71 __u16 bs_dmstate; /* DMIG state info */ 73 __u16 bs_dmstate; /* DMIG state info */
72 __u16 bs_aextents; /* attribute number of extents */ 74 __u16 bs_aextents; /* attribute number of extents */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 44f0b2de153e..96107efc0c61 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -24,21 +24,13 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_alloc.h" 27#include "xfs_alloc.h"
29#include "xfs_dmapi.h"
30#include "xfs_quota.h" 28#include "xfs_quota.h"
31#include "xfs_mount.h" 29#include "xfs_mount.h"
32#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 31#include "xfs_dinode.h"
38#include "xfs_inode.h" 32#include "xfs_inode.h"
39#include "xfs_bmap.h" 33#include "xfs_bmap.h"
40#include "xfs_btree.h"
41#include "xfs_ialloc.h"
42#include "xfs_rtalloc.h" 34#include "xfs_rtalloc.h"
43#include "xfs_error.h" 35#include "xfs_error.h"
44#include "xfs_itable.h" 36#include "xfs_itable.h"
@@ -88,7 +80,7 @@ xfs_mark_inode_dirty_sync(
88{ 80{
89 struct inode *inode = VFS_I(ip); 81 struct inode *inode = VFS_I(ip);
90 82
91 if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) 83 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
92 mark_inode_dirty_sync(inode); 84 mark_inode_dirty_sync(inode);
93} 85}
94 86
@@ -98,46 +90,11 @@ xfs_mark_inode_dirty(
98{ 90{
99 struct inode *inode = VFS_I(ip); 91 struct inode *inode = VFS_I(ip);
100 92
101 if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) 93 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
102 mark_inode_dirty(inode); 94 mark_inode_dirty(inode);
103} 95}
104 96
105/* 97/*
106 * Change the requested timestamp in the given inode.
107 * We don't lock across timestamp updates, and we don't log them but
108 * we do record the fact that there is dirty information in core.
109 */
110void
111xfs_ichgtime(
112 xfs_inode_t *ip,
113 int flags)
114{
115 struct inode *inode = VFS_I(ip);
116 timespec_t tv;
117 int sync_it = 0;
118
119 tv = current_fs_time(inode->i_sb);
120
121 if ((flags & XFS_ICHGTIME_MOD) &&
122 !timespec_equal(&inode->i_mtime, &tv)) {
123 inode->i_mtime = tv;
124 sync_it = 1;
125 }
126 if ((flags & XFS_ICHGTIME_CHG) &&
127 !timespec_equal(&inode->i_ctime, &tv)) {
128 inode->i_ctime = tv;
129 sync_it = 1;
130 }
131
132 /*
133 * Update complete - now make sure everyone knows that the inode
134 * is dirty.
135 */
136 if (sync_it)
137 xfs_mark_inode_dirty_sync(ip);
138}
139
140/*
141 * Hook in SELinux. This is not quite correct yet, what we really need 98 * Hook in SELinux. This is not quite correct yet, what we really need
142 * here (as we do for default ACLs) is a mechanism by which creation of 99 * here (as we do for default ACLs) is a mechanism by which creation of
143 * these attrs can be journalled at inode creation time (along with the 100 * these attrs can be journalled at inode creation time (along with the
@@ -232,7 +189,7 @@ xfs_vn_mknod(
232 } 189 }
233 190
234 xfs_dentry_to_name(&name, dentry); 191 xfs_dentry_to_name(&name, dentry);
235 error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); 192 error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
236 if (unlikely(error)) 193 if (unlikely(error))
237 goto out_free_acl; 194 goto out_free_acl;
238 195
@@ -360,7 +317,7 @@ xfs_vn_link(
360 if (unlikely(error)) 317 if (unlikely(error))
361 return -error; 318 return -error;
362 319
363 atomic_inc(&inode->i_count); 320 ihold(inode);
364 d_instantiate(dentry, inode); 321 d_instantiate(dentry, inode);
365 return 0; 322 return 0;
366} 323}
@@ -405,7 +362,7 @@ xfs_vn_symlink(
405 (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); 362 (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
406 xfs_dentry_to_name(&name, dentry); 363 xfs_dentry_to_name(&name, dentry);
407 364
408 error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); 365 error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
409 if (unlikely(error)) 366 if (unlikely(error))
410 goto out; 367 goto out;
411 368
@@ -496,7 +453,7 @@ xfs_vn_getattr(
496 struct xfs_inode *ip = XFS_I(inode); 453 struct xfs_inode *ip = XFS_I(inode);
497 struct xfs_mount *mp = ip->i_mount; 454 struct xfs_mount *mp = ip->i_mount;
498 455
499 xfs_itrace_entry(ip); 456 trace_xfs_getattr(ip);
500 457
501 if (XFS_FORCED_SHUTDOWN(mp)) 458 if (XFS_FORCED_SHUTDOWN(mp))
502 return XFS_ERROR(EIO); 459 return XFS_ERROR(EIO);
@@ -548,21 +505,6 @@ xfs_vn_setattr(
548 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); 505 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
549} 506}
550 507
551/*
552 * block_truncate_page can return an error, but we can't propagate it
553 * at all here. Leave a complaint + stack trace in the syslog because
554 * this could be bad. If it is bad, we need to propagate the error further.
555 */
556STATIC void
557xfs_vn_truncate(
558 struct inode *inode)
559{
560 int error;
561 error = block_truncate_page(inode->i_mapping, inode->i_size,
562 xfs_get_blocks);
563 WARN_ON(error);
564}
565
566STATIC long 508STATIC long
567xfs_vn_fallocate( 509xfs_vn_fallocate(
568 struct inode *inode, 510 struct inode *inode,
@@ -687,7 +629,7 @@ xfs_vn_fiemap(
687 fieinfo->fi_extents_max + 1; 629 fieinfo->fi_extents_max + 1;
688 bm.bmv_count = min_t(__s32, bm.bmv_count, 630 bm.bmv_count = min_t(__s32, bm.bmv_count,
689 (PAGE_SIZE * 16 / sizeof(struct getbmapx))); 631 (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
690 bm.bmv_iflags = BMV_IF_PREALLOC; 632 bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
691 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) 633 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
692 bm.bmv_iflags |= BMV_IF_ATTRFORK; 634 bm.bmv_iflags |= BMV_IF_ATTRFORK;
693 if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) 635 if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
@@ -702,7 +644,6 @@ xfs_vn_fiemap(
702 644
703static const struct inode_operations xfs_inode_operations = { 645static const struct inode_operations xfs_inode_operations = {
704 .check_acl = xfs_check_acl, 646 .check_acl = xfs_check_acl,
705 .truncate = xfs_vn_truncate,
706 .getattr = xfs_vn_getattr, 647 .getattr = xfs_vn_getattr,
707 .setattr = xfs_vn_setattr, 648 .setattr = xfs_vn_setattr,
708 .setxattr = generic_setxattr, 649 .setxattr = generic_setxattr,
@@ -819,7 +760,9 @@ xfs_setup_inode(
819 760
820 inode->i_ino = ip->i_ino; 761 inode->i_ino = ip->i_ino;
821 inode->i_state = I_NEW; 762 inode->i_state = I_NEW;
822 inode_add_to_lists(ip->i_mount->m_super, inode); 763
764 inode_sb_list_add(inode);
765 insert_inode_hash(inode);
823 766
824 inode->i_mode = ip->i_d.di_mode; 767 inode->i_mode = ip->i_d.di_mode;
825 inode->i_nlink = ip->i_d.di_nlink; 768 inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index facfb323a706..214ddd71ff79 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -71,6 +71,7 @@
71#include <linux/random.h> 71#include <linux/random.h>
72#include <linux/ctype.h> 72#include <linux/ctype.h>
73#include <linux/writeback.h> 73#include <linux/writeback.h>
74#include <linux/capability.h>
74 75
75#include <asm/page.h> 76#include <asm/page.h>
76#include <asm/div64.h> 77#include <asm/div64.h>
@@ -79,15 +80,12 @@
79#include <asm/byteorder.h> 80#include <asm/byteorder.h>
80#include <asm/unaligned.h> 81#include <asm/unaligned.h>
81 82
82#include <xfs_cred.h>
83#include <xfs_vnode.h> 83#include <xfs_vnode.h>
84#include <xfs_stats.h> 84#include <xfs_stats.h>
85#include <xfs_sysctl.h> 85#include <xfs_sysctl.h>
86#include <xfs_iops.h> 86#include <xfs_iops.h>
87#include <xfs_aops.h> 87#include <xfs_aops.h>
88#include <xfs_super.h> 88#include <xfs_super.h>
89#include <xfs_globals.h>
90#include <xfs_fs_subr.h>
91#include <xfs_buf.h> 89#include <xfs_buf.h>
92 90
93/* 91/*
@@ -145,7 +143,7 @@
145#define SYNCHRONIZE() barrier() 143#define SYNCHRONIZE() barrier()
146#define __return_address __builtin_return_address(0) 144#define __return_address __builtin_return_address(0)
147 145
148#define dfltprid 0 146#define XFS_PROJID_DEFAULT 0
149#define MAXPATHLEN 1024 147#define MAXPATHLEN 1024
150 148
151#define MIN(a,b) (min(a,b)) 149#define MIN(a,b) (min(a,b))
@@ -157,8 +155,6 @@
157 */ 155 */
158#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) 156#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
159#define xfs_stack_trace() dump_stack() 157#define xfs_stack_trace() dump_stack()
160#define xfs_itruncate_data(ip, off) \
161 (-vmtruncate(VFS_I(ip), (off)))
162 158
163 159
164/* Move the kernel do_div definition off to one side */ 160/* Move the kernel do_div definition off to one side */
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 067cafbfc635..29b9d642e93d 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -16,7 +16,6 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_dmapi.h"
20#include "xfs_sb.h" 19#include "xfs_sb.h"
21#include "xfs_inum.h" 20#include "xfs_inum.h"
22#include "xfs_log.h" 21#include "xfs_log.h"
@@ -69,15 +68,15 @@ xfs_fs_set_xstate(
69 if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) 68 if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
70 return -ENOSYS; 69 return -ENOSYS;
71 70
72 if (uflags & XFS_QUOTA_UDQ_ACCT) 71 if (uflags & FS_QUOTA_UDQ_ACCT)
73 flags |= XFS_UQUOTA_ACCT; 72 flags |= XFS_UQUOTA_ACCT;
74 if (uflags & XFS_QUOTA_PDQ_ACCT) 73 if (uflags & FS_QUOTA_PDQ_ACCT)
75 flags |= XFS_PQUOTA_ACCT; 74 flags |= XFS_PQUOTA_ACCT;
76 if (uflags & XFS_QUOTA_GDQ_ACCT) 75 if (uflags & FS_QUOTA_GDQ_ACCT)
77 flags |= XFS_GQUOTA_ACCT; 76 flags |= XFS_GQUOTA_ACCT;
78 if (uflags & XFS_QUOTA_UDQ_ENFD) 77 if (uflags & FS_QUOTA_UDQ_ENFD)
79 flags |= XFS_UQUOTA_ENFD; 78 flags |= XFS_UQUOTA_ENFD;
80 if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD)) 79 if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
81 flags |= XFS_OQUOTA_ENFD; 80 flags |= XFS_OQUOTA_ENFD;
82 81
83 switch (op) { 82 switch (op) {
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 80938c736c27..cf808782c065 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -25,14 +25,11 @@
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 27#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 28#include "xfs_quota.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 33#include "xfs_dinode.h"
37#include "xfs_inode.h" 34#include "xfs_inode.h"
38#include "xfs_btree.h" 35#include "xfs_btree.h"
@@ -43,12 +40,10 @@
43#include "xfs_error.h" 40#include "xfs_error.h"
44#include "xfs_itable.h" 41#include "xfs_itable.h"
45#include "xfs_fsops.h" 42#include "xfs_fsops.h"
46#include "xfs_rw.h"
47#include "xfs_attr.h" 43#include "xfs_attr.h"
48#include "xfs_buf_item.h" 44#include "xfs_buf_item.h"
49#include "xfs_utils.h" 45#include "xfs_utils.h"
50#include "xfs_vnodeops.h" 46#include "xfs_vnodeops.h"
51#include "xfs_version.h"
52#include "xfs_log_priv.h" 47#include "xfs_log_priv.h"
53#include "xfs_trans_priv.h" 48#include "xfs_trans_priv.h"
54#include "xfs_filestream.h" 49#include "xfs_filestream.h"
@@ -94,7 +89,6 @@ mempool_t *xfs_ioend_pool;
94#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and 89#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
95 * unwritten extent conversion */ 90 * unwritten extent conversion */
96#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ 91#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
97#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
98#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ 92#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
99#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ 93#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
100#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ 94#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
@@ -116,9 +110,6 @@ mempool_t *xfs_ioend_pool;
116#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ 110#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
117#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ 111#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
118#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ 112#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
119#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */
120#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */
121#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */
122#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ 113#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */
123#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ 114#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */
124 115
@@ -172,15 +163,13 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
172STATIC int 163STATIC int
173xfs_parseargs( 164xfs_parseargs(
174 struct xfs_mount *mp, 165 struct xfs_mount *mp,
175 char *options, 166 char *options)
176 char **mtpt)
177{ 167{
178 struct super_block *sb = mp->m_super; 168 struct super_block *sb = mp->m_super;
179 char *this_char, *value, *eov; 169 char *this_char, *value, *eov;
180 int dsunit = 0; 170 int dsunit = 0;
181 int dswidth = 0; 171 int dswidth = 0;
182 int iosize = 0; 172 int iosize = 0;
183 int dmapi_implies_ikeep = 1;
184 __uint8_t iosizelog = 0; 173 __uint8_t iosizelog = 0;
185 174
186 /* 175 /*
@@ -243,15 +232,10 @@ xfs_parseargs(
243 if (!mp->m_logname) 232 if (!mp->m_logname)
244 return ENOMEM; 233 return ENOMEM;
245 } else if (!strcmp(this_char, MNTOPT_MTPT)) { 234 } else if (!strcmp(this_char, MNTOPT_MTPT)) {
246 if (!value || !*value) { 235 cmn_err(CE_WARN,
247 cmn_err(CE_WARN, 236 "XFS: %s option not allowed on this system",
248 "XFS: %s option requires an argument", 237 this_char);
249 this_char); 238 return EINVAL;
250 return EINVAL;
251 }
252 *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
253 if (!*mtpt)
254 return ENOMEM;
255 } else if (!strcmp(this_char, MNTOPT_RTDEV)) { 239 } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
256 if (!value || !*value) { 240 if (!value || !*value) {
257 cmn_err(CE_WARN, 241 cmn_err(CE_WARN,
@@ -288,8 +272,6 @@ xfs_parseargs(
288 mp->m_flags &= ~XFS_MOUNT_GRPID; 272 mp->m_flags &= ~XFS_MOUNT_GRPID;
289 } else if (!strcmp(this_char, MNTOPT_WSYNC)) { 273 } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
290 mp->m_flags |= XFS_MOUNT_WSYNC; 274 mp->m_flags |= XFS_MOUNT_WSYNC;
291 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
292 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
293 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { 275 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
294 mp->m_flags |= XFS_MOUNT_NORECOVERY; 276 mp->m_flags |= XFS_MOUNT_NORECOVERY;
295 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { 277 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
@@ -329,7 +311,6 @@ xfs_parseargs(
329 } else if (!strcmp(this_char, MNTOPT_IKEEP)) { 311 } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
330 mp->m_flags |= XFS_MOUNT_IKEEP; 312 mp->m_flags |= XFS_MOUNT_IKEEP;
331 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { 313 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
332 dmapi_implies_ikeep = 0;
333 mp->m_flags &= ~XFS_MOUNT_IKEEP; 314 mp->m_flags &= ~XFS_MOUNT_IKEEP;
334 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { 315 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
335 mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; 316 mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
@@ -370,12 +351,6 @@ xfs_parseargs(
370 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { 351 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
371 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); 352 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
372 mp->m_qflags &= ~XFS_OQUOTA_ENFD; 353 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
373 } else if (!strcmp(this_char, MNTOPT_DMAPI)) {
374 mp->m_flags |= XFS_MOUNT_DMAPI;
375 } else if (!strcmp(this_char, MNTOPT_XDSM)) {
376 mp->m_flags |= XFS_MOUNT_DMAPI;
377 } else if (!strcmp(this_char, MNTOPT_DMI)) {
378 mp->m_flags |= XFS_MOUNT_DMAPI;
379 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { 354 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
380 mp->m_flags |= XFS_MOUNT_DELAYLOG; 355 mp->m_flags |= XFS_MOUNT_DELAYLOG;
381 cmn_err(CE_WARN, 356 cmn_err(CE_WARN,
@@ -387,9 +362,11 @@ xfs_parseargs(
387 cmn_err(CE_WARN, 362 cmn_err(CE_WARN,
388 "XFS: ihashsize no longer used, option is deprecated."); 363 "XFS: ihashsize no longer used, option is deprecated.");
389 } else if (!strcmp(this_char, "osyncisdsync")) { 364 } else if (!strcmp(this_char, "osyncisdsync")) {
390 /* no-op, this is now the default */
391 cmn_err(CE_WARN, 365 cmn_err(CE_WARN,
392 "XFS: osyncisdsync is now the default, option is deprecated."); 366 "XFS: osyncisdsync has no effect, option is deprecated.");
367 } else if (!strcmp(this_char, "osyncisosync")) {
368 cmn_err(CE_WARN,
369 "XFS: osyncisosync has no effect, option is deprecated.");
393 } else if (!strcmp(this_char, "irixsgid")) { 370 } else if (!strcmp(this_char, "irixsgid")) {
394 cmn_err(CE_WARN, 371 cmn_err(CE_WARN,
395 "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); 372 "XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
@@ -430,12 +407,6 @@ xfs_parseargs(
430 return EINVAL; 407 return EINVAL;
431 } 408 }
432 409
433 if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
434 printk("XFS: %s option needs the mount point option as well\n",
435 MNTOPT_DMAPI);
436 return EINVAL;
437 }
438
439 if ((dsunit && !dswidth) || (!dsunit && dswidth)) { 410 if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
440 cmn_err(CE_WARN, 411 cmn_err(CE_WARN,
441 "XFS: sunit and swidth must be specified together"); 412 "XFS: sunit and swidth must be specified together");
@@ -449,18 +420,6 @@ xfs_parseargs(
449 return EINVAL; 420 return EINVAL;
450 } 421 }
451 422
452 /*
453 * Applications using DMI filesystems often expect the
454 * inode generation number to be monotonically increasing.
455 * If we delete inode chunks we break this assumption, so
456 * keep unused inode chunks on disk for DMI filesystems
457 * until we come up with a better solution.
458 * Note that if "ikeep" or "noikeep" mount options are
459 * supplied, then they are honored.
460 */
461 if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
462 mp->m_flags |= XFS_MOUNT_IKEEP;
463
464done: 423done:
465 if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { 424 if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
466 /* 425 /*
@@ -539,10 +498,8 @@ xfs_showargs(
539 { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, 498 { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC },
540 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, 499 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
541 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, 500 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY },
542 { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC },
543 { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, 501 { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 },
544 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, 502 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
545 { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI },
546 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, 503 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
547 { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, 504 { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
548 { 0, NULL } 505 { 0, NULL }
@@ -619,7 +576,7 @@ xfs_max_file_offset(
619 576
620 /* Figure out maximum filesize, on Linux this can depend on 577 /* Figure out maximum filesize, on Linux this can depend on
621 * the filesystem blocksize (on 32 bit platforms). 578 * the filesystem blocksize (on 32 bit platforms).
622 * __block_prepare_write does this in an [unsigned] long... 579 * __block_write_begin does this in an [unsigned] long...
623 * page->index << (PAGE_CACHE_SHIFT - bbits) 580 * page->index << (PAGE_CACHE_SHIFT - bbits)
624 * So, for page sized blocks (4K on 32 bit platforms), 581 * So, for page sized blocks (4K on 32 bit platforms),
625 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is 582 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
@@ -687,7 +644,7 @@ xfs_barrier_test(
687 XFS_BUF_ORDERED(sbp); 644 XFS_BUF_ORDERED(sbp);
688 645
689 xfsbdstrat(mp, sbp); 646 xfsbdstrat(mp, sbp);
690 error = xfs_iowait(sbp); 647 error = xfs_buf_iowait(sbp);
691 648
692 /* 649 /*
693 * Clear all the flags we set and possible error state in the 650 * Clear all the flags we set and possible error state in the
@@ -735,8 +692,7 @@ void
735xfs_blkdev_issue_flush( 692xfs_blkdev_issue_flush(
736 xfs_buftarg_t *buftarg) 693 xfs_buftarg_t *buftarg)
737{ 694{
738 blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, 695 blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
739 BLKDEV_IFL_WAIT);
740} 696}
741 697
742STATIC void 698STATIC void
@@ -800,18 +756,20 @@ xfs_open_devices(
800 * Setup xfs_mount buffer target pointers 756 * Setup xfs_mount buffer target pointers
801 */ 757 */
802 error = ENOMEM; 758 error = ENOMEM;
803 mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname); 759 mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
804 if (!mp->m_ddev_targp) 760 if (!mp->m_ddev_targp)
805 goto out_close_rtdev; 761 goto out_close_rtdev;
806 762
807 if (rtdev) { 763 if (rtdev) {
808 mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname); 764 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
765 mp->m_fsname);
809 if (!mp->m_rtdev_targp) 766 if (!mp->m_rtdev_targp)
810 goto out_free_ddev_targ; 767 goto out_free_ddev_targ;
811 } 768 }
812 769
813 if (logdev && logdev != ddev) { 770 if (logdev && logdev != ddev) {
814 mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname); 771 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
772 mp->m_fsname);
815 if (!mp->m_logdev_targp) 773 if (!mp->m_logdev_targp)
816 goto out_free_rtdev_targ; 774 goto out_free_rtdev_targ;
817 } else { 775 } else {
@@ -947,7 +905,7 @@ xfs_fs_destroy_inode(
947{ 905{
948 struct xfs_inode *ip = XFS_I(inode); 906 struct xfs_inode *ip = XFS_I(inode);
949 907
950 xfs_itrace_entry(ip); 908 trace_xfs_destroy_inode(ip);
951 909
952 XFS_STATS_INC(vn_reclaim); 910 XFS_STATS_INC(vn_reclaim);
953 911
@@ -1014,12 +972,7 @@ xfs_fs_inode_init_once(
1014 972
1015/* 973/*
1016 * Dirty the XFS inode when mark_inode_dirty_sync() is called so that 974 * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
1017 * we catch unlogged VFS level updates to the inode. Care must be taken 975 * we catch unlogged VFS level updates to the inode.
1018 * here - the transaction code calls mark_inode_dirty_sync() to mark the
1019 * VFS inode dirty in a transaction and clears the i_update_core field;
1020 * it must clear the field after calling mark_inode_dirty_sync() to
1021 * correctly indicate that the dirty state has been propagated into the
1022 * inode log item.
1023 * 976 *
1024 * We need the barrier() to maintain correct ordering between unlogged 977 * We need the barrier() to maintain correct ordering between unlogged
1025 * updates and the transaction commit code that clears the i_update_core 978 * updates and the transaction commit code that clears the i_update_core
@@ -1063,10 +1016,8 @@ xfs_log_inode(
1063 * an inode in another recent transaction. So we play it safe and 1016 * an inode in another recent transaction. So we play it safe and
1064 * fire off the transaction anyway. 1017 * fire off the transaction anyway.
1065 */ 1018 */
1066 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1019 xfs_trans_ijoin(tp, ip);
1067 xfs_trans_ihold(tp, ip);
1068 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1020 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1069 xfs_trans_set_sync(tp);
1070 error = xfs_trans_commit(tp, 0); 1021 error = xfs_trans_commit(tp, 0);
1071 xfs_ilock_demote(ip, XFS_ILOCK_EXCL); 1022 xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
1072 1023
@@ -1082,27 +1033,18 @@ xfs_fs_write_inode(
1082 struct xfs_mount *mp = ip->i_mount; 1033 struct xfs_mount *mp = ip->i_mount;
1083 int error = EAGAIN; 1034 int error = EAGAIN;
1084 1035
1085 xfs_itrace_entry(ip); 1036 trace_xfs_write_inode(ip);
1086 1037
1087 if (XFS_FORCED_SHUTDOWN(mp)) 1038 if (XFS_FORCED_SHUTDOWN(mp))
1088 return XFS_ERROR(EIO); 1039 return XFS_ERROR(EIO);
1089 1040
1090 if (wbc->sync_mode == WB_SYNC_ALL) { 1041 if (wbc->sync_mode == WB_SYNC_ALL) {
1091 /* 1042 /*
1092 * Make sure the inode has hit stable storage. By using the 1043 * Make sure the inode has made it it into the log. Instead
1093 * log and the fsync transactions we reduce the IOs we have 1044 * of forcing it all the way to stable storage using a
1094 * to do here from two (log and inode) to just the log. 1045 * synchronous transaction we let the log force inside the
1095 * 1046 * ->sync_fs call do that for thus, which reduces the number
1096 * Note: We still need to do a delwri write of the inode after 1047 * of synchronous log foces dramatically.
1097 * this to flush it to the backing buffer so that bulkstat
1098 * works properly if this is the first time the inode has been
1099 * written. Because we hold the ilock atomically over the
1100 * transaction commit and the inode flush we are guaranteed
1101 * that the inode is not pinned when it returns. If the flush
1102 * lock is already held, then the inode has already been
1103 * flushed once and we don't need to flush it again. Hence
1104 * the code will only flush the inode if it isn't already
1105 * being flushed.
1106 */ 1048 */
1107 xfs_ioend_wait(ip); 1049 xfs_ioend_wait(ip);
1108 xfs_ilock(ip, XFS_ILOCK_SHARED); 1050 xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -1116,27 +1058,29 @@ xfs_fs_write_inode(
1116 * We make this non-blocking if the inode is contended, return 1058 * We make this non-blocking if the inode is contended, return
1117 * EAGAIN to indicate to the caller that they did not succeed. 1059 * EAGAIN to indicate to the caller that they did not succeed.
1118 * This prevents the flush path from blocking on inodes inside 1060 * This prevents the flush path from blocking on inodes inside
1119 * another operation right now, they get caught later by xfs_sync. 1061 * another operation right now, they get caught later by
1062 * xfs_sync.
1120 */ 1063 */
1121 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) 1064 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1122 goto out; 1065 goto out;
1123 }
1124 1066
1125 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) 1067 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
1126 goto out_unlock; 1068 goto out_unlock;
1127 1069
1128 /* 1070 /*
1129 * Now we have the flush lock and the inode is not pinned, we can check 1071 * Now we have the flush lock and the inode is not pinned, we
1130 * if the inode is really clean as we know that there are no pending 1072 * can check if the inode is really clean as we know that
1131 * transaction completions, it is not waiting on the delayed write 1073 * there are no pending transaction completions, it is not
1132 * queue and there is no IO in progress. 1074 * waiting on the delayed write queue and there is no IO in
1133 */ 1075 * progress.
1134 if (xfs_inode_clean(ip)) { 1076 */
1135 xfs_ifunlock(ip); 1077 if (xfs_inode_clean(ip)) {
1136 error = 0; 1078 xfs_ifunlock(ip);
1137 goto out_unlock; 1079 error = 0;
1080 goto out_unlock;
1081 }
1082 error = xfs_iflush(ip, 0);
1138 } 1083 }
1139 error = xfs_iflush(ip, 0);
1140 1084
1141 out_unlock: 1085 out_unlock:
1142 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1086 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -1151,12 +1095,15 @@ xfs_fs_write_inode(
1151} 1095}
1152 1096
1153STATIC void 1097STATIC void
1154xfs_fs_clear_inode( 1098xfs_fs_evict_inode(
1155 struct inode *inode) 1099 struct inode *inode)
1156{ 1100{
1157 xfs_inode_t *ip = XFS_I(inode); 1101 xfs_inode_t *ip = XFS_I(inode);
1158 1102
1159 xfs_itrace_entry(ip); 1103 trace_xfs_evict_inode(ip);
1104
1105 truncate_inode_pages(&inode->i_data, 0);
1106 end_writeback(inode);
1160 XFS_STATS_INC(vn_rele); 1107 XFS_STATS_INC(vn_rele);
1161 XFS_STATS_INC(vn_remove); 1108 XFS_STATS_INC(vn_remove);
1162 XFS_STATS_DEC(vn_active); 1109 XFS_STATS_DEC(vn_active);
@@ -1193,22 +1140,13 @@ xfs_fs_put_super(
1193{ 1140{
1194 struct xfs_mount *mp = XFS_M(sb); 1141 struct xfs_mount *mp = XFS_M(sb);
1195 1142
1143 /*
1144 * Unregister the memory shrinker before we tear down the mount
1145 * structure so we don't have memory reclaim racing with us here.
1146 */
1147 xfs_inode_shrinker_unregister(mp);
1196 xfs_syncd_stop(mp); 1148 xfs_syncd_stop(mp);
1197 1149
1198 if (!(sb->s_flags & MS_RDONLY)) {
1199 /*
1200 * XXX(hch): this should be SYNC_WAIT.
1201 *
1202 * Or more likely not needed at all because the VFS is already
1203 * calling ->sync_fs after shutting down all filestem
1204 * operations and just before calling ->put_super.
1205 */
1206 xfs_sync_data(mp, 0);
1207 xfs_sync_attr(mp, 0);
1208 }
1209
1210 XFS_SEND_PREUNMOUNT(mp);
1211
1212 /* 1150 /*
1213 * Blow away any referenced inode in the filestreams cache. 1151 * Blow away any referenced inode in the filestreams cache.
1214 * This can and will cause log traffic as inodes go inactive 1152 * This can and will cause log traffic as inodes go inactive
@@ -1218,14 +1156,10 @@ xfs_fs_put_super(
1218 1156
1219 XFS_bflush(mp->m_ddev_targp); 1157 XFS_bflush(mp->m_ddev_targp);
1220 1158
1221 XFS_SEND_UNMOUNT(mp);
1222
1223 xfs_unmountfs(mp); 1159 xfs_unmountfs(mp);
1224 xfs_freesb(mp); 1160 xfs_freesb(mp);
1225 xfs_inode_shrinker_unregister(mp);
1226 xfs_icsb_destroy_counters(mp); 1161 xfs_icsb_destroy_counters(mp);
1227 xfs_close_devices(mp); 1162 xfs_close_devices(mp);
1228 xfs_dmops_put(mp);
1229 xfs_free_fsname(mp); 1163 xfs_free_fsname(mp);
1230 kfree(mp); 1164 kfree(mp);
1231} 1165}
@@ -1287,6 +1221,7 @@ xfs_fs_statfs(
1287 struct xfs_inode *ip = XFS_I(dentry->d_inode); 1221 struct xfs_inode *ip = XFS_I(dentry->d_inode);
1288 __uint64_t fakeinos, id; 1222 __uint64_t fakeinos, id;
1289 xfs_extlen_t lsize; 1223 xfs_extlen_t lsize;
1224 __int64_t ffree;
1290 1225
1291 statp->f_type = XFS_SB_MAGIC; 1226 statp->f_type = XFS_SB_MAGIC;
1292 statp->f_namelen = MAXNAMELEN - 1; 1227 statp->f_namelen = MAXNAMELEN - 1;
@@ -1310,7 +1245,11 @@ xfs_fs_statfs(
1310 statp->f_files = min_t(typeof(statp->f_files), 1245 statp->f_files = min_t(typeof(statp->f_files),
1311 statp->f_files, 1246 statp->f_files,
1312 mp->m_maxicount); 1247 mp->m_maxicount);
1313 statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); 1248
1249 /* make sure statp->f_ffree does not underflow */
1250 ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
1251 statp->f_ffree = max_t(__int64_t, ffree, 0);
1252
1314 spin_unlock(&mp->m_sb_lock); 1253 spin_unlock(&mp->m_sb_lock);
1315 1254
1316 if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || 1255 if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
@@ -1463,7 +1402,7 @@ xfs_fs_freeze(
1463 1402
1464 xfs_save_resvblks(mp); 1403 xfs_save_resvblks(mp);
1465 xfs_quiesce_attr(mp); 1404 xfs_quiesce_attr(mp);
1466 return -xfs_fs_log_dummy(mp); 1405 return -xfs_fs_log_dummy(mp, SYNC_WAIT);
1467} 1406}
1468 1407
1469STATIC int 1408STATIC int
@@ -1543,7 +1482,6 @@ xfs_fs_fill_super(
1543 struct inode *root; 1482 struct inode *root;
1544 struct xfs_mount *mp = NULL; 1483 struct xfs_mount *mp = NULL;
1545 int flags = 0, error = ENOMEM; 1484 int flags = 0, error = ENOMEM;
1546 char *mtpt = NULL;
1547 1485
1548 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); 1486 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1549 if (!mp) 1487 if (!mp)
@@ -1559,7 +1497,7 @@ xfs_fs_fill_super(
1559 mp->m_super = sb; 1497 mp->m_super = sb;
1560 sb->s_fs_info = mp; 1498 sb->s_fs_info = mp;
1561 1499
1562 error = xfs_parseargs(mp, (char *)data, &mtpt); 1500 error = xfs_parseargs(mp, (char *)data);
1563 if (error) 1501 if (error)
1564 goto out_free_fsname; 1502 goto out_free_fsname;
1565 1503
@@ -1571,19 +1509,16 @@ xfs_fs_fill_super(
1571#endif 1509#endif
1572 sb->s_op = &xfs_super_operations; 1510 sb->s_op = &xfs_super_operations;
1573 1511
1574 error = xfs_dmops_get(mp);
1575 if (error)
1576 goto out_free_fsname;
1577
1578 if (silent) 1512 if (silent)
1579 flags |= XFS_MFSI_QUIET; 1513 flags |= XFS_MFSI_QUIET;
1580 1514
1581 error = xfs_open_devices(mp); 1515 error = xfs_open_devices(mp);
1582 if (error) 1516 if (error)
1583 goto out_put_dmops; 1517 goto out_free_fsname;
1584 1518
1585 if (xfs_icsb_init_counters(mp)) 1519 error = xfs_icsb_init_counters(mp);
1586 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 1520 if (error)
1521 goto out_close_devices;
1587 1522
1588 error = xfs_readsb(mp, flags); 1523 error = xfs_readsb(mp, flags);
1589 if (error) 1524 if (error)
@@ -1608,8 +1543,6 @@ xfs_fs_fill_super(
1608 if (error) 1543 if (error)
1609 goto out_filestream_unmount; 1544 goto out_filestream_unmount;
1610 1545
1611 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
1612
1613 sb->s_magic = XFS_SB_MAGIC; 1546 sb->s_magic = XFS_SB_MAGIC;
1614 sb->s_blocksize = mp->m_sb.sb_blocksize; 1547 sb->s_blocksize = mp->m_sb.sb_blocksize;
1615 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1548 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1638,7 +1571,6 @@ xfs_fs_fill_super(
1638 1571
1639 xfs_inode_shrinker_register(mp); 1572 xfs_inode_shrinker_register(mp);
1640 1573
1641 kfree(mtpt);
1642 return 0; 1574 return 0;
1643 1575
1644 out_filestream_unmount: 1576 out_filestream_unmount:
@@ -1647,12 +1579,10 @@ xfs_fs_fill_super(
1647 xfs_freesb(mp); 1579 xfs_freesb(mp);
1648 out_destroy_counters: 1580 out_destroy_counters:
1649 xfs_icsb_destroy_counters(mp); 1581 xfs_icsb_destroy_counters(mp);
1582 out_close_devices:
1650 xfs_close_devices(mp); 1583 xfs_close_devices(mp);
1651 out_put_dmops:
1652 xfs_dmops_put(mp);
1653 out_free_fsname: 1584 out_free_fsname:
1654 xfs_free_fsname(mp); 1585 xfs_free_fsname(mp);
1655 kfree(mtpt);
1656 kfree(mp); 1586 kfree(mp);
1657 out: 1587 out:
1658 return -error; 1588 return -error;
@@ -1696,7 +1626,7 @@ static const struct super_operations xfs_super_operations = {
1696 .destroy_inode = xfs_fs_destroy_inode, 1626 .destroy_inode = xfs_fs_destroy_inode,
1697 .dirty_inode = xfs_fs_dirty_inode, 1627 .dirty_inode = xfs_fs_dirty_inode,
1698 .write_inode = xfs_fs_write_inode, 1628 .write_inode = xfs_fs_write_inode,
1699 .clear_inode = xfs_fs_clear_inode, 1629 .evict_inode = xfs_fs_evict_inode,
1700 .put_super = xfs_fs_put_super, 1630 .put_super = xfs_fs_put_super,
1701 .sync_fs = xfs_fs_sync_fs, 1631 .sync_fs = xfs_fs_sync_fs,
1702 .freeze_fs = xfs_fs_freeze, 1632 .freeze_fs = xfs_fs_freeze,
@@ -1759,6 +1689,12 @@ xfs_init_zones(void)
1759 if (!xfs_trans_zone) 1689 if (!xfs_trans_zone)
1760 goto out_destroy_ifork_zone; 1690 goto out_destroy_ifork_zone;
1761 1691
1692 xfs_log_item_desc_zone =
1693 kmem_zone_init(sizeof(struct xfs_log_item_desc),
1694 "xfs_log_item_desc");
1695 if (!xfs_log_item_desc_zone)
1696 goto out_destroy_trans_zone;
1697
1762 /* 1698 /*
1763 * The size of the zone allocated buf log item is the maximum 1699 * The size of the zone allocated buf log item is the maximum
1764 * size possible under XFS. This wastes a little bit of memory, 1700 * size possible under XFS. This wastes a little bit of memory,
@@ -1768,7 +1704,7 @@ xfs_init_zones(void)
1768 (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / 1704 (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
1769 NBWORD) * sizeof(int))), "xfs_buf_item"); 1705 NBWORD) * sizeof(int))), "xfs_buf_item");
1770 if (!xfs_buf_item_zone) 1706 if (!xfs_buf_item_zone)
1771 goto out_destroy_trans_zone; 1707 goto out_destroy_log_item_desc_zone;
1772 1708
1773 xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + 1709 xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
1774 ((XFS_EFD_MAX_FAST_EXTENTS - 1) * 1710 ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
@@ -1805,6 +1741,8 @@ xfs_init_zones(void)
1805 kmem_zone_destroy(xfs_efd_zone); 1741 kmem_zone_destroy(xfs_efd_zone);
1806 out_destroy_buf_item_zone: 1742 out_destroy_buf_item_zone:
1807 kmem_zone_destroy(xfs_buf_item_zone); 1743 kmem_zone_destroy(xfs_buf_item_zone);
1744 out_destroy_log_item_desc_zone:
1745 kmem_zone_destroy(xfs_log_item_desc_zone);
1808 out_destroy_trans_zone: 1746 out_destroy_trans_zone:
1809 kmem_zone_destroy(xfs_trans_zone); 1747 kmem_zone_destroy(xfs_trans_zone);
1810 out_destroy_ifork_zone: 1748 out_destroy_ifork_zone:
@@ -1835,6 +1773,7 @@ xfs_destroy_zones(void)
1835 kmem_zone_destroy(xfs_efi_zone); 1773 kmem_zone_destroy(xfs_efi_zone);
1836 kmem_zone_destroy(xfs_efd_zone); 1774 kmem_zone_destroy(xfs_efd_zone);
1837 kmem_zone_destroy(xfs_buf_item_zone); 1775 kmem_zone_destroy(xfs_buf_item_zone);
1776 kmem_zone_destroy(xfs_log_item_desc_zone);
1838 kmem_zone_destroy(xfs_trans_zone); 1777 kmem_zone_destroy(xfs_trans_zone);
1839 kmem_zone_destroy(xfs_ifork_zone); 1778 kmem_zone_destroy(xfs_ifork_zone);
1840 kmem_zone_destroy(xfs_dabuf_zone); 1779 kmem_zone_destroy(xfs_dabuf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 519618e9279e..50a3266c999e 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -56,23 +56,17 @@ extern void xfs_qm_exit(void);
56# define XFS_BIGFS_STRING 56# define XFS_BIGFS_STRING
57#endif 57#endif
58 58
59#ifdef CONFIG_XFS_DMAPI
60# define XFS_DMAPI_STRING "dmapi support, "
61#else
62# define XFS_DMAPI_STRING
63#endif
64
65#ifdef DEBUG 59#ifdef DEBUG
66# define XFS_DBG_STRING "debug" 60# define XFS_DBG_STRING "debug"
67#else 61#else
68# define XFS_DBG_STRING "no debug" 62# define XFS_DBG_STRING "no debug"
69#endif 63#endif
70 64
65#define XFS_VERSION_STRING "SGI XFS"
71#define XFS_BUILD_OPTIONS XFS_ACL_STRING \ 66#define XFS_BUILD_OPTIONS XFS_ACL_STRING \
72 XFS_SECURITY_STRING \ 67 XFS_SECURITY_STRING \
73 XFS_REALTIME_STRING \ 68 XFS_REALTIME_STRING \
74 XFS_BIGFS_STRING \ 69 XFS_BIGFS_STRING \
75 XFS_DMAPI_STRING \
76 XFS_DBG_STRING /* DBG must be last */ 70 XFS_DBG_STRING /* DBG must be last */
77 71
78struct xfs_inode; 72struct xfs_inode;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a51a07c3a70c..37d33254981d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -24,67 +24,54 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_inode.h" 29#include "xfs_inode.h"
37#include "xfs_dinode.h" 30#include "xfs_dinode.h"
38#include "xfs_error.h" 31#include "xfs_error.h"
39#include "xfs_mru_cache.h"
40#include "xfs_filestream.h" 32#include "xfs_filestream.h"
41#include "xfs_vnodeops.h" 33#include "xfs_vnodeops.h"
42#include "xfs_utils.h"
43#include "xfs_buf_item.h"
44#include "xfs_inode_item.h" 34#include "xfs_inode_item.h"
45#include "xfs_rw.h"
46#include "xfs_quota.h" 35#include "xfs_quota.h"
47#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_fsops.h"
48 38
49#include <linux/kthread.h> 39#include <linux/kthread.h>
50#include <linux/freezer.h> 40#include <linux/freezer.h>
51 41
42/*
43 * The inode lookup is done in batches to keep the amount of lock traffic and
44 * radix tree lookups to a minimum. The batch size is a trade off between
45 * lookup reduction and stack usage. This is in the reclaim path, so we can't
46 * be too greedy.
47 */
48#define XFS_LOOKUP_BATCH 32
52 49
53STATIC xfs_inode_t * 50STATIC int
54xfs_inode_ag_lookup( 51xfs_inode_ag_walk_grab(
55 struct xfs_mount *mp, 52 struct xfs_inode *ip)
56 struct xfs_perag *pag,
57 uint32_t *first_index,
58 int tag)
59{ 53{
60 int nr_found; 54 struct inode *inode = VFS_I(ip);
61 struct xfs_inode *ip;
62 55
63 /* 56 /* nothing to sync during shutdown */
64 * use a gang lookup to find the next inode in the tree 57 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
65 * as the tree is sparse and a gang lookup walks to find 58 return EFSCORRUPTED;
66 * the number of objects requested. 59
67 */ 60 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
68 if (tag == XFS_ICI_NO_TAG) { 61 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 62 return ENOENT;
70 (void **)&ip, *first_index, 1); 63
71 } else { 64 /* If we can't grab the inode, it must on it's way to reclaim. */
72 nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, 65 if (!igrab(inode))
73 (void **)&ip, *first_index, 1, tag); 66 return ENOENT;
67
68 if (is_bad_inode(inode)) {
69 IRELE(ip);
70 return ENOENT;
74 } 71 }
75 if (!nr_found)
76 return NULL;
77 72
78 /* 73 /* inode is valid */
79 * Update the index for the next lookup. Catch overflows 74 return 0;
80 * into the next AG range which can occur if we have inodes
81 * in the last block of the AG and we are currently
82 * pointing to the last inode.
83 */
84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
86 return NULL;
87 return ip;
88} 75}
89 76
90STATIC int 77STATIC int
@@ -93,49 +80,75 @@ xfs_inode_ag_walk(
93 struct xfs_perag *pag, 80 struct xfs_perag *pag,
94 int (*execute)(struct xfs_inode *ip, 81 int (*execute)(struct xfs_inode *ip,
95 struct xfs_perag *pag, int flags), 82 struct xfs_perag *pag, int flags),
96 int flags, 83 int flags)
97 int tag,
98 int exclusive,
99 int *nr_to_scan)
100{ 84{
101 uint32_t first_index; 85 uint32_t first_index;
102 int last_error = 0; 86 int last_error = 0;
103 int skipped; 87 int skipped;
88 int done;
89 int nr_found;
104 90
105restart: 91restart:
92 done = 0;
106 skipped = 0; 93 skipped = 0;
107 first_index = 0; 94 first_index = 0;
95 nr_found = 0;
108 do { 96 do {
97 struct xfs_inode *batch[XFS_LOOKUP_BATCH];
109 int error = 0; 98 int error = 0;
110 xfs_inode_t *ip; 99 int i;
111 100
112 if (exclusive) 101 read_lock(&pag->pag_ici_lock);
113 write_lock(&pag->pag_ici_lock); 102 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
114 else 103 (void **)batch, first_index,
115 read_lock(&pag->pag_ici_lock); 104 XFS_LOOKUP_BATCH);
116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 105 if (!nr_found) {
117 if (!ip) { 106 read_unlock(&pag->pag_ici_lock);
118 if (exclusive)
119 write_unlock(&pag->pag_ici_lock);
120 else
121 read_unlock(&pag->pag_ici_lock);
122 break; 107 break;
123 } 108 }
124 109
125 /* execute releases pag->pag_ici_lock */ 110 /*
126 error = execute(ip, pag, flags); 111 * Grab the inodes before we drop the lock. if we found
127 if (error == EAGAIN) { 112 * nothing, nr == 0 and the loop will be skipped.
128 skipped++; 113 */
129 continue; 114 for (i = 0; i < nr_found; i++) {
115 struct xfs_inode *ip = batch[i];
116
117 if (done || xfs_inode_ag_walk_grab(ip))
118 batch[i] = NULL;
119
120 /*
121 * Update the index for the next lookup. Catch overflows
122 * into the next AG range which can occur if we have inodes
123 * in the last block of the AG and we are currently
124 * pointing to the last inode.
125 */
126 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
127 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
128 done = 1;
129 }
130
131 /* unlock now we've grabbed the inodes. */
132 read_unlock(&pag->pag_ici_lock);
133
134 for (i = 0; i < nr_found; i++) {
135 if (!batch[i])
136 continue;
137 error = execute(batch[i], pag, flags);
138 IRELE(batch[i]);
139 if (error == EAGAIN) {
140 skipped++;
141 continue;
142 }
143 if (error && last_error != EFSCORRUPTED)
144 last_error = error;
130 } 145 }
131 if (error)
132 last_error = error;
133 146
134 /* bail out if the filesystem is corrupted. */ 147 /* bail out if the filesystem is corrupted. */
135 if (error == EFSCORRUPTED) 148 if (error == EFSCORRUPTED)
136 break; 149 break;
137 150
138 } while ((*nr_to_scan)--); 151 } while (nr_found && !done);
139 152
140 if (skipped) { 153 if (skipped) {
141 delay(1); 154 delay(1);
@@ -144,110 +157,32 @@ restart:
144 return last_error; 157 return last_error;
145} 158}
146 159
147/*
148 * Select the next per-ag structure to iterate during the walk. The reclaim
149 * walk is optimised only to walk AGs with reclaimable inodes in them.
150 */
151static struct xfs_perag *
152xfs_inode_ag_iter_next_pag(
153 struct xfs_mount *mp,
154 xfs_agnumber_t *first,
155 int tag)
156{
157 struct xfs_perag *pag = NULL;
158
159 if (tag == XFS_ICI_RECLAIM_TAG) {
160 int found;
161 int ref;
162
163 spin_lock(&mp->m_perag_lock);
164 found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
165 (void **)&pag, *first, 1, tag);
166 if (found <= 0) {
167 spin_unlock(&mp->m_perag_lock);
168 return NULL;
169 }
170 *first = pag->pag_agno + 1;
171 /* open coded pag reference increment */
172 ref = atomic_inc_return(&pag->pag_ref);
173 spin_unlock(&mp->m_perag_lock);
174 trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
175 } else {
176 pag = xfs_perag_get(mp, *first);
177 (*first)++;
178 }
179 return pag;
180}
181
182int 160int
183xfs_inode_ag_iterator( 161xfs_inode_ag_iterator(
184 struct xfs_mount *mp, 162 struct xfs_mount *mp,
185 int (*execute)(struct xfs_inode *ip, 163 int (*execute)(struct xfs_inode *ip,
186 struct xfs_perag *pag, int flags), 164 struct xfs_perag *pag, int flags),
187 int flags, 165 int flags)
188 int tag,
189 int exclusive,
190 int *nr_to_scan)
191{ 166{
192 struct xfs_perag *pag; 167 struct xfs_perag *pag;
193 int error = 0; 168 int error = 0;
194 int last_error = 0; 169 int last_error = 0;
195 xfs_agnumber_t ag; 170 xfs_agnumber_t ag;
196 int nr;
197 171
198 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
199 ag = 0; 172 ag = 0;
200 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { 173 while ((pag = xfs_perag_get(mp, ag))) {
201 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 174 ag = pag->pag_agno + 1;
202 exclusive, &nr); 175 error = xfs_inode_ag_walk(mp, pag, execute, flags);
203 xfs_perag_put(pag); 176 xfs_perag_put(pag);
204 if (error) { 177 if (error) {
205 last_error = error; 178 last_error = error;
206 if (error == EFSCORRUPTED) 179 if (error == EFSCORRUPTED)
207 break; 180 break;
208 } 181 }
209 if (nr <= 0)
210 break;
211 } 182 }
212 if (nr_to_scan)
213 *nr_to_scan = nr;
214 return XFS_ERROR(last_error); 183 return XFS_ERROR(last_error);
215} 184}
216 185
217/* must be called with pag_ici_lock held and releases it */
218int
219xfs_sync_inode_valid(
220 struct xfs_inode *ip,
221 struct xfs_perag *pag)
222{
223 struct inode *inode = VFS_I(ip);
224 int error = EFSCORRUPTED;
225
226 /* nothing to sync during shutdown */
227 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
228 goto out_unlock;
229
230 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
231 error = ENOENT;
232 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
233 goto out_unlock;
234
235 /* If we can't grab the inode, it must on it's way to reclaim. */
236 if (!igrab(inode))
237 goto out_unlock;
238
239 if (is_bad_inode(inode)) {
240 IRELE(ip);
241 goto out_unlock;
242 }
243
244 /* inode is valid */
245 error = 0;
246out_unlock:
247 read_unlock(&pag->pag_ici_lock);
248 return error;
249}
250
251STATIC int 186STATIC int
252xfs_sync_inode_data( 187xfs_sync_inode_data(
253 struct xfs_inode *ip, 188 struct xfs_inode *ip,
@@ -258,10 +193,6 @@ xfs_sync_inode_data(
258 struct address_space *mapping = inode->i_mapping; 193 struct address_space *mapping = inode->i_mapping;
259 int error = 0; 194 int error = 0;
260 195
261 error = xfs_sync_inode_valid(ip, pag);
262 if (error)
263 return error;
264
265 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 196 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
266 goto out_wait; 197 goto out_wait;
267 198
@@ -278,7 +209,6 @@ xfs_sync_inode_data(
278 out_wait: 209 out_wait:
279 if (flags & SYNC_WAIT) 210 if (flags & SYNC_WAIT)
280 xfs_ioend_wait(ip); 211 xfs_ioend_wait(ip);
281 IRELE(ip);
282 return error; 212 return error;
283} 213}
284 214
@@ -290,10 +220,6 @@ xfs_sync_inode_attr(
290{ 220{
291 int error = 0; 221 int error = 0;
292 222
293 error = xfs_sync_inode_valid(ip, pag);
294 if (error)
295 return error;
296
297 xfs_ilock(ip, XFS_ILOCK_SHARED); 223 xfs_ilock(ip, XFS_ILOCK_SHARED);
298 if (xfs_inode_clean(ip)) 224 if (xfs_inode_clean(ip))
299 goto out_unlock; 225 goto out_unlock;
@@ -312,14 +238,13 @@ xfs_sync_inode_attr(
312 238
313 out_unlock: 239 out_unlock:
314 xfs_iunlock(ip, XFS_ILOCK_SHARED); 240 xfs_iunlock(ip, XFS_ILOCK_SHARED);
315 IRELE(ip);
316 return error; 241 return error;
317} 242}
318 243
319/* 244/*
320 * Write out pagecache data for the whole filesystem. 245 * Write out pagecache data for the whole filesystem.
321 */ 246 */
322int 247STATIC int
323xfs_sync_data( 248xfs_sync_data(
324 struct xfs_mount *mp, 249 struct xfs_mount *mp,
325 int flags) 250 int flags)
@@ -328,8 +253,7 @@ xfs_sync_data(
328 253
329 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 254 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
330 255
331 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 256 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
332 XFS_ICI_NO_TAG, 0, NULL);
333 if (error) 257 if (error)
334 return XFS_ERROR(error); 258 return XFS_ERROR(error);
335 259
@@ -340,48 +264,14 @@ xfs_sync_data(
340/* 264/*
341 * Write out inode metadata (attributes) for the whole filesystem. 265 * Write out inode metadata (attributes) for the whole filesystem.
342 */ 266 */
343int 267STATIC int
344xfs_sync_attr( 268xfs_sync_attr(
345 struct xfs_mount *mp, 269 struct xfs_mount *mp,
346 int flags) 270 int flags)
347{ 271{
348 ASSERT((flags & ~SYNC_WAIT) == 0); 272 ASSERT((flags & ~SYNC_WAIT) == 0);
349 273
350 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 274 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
351 XFS_ICI_NO_TAG, 0, NULL);
352}
353
354STATIC int
355xfs_commit_dummy_trans(
356 struct xfs_mount *mp,
357 uint flags)
358{
359 struct xfs_inode *ip = mp->m_rootip;
360 struct xfs_trans *tp;
361 int error;
362
363 /*
364 * Put a dummy transaction in the log to tell recovery
365 * that all others are OK.
366 */
367 tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
368 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
369 if (error) {
370 xfs_trans_cancel(tp, 0);
371 return error;
372 }
373
374 xfs_ilock(ip, XFS_ILOCK_EXCL);
375
376 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
377 xfs_trans_ihold(tp, ip);
378 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
379 error = xfs_trans_commit(tp, 0);
380 xfs_iunlock(ip, XFS_ILOCK_EXCL);
381
382 /* the log force ensures this transaction is pushed to disk */
383 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
384 return error;
385} 275}
386 276
387STATIC int 277STATIC int
@@ -444,7 +334,7 @@ xfs_quiesce_data(
444 334
445 /* mark the log as covered if needed */ 335 /* mark the log as covered if needed */
446 if (xfs_log_need_covered(mp)) 336 if (xfs_log_need_covered(mp))
447 error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); 337 error2 = xfs_fs_log_dummy(mp, SYNC_WAIT);
448 338
449 /* flush data-only devices */ 339 /* flush data-only devices */
450 if (mp->m_rtdev_targp) 340 if (mp->m_rtdev_targp)
@@ -575,7 +465,7 @@ xfs_flush_inodes(
575/* 465/*
576 * Every sync period we need to unpin all items, reclaim inodes and sync 466 * Every sync period we need to unpin all items, reclaim inodes and sync
577 * disk quotas. We might need to cover the log to indicate that the 467 * disk quotas. We might need to cover the log to indicate that the
578 * filesystem is idle. 468 * filesystem is idle and not frozen.
579 */ 469 */
580STATIC void 470STATIC void
581xfs_sync_worker( 471xfs_sync_worker(
@@ -589,8 +479,9 @@ xfs_sync_worker(
589 xfs_reclaim_inodes(mp, 0); 479 xfs_reclaim_inodes(mp, 0);
590 /* dgc: errors ignored here */ 480 /* dgc: errors ignored here */
591 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 481 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
592 if (xfs_log_need_covered(mp)) 482 if (mp->m_super->s_frozen == SB_UNFROZEN &&
593 error = xfs_commit_dummy_trans(mp, 0); 483 xfs_log_need_covered(mp))
484 error = xfs_fs_log_dummy(mp, 0);
594 } 485 }
595 mp->m_sync_seq++; 486 mp->m_sync_seq++;
596 wake_up(&mp->m_wait_single_sync_task); 487 wake_up(&mp->m_wait_single_sync_task);
@@ -710,14 +601,11 @@ xfs_inode_set_reclaim_tag(
710 xfs_perag_put(pag); 601 xfs_perag_put(pag);
711} 602}
712 603
713void 604STATIC void
714__xfs_inode_clear_reclaim_tag( 605__xfs_inode_clear_reclaim(
715 xfs_mount_t *mp,
716 xfs_perag_t *pag, 606 xfs_perag_t *pag,
717 xfs_inode_t *ip) 607 xfs_inode_t *ip)
718{ 608{
719 radix_tree_tag_clear(&pag->pag_ici_root,
720 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
721 pag->pag_ici_reclaimable--; 609 pag->pag_ici_reclaimable--;
722 if (!pag->pag_ici_reclaimable) { 610 if (!pag->pag_ici_reclaimable) {
723 /* clear the reclaim tag from the perag radix tree */ 611 /* clear the reclaim tag from the perag radix tree */
@@ -731,6 +619,54 @@ __xfs_inode_clear_reclaim_tag(
731 } 619 }
732} 620}
733 621
622void
623__xfs_inode_clear_reclaim_tag(
624 xfs_mount_t *mp,
625 xfs_perag_t *pag,
626 xfs_inode_t *ip)
627{
628 radix_tree_tag_clear(&pag->pag_ici_root,
629 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
630 __xfs_inode_clear_reclaim(pag, ip);
631}
632
633/*
634 * Grab the inode for reclaim exclusively.
635 * Return 0 if we grabbed it, non-zero otherwise.
636 */
637STATIC int
638xfs_reclaim_inode_grab(
639 struct xfs_inode *ip,
640 int flags)
641{
642
643 /*
644 * do some unlocked checks first to avoid unnecceary lock traffic.
645 * The first is a flush lock check, the second is a already in reclaim
646 * check. Only do these checks if we are not going to block on locks.
647 */
648 if ((flags & SYNC_TRYLOCK) &&
649 (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
650 return 1;
651 }
652
653 /*
654 * The radix tree lock here protects a thread in xfs_iget from racing
655 * with us starting reclaim on the inode. Once we have the
656 * XFS_IRECLAIM flag set it will not touch us.
657 */
658 spin_lock(&ip->i_flags_lock);
659 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
660 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
661 /* ignore as it is already under reclaim */
662 spin_unlock(&ip->i_flags_lock);
663 return 1;
664 }
665 __xfs_iflags_set(ip, XFS_IRECLAIM);
666 spin_unlock(&ip->i_flags_lock);
667 return 0;
668}
669
734/* 670/*
735 * Inodes in different states need to be treated differently, and the return 671 * Inodes in different states need to be treated differently, and the return
736 * value of xfs_iflush is not sufficient to get this right. The following table 672 * value of xfs_iflush is not sufficient to get this right. The following table
@@ -789,23 +725,6 @@ xfs_reclaim_inode(
789{ 725{
790 int error = 0; 726 int error = 0;
791 727
792 /*
793 * The radix tree lock here protects a thread in xfs_iget from racing
794 * with us starting reclaim on the inode. Once we have the
795 * XFS_IRECLAIM flag set it will not touch us.
796 */
797 spin_lock(&ip->i_flags_lock);
798 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
799 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
800 /* ignore as it is already under reclaim */
801 spin_unlock(&ip->i_flags_lock);
802 write_unlock(&pag->pag_ici_lock);
803 return 0;
804 }
805 __xfs_iflags_set(ip, XFS_IRECLAIM);
806 spin_unlock(&ip->i_flags_lock);
807 write_unlock(&pag->pag_ici_lock);
808
809 xfs_ilock(ip, XFS_ILOCK_EXCL); 728 xfs_ilock(ip, XFS_ILOCK_EXCL);
810 if (!xfs_iflock_nowait(ip)) { 729 if (!xfs_iflock_nowait(ip)) {
811 if (!(sync_mode & SYNC_WAIT)) 730 if (!(sync_mode & SYNC_WAIT))
@@ -867,18 +786,161 @@ out:
867reclaim: 786reclaim:
868 xfs_ifunlock(ip); 787 xfs_ifunlock(ip);
869 xfs_iunlock(ip, XFS_ILOCK_EXCL); 788 xfs_iunlock(ip, XFS_ILOCK_EXCL);
870 xfs_ireclaim(ip); 789
790 XFS_STATS_INC(xs_ig_reclaims);
791 /*
792 * Remove the inode from the per-AG radix tree.
793 *
794 * Because radix_tree_delete won't complain even if the item was never
795 * added to the tree assert that it's been there before to catch
796 * problems with the inode life time early on.
797 */
798 write_lock(&pag->pag_ici_lock);
799 if (!radix_tree_delete(&pag->pag_ici_root,
800 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
801 ASSERT(0);
802 __xfs_inode_clear_reclaim(pag, ip);
803 write_unlock(&pag->pag_ici_lock);
804
805 /*
806 * Here we do an (almost) spurious inode lock in order to coordinate
807 * with inode cache radix tree lookups. This is because the lookup
808 * can reference the inodes in the cache without taking references.
809 *
810 * We make that OK here by ensuring that we wait until the inode is
811 * unlocked after the lookup before we go ahead and free it. We get
812 * both the ilock and the iolock because the code may need to drop the
813 * ilock one but will still hold the iolock.
814 */
815 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
816 xfs_qm_dqdetach(ip);
817 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
818
819 xfs_inode_free(ip);
871 return error; 820 return error;
872 821
873} 822}
874 823
824/*
825 * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
826 * corrupted, we still want to try to reclaim all the inodes. If we don't,
827 * then a shut down during filesystem unmount reclaim walk leak all the
828 * unreclaimed inodes.
829 */
830int
831xfs_reclaim_inodes_ag(
832 struct xfs_mount *mp,
833 int flags,
834 int *nr_to_scan)
835{
836 struct xfs_perag *pag;
837 int error = 0;
838 int last_error = 0;
839 xfs_agnumber_t ag;
840 int trylock = flags & SYNC_TRYLOCK;
841 int skipped;
842
843restart:
844 ag = 0;
845 skipped = 0;
846 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
847 unsigned long first_index = 0;
848 int done = 0;
849 int nr_found = 0;
850
851 ag = pag->pag_agno + 1;
852
853 if (trylock) {
854 if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
855 skipped++;
856 continue;
857 }
858 first_index = pag->pag_ici_reclaim_cursor;
859 } else
860 mutex_lock(&pag->pag_ici_reclaim_lock);
861
862 do {
863 struct xfs_inode *batch[XFS_LOOKUP_BATCH];
864 int i;
865
866 write_lock(&pag->pag_ici_lock);
867 nr_found = radix_tree_gang_lookup_tag(
868 &pag->pag_ici_root,
869 (void **)batch, first_index,
870 XFS_LOOKUP_BATCH,
871 XFS_ICI_RECLAIM_TAG);
872 if (!nr_found) {
873 write_unlock(&pag->pag_ici_lock);
874 break;
875 }
876
877 /*
878 * Grab the inodes before we drop the lock. if we found
879 * nothing, nr == 0 and the loop will be skipped.
880 */
881 for (i = 0; i < nr_found; i++) {
882 struct xfs_inode *ip = batch[i];
883
884 if (done || xfs_reclaim_inode_grab(ip, flags))
885 batch[i] = NULL;
886
887 /*
888 * Update the index for the next lookup. Catch
889 * overflows into the next AG range which can
890 * occur if we have inodes in the last block of
891 * the AG and we are currently pointing to the
892 * last inode.
893 */
894 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
895 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
896 done = 1;
897 }
898
899 /* unlock now we've grabbed the inodes. */
900 write_unlock(&pag->pag_ici_lock);
901
902 for (i = 0; i < nr_found; i++) {
903 if (!batch[i])
904 continue;
905 error = xfs_reclaim_inode(batch[i], pag, flags);
906 if (error && last_error != EFSCORRUPTED)
907 last_error = error;
908 }
909
910 *nr_to_scan -= XFS_LOOKUP_BATCH;
911
912 } while (nr_found && !done && *nr_to_scan > 0);
913
914 if (trylock && !done)
915 pag->pag_ici_reclaim_cursor = first_index;
916 else
917 pag->pag_ici_reclaim_cursor = 0;
918 mutex_unlock(&pag->pag_ici_reclaim_lock);
919 xfs_perag_put(pag);
920 }
921
922 /*
923 * if we skipped any AG, and we still have scan count remaining, do
924 * another pass this time using blocking reclaim semantics (i.e
925 * waiting on the reclaim locks and ignoring the reclaim cursors). This
926 * ensure that when we get more reclaimers than AGs we block rather
927 * than spin trying to execute reclaim.
928 */
929 if (trylock && skipped && *nr_to_scan > 0) {
930 trylock = 0;
931 goto restart;
932 }
933 return XFS_ERROR(last_error);
934}
935
875int 936int
876xfs_reclaim_inodes( 937xfs_reclaim_inodes(
877 xfs_mount_t *mp, 938 xfs_mount_t *mp,
878 int mode) 939 int mode)
879{ 940{
880 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, 941 int nr_to_scan = INT_MAX;
881 XFS_ICI_RECLAIM_TAG, 1, NULL); 942
943 return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
882} 944}
883 945
884/* 946/*
@@ -900,17 +962,16 @@ xfs_reclaim_inode_shrink(
900 if (!(gfp_mask & __GFP_FS)) 962 if (!(gfp_mask & __GFP_FS))
901 return -1; 963 return -1;
902 964
903 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, 965 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
904 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); 966 /* terminate if we don't exhaust the scan */
905 /* if we don't exhaust the scan, don't bother coming back */
906 if (nr_to_scan > 0) 967 if (nr_to_scan > 0)
907 return -1; 968 return -1;
908 } 969 }
909 970
910 reclaimable = 0; 971 reclaimable = 0;
911 ag = 0; 972 ag = 0;
912 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, 973 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
913 XFS_ICI_RECLAIM_TAG))) { 974 ag = pag->pag_agno + 1;
914 reclaimable += pag->pag_ici_reclaimable; 975 reclaimable += pag->pag_ici_reclaimable;
915 xfs_perag_put(pag); 976 xfs_perag_put(pag);
916 } 977 }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index e28139aaa4aa..32ba6628290c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -35,9 +35,6 @@ typedef struct xfs_sync_work {
35int xfs_syncd_init(struct xfs_mount *mp); 35int xfs_syncd_init(struct xfs_mount *mp);
36void xfs_syncd_stop(struct xfs_mount *mp); 36void xfs_syncd_stop(struct xfs_mount *mp);
37 37
38int xfs_sync_attr(struct xfs_mount *mp, int flags);
39int xfs_sync_data(struct xfs_mount *mp, int flags);
40
41int xfs_quiesce_data(struct xfs_mount *mp); 38int xfs_quiesce_data(struct xfs_mount *mp);
42void xfs_quiesce_attr(struct xfs_mount *mp); 39void xfs_quiesce_attr(struct xfs_mount *mp);
43 40
@@ -50,10 +47,10 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
50void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, 47void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
51 struct xfs_inode *ip); 48 struct xfs_inode *ip);
52 49
53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 50int xfs_sync_inode_grab(struct xfs_inode *ip);
54int xfs_inode_ag_iterator(struct xfs_mount *mp, 51int xfs_inode_ag_iterator(struct xfs_mount *mp,
55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 52 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
56 int flags, int tag, int write_lock, int *nr_to_scan); 53 int flags);
57 54
58void xfs_inode_shrinker_register(struct xfs_mount *mp); 55void xfs_inode_shrinker_register(struct xfs_mount *mp);
59void xfs_inode_shrinker_unregister(struct xfs_mount *mp); 56void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index d12be8470cba..88d25d4aa56e 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -24,17 +24,13 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_da_btree.h" 27#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
32#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h"
34#include "xfs_dinode.h" 31#include "xfs_dinode.h"
35#include "xfs_inode.h" 32#include "xfs_inode.h"
36#include "xfs_btree.h" 33#include "xfs_btree.h"
37#include "xfs_dmapi.h"
38#include "xfs_mount.h" 34#include "xfs_mount.h"
39#include "xfs_ialloc.h" 35#include "xfs_ialloc.h"
40#include "xfs_itable.h" 36#include "xfs_itable.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 302820690904..acef2e98c594 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \
124 unsigned long caller_ip), \ 124 unsigned long caller_ip), \
125 TP_ARGS(mp, agno, refcount, caller_ip)) 125 TP_ARGS(mp, agno, refcount, caller_ip))
126DEFINE_PERAG_REF_EVENT(xfs_perag_get); 126DEFINE_PERAG_REF_EVENT(xfs_perag_get);
127DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim); 127DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
128DEFINE_PERAG_REF_EVENT(xfs_perag_put); 128DEFINE_PERAG_REF_EVENT(xfs_perag_put);
129DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); 129DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
130DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); 130DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
@@ -317,8 +317,6 @@ DEFINE_BUF_EVENT(xfs_buf_init);
317DEFINE_BUF_EVENT(xfs_buf_free); 317DEFINE_BUF_EVENT(xfs_buf_free);
318DEFINE_BUF_EVENT(xfs_buf_hold); 318DEFINE_BUF_EVENT(xfs_buf_hold);
319DEFINE_BUF_EVENT(xfs_buf_rele); 319DEFINE_BUF_EVENT(xfs_buf_rele);
320DEFINE_BUF_EVENT(xfs_buf_pin);
321DEFINE_BUF_EVENT(xfs_buf_unpin);
322DEFINE_BUF_EVENT(xfs_buf_iodone); 320DEFINE_BUF_EVENT(xfs_buf_iodone);
323DEFINE_BUF_EVENT(xfs_buf_iorequest); 321DEFINE_BUF_EVENT(xfs_buf_iorequest);
324DEFINE_BUF_EVENT(xfs_buf_bawrite); 322DEFINE_BUF_EVENT(xfs_buf_bawrite);
@@ -327,13 +325,12 @@ DEFINE_BUF_EVENT(xfs_buf_lock);
327DEFINE_BUF_EVENT(xfs_buf_lock_done); 325DEFINE_BUF_EVENT(xfs_buf_lock_done);
328DEFINE_BUF_EVENT(xfs_buf_cond_lock); 326DEFINE_BUF_EVENT(xfs_buf_cond_lock);
329DEFINE_BUF_EVENT(xfs_buf_unlock); 327DEFINE_BUF_EVENT(xfs_buf_unlock);
330DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
331DEFINE_BUF_EVENT(xfs_buf_iowait); 328DEFINE_BUF_EVENT(xfs_buf_iowait);
332DEFINE_BUF_EVENT(xfs_buf_iowait_done); 329DEFINE_BUF_EVENT(xfs_buf_iowait_done);
333DEFINE_BUF_EVENT(xfs_buf_delwri_queue); 330DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
334DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); 331DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
335DEFINE_BUF_EVENT(xfs_buf_delwri_split); 332DEFINE_BUF_EVENT(xfs_buf_delwri_split);
336DEFINE_BUF_EVENT(xfs_buf_get_noaddr); 333DEFINE_BUF_EVENT(xfs_buf_get_uncached);
337DEFINE_BUF_EVENT(xfs_bdstrat_shut); 334DEFINE_BUF_EVENT(xfs_bdstrat_shut);
338DEFINE_BUF_EVENT(xfs_buf_item_relse); 335DEFINE_BUF_EVENT(xfs_buf_item_relse);
339DEFINE_BUF_EVENT(xfs_buf_item_iodone); 336DEFINE_BUF_EVENT(xfs_buf_item_iodone);
@@ -541,7 +538,7 @@ DEFINE_LOCK_EVENT(xfs_ilock_nowait);
541DEFINE_LOCK_EVENT(xfs_ilock_demote); 538DEFINE_LOCK_EVENT(xfs_ilock_demote);
542DEFINE_LOCK_EVENT(xfs_iunlock); 539DEFINE_LOCK_EVENT(xfs_iunlock);
543 540
544DECLARE_EVENT_CLASS(xfs_iget_class, 541DECLARE_EVENT_CLASS(xfs_inode_class,
545 TP_PROTO(struct xfs_inode *ip), 542 TP_PROTO(struct xfs_inode *ip),
546 TP_ARGS(ip), 543 TP_ARGS(ip),
547 TP_STRUCT__entry( 544 TP_STRUCT__entry(
@@ -557,16 +554,38 @@ DECLARE_EVENT_CLASS(xfs_iget_class,
557 __entry->ino) 554 __entry->ino)
558) 555)
559 556
560#define DEFINE_IGET_EVENT(name) \ 557#define DEFINE_INODE_EVENT(name) \
561DEFINE_EVENT(xfs_iget_class, name, \ 558DEFINE_EVENT(xfs_inode_class, name, \
562 TP_PROTO(struct xfs_inode *ip), \ 559 TP_PROTO(struct xfs_inode *ip), \
563 TP_ARGS(ip)) 560 TP_ARGS(ip))
564DEFINE_IGET_EVENT(xfs_iget_skip); 561DEFINE_INODE_EVENT(xfs_iget_skip);
565DEFINE_IGET_EVENT(xfs_iget_reclaim); 562DEFINE_INODE_EVENT(xfs_iget_reclaim);
566DEFINE_IGET_EVENT(xfs_iget_found); 563DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
567DEFINE_IGET_EVENT(xfs_iget_alloc); 564DEFINE_INODE_EVENT(xfs_iget_hit);
568 565DEFINE_INODE_EVENT(xfs_iget_miss);
569DECLARE_EVENT_CLASS(xfs_inode_class, 566
567DEFINE_INODE_EVENT(xfs_getattr);
568DEFINE_INODE_EVENT(xfs_setattr);
569DEFINE_INODE_EVENT(xfs_readlink);
570DEFINE_INODE_EVENT(xfs_alloc_file_space);
571DEFINE_INODE_EVENT(xfs_free_file_space);
572DEFINE_INODE_EVENT(xfs_readdir);
573#ifdef CONFIG_XFS_POSIX_ACL
574DEFINE_INODE_EVENT(xfs_check_acl);
575#endif
576DEFINE_INODE_EVENT(xfs_vm_bmap);
577DEFINE_INODE_EVENT(xfs_file_ioctl);
578DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
579DEFINE_INODE_EVENT(xfs_ioctl_setattr);
580DEFINE_INODE_EVENT(xfs_file_fsync);
581DEFINE_INODE_EVENT(xfs_destroy_inode);
582DEFINE_INODE_EVENT(xfs_write_inode);
583DEFINE_INODE_EVENT(xfs_evict_inode);
584
585DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
586DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
587
588DECLARE_EVENT_CLASS(xfs_iref_class,
570 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), 589 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
571 TP_ARGS(ip, caller_ip), 590 TP_ARGS(ip, caller_ip),
572 TP_STRUCT__entry( 591 TP_STRUCT__entry(
@@ -591,20 +610,71 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
591 (char *)__entry->caller_ip) 610 (char *)__entry->caller_ip)
592) 611)
593 612
594#define DEFINE_INODE_EVENT(name) \ 613#define DEFINE_IREF_EVENT(name) \
595DEFINE_EVENT(xfs_inode_class, name, \ 614DEFINE_EVENT(xfs_iref_class, name, \
596 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ 615 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
597 TP_ARGS(ip, caller_ip)) 616 TP_ARGS(ip, caller_ip))
598DEFINE_INODE_EVENT(xfs_ihold); 617DEFINE_IREF_EVENT(xfs_ihold);
599DEFINE_INODE_EVENT(xfs_irele); 618DEFINE_IREF_EVENT(xfs_irele);
600DEFINE_INODE_EVENT(xfs_inode_pin); 619DEFINE_IREF_EVENT(xfs_inode_pin);
601DEFINE_INODE_EVENT(xfs_inode_unpin); 620DEFINE_IREF_EVENT(xfs_inode_unpin);
602DEFINE_INODE_EVENT(xfs_inode_unpin_nowait); 621DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
622
623DECLARE_EVENT_CLASS(xfs_namespace_class,
624 TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
625 TP_ARGS(dp, name),
626 TP_STRUCT__entry(
627 __field(dev_t, dev)
628 __field(xfs_ino_t, dp_ino)
629 __dynamic_array(char, name, name->len)
630 ),
631 TP_fast_assign(
632 __entry->dev = VFS_I(dp)->i_sb->s_dev;
633 __entry->dp_ino = dp->i_ino;
634 memcpy(__get_str(name), name->name, name->len);
635 ),
636 TP_printk("dev %d:%d dp ino 0x%llx name %s",
637 MAJOR(__entry->dev), MINOR(__entry->dev),
638 __entry->dp_ino,
639 __get_str(name))
640)
603 641
604/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ 642#define DEFINE_NAMESPACE_EVENT(name) \
605DEFINE_INODE_EVENT(xfs_inode); 643DEFINE_EVENT(xfs_namespace_class, name, \
606#define xfs_itrace_entry(ip) \ 644 TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
607 trace_xfs_inode(ip, _THIS_IP_) 645 TP_ARGS(dp, name))
646DEFINE_NAMESPACE_EVENT(xfs_remove);
647DEFINE_NAMESPACE_EVENT(xfs_link);
648DEFINE_NAMESPACE_EVENT(xfs_lookup);
649DEFINE_NAMESPACE_EVENT(xfs_create);
650DEFINE_NAMESPACE_EVENT(xfs_symlink);
651
652TRACE_EVENT(xfs_rename,
653 TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
654 struct xfs_name *src_name, struct xfs_name *target_name),
655 TP_ARGS(src_dp, target_dp, src_name, target_name),
656 TP_STRUCT__entry(
657 __field(dev_t, dev)
658 __field(xfs_ino_t, src_dp_ino)
659 __field(xfs_ino_t, target_dp_ino)
660 __dynamic_array(char, src_name, src_name->len)
661 __dynamic_array(char, target_name, target_name->len)
662 ),
663 TP_fast_assign(
664 __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
665 __entry->src_dp_ino = src_dp->i_ino;
666 __entry->target_dp_ino = target_dp->i_ino;
667 memcpy(__get_str(src_name), src_name->name, src_name->len);
668 memcpy(__get_str(target_name), target_name->name, target_name->len);
669 ),
670 TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
671 " src name %s target name %s",
672 MAJOR(__entry->dev), MINOR(__entry->dev),
673 __entry->src_dp_ino,
674 __entry->target_dp_ino,
675 __get_str(src_name),
676 __get_str(target_name))
677)
608 678
609DECLARE_EVENT_CLASS(xfs_dquot_class, 679DECLARE_EVENT_CLASS(xfs_dquot_class,
610 TP_PROTO(struct xfs_dquot *dqp), 680 TP_PROTO(struct xfs_dquot *dqp),
@@ -684,9 +754,6 @@ DEFINE_DQUOT_EVENT(xfs_dqrele);
684DEFINE_DQUOT_EVENT(xfs_dqflush); 754DEFINE_DQUOT_EVENT(xfs_dqflush);
685DEFINE_DQUOT_EVENT(xfs_dqflush_force); 755DEFINE_DQUOT_EVENT(xfs_dqflush_force);
686DEFINE_DQUOT_EVENT(xfs_dqflush_done); 756DEFINE_DQUOT_EVENT(xfs_dqflush_done);
687/* not really iget events, but we re-use the format */
688DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
689DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
690 757
691DECLARE_EVENT_CLASS(xfs_loggrant_class, 758DECLARE_EVENT_CLASS(xfs_loggrant_class,
692 TP_PROTO(struct log *log, struct xlog_ticket *tic), 759 TP_PROTO(struct log *log, struct xlog_ticket *tic),
@@ -834,33 +901,29 @@ DECLARE_EVENT_CLASS(xfs_page_class,
834 __field(loff_t, size) 901 __field(loff_t, size)
835 __field(unsigned long, offset) 902 __field(unsigned long, offset)
836 __field(int, delalloc) 903 __field(int, delalloc)
837 __field(int, unmapped)
838 __field(int, unwritten) 904 __field(int, unwritten)
839 ), 905 ),
840 TP_fast_assign( 906 TP_fast_assign(
841 int delalloc = -1, unmapped = -1, unwritten = -1; 907 int delalloc = -1, unwritten = -1;
842 908
843 if (page_has_buffers(page)) 909 if (page_has_buffers(page))
844 xfs_count_page_state(page, &delalloc, 910 xfs_count_page_state(page, &delalloc, &unwritten);
845 &unmapped, &unwritten);
846 __entry->dev = inode->i_sb->s_dev; 911 __entry->dev = inode->i_sb->s_dev;
847 __entry->ino = XFS_I(inode)->i_ino; 912 __entry->ino = XFS_I(inode)->i_ino;
848 __entry->pgoff = page_offset(page); 913 __entry->pgoff = page_offset(page);
849 __entry->size = i_size_read(inode); 914 __entry->size = i_size_read(inode);
850 __entry->offset = off; 915 __entry->offset = off;
851 __entry->delalloc = delalloc; 916 __entry->delalloc = delalloc;
852 __entry->unmapped = unmapped;
853 __entry->unwritten = unwritten; 917 __entry->unwritten = unwritten;
854 ), 918 ),
855 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " 919 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
856 "delalloc %d unmapped %d unwritten %d", 920 "delalloc %d unwritten %d",
857 MAJOR(__entry->dev), MINOR(__entry->dev), 921 MAJOR(__entry->dev), MINOR(__entry->dev),
858 __entry->ino, 922 __entry->ino,
859 __entry->pgoff, 923 __entry->pgoff,
860 __entry->size, 924 __entry->size,
861 __entry->offset, 925 __entry->offset,
862 __entry->delalloc, 926 __entry->delalloc,
863 __entry->unmapped,
864 __entry->unwritten) 927 __entry->unwritten)
865) 928)
866 929
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h
deleted file mode 100644
index f8d279d7563a..000000000000
--- a/fs/xfs/linux-2.6/xfs_version.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_VERSION_H__
19#define __XFS_VERSION_H__
20
21/*
22 * Dummy file that can contain a timestamp to put into the
23 * XFS init string, to help users keep track of what they're
24 * running
25 */
26
27#define XFS_VERSION_STRING "SGI XFS"
28
29#endif /* __XFS_VERSION_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 585e7633dfc7..faf8e1a83a12 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -23,25 +23,15 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_btree.h"
39#include "xfs_ialloc.h"
40#include "xfs_bmap.h" 31#include "xfs_bmap.h"
41#include "xfs_rtalloc.h" 32#include "xfs_rtalloc.h"
42#include "xfs_error.h" 33#include "xfs_error.h"
43#include "xfs_itable.h" 34#include "xfs_itable.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 35#include "xfs_attr.h"
46#include "xfs_buf_item.h" 36#include "xfs_buf_item.h"
47#include "xfs_trans_space.h" 37#include "xfs_trans_space.h"
@@ -64,8 +54,6 @@
64 flush lock - ditto. 54 flush lock - ditto.
65*/ 55*/
66 56
67STATIC void xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *);
68
69#ifdef DEBUG 57#ifdef DEBUG
70xfs_buftarg_t *xfs_dqerror_target; 58xfs_buftarg_t *xfs_dqerror_target;
71int xfs_do_dqerror; 59int xfs_do_dqerror;
@@ -390,21 +378,14 @@ xfs_qm_dqalloc(
390 return (ESRCH); 378 return (ESRCH);
391 } 379 }
392 380
393 /* 381 xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
394 * xfs_trans_commit normally decrements the vnode ref count
395 * when it unlocks the inode. Since we want to keep the quota
396 * inode around, we bump the vnode ref count now.
397 */
398 IHOLD(quotip);
399
400 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
401 nmaps = 1; 382 nmaps = 1;
402 if ((error = xfs_bmapi(tp, quotip, 383 if ((error = xfs_bmapi(tp, quotip,
403 offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, 384 offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
404 XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, 385 XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
405 &firstblock, 386 &firstblock,
406 XFS_QM_DQALLOC_SPACE_RES(mp), 387 XFS_QM_DQALLOC_SPACE_RES(mp),
407 &map, &nmaps, &flist, NULL))) { 388 &map, &nmaps, &flist))) {
408 goto error0; 389 goto error0;
409 } 390 }
410 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); 391 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -482,87 +463,68 @@ xfs_qm_dqtobp(
482 uint flags) 463 uint flags)
483{ 464{
484 xfs_bmbt_irec_t map; 465 xfs_bmbt_irec_t map;
485 int nmaps, error; 466 int nmaps = 1, error;
486 xfs_buf_t *bp; 467 xfs_buf_t *bp;
487 xfs_inode_t *quotip; 468 xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp);
488 xfs_mount_t *mp; 469 xfs_mount_t *mp = dqp->q_mount;
489 xfs_disk_dquot_t *ddq; 470 xfs_disk_dquot_t *ddq;
490 xfs_dqid_t id; 471 xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id);
491 boolean_t newdquot;
492 xfs_trans_t *tp = (tpp ? *tpp : NULL); 472 xfs_trans_t *tp = (tpp ? *tpp : NULL);
493 473
494 mp = dqp->q_mount; 474 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
495 id = be32_to_cpu(dqp->q_core.d_id);
496 nmaps = 1;
497 newdquot = B_FALSE;
498 475
499 /* 476 xfs_ilock(quotip, XFS_ILOCK_SHARED);
500 * If we don't know where the dquot lives, find out. 477 if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
501 */
502 if (dqp->q_blkno == (xfs_daddr_t) 0) {
503 /* We use the id as an index */
504 dqp->q_fileoffset = (xfs_fileoff_t)id /
505 mp->m_quotainfo->qi_dqperchunk;
506 nmaps = 1;
507 quotip = XFS_DQ_TO_QIP(dqp);
508 xfs_ilock(quotip, XFS_ILOCK_SHARED);
509 /* 478 /*
510 * Return if this type of quotas is turned off while we didn't 479 * Return if this type of quotas is turned off while we
511 * have an inode lock 480 * didn't have the quota inode lock.
512 */ 481 */
513 if (XFS_IS_THIS_QUOTA_OFF(dqp)) { 482 xfs_iunlock(quotip, XFS_ILOCK_SHARED);
514 xfs_iunlock(quotip, XFS_ILOCK_SHARED); 483 return ESRCH;
515 return (ESRCH); 484 }
516 } 485
486 /*
487 * Find the block map; no allocations yet
488 */
489 error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
490 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
491 NULL, 0, &map, &nmaps, NULL);
492
493 xfs_iunlock(quotip, XFS_ILOCK_SHARED);
494 if (error)
495 return error;
496
497 ASSERT(nmaps == 1);
498 ASSERT(map.br_blockcount == 1);
499
500 /*
501 * Offset of dquot in the (fixed sized) dquot chunk.
502 */
503 dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
504 sizeof(xfs_dqblk_t);
505
506 ASSERT(map.br_startblock != DELAYSTARTBLOCK);
507 if (map.br_startblock == HOLESTARTBLOCK) {
517 /* 508 /*
518 * Find the block map; no allocations yet 509 * We don't allocate unless we're asked to
519 */ 510 */
520 error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, 511 if (!(flags & XFS_QMOPT_DQALLOC))
521 XFS_DQUOT_CLUSTER_SIZE_FSB, 512 return ENOENT;
522 XFS_BMAPI_METADATA,
523 NULL, 0, &map, &nmaps, NULL, NULL);
524 513
525 xfs_iunlock(quotip, XFS_ILOCK_SHARED); 514 ASSERT(tp);
515 error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
516 dqp->q_fileoffset, &bp);
526 if (error) 517 if (error)
527 return (error); 518 return error;
528 ASSERT(nmaps == 1); 519 tp = *tpp;
529 ASSERT(map.br_blockcount == 1); 520 } else {
521 trace_xfs_dqtobp_read(dqp);
530 522
531 /* 523 /*
532 * offset of dquot in the (fixed sized) dquot chunk. 524 * store the blkno etc so that we don't have to do the
525 * mapping all the time
533 */ 526 */
534 dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * 527 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
535 sizeof(xfs_dqblk_t);
536 if (map.br_startblock == HOLESTARTBLOCK) {
537 /*
538 * We don't allocate unless we're asked to
539 */
540 if (!(flags & XFS_QMOPT_DQALLOC))
541 return (ENOENT);
542
543 ASSERT(tp);
544 if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
545 dqp->q_fileoffset, &bp)))
546 return (error);
547 tp = *tpp;
548 newdquot = B_TRUE;
549 } else {
550 /*
551 * store the blkno etc so that we don't have to do the
552 * mapping all the time
553 */
554 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
555 }
556 }
557 ASSERT(dqp->q_blkno != DELAYSTARTBLOCK);
558 ASSERT(dqp->q_blkno != HOLESTARTBLOCK);
559
560 /*
561 * Read in the buffer, unless we've just done the allocation
562 * (in which case we already have the buf).
563 */
564 if (!newdquot) {
565 trace_xfs_dqtobp_read(dqp);
566 528
567 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 529 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
568 dqp->q_blkno, 530 dqp->q_blkno,
@@ -571,13 +533,14 @@ xfs_qm_dqtobp(
571 if (error || !bp) 533 if (error || !bp)
572 return XFS_ERROR(error); 534 return XFS_ERROR(error);
573 } 535 }
536
574 ASSERT(XFS_BUF_ISBUSY(bp)); 537 ASSERT(XFS_BUF_ISBUSY(bp));
575 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 538 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
576 539
577 /* 540 /*
578 * calculate the location of the dquot inside the buffer. 541 * calculate the location of the dquot inside the buffer.
579 */ 542 */
580 ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset); 543 ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
581 544
582 /* 545 /*
583 * A simple sanity check in case we got a corrupted dquot... 546 * A simple sanity check in case we got a corrupted dquot...
@@ -1141,6 +1104,46 @@ xfs_qm_dqrele(
1141 xfs_qm_dqput(dqp); 1104 xfs_qm_dqput(dqp);
1142} 1105}
1143 1106
1107/*
1108 * This is the dquot flushing I/O completion routine. It is called
1109 * from interrupt level when the buffer containing the dquot is
1110 * flushed to disk. It is responsible for removing the dquot logitem
1111 * from the AIL if it has not been re-logged, and unlocking the dquot's
1112 * flush lock. This behavior is very similar to that of inodes..
1113 */
1114STATIC void
1115xfs_qm_dqflush_done(
1116 struct xfs_buf *bp,
1117 struct xfs_log_item *lip)
1118{
1119 xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip;
1120 xfs_dquot_t *dqp = qip->qli_dquot;
1121 struct xfs_ail *ailp = lip->li_ailp;
1122
1123 /*
1124 * We only want to pull the item from the AIL if its
1125 * location in the log has not changed since we started the flush.
1126 * Thus, we only bother if the dquot's lsn has
1127 * not changed. First we check the lsn outside the lock
1128 * since it's cheaper, and then we recheck while
1129 * holding the lock before removing the dquot from the AIL.
1130 */
1131 if ((lip->li_flags & XFS_LI_IN_AIL) &&
1132 lip->li_lsn == qip->qli_flush_lsn) {
1133
1134 /* xfs_trans_ail_delete() drops the AIL lock. */
1135 spin_lock(&ailp->xa_lock);
1136 if (lip->li_lsn == qip->qli_flush_lsn)
1137 xfs_trans_ail_delete(ailp, lip);
1138 else
1139 spin_unlock(&ailp->xa_lock);
1140 }
1141
1142 /*
1143 * Release the dq's flush lock since we're done with it.
1144 */
1145 xfs_dqfunlock(dqp);
1146}
1144 1147
1145/* 1148/*
1146 * Write a modified dquot to disk. 1149 * Write a modified dquot to disk.
@@ -1155,18 +1158,18 @@ xfs_qm_dqflush(
1155 xfs_dquot_t *dqp, 1158 xfs_dquot_t *dqp,
1156 uint flags) 1159 uint flags)
1157{ 1160{
1158 xfs_mount_t *mp; 1161 struct xfs_mount *mp = dqp->q_mount;
1159 xfs_buf_t *bp; 1162 struct xfs_buf *bp;
1160 xfs_disk_dquot_t *ddqp; 1163 struct xfs_disk_dquot *ddqp;
1161 int error; 1164 int error;
1162 1165
1163 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1166 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1164 ASSERT(!completion_done(&dqp->q_flush)); 1167 ASSERT(!completion_done(&dqp->q_flush));
1168
1165 trace_xfs_dqflush(dqp); 1169 trace_xfs_dqflush(dqp);
1166 1170
1167 /* 1171 /*
1168 * If not dirty, or it's pinned and we are not supposed to 1172 * If not dirty, or it's pinned and we are not supposed to block, nada.
1169 * block, nada.
1170 */ 1173 */
1171 if (!XFS_DQ_IS_DIRTY(dqp) || 1174 if (!XFS_DQ_IS_DIRTY(dqp) ||
1172 (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { 1175 (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
@@ -1180,40 +1183,46 @@ xfs_qm_dqflush(
1180 * down forcibly. If that's the case we must not write this dquot 1183 * down forcibly. If that's the case we must not write this dquot
1181 * to disk, because the log record didn't make it to disk! 1184 * to disk, because the log record didn't make it to disk!
1182 */ 1185 */
1183 if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) { 1186 if (XFS_FORCED_SHUTDOWN(mp)) {
1184 dqp->dq_flags &= ~(XFS_DQ_DIRTY); 1187 dqp->dq_flags &= ~XFS_DQ_DIRTY;
1185 xfs_dqfunlock(dqp); 1188 xfs_dqfunlock(dqp);
1186 return XFS_ERROR(EIO); 1189 return XFS_ERROR(EIO);
1187 } 1190 }
1188 1191
1189 /* 1192 /*
1190 * Get the buffer containing the on-disk dquot 1193 * Get the buffer containing the on-disk dquot
1191 * We don't need a transaction envelope because we know that the
1192 * the ondisk-dquot has already been allocated for.
1193 */ 1194 */
1194 if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) { 1195 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
1196 mp->m_quotainfo->qi_dqchunklen, 0, &bp);
1197 if (error) {
1195 ASSERT(error != ENOENT); 1198 ASSERT(error != ENOENT);
1196 /*
1197 * Quotas could have gotten turned off (ESRCH)
1198 */
1199 xfs_dqfunlock(dqp); 1199 xfs_dqfunlock(dqp);
1200 return (error); 1200 return error;
1201 } 1201 }
1202 1202
1203 if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 1203 /*
1204 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { 1204 * Calculate the location of the dquot inside the buffer.
1205 xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE); 1205 */
1206 ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
1207
1208 /*
1209 * A simple sanity check in case we got a corrupted dquot..
1210 */
1211 if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
1212 XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
1213 xfs_buf_relse(bp);
1214 xfs_dqfunlock(dqp);
1215 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1206 return XFS_ERROR(EIO); 1216 return XFS_ERROR(EIO);
1207 } 1217 }
1208 1218
1209 /* This is the only portion of data that needs to persist */ 1219 /* This is the only portion of data that needs to persist */
1210 memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t)); 1220 memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
1211 1221
1212 /* 1222 /*
1213 * Clear the dirty field and remember the flush lsn for later use. 1223 * Clear the dirty field and remember the flush lsn for later use.
1214 */ 1224 */
1215 dqp->dq_flags &= ~(XFS_DQ_DIRTY); 1225 dqp->dq_flags &= ~XFS_DQ_DIRTY;
1216 mp = dqp->q_mount;
1217 1226
1218 xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, 1227 xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
1219 &dqp->q_logitem.qli_item.li_lsn); 1228 &dqp->q_logitem.qli_item.li_lsn);
@@ -1222,8 +1231,9 @@ xfs_qm_dqflush(
1222 * Attach an iodone routine so that we can remove this dquot from the 1231 * Attach an iodone routine so that we can remove this dquot from the
1223 * AIL and release the flush lock once the dquot is synced to disk. 1232 * AIL and release the flush lock once the dquot is synced to disk.
1224 */ 1233 */
1225 xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *)) 1234 xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
1226 xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item)); 1235 &dqp->q_logitem.qli_item);
1236
1227 /* 1237 /*
1228 * If the buffer is pinned then push on the log so we won't 1238 * If the buffer is pinned then push on the log so we won't
1229 * get stuck waiting in the write for too long. 1239 * get stuck waiting in the write for too long.
@@ -1247,50 +1257,6 @@ xfs_qm_dqflush(
1247 1257
1248} 1258}
1249 1259
1250/*
1251 * This is the dquot flushing I/O completion routine. It is called
1252 * from interrupt level when the buffer containing the dquot is
1253 * flushed to disk. It is responsible for removing the dquot logitem
1254 * from the AIL if it has not been re-logged, and unlocking the dquot's
1255 * flush lock. This behavior is very similar to that of inodes..
1256 */
1257/*ARGSUSED*/
1258STATIC void
1259xfs_qm_dqflush_done(
1260 xfs_buf_t *bp,
1261 xfs_dq_logitem_t *qip)
1262{
1263 xfs_dquot_t *dqp;
1264 struct xfs_ail *ailp;
1265
1266 dqp = qip->qli_dquot;
1267 ailp = qip->qli_item.li_ailp;
1268
1269 /*
1270 * We only want to pull the item from the AIL if its
1271 * location in the log has not changed since we started the flush.
1272 * Thus, we only bother if the dquot's lsn has
1273 * not changed. First we check the lsn outside the lock
1274 * since it's cheaper, and then we recheck while
1275 * holding the lock before removing the dquot from the AIL.
1276 */
1277 if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
1278 qip->qli_item.li_lsn == qip->qli_flush_lsn) {
1279
1280 /* xfs_trans_ail_delete() drops the AIL lock. */
1281 spin_lock(&ailp->xa_lock);
1282 if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
1283 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip);
1284 else
1285 spin_unlock(&ailp->xa_lock);
1286 }
1287
1288 /*
1289 * Release the dq's flush lock since we're done with it.
1290 */
1291 xfs_dqfunlock(dqp);
1292}
1293
1294int 1260int
1295xfs_qm_dqlock_nowait( 1261xfs_qm_dqlock_nowait(
1296 xfs_dquot_t *dqp) 1262 xfs_dquot_t *dqp)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 8d89a24ae324..2a1f3dc10a02 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -23,42 +23,36 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_bmap.h" 31#include "xfs_bmap.h"
39#include "xfs_btree.h"
40#include "xfs_ialloc.h"
41#include "xfs_rtalloc.h" 32#include "xfs_rtalloc.h"
42#include "xfs_error.h" 33#include "xfs_error.h"
43#include "xfs_itable.h" 34#include "xfs_itable.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 35#include "xfs_attr.h"
46#include "xfs_buf_item.h" 36#include "xfs_buf_item.h"
47#include "xfs_trans_priv.h" 37#include "xfs_trans_priv.h"
48#include "xfs_qm.h" 38#include "xfs_qm.h"
49 39
40static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
41{
42 return container_of(lip, struct xfs_dq_logitem, qli_item);
43}
44
50/* 45/*
51 * returns the number of iovecs needed to log the given dquot item. 46 * returns the number of iovecs needed to log the given dquot item.
52 */ 47 */
53/* ARGSUSED */
54STATIC uint 48STATIC uint
55xfs_qm_dquot_logitem_size( 49xfs_qm_dquot_logitem_size(
56 xfs_dq_logitem_t *logitem) 50 struct xfs_log_item *lip)
57{ 51{
58 /* 52 /*
59 * we need only two iovecs, one for the format, one for the real thing 53 * we need only two iovecs, one for the format, one for the real thing
60 */ 54 */
61 return (2); 55 return 2;
62} 56}
63 57
64/* 58/*
@@ -66,22 +60,21 @@ xfs_qm_dquot_logitem_size(
66 */ 60 */
67STATIC void 61STATIC void
68xfs_qm_dquot_logitem_format( 62xfs_qm_dquot_logitem_format(
69 xfs_dq_logitem_t *logitem, 63 struct xfs_log_item *lip,
70 xfs_log_iovec_t *logvec) 64 struct xfs_log_iovec *logvec)
71{ 65{
72 ASSERT(logitem); 66 struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
73 ASSERT(logitem->qli_dquot);
74 67
75 logvec->i_addr = (xfs_caddr_t)&logitem->qli_format; 68 logvec->i_addr = &qlip->qli_format;
76 logvec->i_len = sizeof(xfs_dq_logformat_t); 69 logvec->i_len = sizeof(xfs_dq_logformat_t);
77 logvec->i_type = XLOG_REG_TYPE_QFORMAT; 70 logvec->i_type = XLOG_REG_TYPE_QFORMAT;
78 logvec++; 71 logvec++;
79 logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core; 72 logvec->i_addr = &qlip->qli_dquot->q_core;
80 logvec->i_len = sizeof(xfs_disk_dquot_t); 73 logvec->i_len = sizeof(xfs_disk_dquot_t);
81 logvec->i_type = XLOG_REG_TYPE_DQUOT; 74 logvec->i_type = XLOG_REG_TYPE_DQUOT;
82 75
83 ASSERT(2 == logitem->qli_item.li_desc->lid_size); 76 ASSERT(2 == lip->li_desc->lid_size);
84 logitem->qli_format.qlf_size = 2; 77 qlip->qli_format.qlf_size = 2;
85 78
86} 79}
87 80
@@ -90,9 +83,9 @@ xfs_qm_dquot_logitem_format(
90 */ 83 */
91STATIC void 84STATIC void
92xfs_qm_dquot_logitem_pin( 85xfs_qm_dquot_logitem_pin(
93 xfs_dq_logitem_t *logitem) 86 struct xfs_log_item *lip)
94{ 87{
95 xfs_dquot_t *dqp = logitem->qli_dquot; 88 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
96 89
97 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 90 ASSERT(XFS_DQ_IS_LOCKED(dqp));
98 atomic_inc(&dqp->q_pincount); 91 atomic_inc(&dqp->q_pincount);
@@ -104,27 +97,18 @@ xfs_qm_dquot_logitem_pin(
104 * dquot must have been previously pinned with a call to 97 * dquot must have been previously pinned with a call to
105 * xfs_qm_dquot_logitem_pin(). 98 * xfs_qm_dquot_logitem_pin().
106 */ 99 */
107/* ARGSUSED */
108STATIC void 100STATIC void
109xfs_qm_dquot_logitem_unpin( 101xfs_qm_dquot_logitem_unpin(
110 xfs_dq_logitem_t *logitem) 102 struct xfs_log_item *lip,
103 int remove)
111{ 104{
112 xfs_dquot_t *dqp = logitem->qli_dquot; 105 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
113 106
114 ASSERT(atomic_read(&dqp->q_pincount) > 0); 107 ASSERT(atomic_read(&dqp->q_pincount) > 0);
115 if (atomic_dec_and_test(&dqp->q_pincount)) 108 if (atomic_dec_and_test(&dqp->q_pincount))
116 wake_up(&dqp->q_pinwait); 109 wake_up(&dqp->q_pinwait);
117} 110}
118 111
119/* ARGSUSED */
120STATIC void
121xfs_qm_dquot_logitem_unpin_remove(
122 xfs_dq_logitem_t *logitem,
123 xfs_trans_t *tp)
124{
125 xfs_qm_dquot_logitem_unpin(logitem);
126}
127
128/* 112/*
129 * Given the logitem, this writes the corresponding dquot entry to disk 113 * Given the logitem, this writes the corresponding dquot entry to disk
130 * asynchronously. This is called with the dquot entry securely locked; 114 * asynchronously. This is called with the dquot entry securely locked;
@@ -133,12 +117,10 @@ xfs_qm_dquot_logitem_unpin_remove(
133 */ 117 */
134STATIC void 118STATIC void
135xfs_qm_dquot_logitem_push( 119xfs_qm_dquot_logitem_push(
136 xfs_dq_logitem_t *logitem) 120 struct xfs_log_item *lip)
137{ 121{
138 xfs_dquot_t *dqp; 122 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
139 int error; 123 int error;
140
141 dqp = logitem->qli_dquot;
142 124
143 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 125 ASSERT(XFS_DQ_IS_LOCKED(dqp));
144 ASSERT(!completion_done(&dqp->q_flush)); 126 ASSERT(!completion_done(&dqp->q_flush));
@@ -160,27 +142,25 @@ xfs_qm_dquot_logitem_push(
160 xfs_dqunlock(dqp); 142 xfs_dqunlock(dqp);
161} 143}
162 144
163/*ARGSUSED*/
164STATIC xfs_lsn_t 145STATIC xfs_lsn_t
165xfs_qm_dquot_logitem_committed( 146xfs_qm_dquot_logitem_committed(
166 xfs_dq_logitem_t *l, 147 struct xfs_log_item *lip,
167 xfs_lsn_t lsn) 148 xfs_lsn_t lsn)
168{ 149{
169 /* 150 /*
170 * We always re-log the entire dquot when it becomes dirty, 151 * We always re-log the entire dquot when it becomes dirty,
171 * so, the latest copy _is_ the only one that matters. 152 * so, the latest copy _is_ the only one that matters.
172 */ 153 */
173 return (lsn); 154 return lsn;
174} 155}
175 156
176
177/* 157/*
178 * This is called to wait for the given dquot to be unpinned. 158 * This is called to wait for the given dquot to be unpinned.
179 * Most of these pin/unpin routines are plagiarized from inode code. 159 * Most of these pin/unpin routines are plagiarized from inode code.
180 */ 160 */
181void 161void
182xfs_qm_dqunpin_wait( 162xfs_qm_dqunpin_wait(
183 xfs_dquot_t *dqp) 163 struct xfs_dquot *dqp)
184{ 164{
185 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 165 ASSERT(XFS_DQ_IS_LOCKED(dqp));
186 if (atomic_read(&dqp->q_pincount) == 0) 166 if (atomic_read(&dqp->q_pincount) == 0)
@@ -206,13 +186,12 @@ xfs_qm_dqunpin_wait(
206 */ 186 */
207STATIC void 187STATIC void
208xfs_qm_dquot_logitem_pushbuf( 188xfs_qm_dquot_logitem_pushbuf(
209 xfs_dq_logitem_t *qip) 189 struct xfs_log_item *lip)
210{ 190{
211 xfs_dquot_t *dqp; 191 struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
212 xfs_mount_t *mp; 192 struct xfs_dquot *dqp = qlip->qli_dquot;
213 xfs_buf_t *bp; 193 struct xfs_buf *bp;
214 194
215 dqp = qip->qli_dquot;
216 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 195 ASSERT(XFS_DQ_IS_LOCKED(dqp));
217 196
218 /* 197 /*
@@ -220,22 +199,20 @@ xfs_qm_dquot_logitem_pushbuf(
220 * inode flush completed and the inode was taken off the AIL. 199 * inode flush completed and the inode was taken off the AIL.
221 * So, just get out. 200 * So, just get out.
222 */ 201 */
223 if (completion_done(&dqp->q_flush) || 202 if (completion_done(&dqp->q_flush) ||
224 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { 203 !(lip->li_flags & XFS_LI_IN_AIL)) {
225 xfs_dqunlock(dqp); 204 xfs_dqunlock(dqp);
226 return; 205 return;
227 } 206 }
228 mp = dqp->q_mount; 207
229 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, 208 bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
230 mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); 209 dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
231 xfs_dqunlock(dqp); 210 xfs_dqunlock(dqp);
232 if (!bp) 211 if (!bp)
233 return; 212 return;
234 if (XFS_BUF_ISDELAYWRITE(bp)) 213 if (XFS_BUF_ISDELAYWRITE(bp))
235 xfs_buf_delwri_promote(bp); 214 xfs_buf_delwri_promote(bp);
236 xfs_buf_relse(bp); 215 xfs_buf_relse(bp);
237 return;
238
239} 216}
240 217
241/* 218/*
@@ -250,15 +227,14 @@ xfs_qm_dquot_logitem_pushbuf(
250 */ 227 */
251STATIC uint 228STATIC uint
252xfs_qm_dquot_logitem_trylock( 229xfs_qm_dquot_logitem_trylock(
253 xfs_dq_logitem_t *qip) 230 struct xfs_log_item *lip)
254{ 231{
255 xfs_dquot_t *dqp; 232 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
256 233
257 dqp = qip->qli_dquot;
258 if (atomic_read(&dqp->q_pincount) > 0) 234 if (atomic_read(&dqp->q_pincount) > 0)
259 return XFS_ITEM_PINNED; 235 return XFS_ITEM_PINNED;
260 236
261 if (! xfs_qm_dqlock_nowait(dqp)) 237 if (!xfs_qm_dqlock_nowait(dqp))
262 return XFS_ITEM_LOCKED; 238 return XFS_ITEM_LOCKED;
263 239
264 if (!xfs_dqflock_nowait(dqp)) { 240 if (!xfs_dqflock_nowait(dqp)) {
@@ -269,11 +245,10 @@ xfs_qm_dquot_logitem_trylock(
269 return XFS_ITEM_PUSHBUF; 245 return XFS_ITEM_PUSHBUF;
270 } 246 }
271 247
272 ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL); 248 ASSERT(lip->li_flags & XFS_LI_IN_AIL);
273 return XFS_ITEM_SUCCESS; 249 return XFS_ITEM_SUCCESS;
274} 250}
275 251
276
277/* 252/*
278 * Unlock the dquot associated with the log item. 253 * Unlock the dquot associated with the log item.
279 * Clear the fields of the dquot and dquot log item that 254 * Clear the fields of the dquot and dquot log item that
@@ -282,12 +257,10 @@ xfs_qm_dquot_logitem_trylock(
282 */ 257 */
283STATIC void 258STATIC void
284xfs_qm_dquot_logitem_unlock( 259xfs_qm_dquot_logitem_unlock(
285 xfs_dq_logitem_t *ql) 260 struct xfs_log_item *lip)
286{ 261{
287 xfs_dquot_t *dqp; 262 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
288 263
289 ASSERT(ql != NULL);
290 dqp = ql->qli_dquot;
291 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 264 ASSERT(XFS_DQ_IS_LOCKED(dqp));
292 265
293 /* 266 /*
@@ -304,43 +277,32 @@ xfs_qm_dquot_logitem_unlock(
304 xfs_dqunlock(dqp); 277 xfs_dqunlock(dqp);
305} 278}
306 279
307
308/* 280/*
309 * this needs to stamp an lsn into the dquot, I think. 281 * this needs to stamp an lsn into the dquot, I think.
310 * rpc's that look at user dquot's would then have to 282 * rpc's that look at user dquot's would then have to
311 * push on the dependency recorded in the dquot 283 * push on the dependency recorded in the dquot
312 */ 284 */
313/* ARGSUSED */
314STATIC void 285STATIC void
315xfs_qm_dquot_logitem_committing( 286xfs_qm_dquot_logitem_committing(
316 xfs_dq_logitem_t *l, 287 struct xfs_log_item *lip,
317 xfs_lsn_t lsn) 288 xfs_lsn_t lsn)
318{ 289{
319 return;
320} 290}
321 291
322
323/* 292/*
324 * This is the ops vector for dquots 293 * This is the ops vector for dquots
325 */ 294 */
326static struct xfs_item_ops xfs_dquot_item_ops = { 295static struct xfs_item_ops xfs_dquot_item_ops = {
327 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size, 296 .iop_size = xfs_qm_dquot_logitem_size,
328 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 297 .iop_format = xfs_qm_dquot_logitem_format,
329 xfs_qm_dquot_logitem_format, 298 .iop_pin = xfs_qm_dquot_logitem_pin,
330 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin, 299 .iop_unpin = xfs_qm_dquot_logitem_unpin,
331 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unpin, 300 .iop_trylock = xfs_qm_dquot_logitem_trylock,
332 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 301 .iop_unlock = xfs_qm_dquot_logitem_unlock,
333 xfs_qm_dquot_logitem_unpin_remove, 302 .iop_committed = xfs_qm_dquot_logitem_committed,
334 .iop_trylock = (uint(*)(xfs_log_item_t*)) 303 .iop_push = xfs_qm_dquot_logitem_push,
335 xfs_qm_dquot_logitem_trylock, 304 .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf,
336 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unlock, 305 .iop_committing = xfs_qm_dquot_logitem_committing
337 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
338 xfs_qm_dquot_logitem_committed,
339 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push,
340 .iop_pushbuf = (void(*)(xfs_log_item_t*))
341 xfs_qm_dquot_logitem_pushbuf,
342 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
343 xfs_qm_dquot_logitem_committing
344}; 306};
345 307
346/* 308/*
@@ -350,10 +312,9 @@ static struct xfs_item_ops xfs_dquot_item_ops = {
350 */ 312 */
351void 313void
352xfs_qm_dquot_logitem_init( 314xfs_qm_dquot_logitem_init(
353 struct xfs_dquot *dqp) 315 struct xfs_dquot *dqp)
354{ 316{
355 xfs_dq_logitem_t *lp; 317 struct xfs_dq_logitem *lp = &dqp->q_logitem;
356 lp = &dqp->q_logitem;
357 318
358 xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, 319 xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
359 &xfs_dquot_item_ops); 320 &xfs_dquot_item_ops);
@@ -374,16 +335,22 @@ xfs_qm_dquot_logitem_init(
374 335
375/*------------------ QUOTAOFF LOG ITEMS -------------------*/ 336/*------------------ QUOTAOFF LOG ITEMS -------------------*/
376 337
338static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
339{
340 return container_of(lip, struct xfs_qoff_logitem, qql_item);
341}
342
343
377/* 344/*
378 * This returns the number of iovecs needed to log the given quotaoff item. 345 * This returns the number of iovecs needed to log the given quotaoff item.
379 * We only need 1 iovec for an quotaoff item. It just logs the 346 * We only need 1 iovec for an quotaoff item. It just logs the
380 * quotaoff_log_format structure. 347 * quotaoff_log_format structure.
381 */ 348 */
382/*ARGSUSED*/
383STATIC uint 349STATIC uint
384xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf) 350xfs_qm_qoff_logitem_size(
351 struct xfs_log_item *lip)
385{ 352{
386 return (1); 353 return 1;
387} 354}
388 355
389/* 356/*
@@ -394,53 +361,46 @@ xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf)
394 * slots in the quotaoff item have been filled. 361 * slots in the quotaoff item have been filled.
395 */ 362 */
396STATIC void 363STATIC void
397xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf, 364xfs_qm_qoff_logitem_format(
398 xfs_log_iovec_t *log_vector) 365 struct xfs_log_item *lip,
366 struct xfs_log_iovec *log_vector)
399{ 367{
400 ASSERT(qf->qql_format.qf_type == XFS_LI_QUOTAOFF); 368 struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
369
370 ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
401 371
402 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); 372 log_vector->i_addr = &qflip->qql_format;
403 log_vector->i_len = sizeof(xfs_qoff_logitem_t); 373 log_vector->i_len = sizeof(xfs_qoff_logitem_t);
404 log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; 374 log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
405 qf->qql_format.qf_size = 1; 375 qflip->qql_format.qf_size = 1;
406} 376}
407 377
408
409/* 378/*
410 * Pinning has no meaning for an quotaoff item, so just return. 379 * Pinning has no meaning for an quotaoff item, so just return.
411 */ 380 */
412/*ARGSUSED*/
413STATIC void 381STATIC void
414xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf) 382xfs_qm_qoff_logitem_pin(
383 struct xfs_log_item *lip)
415{ 384{
416 return;
417} 385}
418 386
419
420/* 387/*
421 * Since pinning has no meaning for an quotaoff item, unpinning does 388 * Since pinning has no meaning for an quotaoff item, unpinning does
422 * not either. 389 * not either.
423 */ 390 */
424/*ARGSUSED*/
425STATIC void 391STATIC void
426xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf) 392xfs_qm_qoff_logitem_unpin(
393 struct xfs_log_item *lip,
394 int remove)
427{ 395{
428 return;
429}
430
431/*ARGSUSED*/
432STATIC void
433xfs_qm_qoff_logitem_unpin_remove(xfs_qoff_logitem_t *qf, xfs_trans_t *tp)
434{
435 return;
436} 396}
437 397
438/* 398/*
439 * Quotaoff items have no locking, so just return success. 399 * Quotaoff items have no locking, so just return success.
440 */ 400 */
441/*ARGSUSED*/
442STATIC uint 401STATIC uint
443xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf) 402xfs_qm_qoff_logitem_trylock(
403 struct xfs_log_item *lip)
444{ 404{
445 return XFS_ITEM_LOCKED; 405 return XFS_ITEM_LOCKED;
446} 406}
@@ -449,53 +409,51 @@ xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf)
449 * Quotaoff items have no locking or pushing, so return failure 409 * Quotaoff items have no locking or pushing, so return failure
450 * so that the caller doesn't bother with us. 410 * so that the caller doesn't bother with us.
451 */ 411 */
452/*ARGSUSED*/
453STATIC void 412STATIC void
454xfs_qm_qoff_logitem_unlock(xfs_qoff_logitem_t *qf) 413xfs_qm_qoff_logitem_unlock(
414 struct xfs_log_item *lip)
455{ 415{
456 return;
457} 416}
458 417
459/* 418/*
460 * The quotaoff-start-item is logged only once and cannot be moved in the log, 419 * The quotaoff-start-item is logged only once and cannot be moved in the log,
461 * so simply return the lsn at which it's been logged. 420 * so simply return the lsn at which it's been logged.
462 */ 421 */
463/*ARGSUSED*/
464STATIC xfs_lsn_t 422STATIC xfs_lsn_t
465xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn) 423xfs_qm_qoff_logitem_committed(
424 struct xfs_log_item *lip,
425 xfs_lsn_t lsn)
466{ 426{
467 return (lsn); 427 return lsn;
468} 428}
469 429
470/* 430/*
471 * There isn't much you can do to push on an quotaoff item. It is simply 431 * There isn't much you can do to push on an quotaoff item. It is simply
472 * stuck waiting for the log to be flushed to disk. 432 * stuck waiting for the log to be flushed to disk.
473 */ 433 */
474/*ARGSUSED*/
475STATIC void 434STATIC void
476xfs_qm_qoff_logitem_push(xfs_qoff_logitem_t *qf) 435xfs_qm_qoff_logitem_push(
436 struct xfs_log_item *lip)
477{ 437{
478 return;
479} 438}
480 439
481 440
482/*ARGSUSED*/
483STATIC xfs_lsn_t 441STATIC xfs_lsn_t
484xfs_qm_qoffend_logitem_committed( 442xfs_qm_qoffend_logitem_committed(
485 xfs_qoff_logitem_t *qfe, 443 struct xfs_log_item *lip,
486 xfs_lsn_t lsn) 444 xfs_lsn_t lsn)
487{ 445{
488 xfs_qoff_logitem_t *qfs; 446 struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
489 struct xfs_ail *ailp; 447 struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
448 struct xfs_ail *ailp = qfs->qql_item.li_ailp;
490 449
491 qfs = qfe->qql_start_lip;
492 ailp = qfs->qql_item.li_ailp;
493 spin_lock(&ailp->xa_lock);
494 /* 450 /*
495 * Delete the qoff-start logitem from the AIL. 451 * Delete the qoff-start logitem from the AIL.
496 * xfs_trans_ail_delete() drops the AIL lock. 452 * xfs_trans_ail_delete() drops the AIL lock.
497 */ 453 */
454 spin_lock(&ailp->xa_lock);
498 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); 455 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
456
499 kmem_free(qfs); 457 kmem_free(qfs);
500 kmem_free(qfe); 458 kmem_free(qfe);
501 return (xfs_lsn_t)-1; 459 return (xfs_lsn_t)-1;
@@ -515,71 +473,52 @@ xfs_qm_qoffend_logitem_committed(
515 * (truly makes the quotaoff irrevocable). If we do something else, 473 * (truly makes the quotaoff irrevocable). If we do something else,
516 * then maybe we don't need two. 474 * then maybe we don't need two.
517 */ 475 */
518/* ARGSUSED */
519STATIC void
520xfs_qm_qoff_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
521{
522 return;
523}
524
525/* ARGSUSED */
526STATIC void 476STATIC void
527xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn) 477xfs_qm_qoff_logitem_committing(
478 struct xfs_log_item *lip,
479 xfs_lsn_t commit_lsn)
528{ 480{
529 return;
530} 481}
531 482
532static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { 483static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
533 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, 484 .iop_size = xfs_qm_qoff_logitem_size,
534 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 485 .iop_format = xfs_qm_qoff_logitem_format,
535 xfs_qm_qoff_logitem_format, 486 .iop_pin = xfs_qm_qoff_logitem_pin,
536 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, 487 .iop_unpin = xfs_qm_qoff_logitem_unpin,
537 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin, 488 .iop_trylock = xfs_qm_qoff_logitem_trylock,
538 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) 489 .iop_unlock = xfs_qm_qoff_logitem_unlock,
539 xfs_qm_qoff_logitem_unpin_remove, 490 .iop_committed = xfs_qm_qoffend_logitem_committed,
540 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, 491 .iop_push = xfs_qm_qoff_logitem_push,
541 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock, 492 .iop_committing = xfs_qm_qoff_logitem_committing
542 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
543 xfs_qm_qoffend_logitem_committed,
544 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
545 .iop_pushbuf = NULL,
546 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
547 xfs_qm_qoffend_logitem_committing
548}; 493};
549 494
550/* 495/*
551 * This is the ops vector shared by all quotaoff-start log items. 496 * This is the ops vector shared by all quotaoff-start log items.
552 */ 497 */
553static struct xfs_item_ops xfs_qm_qoff_logitem_ops = { 498static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
554 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, 499 .iop_size = xfs_qm_qoff_logitem_size,
555 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 500 .iop_format = xfs_qm_qoff_logitem_format,
556 xfs_qm_qoff_logitem_format, 501 .iop_pin = xfs_qm_qoff_logitem_pin,
557 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, 502 .iop_unpin = xfs_qm_qoff_logitem_unpin,
558 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin, 503 .iop_trylock = xfs_qm_qoff_logitem_trylock,
559 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) 504 .iop_unlock = xfs_qm_qoff_logitem_unlock,
560 xfs_qm_qoff_logitem_unpin_remove, 505 .iop_committed = xfs_qm_qoff_logitem_committed,
561 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, 506 .iop_push = xfs_qm_qoff_logitem_push,
562 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock, 507 .iop_committing = xfs_qm_qoff_logitem_committing
563 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
564 xfs_qm_qoff_logitem_committed,
565 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
566 .iop_pushbuf = NULL,
567 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
568 xfs_qm_qoff_logitem_committing
569}; 508};
570 509
571/* 510/*
572 * Allocate and initialize an quotaoff item of the correct quota type(s). 511 * Allocate and initialize an quotaoff item of the correct quota type(s).
573 */ 512 */
574xfs_qoff_logitem_t * 513struct xfs_qoff_logitem *
575xfs_qm_qoff_logitem_init( 514xfs_qm_qoff_logitem_init(
576 struct xfs_mount *mp, 515 struct xfs_mount *mp,
577 xfs_qoff_logitem_t *start, 516 struct xfs_qoff_logitem *start,
578 uint flags) 517 uint flags)
579{ 518{
580 xfs_qoff_logitem_t *qf; 519 struct xfs_qoff_logitem *qf;
581 520
582 qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP); 521 qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
583 522
584 xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? 523 xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
585 &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); 524 &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
@@ -587,5 +526,5 @@ xfs_qm_qoff_logitem_init(
587 qf->qql_format.qf_type = XFS_LI_QUOTAOFF; 526 qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
588 qf->qql_format.qf_flags = flags; 527 qf->qql_format.qf_flags = flags;
589 qf->qql_start_lip = start; 528 qf->qql_start_lip = start;
590 return (qf); 529 return qf;
591} 530}
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 67c018392d62..f8e854b4fde8 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -23,25 +23,18 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 31#include "xfs_dinode.h"
37#include "xfs_inode.h" 32#include "xfs_inode.h"
38#include "xfs_btree.h"
39#include "xfs_ialloc.h" 33#include "xfs_ialloc.h"
40#include "xfs_itable.h" 34#include "xfs_itable.h"
41#include "xfs_rtalloc.h" 35#include "xfs_rtalloc.h"
42#include "xfs_error.h" 36#include "xfs_error.h"
43#include "xfs_bmap.h" 37#include "xfs_bmap.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 38#include "xfs_attr.h"
46#include "xfs_buf_item.h" 39#include "xfs_buf_item.h"
47#include "xfs_trans_space.h" 40#include "xfs_trans_space.h"
@@ -62,8 +55,6 @@ uint ndquot;
62kmem_zone_t *qm_dqzone; 55kmem_zone_t *qm_dqzone;
63kmem_zone_t *qm_dqtrxzone; 56kmem_zone_t *qm_dqtrxzone;
64 57
65static cred_t xfs_zerocr;
66
67STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); 58STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
68STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); 59STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
69 60
@@ -844,7 +835,7 @@ xfs_qm_dqattach_locked(
844 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, 835 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
845 flags & XFS_QMOPT_DQALLOC, 836 flags & XFS_QMOPT_DQALLOC,
846 ip->i_udquot, &ip->i_gdquot) : 837 ip->i_udquot, &ip->i_gdquot) :
847 xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, 838 xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
848 flags & XFS_QMOPT_DQALLOC, 839 flags & XFS_QMOPT_DQALLOC,
849 ip->i_udquot, &ip->i_gdquot); 840 ip->i_udquot, &ip->i_gdquot);
850 /* 841 /*
@@ -1206,87 +1197,6 @@ xfs_qm_list_destroy(
1206 mutex_destroy(&(list->qh_lock)); 1197 mutex_destroy(&(list->qh_lock));
1207} 1198}
1208 1199
1209
1210/*
1211 * Stripped down version of dqattach. This doesn't attach, or even look at the
1212 * dquots attached to the inode. The rationale is that there won't be any
1213 * attached at the time this is called from quotacheck.
1214 */
1215STATIC int
1216xfs_qm_dqget_noattach(
1217 xfs_inode_t *ip,
1218 xfs_dquot_t **O_udqpp,
1219 xfs_dquot_t **O_gdqpp)
1220{
1221 int error;
1222 xfs_mount_t *mp;
1223 xfs_dquot_t *udqp, *gdqp;
1224
1225 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1226 mp = ip->i_mount;
1227 udqp = NULL;
1228 gdqp = NULL;
1229
1230 if (XFS_IS_UQUOTA_ON(mp)) {
1231 ASSERT(ip->i_udquot == NULL);
1232 /*
1233 * We want the dquot allocated if it doesn't exist.
1234 */
1235 if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
1236 XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
1237 &udqp))) {
1238 /*
1239 * Shouldn't be able to turn off quotas here.
1240 */
1241 ASSERT(error != ESRCH);
1242 ASSERT(error != ENOENT);
1243 return error;
1244 }
1245 ASSERT(udqp);
1246 }
1247
1248 if (XFS_IS_OQUOTA_ON(mp)) {
1249 ASSERT(ip->i_gdquot == NULL);
1250 if (udqp)
1251 xfs_dqunlock(udqp);
1252 error = XFS_IS_GQUOTA_ON(mp) ?
1253 xfs_qm_dqget(mp, ip,
1254 ip->i_d.di_gid, XFS_DQ_GROUP,
1255 XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1256 &gdqp) :
1257 xfs_qm_dqget(mp, ip,
1258 ip->i_d.di_projid, XFS_DQ_PROJ,
1259 XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1260 &gdqp);
1261 if (error) {
1262 if (udqp)
1263 xfs_qm_dqrele(udqp);
1264 ASSERT(error != ESRCH);
1265 ASSERT(error != ENOENT);
1266 return error;
1267 }
1268 ASSERT(gdqp);
1269
1270 /* Reacquire the locks in the right order */
1271 if (udqp) {
1272 if (! xfs_qm_dqlock_nowait(udqp)) {
1273 xfs_dqunlock(gdqp);
1274 xfs_dqlock(udqp);
1275 xfs_dqlock(gdqp);
1276 }
1277 }
1278 }
1279
1280 *O_udqpp = udqp;
1281 *O_gdqpp = gdqp;
1282
1283#ifdef QUOTADEBUG
1284 if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
1285 if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
1286#endif
1287 return 0;
1288}
1289
1290/* 1200/*
1291 * Create an inode and return with a reference already taken, but unlocked 1201 * Create an inode and return with a reference already taken, but unlocked
1292 * This is how we create quota inodes 1202 * This is how we create quota inodes
@@ -1312,8 +1222,8 @@ xfs_qm_qino_alloc(
1312 return error; 1222 return error;
1313 } 1223 }
1314 1224
1315 if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 1225 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
1316 &xfs_zerocr, 0, 1, ip, &committed))) { 1226 if (error) {
1317 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 1227 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1318 XFS_TRANS_ABORT); 1228 XFS_TRANS_ABORT);
1319 return error; 1229 return error;
@@ -1497,7 +1407,7 @@ xfs_qm_dqiterate(
1497 maxlblkcnt - lblkno, 1407 maxlblkcnt - lblkno,
1498 XFS_BMAPI_METADATA, 1408 XFS_BMAPI_METADATA,
1499 NULL, 1409 NULL,
1500 0, map, &nmaps, NULL, NULL); 1410 0, map, &nmaps, NULL);
1501 xfs_iunlock(qip, XFS_ILOCK_SHARED); 1411 xfs_iunlock(qip, XFS_ILOCK_SHARED);
1502 if (error) 1412 if (error)
1503 break; 1413 break;
@@ -1523,7 +1433,7 @@ xfs_qm_dqiterate(
1523 rablkcnt = map[i+1].br_blockcount; 1433 rablkcnt = map[i+1].br_blockcount;
1524 rablkno = map[i+1].br_startblock; 1434 rablkno = map[i+1].br_startblock;
1525 while (rablkcnt--) { 1435 while (rablkcnt--) {
1526 xfs_baread(mp->m_ddev_targp, 1436 xfs_buf_readahead(mp->m_ddev_targp,
1527 XFS_FSB_TO_DADDR(mp, rablkno), 1437 XFS_FSB_TO_DADDR(mp, rablkno),
1528 mp->m_quotainfo->qi_dqchunklen); 1438 mp->m_quotainfo->qi_dqchunklen);
1529 rablkno++; 1439 rablkno++;
@@ -1553,18 +1463,34 @@ xfs_qm_dqiterate(
1553 1463
1554/* 1464/*
1555 * Called by dqusage_adjust in doing a quotacheck. 1465 * Called by dqusage_adjust in doing a quotacheck.
1556 * Given the inode, and a dquot (either USR or GRP, doesn't matter), 1466 *
1557 * this updates its incore copy as well as the buffer copy. This is 1467 * Given the inode, and a dquot id this updates both the incore dqout as well
1558 * so that once the quotacheck is done, we can just log all the buffers, 1468 * as the buffer copy. This is so that once the quotacheck is done, we can
1559 * as opposed to logging numerous updates to individual dquots. 1469 * just log all the buffers, as opposed to logging numerous updates to
1470 * individual dquots.
1560 */ 1471 */
1561STATIC void 1472STATIC int
1562xfs_qm_quotacheck_dqadjust( 1473xfs_qm_quotacheck_dqadjust(
1563 xfs_dquot_t *dqp, 1474 struct xfs_inode *ip,
1475 xfs_dqid_t id,
1476 uint type,
1564 xfs_qcnt_t nblks, 1477 xfs_qcnt_t nblks,
1565 xfs_qcnt_t rtblks) 1478 xfs_qcnt_t rtblks)
1566{ 1479{
1567 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1480 struct xfs_mount *mp = ip->i_mount;
1481 struct xfs_dquot *dqp;
1482 int error;
1483
1484 error = xfs_qm_dqget(mp, ip, id, type,
1485 XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
1486 if (error) {
1487 /*
1488 * Shouldn't be able to turn off quotas here.
1489 */
1490 ASSERT(error != ESRCH);
1491 ASSERT(error != ENOENT);
1492 return error;
1493 }
1568 1494
1569 trace_xfs_dqadjust(dqp); 1495 trace_xfs_dqadjust(dqp);
1570 1496
@@ -1589,11 +1515,13 @@ xfs_qm_quotacheck_dqadjust(
1589 * There are no timers for the default values set in the root dquot. 1515 * There are no timers for the default values set in the root dquot.
1590 */ 1516 */
1591 if (dqp->q_core.d_id) { 1517 if (dqp->q_core.d_id) {
1592 xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core); 1518 xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
1593 xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core); 1519 xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
1594 } 1520 }
1595 1521
1596 dqp->dq_flags |= XFS_DQ_DIRTY; 1522 dqp->dq_flags |= XFS_DQ_DIRTY;
1523 xfs_qm_dqput(dqp);
1524 return 0;
1597} 1525}
1598 1526
1599STATIC int 1527STATIC int
@@ -1636,8 +1564,7 @@ xfs_qm_dqusage_adjust(
1636 int *res) /* result code value */ 1564 int *res) /* result code value */
1637{ 1565{
1638 xfs_inode_t *ip; 1566 xfs_inode_t *ip;
1639 xfs_dquot_t *udqp, *gdqp; 1567 xfs_qcnt_t nblks, rtblks = 0;
1640 xfs_qcnt_t nblks, rtblks;
1641 int error; 1568 int error;
1642 1569
1643 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1570 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -1657,49 +1584,24 @@ xfs_qm_dqusage_adjust(
1657 * the case in all other instances. It's OK that we do this because 1584 * the case in all other instances. It's OK that we do this because
1658 * quotacheck is done only at mount time. 1585 * quotacheck is done only at mount time.
1659 */ 1586 */
1660 if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) { 1587 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
1588 if (error) {
1661 *res = BULKSTAT_RV_NOTHING; 1589 *res = BULKSTAT_RV_NOTHING;
1662 return error; 1590 return error;
1663 } 1591 }
1664 1592
1665 /* 1593 ASSERT(ip->i_delayed_blks == 0);
1666 * Obtain the locked dquots. In case of an error (eg. allocation
1667 * fails for ENOSPC), we return the negative of the error number
1668 * to bulkstat, so that it can get propagated to quotacheck() and
1669 * making us disable quotas for the file system.
1670 */
1671 if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
1672 xfs_iput(ip, XFS_ILOCK_EXCL);
1673 *res = BULKSTAT_RV_GIVEUP;
1674 return error;
1675 }
1676 1594
1677 rtblks = 0; 1595 if (XFS_IS_REALTIME_INODE(ip)) {
1678 if (! XFS_IS_REALTIME_INODE(ip)) {
1679 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
1680 } else {
1681 /* 1596 /*
1682 * Walk thru the extent list and count the realtime blocks. 1597 * Walk thru the extent list and count the realtime blocks.
1683 */ 1598 */
1684 if ((error = xfs_qm_get_rtblks(ip, &rtblks))) { 1599 error = xfs_qm_get_rtblks(ip, &rtblks);
1685 xfs_iput(ip, XFS_ILOCK_EXCL); 1600 if (error)
1686 if (udqp) 1601 goto error0;
1687 xfs_qm_dqput(udqp);
1688 if (gdqp)
1689 xfs_qm_dqput(gdqp);
1690 *res = BULKSTAT_RV_GIVEUP;
1691 return error;
1692 }
1693 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1694 } 1602 }
1695 ASSERT(ip->i_delayed_blks == 0);
1696 1603
1697 /* 1604 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1698 * We can't release the inode while holding its dquot locks.
1699 * The inode can go into inactive and might try to acquire the dquotlocks.
1700 * So, just unlock here and do a vn_rele at the end.
1701 */
1702 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1703 1605
1704 /* 1606 /*
1705 * Add the (disk blocks and inode) resources occupied by this 1607 * Add the (disk blocks and inode) resources occupied by this
@@ -1714,26 +1616,36 @@ xfs_qm_dqusage_adjust(
1714 * and quotaoffs don't race. (Quotachecks happen at mount time only). 1616 * and quotaoffs don't race. (Quotachecks happen at mount time only).
1715 */ 1617 */
1716 if (XFS_IS_UQUOTA_ON(mp)) { 1618 if (XFS_IS_UQUOTA_ON(mp)) {
1717 ASSERT(udqp); 1619 error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
1718 xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks); 1620 XFS_DQ_USER, nblks, rtblks);
1719 xfs_qm_dqput(udqp); 1621 if (error)
1622 goto error0;
1720 } 1623 }
1721 if (XFS_IS_OQUOTA_ON(mp)) { 1624
1722 ASSERT(gdqp); 1625 if (XFS_IS_GQUOTA_ON(mp)) {
1723 xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks); 1626 error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
1724 xfs_qm_dqput(gdqp); 1627 XFS_DQ_GROUP, nblks, rtblks);
1628 if (error)
1629 goto error0;
1725 } 1630 }
1726 /*
1727 * Now release the inode. This will send it to 'inactive', and
1728 * possibly even free blocks.
1729 */
1730 IRELE(ip);
1731 1631
1732 /* 1632 if (XFS_IS_PQUOTA_ON(mp)) {
1733 * Goto next inode. 1633 error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
1734 */ 1634 XFS_DQ_PROJ, nblks, rtblks);
1635 if (error)
1636 goto error0;
1637 }
1638
1639 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1640 IRELE(ip);
1735 *res = BULKSTAT_RV_DIDONE; 1641 *res = BULKSTAT_RV_DIDONE;
1736 return 0; 1642 return 0;
1643
1644error0:
1645 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1646 IRELE(ip);
1647 *res = BULKSTAT_RV_GIVEUP;
1648 return error;
1737} 1649}
1738 1650
1739/* 1651/*
@@ -2229,7 +2141,7 @@ xfs_qm_write_sb_changes(
2229 2141
2230 2142
2231/* 2143/*
2232 * Given an inode, a uid and gid (from cred_t) make sure that we have 2144 * Given an inode, a uid, gid and prid make sure that we have
2233 * allocated relevant dquot(s) on disk, and that we won't exceed inode 2145 * allocated relevant dquot(s) on disk, and that we won't exceed inode
2234 * quotas by creating this file. 2146 * quotas by creating this file.
2235 * This also attaches dquot(s) to the given inode after locking it, 2147 * This also attaches dquot(s) to the given inode after locking it,
@@ -2337,7 +2249,7 @@ xfs_qm_vop_dqalloc(
2337 xfs_dqunlock(gq); 2249 xfs_dqunlock(gq);
2338 } 2250 }
2339 } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { 2251 } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2340 if (ip->i_d.di_projid != prid) { 2252 if (xfs_get_projid(ip) != prid) {
2341 xfs_iunlock(ip, lockflags); 2253 xfs_iunlock(ip, lockflags);
2342 if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, 2254 if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
2343 XFS_DQ_PROJ, 2255 XFS_DQ_PROJ,
@@ -2459,7 +2371,7 @@ xfs_qm_vop_chown_reserve(
2459 } 2371 }
2460 if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { 2372 if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2461 if (XFS_IS_PQUOTA_ON(ip->i_mount) && 2373 if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
2462 ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id)) 2374 xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
2463 prjflags = XFS_QMOPT_ENOSPC; 2375 prjflags = XFS_QMOPT_ENOSPC;
2464 2376
2465 if (prjflags || 2377 if (prjflags ||
@@ -2563,7 +2475,7 @@ xfs_qm_vop_create_dqattach(
2563 ip->i_gdquot = gdqp; 2475 ip->i_gdquot = gdqp;
2564 ASSERT(XFS_IS_OQUOTA_ON(mp)); 2476 ASSERT(XFS_IS_OQUOTA_ON(mp));
2565 ASSERT((XFS_IS_GQUOTA_ON(mp) ? 2477 ASSERT((XFS_IS_GQUOTA_ON(mp) ?
2566 ip->i_d.di_gid : ip->i_d.di_projid) == 2478 ip->i_d.di_gid : xfs_get_projid(ip)) ==
2567 be32_to_cpu(gdqp->q_core.d_id)); 2479 be32_to_cpu(gdqp->q_core.d_id));
2568 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); 2480 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2569 } 2481 }
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 97b410c12794..45b5cb1788ab 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -23,25 +23,15 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_ialloc.h"
39#include "xfs_itable.h" 31#include "xfs_itable.h"
40#include "xfs_btree.h"
41#include "xfs_bmap.h" 32#include "xfs_bmap.h"
42#include "xfs_rtalloc.h" 33#include "xfs_rtalloc.h"
43#include "xfs_error.h" 34#include "xfs_error.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 35#include "xfs_attr.h"
46#include "xfs_buf_item.h" 36#include "xfs_buf_item.h"
47#include "xfs_qm.h" 37#include "xfs_qm.h"
@@ -91,7 +81,7 @@ xfs_qm_statvfs(
91 xfs_mount_t *mp = ip->i_mount; 81 xfs_mount_t *mp = ip->i_mount;
92 xfs_dquot_t *dqp; 82 xfs_dquot_t *dqp;
93 83
94 if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) { 84 if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
95 xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); 85 xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
96 xfs_qm_dqput(dqp); 86 xfs_qm_dqput(dqp);
97 } 87 }
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 3d1fc79532e2..8671a0b32644 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -23,25 +23,15 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_ialloc.h"
39#include "xfs_itable.h" 31#include "xfs_itable.h"
40#include "xfs_bmap.h" 32#include "xfs_bmap.h"
41#include "xfs_btree.h"
42#include "xfs_rtalloc.h" 33#include "xfs_rtalloc.h"
43#include "xfs_error.h" 34#include "xfs_error.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 35#include "xfs_attr.h"
46#include "xfs_buf_item.h" 36#include "xfs_buf_item.h"
47#include "xfs_qm.h" 37#include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index b4487764e923..bdebc183223e 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -26,25 +26,15 @@
26#include "xfs_trans.h" 26#include "xfs_trans.h"
27#include "xfs_sb.h" 27#include "xfs_sb.h"
28#include "xfs_ag.h" 28#include "xfs_ag.h"
29#include "xfs_dir2.h"
30#include "xfs_alloc.h" 29#include "xfs_alloc.h"
31#include "xfs_dmapi.h"
32#include "xfs_quota.h" 30#include "xfs_quota.h"
33#include "xfs_mount.h" 31#include "xfs_mount.h"
34#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
35#include "xfs_alloc_btree.h"
36#include "xfs_ialloc_btree.h"
37#include "xfs_dir2_sf.h"
38#include "xfs_attr_sf.h"
39#include "xfs_dinode.h"
40#include "xfs_inode.h" 33#include "xfs_inode.h"
41#include "xfs_ialloc.h"
42#include "xfs_itable.h" 34#include "xfs_itable.h"
43#include "xfs_bmap.h" 35#include "xfs_bmap.h"
44#include "xfs_btree.h"
45#include "xfs_rtalloc.h" 36#include "xfs_rtalloc.h"
46#include "xfs_error.h" 37#include "xfs_error.h"
47#include "xfs_rw.h"
48#include "xfs_attr.h" 38#include "xfs_attr.h"
49#include "xfs_buf_item.h" 39#include "xfs_buf_item.h"
50#include "xfs_utils.h" 40#include "xfs_utils.h"
@@ -248,40 +238,74 @@ out_unlock:
248 return error; 238 return error;
249} 239}
250 240
241STATIC int
242xfs_qm_scall_trunc_qfile(
243 struct xfs_mount *mp,
244 xfs_ino_t ino)
245{
246 struct xfs_inode *ip;
247 struct xfs_trans *tp;
248 int error;
249
250 if (ino == NULLFSINO)
251 return 0;
252
253 error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
254 if (error)
255 return error;
256
257 xfs_ilock(ip, XFS_IOLOCK_EXCL);
258
259 tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
260 error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
261 XFS_TRANS_PERM_LOG_RES,
262 XFS_ITRUNCATE_LOG_COUNT);
263 if (error) {
264 xfs_trans_cancel(tp, 0);
265 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
266 goto out_put;
267 }
268
269 xfs_ilock(ip, XFS_ILOCK_EXCL);
270 xfs_trans_ijoin(tp, ip);
271
272 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1);
273 if (error) {
274 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
275 XFS_TRANS_ABORT);
276 goto out_unlock;
277 }
278
279 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
280 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
281
282out_unlock:
283 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
284out_put:
285 IRELE(ip);
286 return error;
287}
288
251int 289int
252xfs_qm_scall_trunc_qfiles( 290xfs_qm_scall_trunc_qfiles(
253 xfs_mount_t *mp, 291 xfs_mount_t *mp,
254 uint flags) 292 uint flags)
255{ 293{
256 int error = 0, error2 = 0; 294 int error = 0, error2 = 0;
257 xfs_inode_t *qip;
258 295
259 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { 296 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
260 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); 297 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
261 return XFS_ERROR(EINVAL); 298 return XFS_ERROR(EINVAL);
262 } 299 }
263 300
264 if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { 301 if (flags & XFS_DQ_USER)
265 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip); 302 error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
266 if (!error) { 303 if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
267 error = xfs_truncate_file(mp, qip); 304 error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
268 IRELE(qip);
269 }
270 }
271
272 if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
273 mp->m_sb.sb_gquotino != NULLFSINO) {
274 error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip);
275 if (!error2) {
276 error2 = xfs_truncate_file(mp, qip);
277 IRELE(qip);
278 }
279 }
280 305
281 return error ? error : error2; 306 return error ? error : error2;
282} 307}
283 308
284
285/* 309/*
286 * Switch on (a given) quota enforcement for a filesystem. This takes 310 * Switch on (a given) quota enforcement for a filesystem. This takes
287 * effect immediately. 311 * effect immediately.
@@ -786,9 +810,9 @@ xfs_qm_export_dquot(
786 } 810 }
787 811
788#ifdef DEBUG 812#ifdef DEBUG
789 if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == XFS_USER_QUOTA) || 813 if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
790 (XFS_IS_OQUOTA_ENFORCED(mp) && 814 (XFS_IS_OQUOTA_ENFORCED(mp) &&
791 (dst->d_flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)))) && 815 (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
792 dst->d_id != 0) { 816 dst->d_id != 0) {
793 if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && 817 if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
794 (dst->d_blk_softlimit > 0)) { 818 (dst->d_blk_softlimit > 0)) {
@@ -809,17 +833,17 @@ xfs_qm_export_qtype_flags(
809 /* 833 /*
810 * Can't be more than one, or none. 834 * Can't be more than one, or none.
811 */ 835 */
812 ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) != 836 ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
813 (XFS_PROJ_QUOTA | XFS_USER_QUOTA)); 837 (FS_PROJ_QUOTA | FS_USER_QUOTA));
814 ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) != 838 ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
815 (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)); 839 (FS_PROJ_QUOTA | FS_GROUP_QUOTA));
816 ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) != 840 ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
817 (XFS_USER_QUOTA | XFS_GROUP_QUOTA)); 841 (FS_USER_QUOTA | FS_GROUP_QUOTA));
818 ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0); 842 ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
819 843
820 return (flags & XFS_DQ_USER) ? 844 return (flags & XFS_DQ_USER) ?
821 XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? 845 FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
822 XFS_PROJ_QUOTA : XFS_GROUP_QUOTA; 846 FS_PROJ_QUOTA : FS_GROUP_QUOTA;
823} 847}
824 848
825STATIC uint 849STATIC uint
@@ -830,16 +854,16 @@ xfs_qm_export_flags(
830 854
831 uflags = 0; 855 uflags = 0;
832 if (flags & XFS_UQUOTA_ACCT) 856 if (flags & XFS_UQUOTA_ACCT)
833 uflags |= XFS_QUOTA_UDQ_ACCT; 857 uflags |= FS_QUOTA_UDQ_ACCT;
834 if (flags & XFS_PQUOTA_ACCT) 858 if (flags & XFS_PQUOTA_ACCT)
835 uflags |= XFS_QUOTA_PDQ_ACCT; 859 uflags |= FS_QUOTA_PDQ_ACCT;
836 if (flags & XFS_GQUOTA_ACCT) 860 if (flags & XFS_GQUOTA_ACCT)
837 uflags |= XFS_QUOTA_GDQ_ACCT; 861 uflags |= FS_QUOTA_GDQ_ACCT;
838 if (flags & XFS_UQUOTA_ENFD) 862 if (flags & XFS_UQUOTA_ENFD)
839 uflags |= XFS_QUOTA_UDQ_ENFD; 863 uflags |= FS_QUOTA_UDQ_ENFD;
840 if (flags & (XFS_OQUOTA_ENFD)) { 864 if (flags & (XFS_OQUOTA_ENFD)) {
841 uflags |= (flags & XFS_GQUOTA_ACCT) ? 865 uflags |= (flags & XFS_GQUOTA_ACCT) ?
842 XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD; 866 FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
843 } 867 }
844 return (uflags); 868 return (uflags);
845} 869}
@@ -851,21 +875,14 @@ xfs_dqrele_inode(
851 struct xfs_perag *pag, 875 struct xfs_perag *pag,
852 int flags) 876 int flags)
853{ 877{
854 int error;
855
856 /* skip quota inodes */ 878 /* skip quota inodes */
857 if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || 879 if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
858 ip == ip->i_mount->m_quotainfo->qi_gquotaip) { 880 ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
859 ASSERT(ip->i_udquot == NULL); 881 ASSERT(ip->i_udquot == NULL);
860 ASSERT(ip->i_gdquot == NULL); 882 ASSERT(ip->i_gdquot == NULL);
861 read_unlock(&pag->pag_ici_lock);
862 return 0; 883 return 0;
863 } 884 }
864 885
865 error = xfs_sync_inode_valid(ip, pag);
866 if (error)
867 return error;
868
869 xfs_ilock(ip, XFS_ILOCK_EXCL); 886 xfs_ilock(ip, XFS_ILOCK_EXCL);
870 if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { 887 if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
871 xfs_qm_dqrele(ip->i_udquot); 888 xfs_qm_dqrele(ip->i_udquot);
@@ -875,8 +892,7 @@ xfs_dqrele_inode(
875 xfs_qm_dqrele(ip->i_gdquot); 892 xfs_qm_dqrele(ip->i_gdquot);
876 ip->i_gdquot = NULL; 893 ip->i_gdquot = NULL;
877 } 894 }
878 xfs_iput(ip, XFS_ILOCK_EXCL); 895 xfs_iunlock(ip, XFS_ILOCK_EXCL);
879
880 return 0; 896 return 0;
881} 897}
882 898
@@ -893,8 +909,7 @@ xfs_qm_dqrele_all_inodes(
893 uint flags) 909 uint flags)
894{ 910{
895 ASSERT(mp->m_quotainfo); 911 ASSERT(mp->m_quotainfo);
896 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, 912 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
897 XFS_ICI_NO_TAG, 0, NULL);
898} 913}
899 914
900/*------------------------------------------------------------------------*/ 915/*------------------------------------------------------------------------*/
@@ -1143,13 +1158,14 @@ xfs_qm_internalqcheck_adjust(
1143 * of those now. 1158 * of those now.
1144 */ 1159 */
1145 if (! ipreleased) { 1160 if (! ipreleased) {
1146 xfs_iput(ip, lock_flags); 1161 xfs_iunlock(ip, lock_flags);
1162 IRELE(ip);
1147 ipreleased = B_TRUE; 1163 ipreleased = B_TRUE;
1148 goto again; 1164 goto again;
1149 } 1165 }
1150 xfs_qm_internalqcheck_get_dquots(mp, 1166 xfs_qm_internalqcheck_get_dquots(mp,
1151 (xfs_dqid_t) ip->i_d.di_uid, 1167 (xfs_dqid_t) ip->i_d.di_uid,
1152 (xfs_dqid_t) ip->i_d.di_projid, 1168 (xfs_dqid_t) xfs_get_projid(ip),
1153 (xfs_dqid_t) ip->i_d.di_gid, 1169 (xfs_dqid_t) ip->i_d.di_gid,
1154 &ud, &gd); 1170 &ud, &gd);
1155 if (XFS_IS_UQUOTA_ON(mp)) { 1171 if (XFS_IS_UQUOTA_ON(mp)) {
@@ -1160,7 +1176,8 @@ xfs_qm_internalqcheck_adjust(
1160 ASSERT(gd); 1176 ASSERT(gd);
1161 xfs_qm_internalqcheck_dqadjust(ip, gd); 1177 xfs_qm_internalqcheck_dqadjust(ip, gd);
1162 } 1178 }
1163 xfs_iput(ip, lock_flags); 1179 xfs_iunlock(ip, lock_flags);
1180 IRELE(ip);
1164 *res = BULKSTAT_RV_DIDONE; 1181 *res = BULKSTAT_RV_DIDONE;
1165 return (0); 1182 return (0);
1166} 1183}
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 061d827da33c..7de91d1b75c0 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -23,25 +23,15 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_ialloc.h"
39#include "xfs_itable.h" 31#include "xfs_itable.h"
40#include "xfs_btree.h"
41#include "xfs_bmap.h" 32#include "xfs_bmap.h"
42#include "xfs_rtalloc.h" 33#include "xfs_rtalloc.h"
43#include "xfs_error.h" 34#include "xfs_error.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 35#include "xfs_attr.h"
46#include "xfs_buf_item.h" 36#include "xfs_buf_item.h"
47#include "xfs_trans_priv.h" 37#include "xfs_trans_priv.h"
@@ -59,16 +49,14 @@ xfs_trans_dqjoin(
59 xfs_trans_t *tp, 49 xfs_trans_t *tp,
60 xfs_dquot_t *dqp) 50 xfs_dquot_t *dqp)
61{ 51{
62 xfs_dq_logitem_t *lp = &dqp->q_logitem;
63
64 ASSERT(dqp->q_transp != tp); 52 ASSERT(dqp->q_transp != tp);
65 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 53 ASSERT(XFS_DQ_IS_LOCKED(dqp));
66 ASSERT(lp->qli_dquot == dqp); 54 ASSERT(dqp->q_logitem.qli_dquot == dqp);
67 55
68 /* 56 /*
69 * Get a log_item_desc to point at the new item. 57 * Get a log_item_desc to point at the new item.
70 */ 58 */
71 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(lp)); 59 xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
72 60
73 /* 61 /*
74 * Initialize i_transp so we can later determine if this dquot is 62 * Initialize i_transp so we can later determine if this dquot is
@@ -93,16 +81,11 @@ xfs_trans_log_dquot(
93 xfs_trans_t *tp, 81 xfs_trans_t *tp,
94 xfs_dquot_t *dqp) 82 xfs_dquot_t *dqp)
95{ 83{
96 xfs_log_item_desc_t *lidp;
97
98 ASSERT(dqp->q_transp == tp); 84 ASSERT(dqp->q_transp == tp);
99 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 85 ASSERT(XFS_DQ_IS_LOCKED(dqp));
100 86
101 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem));
102 ASSERT(lidp != NULL);
103
104 tp->t_flags |= XFS_TRANS_DIRTY; 87 tp->t_flags |= XFS_TRANS_DIRTY;
105 lidp->lid_flags |= XFS_LID_DIRTY; 88 dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
106} 89}
107 90
108/* 91/*
@@ -874,9 +857,8 @@ xfs_trans_get_qoff_item(
874 /* 857 /*
875 * Get a log_item_desc to point at the new item. 858 * Get a log_item_desc to point at the new item.
876 */ 859 */
877 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)q); 860 xfs_trans_add_item(tp, &q->qql_item);
878 861 return q;
879 return (q);
880} 862}
881 863
882 864
@@ -890,13 +872,8 @@ xfs_trans_log_quotaoff_item(
890 xfs_trans_t *tp, 872 xfs_trans_t *tp,
891 xfs_qoff_logitem_t *qlp) 873 xfs_qoff_logitem_t *qlp)
892{ 874{
893 xfs_log_item_desc_t *lidp;
894
895 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)qlp);
896 ASSERT(lidp != NULL);
897
898 tp->t_flags |= XFS_TRANS_DIRTY; 875 tp->t_flags |= XFS_TRANS_DIRTY;
899 lidp->lid_flags |= XFS_LID_DIRTY; 876 qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
900} 877}
901 878
902STATIC void 879STATIC void
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 3f3610a7ee05..975aa10e1a47 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -22,7 +22,6 @@
22#include "xfs_sb.h" 22#include "xfs_sb.h"
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_ag.h" 24#include "xfs_ag.h"
25#include "xfs_dmapi.h"
26#include "xfs_mount.h" 25#include "xfs_mount.h"
27#include "xfs_error.h" 26#include "xfs_error.h"
28 27
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 4917d4eed4ed..63c7a1a6c022 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -230,6 +230,15 @@ typedef struct xfs_perag {
230 rwlock_t pag_ici_lock; /* incore inode lock */ 230 rwlock_t pag_ici_lock; /* incore inode lock */
231 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 231 struct radix_tree_root pag_ici_root; /* incore inode cache root */
232 int pag_ici_reclaimable; /* reclaimable inodes */ 232 int pag_ici_reclaimable; /* reclaimable inodes */
233 struct mutex pag_ici_reclaim_lock; /* serialisation point */
234 unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
235
236 /* buffer cache index */
237 spinlock_t pag_buf_lock; /* lock for pag_buf_tree */
238 struct rb_root pag_buf_tree; /* ordered tree of active buffers */
239
240 /* for rcu-safe freeing */
241 struct rcu_head rcu_head;
233#endif 242#endif
234 int pagb_count; /* pagb slots in use */ 243 int pagb_count; /* pagb slots in use */
235} xfs_perag_t; 244} xfs_perag_t;
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a7fbe8a99b12..112abc439ca5 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -24,18 +24,13 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_btree.h" 33#include "xfs_btree.h"
38#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 34#include "xfs_alloc.h"
40#include "xfs_error.h" 35#include "xfs_error.h"
41#include "xfs_trace.h" 36#include "xfs_trace.h"
@@ -680,7 +675,7 @@ xfs_alloc_ag_vextent_near(
680 xfs_agblock_t gtbnoa; /* aligned ... */ 675 xfs_agblock_t gtbnoa; /* aligned ... */
681 xfs_extlen_t gtdiff; /* difference to right side entry */ 676 xfs_extlen_t gtdiff; /* difference to right side entry */
682 xfs_extlen_t gtlen; /* length of right side entry */ 677 xfs_extlen_t gtlen; /* length of right side entry */
683 xfs_extlen_t gtlena; /* aligned ... */ 678 xfs_extlen_t gtlena = 0; /* aligned ... */
684 xfs_agblock_t gtnew; /* useful start bno of right side */ 679 xfs_agblock_t gtnew; /* useful start bno of right side */
685 int error; /* error code */ 680 int error; /* error code */
686 int i; /* result code, temporary */ 681 int i; /* result code, temporary */
@@ -688,10 +683,8 @@ xfs_alloc_ag_vextent_near(
688 xfs_agblock_t ltbno; /* start bno of left side entry */ 683 xfs_agblock_t ltbno; /* start bno of left side entry */
689 xfs_agblock_t ltbnoa; /* aligned ... */ 684 xfs_agblock_t ltbnoa; /* aligned ... */
690 xfs_extlen_t ltdiff; /* difference to left side entry */ 685 xfs_extlen_t ltdiff; /* difference to left side entry */
691 /*REFERENCED*/
692 xfs_agblock_t ltend; /* end bno of left side entry */
693 xfs_extlen_t ltlen; /* length of left side entry */ 686 xfs_extlen_t ltlen; /* length of left side entry */
694 xfs_extlen_t ltlena; /* aligned ... */ 687 xfs_extlen_t ltlena = 0; /* aligned ... */
695 xfs_agblock_t ltnew; /* useful start bno of left side */ 688 xfs_agblock_t ltnew; /* useful start bno of left side */
696 xfs_extlen_t rlen; /* length of returned extent */ 689 xfs_extlen_t rlen; /* length of returned extent */
697#if defined(DEBUG) && defined(__KERNEL__) 690#if defined(DEBUG) && defined(__KERNEL__)
@@ -814,8 +807,7 @@ xfs_alloc_ag_vextent_near(
814 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 807 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
815 goto error0; 808 goto error0;
816 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 809 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
817 ltend = ltbno + ltlen; 810 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
818 ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
819 args->len = blen; 811 args->len = blen;
820 if (!xfs_alloc_fix_minleft(args)) { 812 if (!xfs_alloc_fix_minleft(args)) {
821 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 813 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -828,7 +820,7 @@ xfs_alloc_ag_vextent_near(
828 */ 820 */
829 args->agbno = bnew; 821 args->agbno = bnew;
830 ASSERT(bnew >= ltbno); 822 ASSERT(bnew >= ltbno);
831 ASSERT(bnew + blen <= ltend); 823 ASSERT(bnew + blen <= ltbno + ltlen);
832 /* 824 /*
833 * Set up a cursor for the by-bno tree. 825 * Set up a cursor for the by-bno tree.
834 */ 826 */
@@ -1157,7 +1149,6 @@ xfs_alloc_ag_vextent_near(
1157 /* 1149 /*
1158 * Fix up the length and compute the useful address. 1150 * Fix up the length and compute the useful address.
1159 */ 1151 */
1160 ltend = ltbno + ltlen;
1161 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1152 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1162 xfs_alloc_fix_len(args); 1153 xfs_alloc_fix_len(args);
1163 if (!xfs_alloc_fix_minleft(args)) { 1154 if (!xfs_alloc_fix_minleft(args)) {
@@ -1170,7 +1161,7 @@ xfs_alloc_ag_vextent_near(
1170 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, 1161 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
1171 ltlen, &ltnew); 1162 ltlen, &ltnew);
1172 ASSERT(ltnew >= ltbno); 1163 ASSERT(ltnew >= ltbno);
1173 ASSERT(ltnew + rlen <= ltend); 1164 ASSERT(ltnew + rlen <= ltbno + ltlen);
1174 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1165 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1175 args->agbno = ltnew; 1166 args->agbno = ltnew;
1176 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1167 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 6d05199b667c..895009a97271 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -27,16 +27,16 @@ struct xfs_busy_extent;
27/* 27/*
28 * Freespace allocation types. Argument to xfs_alloc_[v]extent. 28 * Freespace allocation types. Argument to xfs_alloc_[v]extent.
29 */ 29 */
30typedef enum xfs_alloctype 30#define XFS_ALLOCTYPE_ANY_AG 0x01 /* allocate anywhere, use rotor */
31{ 31#define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */
32 XFS_ALLOCTYPE_ANY_AG, /* allocate anywhere, use rotor */ 32#define XFS_ALLOCTYPE_START_AG 0x04 /* anywhere, start in this a.g. */
33 XFS_ALLOCTYPE_FIRST_AG, /* ... start at ag 0 */ 33#define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */
34 XFS_ALLOCTYPE_START_AG, /* anywhere, start in this a.g. */ 34#define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */
35 XFS_ALLOCTYPE_THIS_AG, /* anywhere in this a.g. */ 35#define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */
36 XFS_ALLOCTYPE_START_BNO, /* near this block else anywhere */ 36#define XFS_ALLOCTYPE_THIS_BNO 0x40 /* at exactly this block */
37 XFS_ALLOCTYPE_NEAR_BNO, /* in this a.g. and near this block */ 37
38 XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */ 38/* this should become an enum again when the tracing code is fixed */
39} xfs_alloctype_t; 39typedef unsigned int xfs_alloctype_t;
40 40
41#define XFS_ALLOC_TYPES \ 41#define XFS_ALLOC_TYPES \
42 { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \ 42 { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 83f494218759..3916925e2584 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -24,19 +24,14 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_btree.h" 33#include "xfs_btree.h"
38#include "xfs_btree_trace.h" 34#include "xfs_btree_trace.h"
39#include "xfs_ialloc.h"
40#include "xfs_alloc.h" 35#include "xfs_alloc.h"
41#include "xfs_error.h" 36#include "xfs_error.h"
42#include "xfs_trace.h" 37#include "xfs_trace.h"
@@ -285,38 +280,6 @@ xfs_allocbt_key_diff(
285 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; 280 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
286} 281}
287 282
288STATIC int
289xfs_allocbt_kill_root(
290 struct xfs_btree_cur *cur,
291 struct xfs_buf *bp,
292 int level,
293 union xfs_btree_ptr *newroot)
294{
295 int error;
296
297 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
298 XFS_BTREE_STATS_INC(cur, killroot);
299
300 /*
301 * Update the root pointer, decreasing the level by 1 and then
302 * free the old root.
303 */
304 xfs_allocbt_set_root(cur, newroot, -1);
305 error = xfs_allocbt_free_block(cur, bp);
306 if (error) {
307 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
308 return error;
309 }
310
311 XFS_BTREE_STATS_INC(cur, free);
312
313 xfs_btree_setbuf(cur, level, NULL);
314 cur->bc_nlevels--;
315
316 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
317 return 0;
318}
319
320#ifdef DEBUG 283#ifdef DEBUG
321STATIC int 284STATIC int
322xfs_allocbt_keys_inorder( 285xfs_allocbt_keys_inorder(
@@ -428,7 +391,6 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
428 391
429 .dup_cursor = xfs_allocbt_dup_cursor, 392 .dup_cursor = xfs_allocbt_dup_cursor,
430 .set_root = xfs_allocbt_set_root, 393 .set_root = xfs_allocbt_set_root,
431 .kill_root = xfs_allocbt_kill_root,
432 .alloc_block = xfs_allocbt_alloc_block, 394 .alloc_block = xfs_allocbt_alloc_block,
433 .free_block = xfs_allocbt_free_block, 395 .free_block = xfs_allocbt_free_block,
434 .update_lastrec = xfs_allocbt_update_lastrec, 396 .update_lastrec = xfs_allocbt_update_lastrec,
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index b9c196a53c42..c86375378810 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -25,19 +25,13 @@
25#include "xfs_trans.h" 25#include "xfs_trans.h"
26#include "xfs_sb.h" 26#include "xfs_sb.h"
27#include "xfs_ag.h" 27#include "xfs_ag.h"
28#include "xfs_dir2.h"
29#include "xfs_dmapi.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
32#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 31#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 32#include "xfs_dinode.h"
38#include "xfs_inode.h" 33#include "xfs_inode.h"
39#include "xfs_alloc.h" 34#include "xfs_alloc.h"
40#include "xfs_btree.h"
41#include "xfs_inode_item.h" 35#include "xfs_inode_item.h"
42#include "xfs_bmap.h" 36#include "xfs_bmap.h"
43#include "xfs_attr.h" 37#include "xfs_attr.h"
@@ -325,8 +319,7 @@ xfs_attr_set_int(
325 return (error); 319 return (error);
326 } 320 }
327 321
328 xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); 322 xfs_trans_ijoin(args.trans, dp);
329 xfs_trans_ihold(args.trans, dp);
330 323
331 /* 324 /*
332 * If the attribute list is non-existent or a shortform list, 325 * If the attribute list is non-existent or a shortform list,
@@ -362,16 +355,15 @@ xfs_attr_set_int(
362 if (mp->m_flags & XFS_MOUNT_WSYNC) { 355 if (mp->m_flags & XFS_MOUNT_WSYNC) {
363 xfs_trans_set_sync(args.trans); 356 xfs_trans_set_sync(args.trans);
364 } 357 }
358
359 if (!error && (flags & ATTR_KERNOTIME) == 0) {
360 xfs_trans_ichgtime(args.trans, dp,
361 XFS_ICHGTIME_CHG);
362 }
365 err2 = xfs_trans_commit(args.trans, 363 err2 = xfs_trans_commit(args.trans,
366 XFS_TRANS_RELEASE_LOG_RES); 364 XFS_TRANS_RELEASE_LOG_RES);
367 xfs_iunlock(dp, XFS_ILOCK_EXCL); 365 xfs_iunlock(dp, XFS_ILOCK_EXCL);
368 366
369 /*
370 * Hit the inode change time.
371 */
372 if (!error && (flags & ATTR_KERNOTIME) == 0) {
373 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
374 }
375 return(error == 0 ? err2 : error); 367 return(error == 0 ? err2 : error);
376 } 368 }
377 369
@@ -396,10 +388,8 @@ xfs_attr_set_int(
396 * bmap_finish() may have committed the last trans and started 388 * bmap_finish() may have committed the last trans and started
397 * a new one. We need the inode to be in all transactions. 389 * a new one. We need the inode to be in all transactions.
398 */ 390 */
399 if (committed) { 391 if (committed)
400 xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); 392 xfs_trans_ijoin(args.trans, dp);
401 xfs_trans_ihold(args.trans, dp);
402 }
403 393
404 /* 394 /*
405 * Commit the leaf transformation. We'll need another (linked) 395 * Commit the leaf transformation. We'll need another (linked)
@@ -429,6 +419,9 @@ xfs_attr_set_int(
429 xfs_trans_set_sync(args.trans); 419 xfs_trans_set_sync(args.trans);
430 } 420 }
431 421
422 if ((flags & ATTR_KERNOTIME) == 0)
423 xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
424
432 /* 425 /*
433 * Commit the last in the sequence of transactions. 426 * Commit the last in the sequence of transactions.
434 */ 427 */
@@ -436,13 +429,6 @@ xfs_attr_set_int(
436 error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); 429 error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
437 xfs_iunlock(dp, XFS_ILOCK_EXCL); 430 xfs_iunlock(dp, XFS_ILOCK_EXCL);
438 431
439 /*
440 * Hit the inode change time.
441 */
442 if (!error && (flags & ATTR_KERNOTIME) == 0) {
443 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
444 }
445
446 return(error); 432 return(error);
447 433
448out: 434out:
@@ -544,8 +530,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
544 * No need to make quota reservations here. We expect to release some 530 * No need to make quota reservations here. We expect to release some
545 * blocks not allocate in the common case. 531 * blocks not allocate in the common case.
546 */ 532 */
547 xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); 533 xfs_trans_ijoin(args.trans, dp);
548 xfs_trans_ihold(args.trans, dp);
549 534
550 /* 535 /*
551 * Decide on what work routines to call based on the inode size. 536 * Decide on what work routines to call based on the inode size.
@@ -577,6 +562,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
577 xfs_trans_set_sync(args.trans); 562 xfs_trans_set_sync(args.trans);
578 } 563 }
579 564
565 if ((flags & ATTR_KERNOTIME) == 0)
566 xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
567
580 /* 568 /*
581 * Commit the last in the sequence of transactions. 569 * Commit the last in the sequence of transactions.
582 */ 570 */
@@ -584,13 +572,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
584 error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); 572 error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
585 xfs_iunlock(dp, XFS_ILOCK_EXCL); 573 xfs_iunlock(dp, XFS_ILOCK_EXCL);
586 574
587 /*
588 * Hit the inode change time.
589 */
590 if (!error && (flags & ATTR_KERNOTIME) == 0) {
591 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
592 }
593
594 return(error); 575 return(error);
595 576
596out: 577out:
@@ -821,8 +802,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
821 * No need to make quota reservations here. We expect to release some 802 * No need to make quota reservations here. We expect to release some
822 * blocks, not allocate, in the common case. 803 * blocks, not allocate, in the common case.
823 */ 804 */
824 xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); 805 xfs_trans_ijoin(trans, dp);
825 xfs_trans_ihold(trans, dp);
826 806
827 /* 807 /*
828 * Decide on what work routines to call based on the inode size. 808 * Decide on what work routines to call based on the inode size.
@@ -981,10 +961,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
981 * bmap_finish() may have committed the last trans and started 961 * bmap_finish() may have committed the last trans and started
982 * a new one. We need the inode to be in all transactions. 962 * a new one. We need the inode to be in all transactions.
983 */ 963 */
984 if (committed) { 964 if (committed)
985 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 965 xfs_trans_ijoin(args->trans, dp);
986 xfs_trans_ihold(args->trans, dp);
987 }
988 966
989 /* 967 /*
990 * Commit the current trans (including the inode) and start 968 * Commit the current trans (including the inode) and start
@@ -1085,10 +1063,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1085 * and started a new one. We need the inode to be 1063 * and started a new one. We need the inode to be
1086 * in all transactions. 1064 * in all transactions.
1087 */ 1065 */
1088 if (committed) { 1066 if (committed)
1089 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1067 xfs_trans_ijoin(args->trans, dp);
1090 xfs_trans_ihold(args->trans, dp);
1091 }
1092 } else 1068 } else
1093 xfs_da_buf_done(bp); 1069 xfs_da_buf_done(bp);
1094 1070
@@ -1161,10 +1137,8 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
1161 * bmap_finish() may have committed the last trans and started 1137 * bmap_finish() may have committed the last trans and started
1162 * a new one. We need the inode to be in all transactions. 1138 * a new one. We need the inode to be in all transactions.
1163 */ 1139 */
1164 if (committed) { 1140 if (committed)
1165 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1141 xfs_trans_ijoin(args->trans, dp);
1166 xfs_trans_ihold(args->trans, dp);
1167 }
1168 } else 1142 } else
1169 xfs_da_buf_done(bp); 1143 xfs_da_buf_done(bp);
1170 return(0); 1144 return(0);
@@ -1317,10 +1291,8 @@ restart:
1317 * and started a new one. We need the inode to be 1291 * and started a new one. We need the inode to be
1318 * in all transactions. 1292 * in all transactions.
1319 */ 1293 */
1320 if (committed) { 1294 if (committed)
1321 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1295 xfs_trans_ijoin(args->trans, dp);
1322 xfs_trans_ihold(args->trans, dp);
1323 }
1324 1296
1325 /* 1297 /*
1326 * Commit the node conversion and start the next 1298 * Commit the node conversion and start the next
@@ -1356,10 +1328,8 @@ restart:
1356 * bmap_finish() may have committed the last trans and started 1328 * bmap_finish() may have committed the last trans and started
1357 * a new one. We need the inode to be in all transactions. 1329 * a new one. We need the inode to be in all transactions.
1358 */ 1330 */
1359 if (committed) { 1331 if (committed)
1360 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1332 xfs_trans_ijoin(args->trans, dp);
1361 xfs_trans_ihold(args->trans, dp);
1362 }
1363 } else { 1333 } else {
1364 /* 1334 /*
1365 * Addition succeeded, update Btree hashvals. 1335 * Addition succeeded, update Btree hashvals.
@@ -1470,10 +1440,8 @@ restart:
1470 * and started a new one. We need the inode to be 1440 * and started a new one. We need the inode to be
1471 * in all transactions. 1441 * in all transactions.
1472 */ 1442 */
1473 if (committed) { 1443 if (committed)
1474 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1444 xfs_trans_ijoin(args->trans, dp);
1475 xfs_trans_ihold(args->trans, dp);
1476 }
1477 } 1445 }
1478 1446
1479 /* 1447 /*
@@ -1604,10 +1572,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1604 * bmap_finish() may have committed the last trans and started 1572 * bmap_finish() may have committed the last trans and started
1605 * a new one. We need the inode to be in all transactions. 1573 * a new one. We need the inode to be in all transactions.
1606 */ 1574 */
1607 if (committed) { 1575 if (committed)
1608 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1576 xfs_trans_ijoin(args->trans, dp);
1609 xfs_trans_ihold(args->trans, dp);
1610 }
1611 1577
1612 /* 1578 /*
1613 * Commit the Btree join operation and start a new trans. 1579 * Commit the Btree join operation and start a new trans.
@@ -1658,10 +1624,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1658 * and started a new one. We need the inode to be 1624 * and started a new one. We need the inode to be
1659 * in all transactions. 1625 * in all transactions.
1660 */ 1626 */
1661 if (committed) { 1627 if (committed)
1662 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1628 xfs_trans_ijoin(args->trans, dp);
1663 xfs_trans_ihold(args->trans, dp);
1664 }
1665 } else 1629 } else
1666 xfs_da_brelse(args->trans, bp); 1630 xfs_da_brelse(args->trans, bp);
1667 } 1631 }
@@ -2004,7 +1968,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2004 error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno, 1968 error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
2005 args->rmtblkcnt, 1969 args->rmtblkcnt,
2006 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 1970 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2007 NULL, 0, map, &nmap, NULL, NULL); 1971 NULL, 0, map, &nmap, NULL);
2008 if (error) 1972 if (error)
2009 return(error); 1973 return(error);
2010 ASSERT(nmap >= 1); 1974 ASSERT(nmap >= 1);
@@ -2022,7 +1986,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2022 1986
2023 tmp = (valuelen < XFS_BUF_SIZE(bp)) 1987 tmp = (valuelen < XFS_BUF_SIZE(bp))
2024 ? valuelen : XFS_BUF_SIZE(bp); 1988 ? valuelen : XFS_BUF_SIZE(bp);
2025 xfs_biomove(bp, 0, tmp, dst, XBF_READ); 1989 xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ);
2026 xfs_buf_relse(bp); 1990 xfs_buf_relse(bp);
2027 dst += tmp; 1991 dst += tmp;
2028 valuelen -= tmp; 1992 valuelen -= tmp;
@@ -2083,7 +2047,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2083 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA | 2047 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
2084 XFS_BMAPI_WRITE, 2048 XFS_BMAPI_WRITE,
2085 args->firstblock, args->total, &map, &nmap, 2049 args->firstblock, args->total, &map, &nmap,
2086 args->flist, NULL); 2050 args->flist);
2087 if (!error) { 2051 if (!error) {
2088 error = xfs_bmap_finish(&args->trans, args->flist, 2052 error = xfs_bmap_finish(&args->trans, args->flist,
2089 &committed); 2053 &committed);
@@ -2099,10 +2063,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2099 * bmap_finish() may have committed the last trans and started 2063 * bmap_finish() may have committed the last trans and started
2100 * a new one. We need the inode to be in all transactions. 2064 * a new one. We need the inode to be in all transactions.
2101 */ 2065 */
2102 if (committed) { 2066 if (committed)
2103 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 2067 xfs_trans_ijoin(args->trans, dp);
2104 xfs_trans_ihold(args->trans, dp);
2105 }
2106 2068
2107 ASSERT(nmap == 1); 2069 ASSERT(nmap == 1);
2108 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 2070 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -2136,7 +2098,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2136 args->rmtblkcnt, 2098 args->rmtblkcnt,
2137 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2099 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2138 args->firstblock, 0, &map, &nmap, 2100 args->firstblock, 0, &map, &nmap,
2139 NULL, NULL); 2101 NULL);
2140 if (error) { 2102 if (error) {
2141 return(error); 2103 return(error);
2142 } 2104 }
@@ -2154,9 +2116,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2154 2116
2155 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : 2117 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
2156 XFS_BUF_SIZE(bp); 2118 XFS_BUF_SIZE(bp);
2157 xfs_biomove(bp, 0, tmp, src, XBF_WRITE); 2119 xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
2158 if (tmp < XFS_BUF_SIZE(bp)) 2120 if (tmp < XFS_BUF_SIZE(bp))
2159 xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); 2121 xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
2160 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ 2122 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
2161 return (error); 2123 return (error);
2162 } 2124 }
@@ -2201,7 +2163,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2201 args->rmtblkcnt, 2163 args->rmtblkcnt,
2202 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2164 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2203 args->firstblock, 0, &map, &nmap, 2165 args->firstblock, 0, &map, &nmap,
2204 args->flist, NULL); 2166 args->flist);
2205 if (error) { 2167 if (error) {
2206 return(error); 2168 return(error);
2207 } 2169 }
@@ -2239,7 +2201,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2239 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 2201 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
2240 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2202 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2241 1, args->firstblock, args->flist, 2203 1, args->firstblock, args->flist,
2242 NULL, &done); 2204 &done);
2243 if (!error) { 2205 if (!error) {
2244 error = xfs_bmap_finish(&args->trans, args->flist, 2206 error = xfs_bmap_finish(&args->trans, args->flist,
2245 &committed); 2207 &committed);
@@ -2255,10 +2217,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2255 * bmap_finish() may have committed the last trans and started 2217 * bmap_finish() may have committed the last trans and started
2256 * a new one. We need the inode to be in all transactions. 2218 * a new one. We need the inode to be in all transactions.
2257 */ 2219 */
2258 if (committed) { 2220 if (committed)
2259 xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL); 2221 xfs_trans_ijoin(args->trans, args->dp);
2260 xfs_trans_ihold(args->trans, args->dp);
2261 }
2262 2222
2263 /* 2223 /*
2264 * Close out trans and start the next one in the chain. 2224 * Close out trans and start the next one in the chain.
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index a90ce74fc256..a6cff8edcdb6 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -24,8 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
@@ -33,7 +31,6 @@
33#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
34#include "xfs_alloc.h" 32#include "xfs_alloc.h"
35#include "xfs_btree.h" 33#include "xfs_btree.h"
36#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 35#include "xfs_dinode.h"
39#include "xfs_inode.h" 36#include "xfs_inode.h"
@@ -2931,7 +2928,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
2931 nmap = 1; 2928 nmap = 1;
2932 error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt, 2929 error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt,
2933 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2930 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2934 NULL, 0, &map, &nmap, NULL, NULL); 2931 NULL, 0, &map, &nmap, NULL);
2935 if (error) { 2932 if (error) {
2936 return(error); 2933 return(error);
2937 } 2934 }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 99587ded043f..8abd12e32e13 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -30,13 +30,10 @@
30#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
32#include "xfs_dir2_sf.h" 32#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h"
34#include "xfs_dinode.h" 33#include "xfs_dinode.h"
35#include "xfs_inode.h" 34#include "xfs_inode.h"
36#include "xfs_btree.h" 35#include "xfs_btree.h"
37#include "xfs_dmapi.h"
38#include "xfs_mount.h" 36#include "xfs_mount.h"
39#include "xfs_ialloc.h"
40#include "xfs_itable.h" 37#include "xfs_itable.h"
41#include "xfs_dir2_data.h" 38#include "xfs_dir2_data.h"
42#include "xfs_dir2_leaf.h" 39#include "xfs_dir2_leaf.h"
@@ -104,7 +101,6 @@ xfs_bmap_add_extent(
104 xfs_fsblock_t *first, /* pointer to firstblock variable */ 101 xfs_fsblock_t *first, /* pointer to firstblock variable */
105 xfs_bmap_free_t *flist, /* list of extents to be freed */ 102 xfs_bmap_free_t *flist, /* list of extents to be freed */
106 int *logflagsp, /* inode logging flags */ 103 int *logflagsp, /* inode logging flags */
107 xfs_extdelta_t *delta, /* Change made to incore extents */
108 int whichfork, /* data or attr fork */ 104 int whichfork, /* data or attr fork */
109 int rsvd); /* OK to allocate reserved blocks */ 105 int rsvd); /* OK to allocate reserved blocks */
110 106
@@ -122,7 +118,6 @@ xfs_bmap_add_extent_delay_real(
122 xfs_fsblock_t *first, /* pointer to firstblock variable */ 118 xfs_fsblock_t *first, /* pointer to firstblock variable */
123 xfs_bmap_free_t *flist, /* list of extents to be freed */ 119 xfs_bmap_free_t *flist, /* list of extents to be freed */
124 int *logflagsp, /* inode logging flags */ 120 int *logflagsp, /* inode logging flags */
125 xfs_extdelta_t *delta, /* Change made to incore extents */
126 int rsvd); /* OK to allocate reserved blocks */ 121 int rsvd); /* OK to allocate reserved blocks */
127 122
128/* 123/*
@@ -135,7 +130,6 @@ xfs_bmap_add_extent_hole_delay(
135 xfs_extnum_t idx, /* extent number to update/insert */ 130 xfs_extnum_t idx, /* extent number to update/insert */
136 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 131 xfs_bmbt_irec_t *new, /* new data to add to file extents */
137 int *logflagsp,/* inode logging flags */ 132 int *logflagsp,/* inode logging flags */
138 xfs_extdelta_t *delta, /* Change made to incore extents */
139 int rsvd); /* OK to allocate reserved blocks */ 133 int rsvd); /* OK to allocate reserved blocks */
140 134
141/* 135/*
@@ -149,7 +143,6 @@ xfs_bmap_add_extent_hole_real(
149 xfs_btree_cur_t *cur, /* if null, not a btree */ 143 xfs_btree_cur_t *cur, /* if null, not a btree */
150 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 144 xfs_bmbt_irec_t *new, /* new data to add to file extents */
151 int *logflagsp, /* inode logging flags */ 145 int *logflagsp, /* inode logging flags */
152 xfs_extdelta_t *delta, /* Change made to incore extents */
153 int whichfork); /* data or attr fork */ 146 int whichfork); /* data or attr fork */
154 147
155/* 148/*
@@ -162,8 +155,7 @@ xfs_bmap_add_extent_unwritten_real(
162 xfs_extnum_t idx, /* extent number to update/insert */ 155 xfs_extnum_t idx, /* extent number to update/insert */
163 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 156 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
164 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 157 xfs_bmbt_irec_t *new, /* new data to add to file extents */
165 int *logflagsp, /* inode logging flags */ 158 int *logflagsp); /* inode logging flags */
166 xfs_extdelta_t *delta); /* Change made to incore extents */
167 159
168/* 160/*
169 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 161 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
@@ -200,7 +192,6 @@ xfs_bmap_del_extent(
200 xfs_btree_cur_t *cur, /* if null, not a btree */ 192 xfs_btree_cur_t *cur, /* if null, not a btree */
201 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 193 xfs_bmbt_irec_t *new, /* new data to add to file extents */
202 int *logflagsp,/* inode logging flags */ 194 int *logflagsp,/* inode logging flags */
203 xfs_extdelta_t *delta, /* Change made to incore extents */
204 int whichfork, /* data or attr fork */ 195 int whichfork, /* data or attr fork */
205 int rsvd); /* OK to allocate reserved blocks */ 196 int rsvd); /* OK to allocate reserved blocks */
206 197
@@ -489,7 +480,6 @@ xfs_bmap_add_extent(
489 xfs_fsblock_t *first, /* pointer to firstblock variable */ 480 xfs_fsblock_t *first, /* pointer to firstblock variable */
490 xfs_bmap_free_t *flist, /* list of extents to be freed */ 481 xfs_bmap_free_t *flist, /* list of extents to be freed */
491 int *logflagsp, /* inode logging flags */ 482 int *logflagsp, /* inode logging flags */
492 xfs_extdelta_t *delta, /* Change made to incore extents */
493 int whichfork, /* data or attr fork */ 483 int whichfork, /* data or attr fork */
494 int rsvd) /* OK to use reserved data blocks */ 484 int rsvd) /* OK to use reserved data blocks */
495{ 485{
@@ -524,15 +514,6 @@ xfs_bmap_add_extent(
524 logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 514 logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
525 } else 515 } else
526 logflags = 0; 516 logflags = 0;
527 /* DELTA: single new extent */
528 if (delta) {
529 if (delta->xed_startoff > new->br_startoff)
530 delta->xed_startoff = new->br_startoff;
531 if (delta->xed_blockcount <
532 new->br_startoff + new->br_blockcount)
533 delta->xed_blockcount = new->br_startoff +
534 new->br_blockcount;
535 }
536 } 517 }
537 /* 518 /*
538 * Any kind of new delayed allocation goes here. 519 * Any kind of new delayed allocation goes here.
@@ -542,7 +523,7 @@ xfs_bmap_add_extent(
542 ASSERT((cur->bc_private.b.flags & 523 ASSERT((cur->bc_private.b.flags &
543 XFS_BTCUR_BPRV_WASDEL) == 0); 524 XFS_BTCUR_BPRV_WASDEL) == 0);
544 if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new, 525 if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new,
545 &logflags, delta, rsvd))) 526 &logflags, rsvd)))
546 goto done; 527 goto done;
547 } 528 }
548 /* 529 /*
@@ -553,7 +534,7 @@ xfs_bmap_add_extent(
553 ASSERT((cur->bc_private.b.flags & 534 ASSERT((cur->bc_private.b.flags &
554 XFS_BTCUR_BPRV_WASDEL) == 0); 535 XFS_BTCUR_BPRV_WASDEL) == 0);
555 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, 536 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
556 &logflags, delta, whichfork))) 537 &logflags, whichfork)))
557 goto done; 538 goto done;
558 } else { 539 } else {
559 xfs_bmbt_irec_t prev; /* old extent at offset idx */ 540 xfs_bmbt_irec_t prev; /* old extent at offset idx */
@@ -578,17 +559,17 @@ xfs_bmap_add_extent(
578 XFS_BTCUR_BPRV_WASDEL); 559 XFS_BTCUR_BPRV_WASDEL);
579 if ((error = xfs_bmap_add_extent_delay_real(ip, 560 if ((error = xfs_bmap_add_extent_delay_real(ip,
580 idx, &cur, new, &da_new, first, flist, 561 idx, &cur, new, &da_new, first, flist,
581 &logflags, delta, rsvd))) 562 &logflags, rsvd)))
582 goto done; 563 goto done;
583 } else if (new->br_state == XFS_EXT_NORM) { 564 } else if (new->br_state == XFS_EXT_NORM) {
584 ASSERT(new->br_state == XFS_EXT_NORM); 565 ASSERT(new->br_state == XFS_EXT_NORM);
585 if ((error = xfs_bmap_add_extent_unwritten_real( 566 if ((error = xfs_bmap_add_extent_unwritten_real(
586 ip, idx, &cur, new, &logflags, delta))) 567 ip, idx, &cur, new, &logflags)))
587 goto done; 568 goto done;
588 } else { 569 } else {
589 ASSERT(new->br_state == XFS_EXT_UNWRITTEN); 570 ASSERT(new->br_state == XFS_EXT_UNWRITTEN);
590 if ((error = xfs_bmap_add_extent_unwritten_real( 571 if ((error = xfs_bmap_add_extent_unwritten_real(
591 ip, idx, &cur, new, &logflags, delta))) 572 ip, idx, &cur, new, &logflags)))
592 goto done; 573 goto done;
593 } 574 }
594 ASSERT(*curp == cur || *curp == NULL); 575 ASSERT(*curp == cur || *curp == NULL);
@@ -601,7 +582,7 @@ xfs_bmap_add_extent(
601 ASSERT((cur->bc_private.b.flags & 582 ASSERT((cur->bc_private.b.flags &
602 XFS_BTCUR_BPRV_WASDEL) == 0); 583 XFS_BTCUR_BPRV_WASDEL) == 0);
603 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, 584 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
604 new, &logflags, delta, whichfork))) 585 new, &logflags, whichfork)))
605 goto done; 586 goto done;
606 } 587 }
607 } 588 }
@@ -633,7 +614,7 @@ xfs_bmap_add_extent(
633 nblks += cur->bc_private.b.allocated; 614 nblks += cur->bc_private.b.allocated;
634 ASSERT(nblks <= da_old); 615 ASSERT(nblks <= da_old);
635 if (nblks < da_old) 616 if (nblks < da_old)
636 xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, 617 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
637 (int64_t)(da_old - nblks), rsvd); 618 (int64_t)(da_old - nblks), rsvd);
638 } 619 }
639 /* 620 /*
@@ -666,7 +647,6 @@ xfs_bmap_add_extent_delay_real(
666 xfs_fsblock_t *first, /* pointer to firstblock variable */ 647 xfs_fsblock_t *first, /* pointer to firstblock variable */
667 xfs_bmap_free_t *flist, /* list of extents to be freed */ 648 xfs_bmap_free_t *flist, /* list of extents to be freed */
668 int *logflagsp, /* inode logging flags */ 649 int *logflagsp, /* inode logging flags */
669 xfs_extdelta_t *delta, /* Change made to incore extents */
670 int rsvd) /* OK to use reserved data block allocation */ 650 int rsvd) /* OK to use reserved data block allocation */
671{ 651{
672 xfs_btree_cur_t *cur; /* btree cursor */ 652 xfs_btree_cur_t *cur; /* btree cursor */
@@ -797,11 +777,6 @@ xfs_bmap_add_extent_delay_real(
797 goto done; 777 goto done;
798 } 778 }
799 *dnew = 0; 779 *dnew = 0;
800 /* DELTA: Three in-core extents are replaced by one. */
801 temp = LEFT.br_startoff;
802 temp2 = LEFT.br_blockcount +
803 PREV.br_blockcount +
804 RIGHT.br_blockcount;
805 break; 780 break;
806 781
807 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 782 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -832,10 +807,6 @@ xfs_bmap_add_extent_delay_real(
832 goto done; 807 goto done;
833 } 808 }
834 *dnew = 0; 809 *dnew = 0;
835 /* DELTA: Two in-core extents are replaced by one. */
836 temp = LEFT.br_startoff;
837 temp2 = LEFT.br_blockcount +
838 PREV.br_blockcount;
839 break; 810 break;
840 811
841 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 812 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -867,10 +838,6 @@ xfs_bmap_add_extent_delay_real(
867 goto done; 838 goto done;
868 } 839 }
869 *dnew = 0; 840 *dnew = 0;
870 /* DELTA: Two in-core extents are replaced by one. */
871 temp = PREV.br_startoff;
872 temp2 = PREV.br_blockcount +
873 RIGHT.br_blockcount;
874 break; 841 break;
875 842
876 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 843 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -900,9 +867,6 @@ xfs_bmap_add_extent_delay_real(
900 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 867 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
901 } 868 }
902 *dnew = 0; 869 *dnew = 0;
903 /* DELTA: The in-core extent described by new changed type. */
904 temp = new->br_startoff;
905 temp2 = new->br_blockcount;
906 break; 870 break;
907 871
908 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 872 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -942,10 +906,6 @@ xfs_bmap_add_extent_delay_real(
942 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 906 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
943 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 907 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
944 *dnew = temp; 908 *dnew = temp;
945 /* DELTA: The boundary between two in-core extents moved. */
946 temp = LEFT.br_startoff;
947 temp2 = LEFT.br_blockcount +
948 PREV.br_blockcount;
949 break; 909 break;
950 910
951 case BMAP_LEFT_FILLING: 911 case BMAP_LEFT_FILLING:
@@ -990,9 +950,6 @@ xfs_bmap_add_extent_delay_real(
990 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 950 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
991 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); 951 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
992 *dnew = temp; 952 *dnew = temp;
993 /* DELTA: One in-core extent is split in two. */
994 temp = PREV.br_startoff;
995 temp2 = PREV.br_blockcount;
996 break; 953 break;
997 954
998 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 955 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1031,10 +988,6 @@ xfs_bmap_add_extent_delay_real(
1031 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 988 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1032 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 989 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1033 *dnew = temp; 990 *dnew = temp;
1034 /* DELTA: The boundary between two in-core extents moved. */
1035 temp = PREV.br_startoff;
1036 temp2 = PREV.br_blockcount +
1037 RIGHT.br_blockcount;
1038 break; 991 break;
1039 992
1040 case BMAP_RIGHT_FILLING: 993 case BMAP_RIGHT_FILLING:
@@ -1078,9 +1031,6 @@ xfs_bmap_add_extent_delay_real(
1078 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1031 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1079 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1032 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1080 *dnew = temp; 1033 *dnew = temp;
1081 /* DELTA: One in-core extent is split in two. */
1082 temp = PREV.br_startoff;
1083 temp2 = PREV.br_blockcount;
1084 break; 1034 break;
1085 1035
1086 case 0: 1036 case 0:
@@ -1129,7 +1079,8 @@ xfs_bmap_add_extent_delay_real(
1129 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - 1079 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
1130 (cur ? cur->bc_private.b.allocated : 0)); 1080 (cur ? cur->bc_private.b.allocated : 0));
1131 if (diff > 0 && 1081 if (diff > 0 &&
1132 xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) { 1082 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
1083 -((int64_t)diff), rsvd)) {
1133 /* 1084 /*
1134 * Ick gross gag me with a spoon. 1085 * Ick gross gag me with a spoon.
1135 */ 1086 */
@@ -1139,16 +1090,18 @@ xfs_bmap_add_extent_delay_real(
1139 temp--; 1090 temp--;
1140 diff--; 1091 diff--;
1141 if (!diff || 1092 if (!diff ||
1142 !xfs_mod_incore_sb(ip->i_mount, 1093 !xfs_icsb_modify_counters(ip->i_mount,
1143 XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) 1094 XFS_SBS_FDBLOCKS,
1095 -((int64_t)diff), rsvd))
1144 break; 1096 break;
1145 } 1097 }
1146 if (temp2) { 1098 if (temp2) {
1147 temp2--; 1099 temp2--;
1148 diff--; 1100 diff--;
1149 if (!diff || 1101 if (!diff ||
1150 !xfs_mod_incore_sb(ip->i_mount, 1102 !xfs_icsb_modify_counters(ip->i_mount,
1151 XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) 1103 XFS_SBS_FDBLOCKS,
1104 -((int64_t)diff), rsvd))
1152 break; 1105 break;
1153 } 1106 }
1154 } 1107 }
@@ -1161,9 +1114,6 @@ xfs_bmap_add_extent_delay_real(
1161 nullstartblock((int)temp2)); 1114 nullstartblock((int)temp2));
1162 trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_); 1115 trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_);
1163 *dnew = temp + temp2; 1116 *dnew = temp + temp2;
1164 /* DELTA: One in-core extent is split in three. */
1165 temp = PREV.br_startoff;
1166 temp2 = PREV.br_blockcount;
1167 break; 1117 break;
1168 1118
1169 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1119 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -1179,13 +1129,6 @@ xfs_bmap_add_extent_delay_real(
1179 ASSERT(0); 1129 ASSERT(0);
1180 } 1130 }
1181 *curp = cur; 1131 *curp = cur;
1182 if (delta) {
1183 temp2 += temp;
1184 if (delta->xed_startoff > temp)
1185 delta->xed_startoff = temp;
1186 if (delta->xed_blockcount < temp2)
1187 delta->xed_blockcount = temp2;
1188 }
1189done: 1132done:
1190 *logflagsp = rval; 1133 *logflagsp = rval;
1191 return error; 1134 return error;
@@ -1204,8 +1147,7 @@ xfs_bmap_add_extent_unwritten_real(
1204 xfs_extnum_t idx, /* extent number to update/insert */ 1147 xfs_extnum_t idx, /* extent number to update/insert */
1205 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 1148 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
1206 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1149 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1207 int *logflagsp, /* inode logging flags */ 1150 int *logflagsp) /* inode logging flags */
1208 xfs_extdelta_t *delta) /* Change made to incore extents */
1209{ 1151{
1210 xfs_btree_cur_t *cur; /* btree cursor */ 1152 xfs_btree_cur_t *cur; /* btree cursor */
1211 xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ 1153 xfs_bmbt_rec_host_t *ep; /* extent entry for idx */
@@ -1219,8 +1161,6 @@ xfs_bmap_add_extent_unwritten_real(
1219 /* left is 0, right is 1, prev is 2 */ 1161 /* left is 0, right is 1, prev is 2 */
1220 int rval=0; /* return value (logging flags) */ 1162 int rval=0; /* return value (logging flags) */
1221 int state = 0;/* state bits, accessed thru macros */ 1163 int state = 0;/* state bits, accessed thru macros */
1222 xfs_filblks_t temp=0;
1223 xfs_filblks_t temp2=0;
1224 1164
1225#define LEFT r[0] 1165#define LEFT r[0]
1226#define RIGHT r[1] 1166#define RIGHT r[1]
@@ -1341,11 +1281,6 @@ xfs_bmap_add_extent_unwritten_real(
1341 RIGHT.br_blockcount, LEFT.br_state))) 1281 RIGHT.br_blockcount, LEFT.br_state)))
1342 goto done; 1282 goto done;
1343 } 1283 }
1344 /* DELTA: Three in-core extents are replaced by one. */
1345 temp = LEFT.br_startoff;
1346 temp2 = LEFT.br_blockcount +
1347 PREV.br_blockcount +
1348 RIGHT.br_blockcount;
1349 break; 1284 break;
1350 1285
1351 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 1286 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -1382,10 +1317,6 @@ xfs_bmap_add_extent_unwritten_real(
1382 LEFT.br_state))) 1317 LEFT.br_state)))
1383 goto done; 1318 goto done;
1384 } 1319 }
1385 /* DELTA: Two in-core extents are replaced by one. */
1386 temp = LEFT.br_startoff;
1387 temp2 = LEFT.br_blockcount +
1388 PREV.br_blockcount;
1389 break; 1320 break;
1390 1321
1391 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1322 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1422,10 +1353,6 @@ xfs_bmap_add_extent_unwritten_real(
1422 newext))) 1353 newext)))
1423 goto done; 1354 goto done;
1424 } 1355 }
1425 /* DELTA: Two in-core extents are replaced by one. */
1426 temp = PREV.br_startoff;
1427 temp2 = PREV.br_blockcount +
1428 RIGHT.br_blockcount;
1429 break; 1356 break;
1430 1357
1431 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 1358 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -1453,9 +1380,6 @@ xfs_bmap_add_extent_unwritten_real(
1453 newext))) 1380 newext)))
1454 goto done; 1381 goto done;
1455 } 1382 }
1456 /* DELTA: The in-core extent described by new changed type. */
1457 temp = new->br_startoff;
1458 temp2 = new->br_blockcount;
1459 break; 1383 break;
1460 1384
1461 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 1385 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -1501,10 +1425,6 @@ xfs_bmap_add_extent_unwritten_real(
1501 LEFT.br_state)) 1425 LEFT.br_state))
1502 goto done; 1426 goto done;
1503 } 1427 }
1504 /* DELTA: The boundary between two in-core extents moved. */
1505 temp = LEFT.br_startoff;
1506 temp2 = LEFT.br_blockcount +
1507 PREV.br_blockcount;
1508 break; 1428 break;
1509 1429
1510 case BMAP_LEFT_FILLING: 1430 case BMAP_LEFT_FILLING:
@@ -1544,9 +1464,6 @@ xfs_bmap_add_extent_unwritten_real(
1544 goto done; 1464 goto done;
1545 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1465 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1546 } 1466 }
1547 /* DELTA: One in-core extent is split in two. */
1548 temp = PREV.br_startoff;
1549 temp2 = PREV.br_blockcount;
1550 break; 1467 break;
1551 1468
1552 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1469 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1587,10 +1504,6 @@ xfs_bmap_add_extent_unwritten_real(
1587 newext))) 1504 newext)))
1588 goto done; 1505 goto done;
1589 } 1506 }
1590 /* DELTA: The boundary between two in-core extents moved. */
1591 temp = PREV.br_startoff;
1592 temp2 = PREV.br_blockcount +
1593 RIGHT.br_blockcount;
1594 break; 1507 break;
1595 1508
1596 case BMAP_RIGHT_FILLING: 1509 case BMAP_RIGHT_FILLING:
@@ -1630,9 +1543,6 @@ xfs_bmap_add_extent_unwritten_real(
1630 goto done; 1543 goto done;
1631 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1544 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1632 } 1545 }
1633 /* DELTA: One in-core extent is split in two. */
1634 temp = PREV.br_startoff;
1635 temp2 = PREV.br_blockcount;
1636 break; 1546 break;
1637 1547
1638 case 0: 1548 case 0:
@@ -1692,9 +1602,6 @@ xfs_bmap_add_extent_unwritten_real(
1692 goto done; 1602 goto done;
1693 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1603 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1694 } 1604 }
1695 /* DELTA: One in-core extent is split in three. */
1696 temp = PREV.br_startoff;
1697 temp2 = PREV.br_blockcount;
1698 break; 1605 break;
1699 1606
1700 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1607 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -1710,13 +1617,6 @@ xfs_bmap_add_extent_unwritten_real(
1710 ASSERT(0); 1617 ASSERT(0);
1711 } 1618 }
1712 *curp = cur; 1619 *curp = cur;
1713 if (delta) {
1714 temp2 += temp;
1715 if (delta->xed_startoff > temp)
1716 delta->xed_startoff = temp;
1717 if (delta->xed_blockcount < temp2)
1718 delta->xed_blockcount = temp2;
1719 }
1720done: 1620done:
1721 *logflagsp = rval; 1621 *logflagsp = rval;
1722 return error; 1622 return error;
@@ -1736,7 +1636,6 @@ xfs_bmap_add_extent_hole_delay(
1736 xfs_extnum_t idx, /* extent number to update/insert */ 1636 xfs_extnum_t idx, /* extent number to update/insert */
1737 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1637 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1738 int *logflagsp, /* inode logging flags */ 1638 int *logflagsp, /* inode logging flags */
1739 xfs_extdelta_t *delta, /* Change made to incore extents */
1740 int rsvd) /* OK to allocate reserved blocks */ 1639 int rsvd) /* OK to allocate reserved blocks */
1741{ 1640{
1742 xfs_bmbt_rec_host_t *ep; /* extent record for idx */ 1641 xfs_bmbt_rec_host_t *ep; /* extent record for idx */
@@ -1747,7 +1646,6 @@ xfs_bmap_add_extent_hole_delay(
1747 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 1646 xfs_bmbt_irec_t right; /* right neighbor extent entry */
1748 int state; /* state bits, accessed thru macros */ 1647 int state; /* state bits, accessed thru macros */
1749 xfs_filblks_t temp=0; /* temp for indirect calculations */ 1648 xfs_filblks_t temp=0; /* temp for indirect calculations */
1750 xfs_filblks_t temp2=0;
1751 1649
1752 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1650 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1753 ep = xfs_iext_get_ext(ifp, idx); 1651 ep = xfs_iext_get_ext(ifp, idx);
@@ -1819,9 +1717,6 @@ xfs_bmap_add_extent_hole_delay(
1819 1717
1820 xfs_iext_remove(ip, idx, 1, state); 1718 xfs_iext_remove(ip, idx, 1, state);
1821 ip->i_df.if_lastex = idx - 1; 1719 ip->i_df.if_lastex = idx - 1;
1822 /* DELTA: Two in-core extents were replaced by one. */
1823 temp2 = temp;
1824 temp = left.br_startoff;
1825 break; 1720 break;
1826 1721
1827 case BMAP_LEFT_CONTIG: 1722 case BMAP_LEFT_CONTIG:
@@ -1841,9 +1736,6 @@ xfs_bmap_add_extent_hole_delay(
1841 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 1736 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1842 1737
1843 ip->i_df.if_lastex = idx - 1; 1738 ip->i_df.if_lastex = idx - 1;
1844 /* DELTA: One in-core extent grew into a hole. */
1845 temp2 = temp;
1846 temp = left.br_startoff;
1847 break; 1739 break;
1848 1740
1849 case BMAP_RIGHT_CONTIG: 1741 case BMAP_RIGHT_CONTIG:
@@ -1862,9 +1754,6 @@ xfs_bmap_add_extent_hole_delay(
1862 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1754 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1863 1755
1864 ip->i_df.if_lastex = idx; 1756 ip->i_df.if_lastex = idx;
1865 /* DELTA: One in-core extent grew into a hole. */
1866 temp2 = temp;
1867 temp = new->br_startoff;
1868 break; 1757 break;
1869 1758
1870 case 0: 1759 case 0:
@@ -1876,26 +1765,16 @@ xfs_bmap_add_extent_hole_delay(
1876 oldlen = newlen = 0; 1765 oldlen = newlen = 0;
1877 xfs_iext_insert(ip, idx, 1, new, state); 1766 xfs_iext_insert(ip, idx, 1, new, state);
1878 ip->i_df.if_lastex = idx; 1767 ip->i_df.if_lastex = idx;
1879 /* DELTA: A new in-core extent was added in a hole. */
1880 temp2 = new->br_blockcount;
1881 temp = new->br_startoff;
1882 break; 1768 break;
1883 } 1769 }
1884 if (oldlen != newlen) { 1770 if (oldlen != newlen) {
1885 ASSERT(oldlen > newlen); 1771 ASSERT(oldlen > newlen);
1886 xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, 1772 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
1887 (int64_t)(oldlen - newlen), rsvd); 1773 (int64_t)(oldlen - newlen), rsvd);
1888 /* 1774 /*
1889 * Nothing to do for disk quota accounting here. 1775 * Nothing to do for disk quota accounting here.
1890 */ 1776 */
1891 } 1777 }
1892 if (delta) {
1893 temp2 += temp;
1894 if (delta->xed_startoff > temp)
1895 delta->xed_startoff = temp;
1896 if (delta->xed_blockcount < temp2)
1897 delta->xed_blockcount = temp2;
1898 }
1899 *logflagsp = 0; 1778 *logflagsp = 0;
1900 return 0; 1779 return 0;
1901} 1780}
@@ -1911,7 +1790,6 @@ xfs_bmap_add_extent_hole_real(
1911 xfs_btree_cur_t *cur, /* if null, not a btree */ 1790 xfs_btree_cur_t *cur, /* if null, not a btree */
1912 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1791 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1913 int *logflagsp, /* inode logging flags */ 1792 int *logflagsp, /* inode logging flags */
1914 xfs_extdelta_t *delta, /* Change made to incore extents */
1915 int whichfork) /* data or attr fork */ 1793 int whichfork) /* data or attr fork */
1916{ 1794{
1917 xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */ 1795 xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */
@@ -1922,8 +1800,6 @@ xfs_bmap_add_extent_hole_real(
1922 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 1800 xfs_bmbt_irec_t right; /* right neighbor extent entry */
1923 int rval=0; /* return value (logging flags) */ 1801 int rval=0; /* return value (logging flags) */
1924 int state; /* state bits, accessed thru macros */ 1802 int state; /* state bits, accessed thru macros */
1925 xfs_filblks_t temp=0;
1926 xfs_filblks_t temp2=0;
1927 1803
1928 ifp = XFS_IFORK_PTR(ip, whichfork); 1804 ifp = XFS_IFORK_PTR(ip, whichfork);
1929 ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); 1805 ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
@@ -2020,11 +1896,6 @@ xfs_bmap_add_extent_hole_real(
2020 left.br_state))) 1896 left.br_state)))
2021 goto done; 1897 goto done;
2022 } 1898 }
2023 /* DELTA: Two in-core extents were replaced by one. */
2024 temp = left.br_startoff;
2025 temp2 = left.br_blockcount +
2026 new->br_blockcount +
2027 right.br_blockcount;
2028 break; 1899 break;
2029 1900
2030 case BMAP_LEFT_CONTIG: 1901 case BMAP_LEFT_CONTIG:
@@ -2056,10 +1927,6 @@ xfs_bmap_add_extent_hole_real(
2056 left.br_state))) 1927 left.br_state)))
2057 goto done; 1928 goto done;
2058 } 1929 }
2059 /* DELTA: One in-core extent grew. */
2060 temp = left.br_startoff;
2061 temp2 = left.br_blockcount +
2062 new->br_blockcount;
2063 break; 1930 break;
2064 1931
2065 case BMAP_RIGHT_CONTIG: 1932 case BMAP_RIGHT_CONTIG:
@@ -2092,10 +1959,6 @@ xfs_bmap_add_extent_hole_real(
2092 right.br_state))) 1959 right.br_state)))
2093 goto done; 1960 goto done;
2094 } 1961 }
2095 /* DELTA: One in-core extent grew. */
2096 temp = new->br_startoff;
2097 temp2 = new->br_blockcount +
2098 right.br_blockcount;
2099 break; 1962 break;
2100 1963
2101 case 0: 1964 case 0:
@@ -2123,18 +1986,8 @@ xfs_bmap_add_extent_hole_real(
2123 goto done; 1986 goto done;
2124 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1987 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2125 } 1988 }
2126 /* DELTA: A new extent was added in a hole. */
2127 temp = new->br_startoff;
2128 temp2 = new->br_blockcount;
2129 break; 1989 break;
2130 } 1990 }
2131 if (delta) {
2132 temp2 += temp;
2133 if (delta->xed_startoff > temp)
2134 delta->xed_startoff = temp;
2135 if (delta->xed_blockcount < temp2)
2136 delta->xed_blockcount = temp2;
2137 }
2138done: 1991done:
2139 *logflagsp = rval; 1992 *logflagsp = rval;
2140 return error; 1993 return error;
@@ -2959,7 +2812,6 @@ xfs_bmap_del_extent(
2959 xfs_btree_cur_t *cur, /* if null, not a btree */ 2812 xfs_btree_cur_t *cur, /* if null, not a btree */
2960 xfs_bmbt_irec_t *del, /* data to remove from extents */ 2813 xfs_bmbt_irec_t *del, /* data to remove from extents */
2961 int *logflagsp, /* inode logging flags */ 2814 int *logflagsp, /* inode logging flags */
2962 xfs_extdelta_t *delta, /* Change made to incore extents */
2963 int whichfork, /* data or attr fork */ 2815 int whichfork, /* data or attr fork */
2964 int rsvd) /* OK to allocate reserved blocks */ 2816 int rsvd) /* OK to allocate reserved blocks */
2965{ 2817{
@@ -3262,16 +3114,9 @@ xfs_bmap_del_extent(
3262 * Nothing to do for disk quota accounting here. 3114 * Nothing to do for disk quota accounting here.
3263 */ 3115 */
3264 ASSERT(da_old >= da_new); 3116 ASSERT(da_old >= da_new);
3265 if (da_old > da_new) 3117 if (da_old > da_new) {
3266 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new), 3118 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
3267 rsvd); 3119 (int64_t)(da_old - da_new), rsvd);
3268 if (delta) {
3269 /* DELTA: report the original extent. */
3270 if (delta->xed_startoff > got.br_startoff)
3271 delta->xed_startoff = got.br_startoff;
3272 if (delta->xed_blockcount < got.br_startoff+got.br_blockcount)
3273 delta->xed_blockcount = got.br_startoff +
3274 got.br_blockcount;
3275 } 3120 }
3276done: 3121done:
3277 *logflagsp = flags; 3122 *logflagsp = flags;
@@ -3754,9 +3599,10 @@ xfs_bmap_add_attrfork(
3754 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 3599 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
3755 } 3600 }
3756 ASSERT(ip->i_d.di_anextents == 0); 3601 ASSERT(ip->i_d.di_anextents == 0);
3757 IHOLD(ip); 3602
3758 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3603 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
3759 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3604 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3605
3760 switch (ip->i_d.di_format) { 3606 switch (ip->i_d.di_format) {
3761 case XFS_DINODE_FMT_DEV: 3607 case XFS_DINODE_FMT_DEV:
3762 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; 3608 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
@@ -4483,8 +4329,7 @@ xfs_bmapi(
4483 xfs_extlen_t total, /* total blocks needed */ 4329 xfs_extlen_t total, /* total blocks needed */
4484 xfs_bmbt_irec_t *mval, /* output: map values */ 4330 xfs_bmbt_irec_t *mval, /* output: map values */
4485 int *nmap, /* i/o: mval size/count */ 4331 int *nmap, /* i/o: mval size/count */
4486 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 4332 xfs_bmap_free_t *flist) /* i/o: list extents to free */
4487 xfs_extdelta_t *delta) /* o: change made to incore extents */
4488{ 4333{
4489 xfs_fsblock_t abno; /* allocated block number */ 4334 xfs_fsblock_t abno; /* allocated block number */
4490 xfs_extlen_t alen; /* allocated extent length */ 4335 xfs_extlen_t alen; /* allocated extent length */
@@ -4596,10 +4441,7 @@ xfs_bmapi(
4596 end = bno + len; 4441 end = bno + len;
4597 obno = bno; 4442 obno = bno;
4598 bma.ip = NULL; 4443 bma.ip = NULL;
4599 if (delta) { 4444
4600 delta->xed_startoff = NULLFILEOFF;
4601 delta->xed_blockcount = 0;
4602 }
4603 while (bno < end && n < *nmap) { 4445 while (bno < end && n < *nmap) {
4604 /* 4446 /*
4605 * Reading past eof, act as though there's a hole 4447 * Reading past eof, act as though there's a hole
@@ -4620,19 +4462,13 @@ xfs_bmapi(
4620 * allocate the stuff asked for in this bmap call 4462 * allocate the stuff asked for in this bmap call
4621 * but that wouldn't be as good. 4463 * but that wouldn't be as good.
4622 */ 4464 */
4623 if (wasdelay && !(flags & XFS_BMAPI_EXACT)) { 4465 if (wasdelay) {
4624 alen = (xfs_extlen_t)got.br_blockcount; 4466 alen = (xfs_extlen_t)got.br_blockcount;
4625 aoff = got.br_startoff; 4467 aoff = got.br_startoff;
4626 if (lastx != NULLEXTNUM && lastx) { 4468 if (lastx != NULLEXTNUM && lastx) {
4627 ep = xfs_iext_get_ext(ifp, lastx - 1); 4469 ep = xfs_iext_get_ext(ifp, lastx - 1);
4628 xfs_bmbt_get_all(ep, &prev); 4470 xfs_bmbt_get_all(ep, &prev);
4629 } 4471 }
4630 } else if (wasdelay) {
4631 alen = (xfs_extlen_t)
4632 XFS_FILBLKS_MIN(len,
4633 (got.br_startoff +
4634 got.br_blockcount) - bno);
4635 aoff = bno;
4636 } else { 4472 } else {
4637 alen = (xfs_extlen_t) 4473 alen = (xfs_extlen_t)
4638 XFS_FILBLKS_MIN(len, MAXEXTLEN); 4474 XFS_FILBLKS_MIN(len, MAXEXTLEN);
@@ -4694,13 +4530,13 @@ xfs_bmapi(
4694 -((int64_t)extsz), (flags & 4530 -((int64_t)extsz), (flags &
4695 XFS_BMAPI_RSVBLOCKS)); 4531 XFS_BMAPI_RSVBLOCKS));
4696 } else { 4532 } else {
4697 error = xfs_mod_incore_sb(mp, 4533 error = xfs_icsb_modify_counters(mp,
4698 XFS_SBS_FDBLOCKS, 4534 XFS_SBS_FDBLOCKS,
4699 -((int64_t)alen), (flags & 4535 -((int64_t)alen), (flags &
4700 XFS_BMAPI_RSVBLOCKS)); 4536 XFS_BMAPI_RSVBLOCKS));
4701 } 4537 }
4702 if (!error) { 4538 if (!error) {
4703 error = xfs_mod_incore_sb(mp, 4539 error = xfs_icsb_modify_counters(mp,
4704 XFS_SBS_FDBLOCKS, 4540 XFS_SBS_FDBLOCKS,
4705 -((int64_t)indlen), (flags & 4541 -((int64_t)indlen), (flags &
4706 XFS_BMAPI_RSVBLOCKS)); 4542 XFS_BMAPI_RSVBLOCKS));
@@ -4710,7 +4546,7 @@ xfs_bmapi(
4710 (int64_t)extsz, (flags & 4546 (int64_t)extsz, (flags &
4711 XFS_BMAPI_RSVBLOCKS)); 4547 XFS_BMAPI_RSVBLOCKS));
4712 else if (error) 4548 else if (error)
4713 xfs_mod_incore_sb(mp, 4549 xfs_icsb_modify_counters(mp,
4714 XFS_SBS_FDBLOCKS, 4550 XFS_SBS_FDBLOCKS,
4715 (int64_t)alen, (flags & 4551 (int64_t)alen, (flags &
4716 XFS_BMAPI_RSVBLOCKS)); 4552 XFS_BMAPI_RSVBLOCKS));
@@ -4831,7 +4667,7 @@ xfs_bmapi(
4831 got.br_state = XFS_EXT_UNWRITTEN; 4667 got.br_state = XFS_EXT_UNWRITTEN;
4832 } 4668 }
4833 error = xfs_bmap_add_extent(ip, lastx, &cur, &got, 4669 error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
4834 firstblock, flist, &tmp_logflags, delta, 4670 firstblock, flist, &tmp_logflags,
4835 whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); 4671 whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
4836 logflags |= tmp_logflags; 4672 logflags |= tmp_logflags;
4837 if (error) 4673 if (error)
@@ -4912,8 +4748,12 @@ xfs_bmapi(
4912 * Check if writing previously allocated but 4748 * Check if writing previously allocated but
4913 * unwritten extents. 4749 * unwritten extents.
4914 */ 4750 */
4915 if (wr && mval->br_state == XFS_EXT_UNWRITTEN && 4751 if (wr &&
4916 ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) { 4752 ((mval->br_state == XFS_EXT_UNWRITTEN &&
4753 ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) ||
4754 (mval->br_state == XFS_EXT_NORM &&
4755 ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) ==
4756 (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) {
4917 /* 4757 /*
4918 * Modify (by adding) the state flag, if writing. 4758 * Modify (by adding) the state flag, if writing.
4919 */ 4759 */
@@ -4925,9 +4765,11 @@ xfs_bmapi(
4925 *firstblock; 4765 *firstblock;
4926 cur->bc_private.b.flist = flist; 4766 cur->bc_private.b.flist = flist;
4927 } 4767 }
4928 mval->br_state = XFS_EXT_NORM; 4768 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4769 ? XFS_EXT_NORM
4770 : XFS_EXT_UNWRITTEN;
4929 error = xfs_bmap_add_extent(ip, lastx, &cur, mval, 4771 error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
4930 firstblock, flist, &tmp_logflags, delta, 4772 firstblock, flist, &tmp_logflags,
4931 whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); 4773 whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
4932 logflags |= tmp_logflags; 4774 logflags |= tmp_logflags;
4933 if (error) 4775 if (error)
@@ -5017,14 +4859,6 @@ xfs_bmapi(
5017 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || 4859 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
5018 XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max); 4860 XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
5019 error = 0; 4861 error = 0;
5020 if (delta && delta->xed_startoff != NULLFILEOFF) {
5021 /* A change was actually made.
5022 * Note that delta->xed_blockount is an offset at this
5023 * point and needs to be converted to a block count.
5024 */
5025 ASSERT(delta->xed_blockcount > delta->xed_startoff);
5026 delta->xed_blockcount -= delta->xed_startoff;
5027 }
5028error0: 4862error0:
5029 /* 4863 /*
5030 * Log everything. Do this after conversion, there's no point in 4864 * Log everything. Do this after conversion, there's no point in
@@ -5136,8 +4970,6 @@ xfs_bunmapi(
5136 xfs_fsblock_t *firstblock, /* first allocated block 4970 xfs_fsblock_t *firstblock, /* first allocated block
5137 controls a.g. for allocs */ 4971 controls a.g. for allocs */
5138 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 4972 xfs_bmap_free_t *flist, /* i/o: list extents to free */
5139 xfs_extdelta_t *delta, /* o: change made to incore
5140 extents */
5141 int *done) /* set if not done yet */ 4973 int *done) /* set if not done yet */
5142{ 4974{
5143 xfs_btree_cur_t *cur; /* bmap btree cursor */ 4975 xfs_btree_cur_t *cur; /* bmap btree cursor */
@@ -5196,10 +5028,7 @@ xfs_bunmapi(
5196 bno = start + len - 1; 5028 bno = start + len - 1;
5197 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, 5029 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5198 &prev); 5030 &prev);
5199 if (delta) { 5031
5200 delta->xed_startoff = NULLFILEOFF;
5201 delta->xed_blockcount = 0;
5202 }
5203 /* 5032 /*
5204 * Check to see if the given block number is past the end of the 5033 * Check to see if the given block number is past the end of the
5205 * file, back up to the last block if so... 5034 * file, back up to the last block if so...
@@ -5297,7 +5126,7 @@ xfs_bunmapi(
5297 } 5126 }
5298 del.br_state = XFS_EXT_UNWRITTEN; 5127 del.br_state = XFS_EXT_UNWRITTEN;
5299 error = xfs_bmap_add_extent(ip, lastx, &cur, &del, 5128 error = xfs_bmap_add_extent(ip, lastx, &cur, &del,
5300 firstblock, flist, &logflags, delta, 5129 firstblock, flist, &logflags,
5301 XFS_DATA_FORK, 0); 5130 XFS_DATA_FORK, 0);
5302 if (error) 5131 if (error)
5303 goto error0; 5132 goto error0;
@@ -5352,7 +5181,7 @@ xfs_bunmapi(
5352 prev.br_state = XFS_EXT_UNWRITTEN; 5181 prev.br_state = XFS_EXT_UNWRITTEN;
5353 error = xfs_bmap_add_extent(ip, lastx - 1, &cur, 5182 error = xfs_bmap_add_extent(ip, lastx - 1, &cur,
5354 &prev, firstblock, flist, &logflags, 5183 &prev, firstblock, flist, &logflags,
5355 delta, XFS_DATA_FORK, 0); 5184 XFS_DATA_FORK, 0);
5356 if (error) 5185 if (error)
5357 goto error0; 5186 goto error0;
5358 goto nodelete; 5187 goto nodelete;
@@ -5361,7 +5190,7 @@ xfs_bunmapi(
5361 del.br_state = XFS_EXT_UNWRITTEN; 5190 del.br_state = XFS_EXT_UNWRITTEN;
5362 error = xfs_bmap_add_extent(ip, lastx, &cur, 5191 error = xfs_bmap_add_extent(ip, lastx, &cur,
5363 &del, firstblock, flist, &logflags, 5192 &del, firstblock, flist, &logflags,
5364 delta, XFS_DATA_FORK, 0); 5193 XFS_DATA_FORK, 0);
5365 if (error) 5194 if (error)
5366 goto error0; 5195 goto error0;
5367 goto nodelete; 5196 goto nodelete;
@@ -5381,7 +5210,7 @@ xfs_bunmapi(
5381 ip, -((long)del.br_blockcount), 0, 5210 ip, -((long)del.br_blockcount), 0,
5382 XFS_QMOPT_RES_RTBLKS); 5211 XFS_QMOPT_RES_RTBLKS);
5383 } else { 5212 } else {
5384 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, 5213 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
5385 (int64_t)del.br_blockcount, rsvd); 5214 (int64_t)del.br_blockcount, rsvd);
5386 (void)xfs_trans_reserve_quota_nblks(NULL, 5215 (void)xfs_trans_reserve_quota_nblks(NULL,
5387 ip, -((long)del.br_blockcount), 0, 5216 ip, -((long)del.br_blockcount), 0,
@@ -5414,7 +5243,7 @@ xfs_bunmapi(
5414 goto error0; 5243 goto error0;
5415 } 5244 }
5416 error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, 5245 error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del,
5417 &tmp_logflags, delta, whichfork, rsvd); 5246 &tmp_logflags, whichfork, rsvd);
5418 logflags |= tmp_logflags; 5247 logflags |= tmp_logflags;
5419 if (error) 5248 if (error)
5420 goto error0; 5249 goto error0;
@@ -5471,14 +5300,6 @@ nodelete:
5471 ASSERT(ifp->if_ext_max == 5300 ASSERT(ifp->if_ext_max ==
5472 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); 5301 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5473 error = 0; 5302 error = 0;
5474 if (delta && delta->xed_startoff != NULLFILEOFF) {
5475 /* A change was actually made.
5476 * Note that delta->xed_blockount is an offset at this
5477 * point and needs to be converted to a block count.
5478 */
5479 ASSERT(delta->xed_blockcount > delta->xed_startoff);
5480 delta->xed_blockcount -= delta->xed_startoff;
5481 }
5482error0: 5303error0:
5483 /* 5304 /*
5484 * Log everything. Do this after conversion, there's no point in 5305 * Log everything. Do this after conversion, there's no point in
@@ -5605,28 +5426,6 @@ xfs_getbmap(
5605 prealloced = 0; 5426 prealloced = 0;
5606 fixlen = 1LL << 32; 5427 fixlen = 1LL << 32;
5607 } else { 5428 } else {
5608 /*
5609 * If the BMV_IF_NO_DMAPI_READ interface bit specified, do
5610 * not generate a DMAPI read event. Otherwise, if the
5611 * DM_EVENT_READ bit is set for the file, generate a read
5612 * event in order that the DMAPI application may do its thing
5613 * before we return the extents. Usually this means restoring
5614 * user file data to regions of the file that look like holes.
5615 *
5616 * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
5617 * BMV_IF_NO_DMAPI_READ so that read events are generated.
5618 * If this were not true, callers of ioctl(XFS_IOC_GETBMAP)
5619 * could misinterpret holes in a DMAPI file as true holes,
5620 * when in fact they may represent offline user data.
5621 */
5622 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
5623 !(iflags & BMV_IF_NO_DMAPI_READ)) {
5624 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip,
5625 0, 0, 0, NULL);
5626 if (error)
5627 return XFS_ERROR(error);
5628 }
5629
5630 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && 5429 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
5631 ip->i_d.di_format != XFS_DINODE_FMT_BTREE && 5430 ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
5632 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 5431 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
@@ -5713,7 +5512,7 @@ xfs_getbmap(
5713 error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), 5512 error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
5714 XFS_BB_TO_FSB(mp, bmv->bmv_length), 5513 XFS_BB_TO_FSB(mp, bmv->bmv_length),
5715 bmapi_flags, NULL, 0, map, &nmap, 5514 bmapi_flags, NULL, 0, map, &nmap,
5716 NULL, NULL); 5515 NULL);
5717 if (error) 5516 if (error)
5718 goto out_free_map; 5517 goto out_free_map;
5719 ASSERT(nmap <= subnex); 5518 ASSERT(nmap <= subnex);
@@ -5744,12 +5543,24 @@ xfs_getbmap(
5744 map[i].br_startblock)) 5543 map[i].br_startblock))
5745 goto out_free_map; 5544 goto out_free_map;
5746 5545
5747 nexleft--;
5748 bmv->bmv_offset = 5546 bmv->bmv_offset =
5749 out[cur_ext].bmv_offset + 5547 out[cur_ext].bmv_offset +
5750 out[cur_ext].bmv_length; 5548 out[cur_ext].bmv_length;
5751 bmv->bmv_length = 5549 bmv->bmv_length =
5752 max_t(__int64_t, 0, bmvend - bmv->bmv_offset); 5550 max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
5551
5552 /*
5553 * In case we don't want to return the hole,
5554 * don't increase cur_ext so that we can reuse
5555 * it in the next loop.
5556 */
5557 if ((iflags & BMV_IF_NO_HOLES) &&
5558 map[i].br_startblock == HOLESTARTBLOCK) {
5559 memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
5560 continue;
5561 }
5562
5563 nexleft--;
5753 bmv->bmv_entries++; 5564 bmv->bmv_entries++;
5754 cur_ext++; 5565 cur_ext++;
5755 } 5566 }
@@ -5859,66 +5670,34 @@ xfs_bmap_eof(
5859} 5670}
5860 5671
5861#ifdef DEBUG 5672#ifdef DEBUG
5862STATIC 5673STATIC struct xfs_buf *
5863xfs_buf_t *
5864xfs_bmap_get_bp( 5674xfs_bmap_get_bp(
5865 xfs_btree_cur_t *cur, 5675 struct xfs_btree_cur *cur,
5866 xfs_fsblock_t bno) 5676 xfs_fsblock_t bno)
5867{ 5677{
5868 int i; 5678 struct xfs_log_item_desc *lidp;
5869 xfs_buf_t *bp; 5679 int i;
5870 5680
5871 if (!cur) 5681 if (!cur)
5872 return(NULL); 5682 return NULL;
5873
5874 bp = NULL;
5875 for(i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
5876 bp = cur->bc_bufs[i];
5877 if (!bp) break;
5878 if (XFS_BUF_ADDR(bp) == bno)
5879 break; /* Found it */
5880 }
5881 if (i == XFS_BTREE_MAXLEVELS)
5882 bp = NULL;
5883
5884 if (!bp) { /* Chase down all the log items to see if the bp is there */
5885 xfs_log_item_chunk_t *licp;
5886 xfs_trans_t *tp;
5887
5888 tp = cur->bc_tp;
5889 licp = &tp->t_items;
5890 while (!bp && licp != NULL) {
5891 if (xfs_lic_are_all_free(licp)) {
5892 licp = licp->lic_next;
5893 continue;
5894 }
5895 for (i = 0; i < licp->lic_unused; i++) {
5896 xfs_log_item_desc_t *lidp;
5897 xfs_log_item_t *lip;
5898 xfs_buf_log_item_t *bip;
5899 xfs_buf_t *lbp;
5900
5901 if (xfs_lic_isfree(licp, i)) {
5902 continue;
5903 }
5904
5905 lidp = xfs_lic_slot(licp, i);
5906 lip = lidp->lid_item;
5907 if (lip->li_type != XFS_LI_BUF)
5908 continue;
5909 5683
5910 bip = (xfs_buf_log_item_t *)lip; 5684 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
5911 lbp = bip->bli_buf; 5685 if (!cur->bc_bufs[i])
5686 break;
5687 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
5688 return cur->bc_bufs[i];
5689 }
5912 5690
5913 if (XFS_BUF_ADDR(lbp) == bno) { 5691 /* Chase down all the log items to see if the bp is there */
5914 bp = lbp; 5692 list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
5915 break; /* Found it */ 5693 struct xfs_buf_log_item *bip;
5916 } 5694 bip = (struct xfs_buf_log_item *)lidp->lid_item;
5917 } 5695 if (bip->bli_item.li_type == XFS_LI_BUF &&
5918 licp = licp->lic_next; 5696 XFS_BUF_ADDR(bip->bli_buf) == bno)
5919 } 5697 return bip->bli_buf;
5920 } 5698 }
5921 return(bp); 5699
5700 return NULL;
5922} 5701}
5923 5702
5924STATIC void 5703STATIC void
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 419dafb9d87d..71ec9b6ecdfc 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -28,20 +28,6 @@ struct xfs_trans;
28extern kmem_zone_t *xfs_bmap_free_item_zone; 28extern kmem_zone_t *xfs_bmap_free_item_zone;
29 29
30/* 30/*
31 * DELTA: describe a change to the in-core extent list.
32 *
33 * Internally the use of xed_blockount is somewhat funky.
34 * xed_blockcount contains an offset much of the time because this
35 * makes merging changes easier. (xfs_fileoff_t and xfs_filblks_t are
36 * the same underlying type).
37 */
38typedef struct xfs_extdelta
39{
40 xfs_fileoff_t xed_startoff; /* offset of range */
41 xfs_filblks_t xed_blockcount; /* blocks in range */
42} xfs_extdelta_t;
43
44/*
45 * List of extents to be free "later". 31 * List of extents to be free "later".
46 * The list is kept sorted on xbf_startblock. 32 * The list is kept sorted on xbf_startblock.
47 */ 33 */
@@ -82,27 +68,25 @@ typedef struct xfs_bmap_free
82#define XFS_BMAPI_DELAY 0x002 /* delayed write operation */ 68#define XFS_BMAPI_DELAY 0x002 /* delayed write operation */
83#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ 69#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */
84#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ 70#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */
85#define XFS_BMAPI_EXACT 0x010 /* allocate only to spec'd bounds */ 71#define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */
86#define XFS_BMAPI_ATTRFORK 0x020 /* use attribute fork not data */ 72#define XFS_BMAPI_RSVBLOCKS 0x020 /* OK to alloc. reserved data blocks */
87#define XFS_BMAPI_ASYNC 0x040 /* bunmapi xactions can be async */ 73#define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */
88#define XFS_BMAPI_RSVBLOCKS 0x080 /* OK to alloc. reserved data blocks */ 74#define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */
89#define XFS_BMAPI_PREALLOC 0x100 /* preallocation op: unwritten space */
90#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */
91 /* combine contig. space */ 75 /* combine contig. space */
92#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */ 76#define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */
93/* XFS_BMAPI_DIRECT_IO 0x800 */ 77/*
94#define XFS_BMAPI_CONVERT 0x1000 /* unwritten extent conversion - */ 78 * unwritten extent conversion - this needs write cache flushing and no additional
95 /* need write cache flushing and no */ 79 * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts
96 /* additional allocation alignments */ 80 * from written to unwritten, otherwise convert from unwritten to written.
81 */
82#define XFS_BMAPI_CONVERT 0x200
97 83
98#define XFS_BMAPI_FLAGS \ 84#define XFS_BMAPI_FLAGS \
99 { XFS_BMAPI_WRITE, "WRITE" }, \ 85 { XFS_BMAPI_WRITE, "WRITE" }, \
100 { XFS_BMAPI_DELAY, "DELAY" }, \ 86 { XFS_BMAPI_DELAY, "DELAY" }, \
101 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 87 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
102 { XFS_BMAPI_METADATA, "METADATA" }, \ 88 { XFS_BMAPI_METADATA, "METADATA" }, \
103 { XFS_BMAPI_EXACT, "EXACT" }, \
104 { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ 89 { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \
105 { XFS_BMAPI_ASYNC, "ASYNC" }, \
106 { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \ 90 { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \
107 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 91 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
108 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 92 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
@@ -310,9 +294,7 @@ xfs_bmapi(
310 xfs_extlen_t total, /* total blocks needed */ 294 xfs_extlen_t total, /* total blocks needed */
311 struct xfs_bmbt_irec *mval, /* output: map values */ 295 struct xfs_bmbt_irec *mval, /* output: map values */
312 int *nmap, /* i/o: mval size/count */ 296 int *nmap, /* i/o: mval size/count */
313 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 297 xfs_bmap_free_t *flist); /* i/o: list extents to free */
314 xfs_extdelta_t *delta); /* o: change made to incore
315 extents */
316 298
317/* 299/*
318 * Map file blocks to filesystem blocks, simple version. 300 * Map file blocks to filesystem blocks, simple version.
@@ -346,8 +328,6 @@ xfs_bunmapi(
346 xfs_fsblock_t *firstblock, /* first allocated block 328 xfs_fsblock_t *firstblock, /* first allocated block
347 controls a.g. for allocs */ 329 controls a.g. for allocs */
348 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 330 xfs_bmap_free_t *flist, /* i/o: list extents to free */
349 xfs_extdelta_t *delta, /* o: change made to incore
350 extents */
351 int *done); /* set if not done yet */ 331 int *done); /* set if not done yet */
352 332
353/* 333/*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 416e47e54b83..87d3c10b6954 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -24,21 +24,16 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
38#include "xfs_alloc.h" 34#include "xfs_alloc.h"
39#include "xfs_btree.h" 35#include "xfs_btree.h"
40#include "xfs_btree_trace.h" 36#include "xfs_btree_trace.h"
41#include "xfs_ialloc.h"
42#include "xfs_itable.h" 37#include "xfs_itable.h"
43#include "xfs_bmap.h" 38#include "xfs_bmap.h"
44#include "xfs_error.h" 39#include "xfs_error.h"
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 96be4b0f2496..04f9cca8da7e 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -24,20 +24,15 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
38#include "xfs_btree.h" 34#include "xfs_btree.h"
39#include "xfs_btree_trace.h" 35#include "xfs_btree_trace.h"
40#include "xfs_ialloc.h"
41#include "xfs_error.h" 36#include "xfs_error.h"
42#include "xfs_trace.h" 37#include "xfs_trace.h"
43 38
@@ -222,7 +217,7 @@ xfs_btree_del_cursor(
222 */ 217 */
223 for (i = 0; i < cur->bc_nlevels; i++) { 218 for (i = 0; i < cur->bc_nlevels; i++) {
224 if (cur->bc_bufs[i]) 219 if (cur->bc_bufs[i])
225 xfs_btree_setbuf(cur, i, NULL); 220 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
226 else if (!error) 221 else if (!error)
227 break; 222 break;
228 } 223 }
@@ -661,7 +656,7 @@ xfs_btree_reada_bufl(
661 656
662 ASSERT(fsbno != NULLFSBLOCK); 657 ASSERT(fsbno != NULLFSBLOCK);
663 d = XFS_FSB_TO_DADDR(mp, fsbno); 658 d = XFS_FSB_TO_DADDR(mp, fsbno);
664 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); 659 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
665} 660}
666 661
667/* 662/*
@@ -681,7 +676,7 @@ xfs_btree_reada_bufs(
681 ASSERT(agno != NULLAGNUMBER); 676 ASSERT(agno != NULLAGNUMBER);
682 ASSERT(agbno != NULLAGBLOCK); 677 ASSERT(agbno != NULLAGBLOCK);
683 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 678 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
684 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); 679 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
685} 680}
686 681
687STATIC int 682STATIC int
@@ -768,22 +763,19 @@ xfs_btree_readahead(
768 * Set the buffer for level "lev" in the cursor to bp, releasing 763 * Set the buffer for level "lev" in the cursor to bp, releasing
769 * any previous buffer. 764 * any previous buffer.
770 */ 765 */
771void 766STATIC void
772xfs_btree_setbuf( 767xfs_btree_setbuf(
773 xfs_btree_cur_t *cur, /* btree cursor */ 768 xfs_btree_cur_t *cur, /* btree cursor */
774 int lev, /* level in btree */ 769 int lev, /* level in btree */
775 xfs_buf_t *bp) /* new buffer to set */ 770 xfs_buf_t *bp) /* new buffer to set */
776{ 771{
777 struct xfs_btree_block *b; /* btree block */ 772 struct xfs_btree_block *b; /* btree block */
778 xfs_buf_t *obp; /* old buffer pointer */
779 773
780 obp = cur->bc_bufs[lev]; 774 if (cur->bc_bufs[lev])
781 if (obp) 775 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
782 xfs_trans_brelse(cur->bc_tp, obp);
783 cur->bc_bufs[lev] = bp; 776 cur->bc_bufs[lev] = bp;
784 cur->bc_ra[lev] = 0; 777 cur->bc_ra[lev] = 0;
785 if (!bp) 778
786 return;
787 b = XFS_BUF_TO_BLOCK(bp); 779 b = XFS_BUF_TO_BLOCK(bp);
788 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 780 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
789 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) 781 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
@@ -3016,6 +3008,43 @@ out0:
3016 return 0; 3008 return 0;
3017} 3009}
3018 3010
3011/*
3012 * Kill the current root node, and replace it with it's only child node.
3013 */
3014STATIC int
3015xfs_btree_kill_root(
3016 struct xfs_btree_cur *cur,
3017 struct xfs_buf *bp,
3018 int level,
3019 union xfs_btree_ptr *newroot)
3020{
3021 int error;
3022
3023 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3024 XFS_BTREE_STATS_INC(cur, killroot);
3025
3026 /*
3027 * Update the root pointer, decreasing the level by 1 and then
3028 * free the old root.
3029 */
3030 cur->bc_ops->set_root(cur, newroot, -1);
3031
3032 error = cur->bc_ops->free_block(cur, bp);
3033 if (error) {
3034 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3035 return error;
3036 }
3037
3038 XFS_BTREE_STATS_INC(cur, free);
3039
3040 cur->bc_bufs[level] = NULL;
3041 cur->bc_ra[level] = 0;
3042 cur->bc_nlevels--;
3043
3044 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3045 return 0;
3046}
3047
3019STATIC int 3048STATIC int
3020xfs_btree_dec_cursor( 3049xfs_btree_dec_cursor(
3021 struct xfs_btree_cur *cur, 3050 struct xfs_btree_cur *cur,
@@ -3200,7 +3229,7 @@ xfs_btree_delrec(
3200 * Make it the new root of the btree. 3229 * Make it the new root of the btree.
3201 */ 3230 */
3202 pp = xfs_btree_ptr_addr(cur, 1, block); 3231 pp = xfs_btree_ptr_addr(cur, 1, block);
3203 error = cur->bc_ops->kill_root(cur, bp, level, pp); 3232 error = xfs_btree_kill_root(cur, bp, level, pp);
3204 if (error) 3233 if (error)
3205 goto error0; 3234 goto error0;
3206 } else if (level > 0) { 3235 } else if (level > 0) {
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 7fa07062bdda..82fafc66bd1f 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -152,9 +152,7 @@ struct xfs_btree_ops {
152 152
153 /* update btree root pointer */ 153 /* update btree root pointer */
154 void (*set_root)(struct xfs_btree_cur *cur, 154 void (*set_root)(struct xfs_btree_cur *cur,
155 union xfs_btree_ptr *nptr, int level_change); 155 union xfs_btree_ptr *nptr, int level_change);
156 int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp,
157 int level, union xfs_btree_ptr *newroot);
158 156
159 /* block allocation / freeing */ 157 /* block allocation / freeing */
160 int (*alloc_block)(struct xfs_btree_cur *cur, 158 int (*alloc_block)(struct xfs_btree_cur *cur,
@@ -399,16 +397,6 @@ xfs_btree_reada_bufs(
399 xfs_agblock_t agbno, /* allocation group block number */ 397 xfs_agblock_t agbno, /* allocation group block number */
400 xfs_extlen_t count); /* count of filesystem blocks */ 398 xfs_extlen_t count); /* count of filesystem blocks */
401 399
402/*
403 * Set the buffer for level "lev" in the cursor to bp, releasing
404 * any previous buffer.
405 */
406void
407xfs_btree_setbuf(
408 xfs_btree_cur_t *cur, /* btree cursor */
409 int lev, /* level in btree */
410 struct xfs_buf *bp); /* new buffer to set */
411
412 400
413/* 401/*
414 * Common btree core entry points. 402 * Common btree core entry points.
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 02a80984aa05..2686d0d54c5b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_buf_item.h" 28#include "xfs_buf_item.h"
30#include "xfs_trans_priv.h" 29#include "xfs_trans_priv.h"
@@ -34,6 +33,12 @@
34 33
35kmem_zone_t *xfs_buf_item_zone; 34kmem_zone_t *xfs_buf_item_zone;
36 35
36static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
37{
38 return container_of(lip, struct xfs_buf_log_item, bli_item);
39}
40
41
37#ifdef XFS_TRANS_DEBUG 42#ifdef XFS_TRANS_DEBUG
38/* 43/*
39 * This function uses an alternate strategy for tracking the bytes 44 * This function uses an alternate strategy for tracking the bytes
@@ -151,12 +156,13 @@ STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
151 */ 156 */
152STATIC uint 157STATIC uint
153xfs_buf_item_size( 158xfs_buf_item_size(
154 xfs_buf_log_item_t *bip) 159 struct xfs_log_item *lip)
155{ 160{
156 uint nvecs; 161 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
157 int next_bit; 162 struct xfs_buf *bp = bip->bli_buf;
158 int last_bit; 163 uint nvecs;
159 xfs_buf_t *bp; 164 int next_bit;
165 int last_bit;
160 166
161 ASSERT(atomic_read(&bip->bli_refcount) > 0); 167 ASSERT(atomic_read(&bip->bli_refcount) > 0);
162 if (bip->bli_flags & XFS_BLI_STALE) { 168 if (bip->bli_flags & XFS_BLI_STALE) {
@@ -170,7 +176,6 @@ xfs_buf_item_size(
170 return 1; 176 return 1;
171 } 177 }
172 178
173 bp = bip->bli_buf;
174 ASSERT(bip->bli_flags & XFS_BLI_LOGGED); 179 ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
175 nvecs = 1; 180 nvecs = 1;
176 last_bit = xfs_next_bit(bip->bli_format.blf_data_map, 181 last_bit = xfs_next_bit(bip->bli_format.blf_data_map,
@@ -219,13 +224,13 @@ xfs_buf_item_size(
219 */ 224 */
220STATIC void 225STATIC void
221xfs_buf_item_format( 226xfs_buf_item_format(
222 xfs_buf_log_item_t *bip, 227 struct xfs_log_item *lip,
223 xfs_log_iovec_t *log_vector) 228 struct xfs_log_iovec *vecp)
224{ 229{
230 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
231 struct xfs_buf *bp = bip->bli_buf;
225 uint base_size; 232 uint base_size;
226 uint nvecs; 233 uint nvecs;
227 xfs_log_iovec_t *vecp;
228 xfs_buf_t *bp;
229 int first_bit; 234 int first_bit;
230 int last_bit; 235 int last_bit;
231 int next_bit; 236 int next_bit;
@@ -235,8 +240,6 @@ xfs_buf_item_format(
235 ASSERT(atomic_read(&bip->bli_refcount) > 0); 240 ASSERT(atomic_read(&bip->bli_refcount) > 0);
236 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 241 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
237 (bip->bli_flags & XFS_BLI_STALE)); 242 (bip->bli_flags & XFS_BLI_STALE));
238 bp = bip->bli_buf;
239 vecp = log_vector;
240 243
241 /* 244 /*
242 * The size of the base structure is the size of the 245 * The size of the base structure is the size of the
@@ -248,7 +251,7 @@ xfs_buf_item_format(
248 base_size = 251 base_size =
249 (uint)(sizeof(xfs_buf_log_format_t) + 252 (uint)(sizeof(xfs_buf_log_format_t) +
250 ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); 253 ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
251 vecp->i_addr = (xfs_caddr_t)&bip->bli_format; 254 vecp->i_addr = &bip->bli_format;
252 vecp->i_len = base_size; 255 vecp->i_len = base_size;
253 vecp->i_type = XLOG_REG_TYPE_BFORMAT; 256 vecp->i_type = XLOG_REG_TYPE_BFORMAT;
254 vecp++; 257 vecp++;
@@ -263,7 +266,7 @@ xfs_buf_item_format(
263 */ 266 */
264 if (bip->bli_flags & XFS_BLI_INODE_BUF) { 267 if (bip->bli_flags & XFS_BLI_INODE_BUF) {
265 if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && 268 if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
266 xfs_log_item_in_current_chkpt(&bip->bli_item))) 269 xfs_log_item_in_current_chkpt(lip)))
267 bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; 270 bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
268 bip->bli_flags &= ~XFS_BLI_INODE_BUF; 271 bip->bli_flags &= ~XFS_BLI_INODE_BUF;
269 } 272 }
@@ -356,66 +359,90 @@ xfs_buf_item_format(
356 359
357/* 360/*
358 * This is called to pin the buffer associated with the buf log item in memory 361 * This is called to pin the buffer associated with the buf log item in memory
359 * so it cannot be written out. Simply call bpin() on the buffer to do this. 362 * so it cannot be written out.
360 * 363 *
361 * We also always take a reference to the buffer log item here so that the bli 364 * We also always take a reference to the buffer log item here so that the bli
362 * is held while the item is pinned in memory. This means that we can 365 * is held while the item is pinned in memory. This means that we can
363 * unconditionally drop the reference count a transaction holds when the 366 * unconditionally drop the reference count a transaction holds when the
364 * transaction is completed. 367 * transaction is completed.
365 */ 368 */
366
367STATIC void 369STATIC void
368xfs_buf_item_pin( 370xfs_buf_item_pin(
369 xfs_buf_log_item_t *bip) 371 struct xfs_log_item *lip)
370{ 372{
371 xfs_buf_t *bp; 373 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
372 374
373 bp = bip->bli_buf; 375 ASSERT(XFS_BUF_ISBUSY(bip->bli_buf));
374 ASSERT(XFS_BUF_ISBUSY(bp));
375 ASSERT(atomic_read(&bip->bli_refcount) > 0); 376 ASSERT(atomic_read(&bip->bli_refcount) > 0);
376 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 377 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
377 (bip->bli_flags & XFS_BLI_STALE)); 378 (bip->bli_flags & XFS_BLI_STALE));
378 atomic_inc(&bip->bli_refcount); 379
379 trace_xfs_buf_item_pin(bip); 380 trace_xfs_buf_item_pin(bip);
380 xfs_bpin(bp);
381}
382 381
382 atomic_inc(&bip->bli_refcount);
383 atomic_inc(&bip->bli_buf->b_pin_count);
384}
383 385
384/* 386/*
385 * This is called to unpin the buffer associated with the buf log 387 * This is called to unpin the buffer associated with the buf log
386 * item which was previously pinned with a call to xfs_buf_item_pin(). 388 * item which was previously pinned with a call to xfs_buf_item_pin().
387 * Just call bunpin() on the buffer to do this.
388 * 389 *
389 * Also drop the reference to the buf item for the current transaction. 390 * Also drop the reference to the buf item for the current transaction.
390 * If the XFS_BLI_STALE flag is set and we are the last reference, 391 * If the XFS_BLI_STALE flag is set and we are the last reference,
391 * then free up the buf log item and unlock the buffer. 392 * then free up the buf log item and unlock the buffer.
393 *
394 * If the remove flag is set we are called from uncommit in the
395 * forced-shutdown path. If that is true and the reference count on
396 * the log item is going to drop to zero we need to free the item's
397 * descriptor in the transaction.
392 */ 398 */
393STATIC void 399STATIC void
394xfs_buf_item_unpin( 400xfs_buf_item_unpin(
395 xfs_buf_log_item_t *bip) 401 struct xfs_log_item *lip,
402 int remove)
396{ 403{
397 struct xfs_ail *ailp; 404 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
398 xfs_buf_t *bp; 405 xfs_buf_t *bp = bip->bli_buf;
399 int freed; 406 struct xfs_ail *ailp = lip->li_ailp;
400 int stale = bip->bli_flags & XFS_BLI_STALE; 407 int stale = bip->bli_flags & XFS_BLI_STALE;
408 int freed;
401 409
402 bp = bip->bli_buf;
403 ASSERT(bp != NULL);
404 ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); 410 ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
405 ASSERT(atomic_read(&bip->bli_refcount) > 0); 411 ASSERT(atomic_read(&bip->bli_refcount) > 0);
412
406 trace_xfs_buf_item_unpin(bip); 413 trace_xfs_buf_item_unpin(bip);
407 414
408 freed = atomic_dec_and_test(&bip->bli_refcount); 415 freed = atomic_dec_and_test(&bip->bli_refcount);
409 ailp = bip->bli_item.li_ailp; 416
410 xfs_bunpin(bp); 417 if (atomic_dec_and_test(&bp->b_pin_count))
418 wake_up_all(&bp->b_waiters);
419
411 if (freed && stale) { 420 if (freed && stale) {
412 ASSERT(bip->bli_flags & XFS_BLI_STALE); 421 ASSERT(bip->bli_flags & XFS_BLI_STALE);
413 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 422 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
414 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); 423 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
415 ASSERT(XFS_BUF_ISSTALE(bp)); 424 ASSERT(XFS_BUF_ISSTALE(bp));
416 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 425 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
426
417 trace_xfs_buf_item_unpin_stale(bip); 427 trace_xfs_buf_item_unpin_stale(bip);
418 428
429 if (remove) {
430 /*
431 * We have to remove the log item from the transaction
432 * as we are about to release our reference to the
433 * buffer. If we don't, the unlock that occurs later
434 * in xfs_trans_uncommit() will ry to reference the
435 * buffer which we no longer have a hold on.
436 */
437 xfs_trans_del_item(lip);
438
439 /*
440 * Since the transaction no longer refers to the buffer,
441 * the buffer should no longer refer to the transaction.
442 */
443 XFS_BUF_SET_FSPRIVATE2(bp, NULL);
444 }
445
419 /* 446 /*
420 * If we get called here because of an IO error, we may 447 * If we get called here because of an IO error, we may
421 * or may not have the item on the AIL. xfs_trans_ail_delete() 448 * or may not have the item on the AIL. xfs_trans_ail_delete()
@@ -437,48 +464,6 @@ xfs_buf_item_unpin(
437} 464}
438 465
439/* 466/*
440 * this is called from uncommit in the forced-shutdown path.
441 * we need to check to see if the reference count on the log item
442 * is going to drop to zero. If so, unpin will free the log item
443 * so we need to free the item's descriptor (that points to the item)
444 * in the transaction.
445 */
446STATIC void
447xfs_buf_item_unpin_remove(
448 xfs_buf_log_item_t *bip,
449 xfs_trans_t *tp)
450{
451 /* will xfs_buf_item_unpin() call xfs_buf_item_relse()? */
452 if ((atomic_read(&bip->bli_refcount) == 1) &&
453 (bip->bli_flags & XFS_BLI_STALE)) {
454 /*
455 * yes -- We can safely do some work here and then call
456 * buf_item_unpin to do the rest because we are
457 * are holding the buffer locked so no one else will be
458 * able to bump up the refcount. We have to remove the
459 * log item from the transaction as we are about to release
460 * our reference to the buffer. If we don't, the unlock that
461 * occurs later in the xfs_trans_uncommit() will try to
462 * reference the buffer which we no longer have a hold on.
463 */
464 struct xfs_log_item_desc *lidp;
465
466 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0);
467 trace_xfs_buf_item_unpin_stale(bip);
468
469 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip);
470 xfs_trans_free_item(tp, lidp);
471
472 /*
473 * Since the transaction no longer refers to the buffer, the
474 * buffer should no longer refer to the transaction.
475 */
476 XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL);
477 }
478 xfs_buf_item_unpin(bip);
479}
480
481/*
482 * This is called to attempt to lock the buffer associated with this 467 * This is called to attempt to lock the buffer associated with this
483 * buf log item. Don't sleep on the buffer lock. If we can't get 468 * buf log item. Don't sleep on the buffer lock. If we can't get
484 * the lock right away, return 0. If we can get the lock, take a 469 * the lock right away, return 0. If we can get the lock, take a
@@ -488,11 +473,11 @@ xfs_buf_item_unpin_remove(
488 */ 473 */
489STATIC uint 474STATIC uint
490xfs_buf_item_trylock( 475xfs_buf_item_trylock(
491 xfs_buf_log_item_t *bip) 476 struct xfs_log_item *lip)
492{ 477{
493 xfs_buf_t *bp; 478 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
479 struct xfs_buf *bp = bip->bli_buf;
494 480
495 bp = bip->bli_buf;
496 if (XFS_BUF_ISPINNED(bp)) 481 if (XFS_BUF_ISPINNED(bp))
497 return XFS_ITEM_PINNED; 482 return XFS_ITEM_PINNED;
498 if (!XFS_BUF_CPSEMA(bp)) 483 if (!XFS_BUF_CPSEMA(bp))
@@ -529,13 +514,12 @@ xfs_buf_item_trylock(
529 */ 514 */
530STATIC void 515STATIC void
531xfs_buf_item_unlock( 516xfs_buf_item_unlock(
532 xfs_buf_log_item_t *bip) 517 struct xfs_log_item *lip)
533{ 518{
534 int aborted; 519 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
535 xfs_buf_t *bp; 520 struct xfs_buf *bp = bip->bli_buf;
536 uint hold; 521 int aborted;
537 522 uint hold;
538 bp = bip->bli_buf;
539 523
540 /* Clear the buffer's association with this transaction. */ 524 /* Clear the buffer's association with this transaction. */
541 XFS_BUF_SET_FSPRIVATE2(bp, NULL); 525 XFS_BUF_SET_FSPRIVATE2(bp, NULL);
@@ -546,7 +530,7 @@ xfs_buf_item_unlock(
546 * (cancelled) buffers at unpin time, but we'll never go through the 530 * (cancelled) buffers at unpin time, but we'll never go through the
547 * pin/unpin cycle if we abort inside commit. 531 * pin/unpin cycle if we abort inside commit.
548 */ 532 */
549 aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0; 533 aborted = (lip->li_flags & XFS_LI_ABORTED) != 0;
550 534
551 /* 535 /*
552 * Before possibly freeing the buf item, determine if we should 536 * Before possibly freeing the buf item, determine if we should
@@ -607,16 +591,16 @@ xfs_buf_item_unlock(
607 */ 591 */
608STATIC xfs_lsn_t 592STATIC xfs_lsn_t
609xfs_buf_item_committed( 593xfs_buf_item_committed(
610 xfs_buf_log_item_t *bip, 594 struct xfs_log_item *lip,
611 xfs_lsn_t lsn) 595 xfs_lsn_t lsn)
612{ 596{
597 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
598
613 trace_xfs_buf_item_committed(bip); 599 trace_xfs_buf_item_committed(bip);
614 600
615 if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && 601 if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && lip->li_lsn != 0)
616 (bip->bli_item.li_lsn != 0)) { 602 return lip->li_lsn;
617 return bip->bli_item.li_lsn; 603 return lsn;
618 }
619 return (lsn);
620} 604}
621 605
622/* 606/*
@@ -626,15 +610,16 @@ xfs_buf_item_committed(
626 */ 610 */
627STATIC void 611STATIC void
628xfs_buf_item_push( 612xfs_buf_item_push(
629 xfs_buf_log_item_t *bip) 613 struct xfs_log_item *lip)
630{ 614{
631 xfs_buf_t *bp; 615 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
616 struct xfs_buf *bp = bip->bli_buf;
632 617
633 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 618 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
619 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
620
634 trace_xfs_buf_item_push(bip); 621 trace_xfs_buf_item_push(bip);
635 622
636 bp = bip->bli_buf;
637 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
638 xfs_buf_relse(bp); 623 xfs_buf_relse(bp);
639} 624}
640 625
@@ -646,22 +631,24 @@ xfs_buf_item_push(
646 */ 631 */
647STATIC void 632STATIC void
648xfs_buf_item_pushbuf( 633xfs_buf_item_pushbuf(
649 xfs_buf_log_item_t *bip) 634 struct xfs_log_item *lip)
650{ 635{
651 xfs_buf_t *bp; 636 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
637 struct xfs_buf *bp = bip->bli_buf;
652 638
653 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 639 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
640 ASSERT(XFS_BUF_ISDELAYWRITE(bp));
641
654 trace_xfs_buf_item_pushbuf(bip); 642 trace_xfs_buf_item_pushbuf(bip);
655 643
656 bp = bip->bli_buf;
657 ASSERT(XFS_BUF_ISDELAYWRITE(bp));
658 xfs_buf_delwri_promote(bp); 644 xfs_buf_delwri_promote(bp);
659 xfs_buf_relse(bp); 645 xfs_buf_relse(bp);
660} 646}
661 647
662/* ARGSUSED */
663STATIC void 648STATIC void
664xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn) 649xfs_buf_item_committing(
650 struct xfs_log_item *lip,
651 xfs_lsn_t commit_lsn)
665{ 652{
666} 653}
667 654
@@ -669,21 +656,16 @@ xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
669 * This is the ops vector shared by all buf log items. 656 * This is the ops vector shared by all buf log items.
670 */ 657 */
671static struct xfs_item_ops xfs_buf_item_ops = { 658static struct xfs_item_ops xfs_buf_item_ops = {
672 .iop_size = (uint(*)(xfs_log_item_t*))xfs_buf_item_size, 659 .iop_size = xfs_buf_item_size,
673 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 660 .iop_format = xfs_buf_item_format,
674 xfs_buf_item_format, 661 .iop_pin = xfs_buf_item_pin,
675 .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin, 662 .iop_unpin = xfs_buf_item_unpin,
676 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_buf_item_unpin, 663 .iop_trylock = xfs_buf_item_trylock,
677 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) 664 .iop_unlock = xfs_buf_item_unlock,
678 xfs_buf_item_unpin_remove, 665 .iop_committed = xfs_buf_item_committed,
679 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock, 666 .iop_push = xfs_buf_item_push,
680 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_buf_item_unlock, 667 .iop_pushbuf = xfs_buf_item_pushbuf,
681 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 668 .iop_committing = xfs_buf_item_committing
682 xfs_buf_item_committed,
683 .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push,
684 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf,
685 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
686 xfs_buf_item_committing
687}; 669};
688 670
689 671
@@ -710,9 +692,7 @@ xfs_buf_item_init(
710 * the first. If we do already have one, there is 692 * the first. If we do already have one, there is
711 * nothing to do here so return. 693 * nothing to do here so return.
712 */ 694 */
713 if (bp->b_mount != mp) 695 ASSERT(bp->b_target->bt_mount == mp);
714 bp->b_mount = mp;
715 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
716 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 696 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
717 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 697 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
718 if (lip->li_type == XFS_LI_BUF) { 698 if (lip->li_type == XFS_LI_BUF) {
@@ -993,7 +973,7 @@ xfs_buf_iodone_callbacks(
993 xfs_buf_do_callbacks(bp, lip); 973 xfs_buf_do_callbacks(bp, lip);
994 XFS_BUF_SET_FSPRIVATE(bp, NULL); 974 XFS_BUF_SET_FSPRIVATE(bp, NULL);
995 XFS_BUF_CLR_IODONE_FUNC(bp); 975 XFS_BUF_CLR_IODONE_FUNC(bp);
996 xfs_biodone(bp); 976 xfs_buf_ioend(bp, 0);
997 return; 977 return;
998 } 978 }
999 979
@@ -1052,7 +1032,7 @@ xfs_buf_iodone_callbacks(
1052 xfs_buf_do_callbacks(bp, lip); 1032 xfs_buf_do_callbacks(bp, lip);
1053 XFS_BUF_SET_FSPRIVATE(bp, NULL); 1033 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1054 XFS_BUF_CLR_IODONE_FUNC(bp); 1034 XFS_BUF_CLR_IODONE_FUNC(bp);
1055 xfs_biodone(bp); 1035 xfs_buf_ioend(bp, 0);
1056} 1036}
1057 1037
1058/* 1038/*
@@ -1098,15 +1078,14 @@ xfs_buf_error_relse(
1098 * It is called by xfs_buf_iodone_callbacks() above which will take 1078 * It is called by xfs_buf_iodone_callbacks() above which will take
1099 * care of cleaning up the buffer itself. 1079 * care of cleaning up the buffer itself.
1100 */ 1080 */
1101/* ARGSUSED */
1102void 1081void
1103xfs_buf_iodone( 1082xfs_buf_iodone(
1104 xfs_buf_t *bp, 1083 struct xfs_buf *bp,
1105 xfs_buf_log_item_t *bip) 1084 struct xfs_log_item *lip)
1106{ 1085{
1107 struct xfs_ail *ailp = bip->bli_item.li_ailp; 1086 struct xfs_ail *ailp = lip->li_ailp;
1108 1087
1109 ASSERT(bip->bli_buf == bp); 1088 ASSERT(BUF_ITEM(lip)->bli_buf == bp);
1110 1089
1111 xfs_buf_rele(bp); 1090 xfs_buf_rele(bp);
1112 1091
@@ -1120,6 +1099,6 @@ xfs_buf_iodone(
1120 * Either way, AIL is useless if we're forcing a shutdown. 1099 * Either way, AIL is useless if we're forcing a shutdown.
1121 */ 1100 */
1122 spin_lock(&ailp->xa_lock); 1101 spin_lock(&ailp->xa_lock);
1123 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); 1102 xfs_trans_ail_delete(ailp, lip);
1124 xfs_buf_item_free(bip); 1103 xfs_buf_item_free(BUF_ITEM(lip));
1125} 1104}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index f20bb472d582..0e2ed43f16c7 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -124,7 +124,7 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
124 void(*)(struct xfs_buf *, xfs_log_item_t *), 124 void(*)(struct xfs_buf *, xfs_log_item_t *),
125 xfs_log_item_t *); 125 xfs_log_item_t *);
126void xfs_buf_iodone_callbacks(struct xfs_buf *); 126void xfs_buf_iodone_callbacks(struct xfs_buf *);
127void xfs_buf_iodone(struct xfs_buf *, xfs_buf_log_item_t *); 127void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
128 128
129#ifdef XFS_TRANS_DEBUG 129#ifdef XFS_TRANS_DEBUG
130void 130void
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 0ca556b4bf31..1c00bedb3175 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -25,19 +25,14 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h" 31#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 32#include "xfs_dinode.h"
37#include "xfs_inode.h" 33#include "xfs_inode.h"
38#include "xfs_inode_item.h" 34#include "xfs_inode_item.h"
39#include "xfs_alloc.h" 35#include "xfs_alloc.h"
40#include "xfs_btree.h"
41#include "xfs_bmap.h" 36#include "xfs_bmap.h"
42#include "xfs_attr.h" 37#include "xfs_attr.h"
43#include "xfs_attr_leaf.h" 38#include "xfs_attr_leaf.h"
@@ -581,16 +576,14 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
581 xfs_da_intnode_t *node; 576 xfs_da_intnode_t *node;
582 xfs_da_node_entry_t *btree; 577 xfs_da_node_entry_t *btree;
583 int tmp; 578 int tmp;
584 xfs_mount_t *mp;
585 579
586 node = oldblk->bp->data; 580 node = oldblk->bp->data;
587 mp = state->mp;
588 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 581 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
589 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); 582 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
590 ASSERT(newblk->blkno != 0); 583 ASSERT(newblk->blkno != 0);
591 if (state->args->whichfork == XFS_DATA_FORK) 584 if (state->args->whichfork == XFS_DATA_FORK)
592 ASSERT(newblk->blkno >= mp->m_dirleafblk && 585 ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
593 newblk->blkno < mp->m_dirfreeblk); 586 newblk->blkno < state->mp->m_dirfreeblk);
594 587
595 /* 588 /*
596 * We may need to make some room before we insert the new node. 589 * We may need to make some room before we insert the new node.
@@ -1601,7 +1594,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1601 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| 1594 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
1602 XFS_BMAPI_CONTIG, 1595 XFS_BMAPI_CONTIG,
1603 args->firstblock, args->total, &map, &nmap, 1596 args->firstblock, args->total, &map, &nmap,
1604 args->flist, NULL))) { 1597 args->flist))) {
1605 return error; 1598 return error;
1606 } 1599 }
1607 ASSERT(nmap <= 1); 1600 ASSERT(nmap <= 1);
@@ -1622,8 +1615,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1622 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE| 1615 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
1623 XFS_BMAPI_METADATA, 1616 XFS_BMAPI_METADATA,
1624 args->firstblock, args->total, 1617 args->firstblock, args->total,
1625 &mapp[mapi], &nmap, args->flist, 1618 &mapp[mapi], &nmap, args->flist))) {
1626 NULL))) {
1627 kmem_free(mapp); 1619 kmem_free(mapp);
1628 return error; 1620 return error;
1629 } 1621 }
@@ -1884,7 +1876,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1884 */ 1876 */
1885 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count, 1877 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1886 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 1878 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
1887 0, args->firstblock, args->flist, NULL, 1879 0, args->firstblock, args->flist,
1888 &done)) == ENOSPC) { 1880 &done)) == ENOSPC) {
1889 if (w != XFS_DATA_FORK) 1881 if (w != XFS_DATA_FORK)
1890 break; 1882 break;
@@ -1989,7 +1981,7 @@ xfs_da_do_buf(
1989 nfsb, 1981 nfsb,
1990 XFS_BMAPI_METADATA | 1982 XFS_BMAPI_METADATA |
1991 xfs_bmapi_aflag(whichfork), 1983 xfs_bmapi_aflag(whichfork),
1992 NULL, 0, mapp, &nmap, NULL, NULL))) 1984 NULL, 0, mapp, &nmap, NULL)))
1993 goto exit0; 1985 goto exit0;
1994 } 1986 }
1995 } else { 1987 } else {
@@ -2050,7 +2042,7 @@ xfs_da_do_buf(
2050 mappedbno, nmapped, 0, &bp); 2042 mappedbno, nmapped, 0, &bp);
2051 break; 2043 break;
2052 case 3: 2044 case 3:
2053 xfs_baread(mp->m_ddev_targp, mappedbno, nmapped); 2045 xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped);
2054 error = 0; 2046 error = 0;
2055 bp = NULL; 2047 bp = NULL;
2056 break; 2048 break;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 7f159d2a429a..3b9582c60a22 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -24,24 +24,15 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 29#include "xfs_dinode.h"
36#include "xfs_inode.h" 30#include "xfs_inode.h"
37#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
38#include "xfs_bmap.h" 32#include "xfs_bmap.h"
39#include "xfs_btree.h"
40#include "xfs_ialloc.h"
41#include "xfs_itable.h" 33#include "xfs_itable.h"
42#include "xfs_dfrag.h" 34#include "xfs_dfrag.h"
43#include "xfs_error.h" 35#include "xfs_error.h"
44#include "xfs_rw.h"
45#include "xfs_vnodeops.h" 36#include "xfs_vnodeops.h"
46#include "xfs_trace.h" 37#include "xfs_trace.h"
47 38
@@ -425,11 +416,8 @@ xfs_swap_extents(
425 } 416 }
426 417
427 418
428 IHOLD(ip); 419 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
429 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 420 xfs_trans_ijoin_ref(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
430
431 IHOLD(tip);
432 xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
433 421
434 xfs_trans_log_inode(tp, ip, ilf_fields); 422 xfs_trans_log_inode(tp, ip, ilf_fields);
435 xfs_trans_log_inode(tp, tip, tilf_fields); 423 xfs_trans_log_inode(tp, tip, tilf_fields);
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index e5b153b2e6a3..dffba9ba0db6 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -49,8 +49,9 @@ typedef struct xfs_dinode {
49 __be32 di_uid; /* owner's user id */ 49 __be32 di_uid; /* owner's user id */
50 __be32 di_gid; /* owner's group id */ 50 __be32 di_gid; /* owner's group id */
51 __be32 di_nlink; /* number of links to file */ 51 __be32 di_nlink; /* number of links to file */
52 __be16 di_projid; /* owner's project id */ 52 __be16 di_projid_lo; /* lower part of owner's project id */
53 __u8 di_pad[8]; /* unused, zeroed space */ 53 __be16 di_projid_hi; /* higher part owner's project id */
54 __u8 di_pad[6]; /* unused, zeroed space */
54 __be16 di_flushiter; /* incremented on flush */ 55 __be16 di_flushiter; /* incremented on flush */
55 xfs_timestamp_t di_atime; /* time last accessed */ 56 xfs_timestamp_t di_atime; /* time last accessed */
56 xfs_timestamp_t di_mtime; /* time last modified */ 57 xfs_timestamp_t di_mtime; /* time last modified */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 42520f041265..a1321bc7f192 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -25,13 +25,11 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_dir2_sf.h" 32#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 33#include "xfs_dinode.h"
36#include "xfs_inode.h" 34#include "xfs_inode.h"
37#include "xfs_inode_item.h" 35#include "xfs_inode_item.h"
@@ -382,7 +380,7 @@ xfs_readdir(
382 int rval; /* return value */ 380 int rval; /* return value */
383 int v; /* type-checking value */ 381 int v; /* type-checking value */
384 382
385 xfs_itrace_entry(dp); 383 trace_xfs_readdir(dp);
386 384
387 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 385 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
388 return XFS_ERROR(EIO); 386 return XFS_ERROR(EIO);
@@ -549,7 +547,7 @@ xfs_dir2_grow_inode(
549 if ((error = xfs_bmapi(tp, dp, bno, count, 547 if ((error = xfs_bmapi(tp, dp, bno, count,
550 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, 548 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
551 args->firstblock, args->total, &map, &nmap, 549 args->firstblock, args->total, &map, &nmap,
552 args->flist, NULL))) 550 args->flist)))
553 return error; 551 return error;
554 ASSERT(nmap <= 1); 552 ASSERT(nmap <= 1);
555 if (nmap == 1) { 553 if (nmap == 1) {
@@ -581,8 +579,7 @@ xfs_dir2_grow_inode(
581 if ((error = xfs_bmapi(tp, dp, b, c, 579 if ((error = xfs_bmapi(tp, dp, b, c,
582 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA, 580 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
583 args->firstblock, args->total, 581 args->firstblock, args->total,
584 &mapp[mapi], &nmap, args->flist, 582 &mapp[mapi], &nmap, args->flist))) {
585 NULL))) {
586 kmem_free(mapp); 583 kmem_free(mapp);
587 return error; 584 return error;
588 } 585 }
@@ -715,7 +712,7 @@ xfs_dir2_shrink_inode(
715 */ 712 */
716 if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs, 713 if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs,
717 XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, 714 XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
718 NULL, &done))) { 715 &done))) {
719 /* 716 /*
720 * ENOSPC actually can happen if we're in a removename with 717 * ENOSPC actually can happen if we're in a removename with
721 * no space reservation, and the resulting block removal 718 * no space reservation, and the resulting block removal
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 779a267b0a84..580d99cef9e7 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -24,12 +24,10 @@
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 31#include "xfs_dinode.h"
34#include "xfs_inode.h" 32#include "xfs_inode.h"
35#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
@@ -1073,10 +1071,10 @@ xfs_dir2_sf_to_block(
1073 */ 1071 */
1074 1072
1075 buf_len = dp->i_df.if_bytes; 1073 buf_len = dp->i_df.if_bytes;
1076 buf = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP); 1074 buf = kmem_alloc(buf_len, KM_SLEEP);
1077 1075
1078 memcpy(buf, sfp, dp->i_df.if_bytes); 1076 memcpy(buf, sfp, buf_len);
1079 xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); 1077 xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK);
1080 dp->i_d.di_size = 0; 1078 dp->i_d.di_size = 0;
1081 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1079 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1082 /* 1080 /*
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 498f8d694330..921595b84f5b 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -24,12 +24,10 @@
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 31#include "xfs_dinode.h"
34#include "xfs_inode.h" 32#include "xfs_inode.h"
35#include "xfs_dir2_data.h" 33#include "xfs_dir2_data.h"
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index e2d89854ec9e..ae891223be90 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -25,11 +25,9 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dir2_sf.h" 31#include "xfs_dir2_sf.h"
34#include "xfs_dinode.h" 32#include "xfs_dinode.h"
35#include "xfs_inode.h" 33#include "xfs_inode.h"
@@ -875,7 +873,7 @@ xfs_dir2_leaf_getdents(
875 xfs_dir2_byte_to_da(mp, 873 xfs_dir2_byte_to_da(mp,
876 XFS_DIR2_LEAF_OFFSET) - map_off, 874 XFS_DIR2_LEAF_OFFSET) - map_off,
877 XFS_BMAPI_METADATA, NULL, 0, 875 XFS_BMAPI_METADATA, NULL, 0,
878 &map[map_valid], &nmap, NULL, NULL); 876 &map[map_valid], &nmap, NULL);
879 /* 877 /*
880 * Don't know if we should ignore this or 878 * Don't know if we should ignore this or
881 * try to return an error. 879 * try to return an error.
@@ -963,7 +961,7 @@ xfs_dir2_leaf_getdents(
963 if (i > ra_current && 961 if (i > ra_current &&
964 map[ra_index].br_blockcount >= 962 map[ra_index].br_blockcount >=
965 mp->m_dirblkfsbs) { 963 mp->m_dirblkfsbs) {
966 xfs_baread(mp->m_ddev_targp, 964 xfs_buf_readahead(mp->m_ddev_targp,
967 XFS_FSB_TO_DADDR(mp, 965 XFS_FSB_TO_DADDR(mp,
968 map[ra_index].br_startblock + 966 map[ra_index].br_startblock +
969 ra_offset), 967 ra_offset),
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 78fc4d9ae756..f9a0864b696a 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -24,12 +24,10 @@
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 31#include "xfs_dinode.h"
34#include "xfs_inode.h" 32#include "xfs_inode.h"
35#include "xfs_bmap.h" 33#include "xfs_bmap.h"
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index c1a5945d463a..b1bae6b1eed9 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -24,12 +24,10 @@
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 31#include "xfs_dinode.h"
34#include "xfs_inode.h" 32#include "xfs_inode.h"
35#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
deleted file mode 100644
index 2813cdd72375..000000000000
--- a/fs/xfs/xfs_dmapi.h
+++ /dev/null
@@ -1,170 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DMAPI_H__
19#define __XFS_DMAPI_H__
20
21/* Values used to define the on-disk version of dm_attrname_t. All
22 * on-disk attribute names start with the 8-byte string "SGI_DMI_".
23 *
24 * In the on-disk inode, DMAPI attribute names consist of the user-provided
25 * name with the DMATTR_PREFIXSTRING pre-pended. This string must NEVER be
26 * changed.
27 */
28
29#define DMATTR_PREFIXLEN 8
30#define DMATTR_PREFIXSTRING "SGI_DMI_"
31
32typedef enum {
33 DM_EVENT_INVALID = -1,
34 DM_EVENT_CANCEL = 0, /* not supported */
35 DM_EVENT_MOUNT = 1,
36 DM_EVENT_PREUNMOUNT = 2,
37 DM_EVENT_UNMOUNT = 3,
38 DM_EVENT_DEBUT = 4, /* not supported */
39 DM_EVENT_CREATE = 5,
40 DM_EVENT_CLOSE = 6, /* not supported */
41 DM_EVENT_POSTCREATE = 7,
42 DM_EVENT_REMOVE = 8,
43 DM_EVENT_POSTREMOVE = 9,
44 DM_EVENT_RENAME = 10,
45 DM_EVENT_POSTRENAME = 11,
46 DM_EVENT_LINK = 12,
47 DM_EVENT_POSTLINK = 13,
48 DM_EVENT_SYMLINK = 14,
49 DM_EVENT_POSTSYMLINK = 15,
50 DM_EVENT_READ = 16,
51 DM_EVENT_WRITE = 17,
52 DM_EVENT_TRUNCATE = 18,
53 DM_EVENT_ATTRIBUTE = 19,
54 DM_EVENT_DESTROY = 20,
55 DM_EVENT_NOSPACE = 21,
56 DM_EVENT_USER = 22,
57 DM_EVENT_MAX = 23
58} dm_eventtype_t;
59#define HAVE_DM_EVENTTYPE_T
60
61typedef enum {
62 DM_RIGHT_NULL,
63 DM_RIGHT_SHARED,
64 DM_RIGHT_EXCL
65} dm_right_t;
66#define HAVE_DM_RIGHT_T
67
68/* Defines for determining if an event message should be sent. */
69#ifdef HAVE_DMAPI
70#define DM_EVENT_ENABLED(ip, event) ( \
71 unlikely ((ip)->i_mount->m_flags & XFS_MOUNT_DMAPI) && \
72 ( ((ip)->i_d.di_dmevmask & (1 << event)) || \
73 ((ip)->i_mount->m_dmevmask & (1 << event)) ) \
74 )
75#else
76#define DM_EVENT_ENABLED(ip, event) (0)
77#endif
78
79#define DM_XFS_VALID_FS_EVENTS ( \
80 (1 << DM_EVENT_PREUNMOUNT) | \
81 (1 << DM_EVENT_UNMOUNT) | \
82 (1 << DM_EVENT_NOSPACE) | \
83 (1 << DM_EVENT_DEBUT) | \
84 (1 << DM_EVENT_CREATE) | \
85 (1 << DM_EVENT_POSTCREATE) | \
86 (1 << DM_EVENT_REMOVE) | \
87 (1 << DM_EVENT_POSTREMOVE) | \
88 (1 << DM_EVENT_RENAME) | \
89 (1 << DM_EVENT_POSTRENAME) | \
90 (1 << DM_EVENT_LINK) | \
91 (1 << DM_EVENT_POSTLINK) | \
92 (1 << DM_EVENT_SYMLINK) | \
93 (1 << DM_EVENT_POSTSYMLINK) | \
94 (1 << DM_EVENT_ATTRIBUTE) | \
95 (1 << DM_EVENT_DESTROY) )
96
97/* Events valid in dm_set_eventlist() when called with a file handle for
98 a regular file or a symlink. These events are persistent.
99*/
100
101#define DM_XFS_VALID_FILE_EVENTS ( \
102 (1 << DM_EVENT_ATTRIBUTE) | \
103 (1 << DM_EVENT_DESTROY) )
104
105/* Events valid in dm_set_eventlist() when called with a file handle for
106 a directory. These events are persistent.
107*/
108
109#define DM_XFS_VALID_DIRECTORY_EVENTS ( \
110 (1 << DM_EVENT_CREATE) | \
111 (1 << DM_EVENT_POSTCREATE) | \
112 (1 << DM_EVENT_REMOVE) | \
113 (1 << DM_EVENT_POSTREMOVE) | \
114 (1 << DM_EVENT_RENAME) | \
115 (1 << DM_EVENT_POSTRENAME) | \
116 (1 << DM_EVENT_LINK) | \
117 (1 << DM_EVENT_POSTLINK) | \
118 (1 << DM_EVENT_SYMLINK) | \
119 (1 << DM_EVENT_POSTSYMLINK) | \
120 (1 << DM_EVENT_ATTRIBUTE) | \
121 (1 << DM_EVENT_DESTROY) )
122
123/* Events supported by the XFS filesystem. */
124#define DM_XFS_SUPPORTED_EVENTS ( \
125 (1 << DM_EVENT_MOUNT) | \
126 (1 << DM_EVENT_PREUNMOUNT) | \
127 (1 << DM_EVENT_UNMOUNT) | \
128 (1 << DM_EVENT_NOSPACE) | \
129 (1 << DM_EVENT_CREATE) | \
130 (1 << DM_EVENT_POSTCREATE) | \
131 (1 << DM_EVENT_REMOVE) | \
132 (1 << DM_EVENT_POSTREMOVE) | \
133 (1 << DM_EVENT_RENAME) | \
134 (1 << DM_EVENT_POSTRENAME) | \
135 (1 << DM_EVENT_LINK) | \
136 (1 << DM_EVENT_POSTLINK) | \
137 (1 << DM_EVENT_SYMLINK) | \
138 (1 << DM_EVENT_POSTSYMLINK) | \
139 (1 << DM_EVENT_READ) | \
140 (1 << DM_EVENT_WRITE) | \
141 (1 << DM_EVENT_TRUNCATE) | \
142 (1 << DM_EVENT_ATTRIBUTE) | \
143 (1 << DM_EVENT_DESTROY) )
144
145
146/*
147 * Definitions used for the flags field on dm_send_*_event().
148 */
149
150#define DM_FLAGS_NDELAY 0x001 /* return EAGAIN after dm_pending() */
151#define DM_FLAGS_UNWANTED 0x002 /* event not in fsys dm_eventset_t */
152#define DM_FLAGS_IMUX 0x004 /* thread holds i_mutex */
153#define DM_FLAGS_IALLOCSEM_RD 0x010 /* thread holds i_alloc_sem rd */
154#define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */
155
156/*
157 * Pull in platform specific event flags defines
158 */
159#include "xfs_dmapi_priv.h"
160
161/*
162 * Macros to turn caller specified delay/block flags into
163 * dm_send_xxxx_event flag DM_FLAGS_NDELAY.
164 */
165
166#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
167 DM_FLAGS_NDELAY : 0)
168#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
169
170#endif /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
deleted file mode 100644
index e71e2581c0c3..000000000000
--- a/fs/xfs/xfs_dmops.c
+++ /dev/null
@@ -1,55 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_log.h"
22#include "xfs_trans.h"
23#include "xfs_sb.h"
24#include "xfs_dmapi.h"
25#include "xfs_inum.h"
26#include "xfs_ag.h"
27#include "xfs_mount.h"
28
29
30static struct xfs_dmops xfs_dmcore_stub = {
31 .xfs_send_data = (xfs_send_data_t)fs_nosys,
32 .xfs_send_mmap = (xfs_send_mmap_t)fs_noerr,
33 .xfs_send_destroy = (xfs_send_destroy_t)fs_nosys,
34 .xfs_send_namesp = (xfs_send_namesp_t)fs_nosys,
35 .xfs_send_mount = (xfs_send_mount_t)fs_nosys,
36 .xfs_send_unmount = (xfs_send_unmount_t)fs_noerr,
37};
38
39int
40xfs_dmops_get(struct xfs_mount *mp)
41{
42 if (mp->m_flags & XFS_MOUNT_DMAPI) {
43 cmn_err(CE_WARN,
44 "XFS: dmapi support not available in this kernel.");
45 return EINVAL;
46 }
47
48 mp->m_dm_ops = &xfs_dmcore_stub;
49 return 0;
50}
51
52void
53xfs_dmops_put(struct xfs_mount *mp)
54{
55}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 047b8a8e5c29..ed9990267661 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -23,12 +23,8 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 26#include "xfs_mount.h"
29#include "xfs_bmap_btree.h" 27#include "xfs_bmap_btree.h"
30#include "xfs_dir2_sf.h"
31#include "xfs_attr_sf.h"
32#include "xfs_dinode.h" 28#include "xfs_dinode.h"
33#include "xfs_inode.h" 29#include "xfs_inode.h"
34#include "xfs_utils.h" 30#include "xfs_utils.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 409fe81585fd..a55e687bf562 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -24,7 +24,6 @@
24#include "xfs_buf_item.h" 24#include "xfs_buf_item.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
30#include "xfs_extfree_item.h" 29#include "xfs_extfree_item.h"
@@ -33,18 +32,19 @@
33kmem_zone_t *xfs_efi_zone; 32kmem_zone_t *xfs_efi_zone;
34kmem_zone_t *xfs_efd_zone; 33kmem_zone_t *xfs_efd_zone;
35 34
36STATIC void xfs_efi_item_unlock(xfs_efi_log_item_t *); 35static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip)
36{
37 return container_of(lip, struct xfs_efi_log_item, efi_item);
38}
37 39
38void 40void
39xfs_efi_item_free(xfs_efi_log_item_t *efip) 41xfs_efi_item_free(
42 struct xfs_efi_log_item *efip)
40{ 43{
41 int nexts = efip->efi_format.efi_nextents; 44 if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS)
42
43 if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
44 kmem_free(efip); 45 kmem_free(efip);
45 } else { 46 else
46 kmem_zone_free(xfs_efi_zone, efip); 47 kmem_zone_free(xfs_efi_zone, efip);
47 }
48} 48}
49 49
50/* 50/*
@@ -52,9 +52,9 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip)
52 * We only need 1 iovec for an efi item. It just logs the efi_log_format 52 * We only need 1 iovec for an efi item. It just logs the efi_log_format
53 * structure. 53 * structure.
54 */ 54 */
55/*ARGSUSED*/
56STATIC uint 55STATIC uint
57xfs_efi_item_size(xfs_efi_log_item_t *efip) 56xfs_efi_item_size(
57 struct xfs_log_item *lip)
58{ 58{
59 return 1; 59 return 1;
60} 60}
@@ -67,10 +67,12 @@ xfs_efi_item_size(xfs_efi_log_item_t *efip)
67 * slots in the efi item have been filled. 67 * slots in the efi item have been filled.
68 */ 68 */
69STATIC void 69STATIC void
70xfs_efi_item_format(xfs_efi_log_item_t *efip, 70xfs_efi_item_format(
71 xfs_log_iovec_t *log_vector) 71 struct xfs_log_item *lip,
72 struct xfs_log_iovec *log_vector)
72{ 73{
73 uint size; 74 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
75 uint size;
74 76
75 ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents); 77 ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents);
76 78
@@ -80,7 +82,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip,
80 size += (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t); 82 size += (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t);
81 efip->efi_format.efi_size = 1; 83 efip->efi_format.efi_size = 1;
82 84
83 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); 85 log_vector->i_addr = &efip->efi_format;
84 log_vector->i_len = size; 86 log_vector->i_len = size;
85 log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT; 87 log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT;
86 ASSERT(size >= sizeof(xfs_efi_log_format_t)); 88 ASSERT(size >= sizeof(xfs_efi_log_format_t));
@@ -90,60 +92,33 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip,
90/* 92/*
91 * Pinning has no meaning for an efi item, so just return. 93 * Pinning has no meaning for an efi item, so just return.
92 */ 94 */
93/*ARGSUSED*/
94STATIC void 95STATIC void
95xfs_efi_item_pin(xfs_efi_log_item_t *efip) 96xfs_efi_item_pin(
97 struct xfs_log_item *lip)
96{ 98{
97 return;
98} 99}
99 100
100
101/* 101/*
102 * While EFIs cannot really be pinned, the unpin operation is the 102 * While EFIs cannot really be pinned, the unpin operation is the
103 * last place at which the EFI is manipulated during a transaction. 103 * last place at which the EFI is manipulated during a transaction.
104 * Here we coordinate with xfs_efi_cancel() to determine who gets to 104 * Here we coordinate with xfs_efi_cancel() to determine who gets to
105 * free the EFI. 105 * free the EFI.
106 */ 106 */
107/*ARGSUSED*/
108STATIC void
109xfs_efi_item_unpin(xfs_efi_log_item_t *efip)
110{
111 struct xfs_ail *ailp = efip->efi_item.li_ailp;
112
113 spin_lock(&ailp->xa_lock);
114 if (efip->efi_flags & XFS_EFI_CANCELED) {
115 /* xfs_trans_ail_delete() drops the AIL lock. */
116 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
117 xfs_efi_item_free(efip);
118 } else {
119 efip->efi_flags |= XFS_EFI_COMMITTED;
120 spin_unlock(&ailp->xa_lock);
121 }
122}
123
124/*
125 * like unpin only we have to also clear the xaction descriptor
126 * pointing the log item if we free the item. This routine duplicates
127 * unpin because efi_flags is protected by the AIL lock. Freeing
128 * the descriptor and then calling unpin would force us to drop the AIL
129 * lock which would open up a race condition.
130 */
131STATIC void 107STATIC void
132xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) 108xfs_efi_item_unpin(
109 struct xfs_log_item *lip,
110 int remove)
133{ 111{
134 struct xfs_ail *ailp = efip->efi_item.li_ailp; 112 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
135 xfs_log_item_desc_t *lidp; 113 struct xfs_ail *ailp = lip->li_ailp;
136 114
137 spin_lock(&ailp->xa_lock); 115 spin_lock(&ailp->xa_lock);
138 if (efip->efi_flags & XFS_EFI_CANCELED) { 116 if (efip->efi_flags & XFS_EFI_CANCELED) {
139 /* 117 if (remove)
140 * free the xaction descriptor pointing to this item 118 xfs_trans_del_item(lip);
141 */
142 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip);
143 xfs_trans_free_item(tp, lidp);
144 119
145 /* xfs_trans_ail_delete() drops the AIL lock. */ 120 /* xfs_trans_ail_delete() drops the AIL lock. */
146 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); 121 xfs_trans_ail_delete(ailp, lip);
147 xfs_efi_item_free(efip); 122 xfs_efi_item_free(efip);
148 } else { 123 } else {
149 efip->efi_flags |= XFS_EFI_COMMITTED; 124 efip->efi_flags |= XFS_EFI_COMMITTED;
@@ -158,9 +133,9 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
158 * XFS_ITEM_PINNED so that the caller will eventually flush the log. 133 * XFS_ITEM_PINNED so that the caller will eventually flush the log.
159 * This should help in getting the EFI out of the AIL. 134 * This should help in getting the EFI out of the AIL.
160 */ 135 */
161/*ARGSUSED*/
162STATIC uint 136STATIC uint
163xfs_efi_item_trylock(xfs_efi_log_item_t *efip) 137xfs_efi_item_trylock(
138 struct xfs_log_item *lip)
164{ 139{
165 return XFS_ITEM_PINNED; 140 return XFS_ITEM_PINNED;
166} 141}
@@ -168,13 +143,12 @@ xfs_efi_item_trylock(xfs_efi_log_item_t *efip)
168/* 143/*
169 * Efi items have no locking, so just return. 144 * Efi items have no locking, so just return.
170 */ 145 */
171/*ARGSUSED*/
172STATIC void 146STATIC void
173xfs_efi_item_unlock(xfs_efi_log_item_t *efip) 147xfs_efi_item_unlock(
148 struct xfs_log_item *lip)
174{ 149{
175 if (efip->efi_item.li_flags & XFS_LI_ABORTED) 150 if (lip->li_flags & XFS_LI_ABORTED)
176 xfs_efi_item_free(efip); 151 xfs_efi_item_free(EFI_ITEM(lip));
177 return;
178} 152}
179 153
180/* 154/*
@@ -183,9 +157,10 @@ xfs_efi_item_unlock(xfs_efi_log_item_t *efip)
183 * flag is not paid any attention here. Checking for that is delayed 157 * flag is not paid any attention here. Checking for that is delayed
184 * until the EFI is unpinned. 158 * until the EFI is unpinned.
185 */ 159 */
186/*ARGSUSED*/
187STATIC xfs_lsn_t 160STATIC xfs_lsn_t
188xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) 161xfs_efi_item_committed(
162 struct xfs_log_item *lip,
163 xfs_lsn_t lsn)
189{ 164{
190 return lsn; 165 return lsn;
191} 166}
@@ -195,11 +170,10 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
195 * stuck waiting for all of its corresponding efd items to be 170 * stuck waiting for all of its corresponding efd items to be
196 * committed to disk. 171 * committed to disk.
197 */ 172 */
198/*ARGSUSED*/
199STATIC void 173STATIC void
200xfs_efi_item_push(xfs_efi_log_item_t *efip) 174xfs_efi_item_push(
175 struct xfs_log_item *lip)
201{ 176{
202 return;
203} 177}
204 178
205/* 179/*
@@ -209,61 +183,55 @@ xfs_efi_item_push(xfs_efi_log_item_t *efip)
209 * example, for inodes, the inode is locked throughout the extent freeing 183 * example, for inodes, the inode is locked throughout the extent freeing
210 * so the dependency should be recorded there. 184 * so the dependency should be recorded there.
211 */ 185 */
212/*ARGSUSED*/
213STATIC void 186STATIC void
214xfs_efi_item_committing(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) 187xfs_efi_item_committing(
188 struct xfs_log_item *lip,
189 xfs_lsn_t lsn)
215{ 190{
216 return;
217} 191}
218 192
219/* 193/*
220 * This is the ops vector shared by all efi log items. 194 * This is the ops vector shared by all efi log items.
221 */ 195 */
222static struct xfs_item_ops xfs_efi_item_ops = { 196static struct xfs_item_ops xfs_efi_item_ops = {
223 .iop_size = (uint(*)(xfs_log_item_t*))xfs_efi_item_size, 197 .iop_size = xfs_efi_item_size,
224 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 198 .iop_format = xfs_efi_item_format,
225 xfs_efi_item_format, 199 .iop_pin = xfs_efi_item_pin,
226 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efi_item_pin, 200 .iop_unpin = xfs_efi_item_unpin,
227 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efi_item_unpin, 201 .iop_trylock = xfs_efi_item_trylock,
228 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) 202 .iop_unlock = xfs_efi_item_unlock,
229 xfs_efi_item_unpin_remove, 203 .iop_committed = xfs_efi_item_committed,
230 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock, 204 .iop_push = xfs_efi_item_push,
231 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_efi_item_unlock, 205 .iop_committing = xfs_efi_item_committing
232 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
233 xfs_efi_item_committed,
234 .iop_push = (void(*)(xfs_log_item_t*))xfs_efi_item_push,
235 .iop_pushbuf = NULL,
236 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
237 xfs_efi_item_committing
238}; 206};
239 207
240 208
241/* 209/*
242 * Allocate and initialize an efi item with the given number of extents. 210 * Allocate and initialize an efi item with the given number of extents.
243 */ 211 */
244xfs_efi_log_item_t * 212struct xfs_efi_log_item *
245xfs_efi_init(xfs_mount_t *mp, 213xfs_efi_init(
246 uint nextents) 214 struct xfs_mount *mp,
215 uint nextents)
247 216
248{ 217{
249 xfs_efi_log_item_t *efip; 218 struct xfs_efi_log_item *efip;
250 uint size; 219 uint size;
251 220
252 ASSERT(nextents > 0); 221 ASSERT(nextents > 0);
253 if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { 222 if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
254 size = (uint)(sizeof(xfs_efi_log_item_t) + 223 size = (uint)(sizeof(xfs_efi_log_item_t) +
255 ((nextents - 1) * sizeof(xfs_extent_t))); 224 ((nextents - 1) * sizeof(xfs_extent_t)));
256 efip = (xfs_efi_log_item_t*)kmem_zalloc(size, KM_SLEEP); 225 efip = kmem_zalloc(size, KM_SLEEP);
257 } else { 226 } else {
258 efip = (xfs_efi_log_item_t*)kmem_zone_zalloc(xfs_efi_zone, 227 efip = kmem_zone_zalloc(xfs_efi_zone, KM_SLEEP);
259 KM_SLEEP);
260 } 228 }
261 229
262 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); 230 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
263 efip->efi_format.efi_nextents = nextents; 231 efip->efi_format.efi_nextents = nextents;
264 efip->efi_format.efi_id = (__psint_t)(void*)efip; 232 efip->efi_format.efi_id = (__psint_t)(void*)efip;
265 233
266 return (efip); 234 return efip;
267} 235}
268 236
269/* 237/*
@@ -276,7 +244,7 @@ xfs_efi_init(xfs_mount_t *mp,
276int 244int
277xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) 245xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
278{ 246{
279 xfs_efi_log_format_t *src_efi_fmt = (xfs_efi_log_format_t *)buf->i_addr; 247 xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
280 uint i; 248 uint i;
281 uint len = sizeof(xfs_efi_log_format_t) + 249 uint len = sizeof(xfs_efi_log_format_t) +
282 (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t); 250 (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t);
@@ -289,8 +257,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
289 memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len); 257 memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len);
290 return 0; 258 return 0;
291 } else if (buf->i_len == len32) { 259 } else if (buf->i_len == len32) {
292 xfs_efi_log_format_32_t *src_efi_fmt_32 = 260 xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr;
293 (xfs_efi_log_format_32_t *)buf->i_addr;
294 261
295 dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type; 262 dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type;
296 dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size; 263 dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size;
@@ -304,8 +271,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
304 } 271 }
305 return 0; 272 return 0;
306 } else if (buf->i_len == len64) { 273 } else if (buf->i_len == len64) {
307 xfs_efi_log_format_64_t *src_efi_fmt_64 = 274 xfs_efi_log_format_64_t *src_efi_fmt_64 = buf->i_addr;
308 (xfs_efi_log_format_64_t *)buf->i_addr;
309 275
310 dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type; 276 dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type;
311 dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size; 277 dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size;
@@ -356,16 +322,18 @@ xfs_efi_release(xfs_efi_log_item_t *efip,
356 } 322 }
357} 323}
358 324
359STATIC void 325static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
360xfs_efd_item_free(xfs_efd_log_item_t *efdp)
361{ 326{
362 int nexts = efdp->efd_format.efd_nextents; 327 return container_of(lip, struct xfs_efd_log_item, efd_item);
328}
363 329
364 if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { 330STATIC void
331xfs_efd_item_free(struct xfs_efd_log_item *efdp)
332{
333 if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS)
365 kmem_free(efdp); 334 kmem_free(efdp);
366 } else { 335 else
367 kmem_zone_free(xfs_efd_zone, efdp); 336 kmem_zone_free(xfs_efd_zone, efdp);
368 }
369} 337}
370 338
371/* 339/*
@@ -373,9 +341,9 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp)
373 * We only need 1 iovec for an efd item. It just logs the efd_log_format 341 * We only need 1 iovec for an efd item. It just logs the efd_log_format
374 * structure. 342 * structure.
375 */ 343 */
376/*ARGSUSED*/
377STATIC uint 344STATIC uint
378xfs_efd_item_size(xfs_efd_log_item_t *efdp) 345xfs_efd_item_size(
346 struct xfs_log_item *lip)
379{ 347{
380 return 1; 348 return 1;
381} 349}
@@ -388,10 +356,12 @@ xfs_efd_item_size(xfs_efd_log_item_t *efdp)
388 * slots in the efd item have been filled. 356 * slots in the efd item have been filled.
389 */ 357 */
390STATIC void 358STATIC void
391xfs_efd_item_format(xfs_efd_log_item_t *efdp, 359xfs_efd_item_format(
392 xfs_log_iovec_t *log_vector) 360 struct xfs_log_item *lip,
361 struct xfs_log_iovec *log_vector)
393{ 362{
394 uint size; 363 struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
364 uint size;
395 365
396 ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents); 366 ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);
397 367
@@ -401,48 +371,38 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp,
401 size += (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t); 371 size += (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t);
402 efdp->efd_format.efd_size = 1; 372 efdp->efd_format.efd_size = 1;
403 373
404 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); 374 log_vector->i_addr = &efdp->efd_format;
405 log_vector->i_len = size; 375 log_vector->i_len = size;
406 log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT; 376 log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT;
407 ASSERT(size >= sizeof(xfs_efd_log_format_t)); 377 ASSERT(size >= sizeof(xfs_efd_log_format_t));
408} 378}
409 379
410
411/* 380/*
412 * Pinning has no meaning for an efd item, so just return. 381 * Pinning has no meaning for an efd item, so just return.
413 */ 382 */
414/*ARGSUSED*/
415STATIC void 383STATIC void
416xfs_efd_item_pin(xfs_efd_log_item_t *efdp) 384xfs_efd_item_pin(
385 struct xfs_log_item *lip)
417{ 386{
418 return;
419} 387}
420 388
421
422/* 389/*
423 * Since pinning has no meaning for an efd item, unpinning does 390 * Since pinning has no meaning for an efd item, unpinning does
424 * not either. 391 * not either.
425 */ 392 */
426/*ARGSUSED*/
427STATIC void
428xfs_efd_item_unpin(xfs_efd_log_item_t *efdp)
429{
430 return;
431}
432
433/*ARGSUSED*/
434STATIC void 393STATIC void
435xfs_efd_item_unpin_remove(xfs_efd_log_item_t *efdp, xfs_trans_t *tp) 394xfs_efd_item_unpin(
395 struct xfs_log_item *lip,
396 int remove)
436{ 397{
437 return;
438} 398}
439 399
440/* 400/*
441 * Efd items have no locking, so just return success. 401 * Efd items have no locking, so just return success.
442 */ 402 */
443/*ARGSUSED*/
444STATIC uint 403STATIC uint
445xfs_efd_item_trylock(xfs_efd_log_item_t *efdp) 404xfs_efd_item_trylock(
405 struct xfs_log_item *lip)
446{ 406{
447 return XFS_ITEM_LOCKED; 407 return XFS_ITEM_LOCKED;
448} 408}
@@ -451,13 +411,12 @@ xfs_efd_item_trylock(xfs_efd_log_item_t *efdp)
451 * Efd items have no locking or pushing, so return failure 411 * Efd items have no locking or pushing, so return failure
452 * so that the caller doesn't bother with us. 412 * so that the caller doesn't bother with us.
453 */ 413 */
454/*ARGSUSED*/
455STATIC void 414STATIC void
456xfs_efd_item_unlock(xfs_efd_log_item_t *efdp) 415xfs_efd_item_unlock(
416 struct xfs_log_item *lip)
457{ 417{
458 if (efdp->efd_item.li_flags & XFS_LI_ABORTED) 418 if (lip->li_flags & XFS_LI_ABORTED)
459 xfs_efd_item_free(efdp); 419 xfs_efd_item_free(EFD_ITEM(lip));
460 return;
461} 420}
462 421
463/* 422/*
@@ -467,15 +426,18 @@ xfs_efd_item_unlock(xfs_efd_log_item_t *efdp)
467 * return -1 to keep the transaction code from further referencing 426 * return -1 to keep the transaction code from further referencing
468 * this item. 427 * this item.
469 */ 428 */
470/*ARGSUSED*/
471STATIC xfs_lsn_t 429STATIC xfs_lsn_t
472xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn) 430xfs_efd_item_committed(
431 struct xfs_log_item *lip,
432 xfs_lsn_t lsn)
473{ 433{
434 struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
435
474 /* 436 /*
475 * If we got a log I/O error, it's always the case that the LR with the 437 * If we got a log I/O error, it's always the case that the LR with the
476 * EFI got unpinned and freed before the EFD got aborted. 438 * EFI got unpinned and freed before the EFD got aborted.
477 */ 439 */
478 if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0) 440 if (!(lip->li_flags & XFS_LI_ABORTED))
479 xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents); 441 xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents);
480 442
481 xfs_efd_item_free(efdp); 443 xfs_efd_item_free(efdp);
@@ -486,11 +448,10 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
486 * There isn't much you can do to push on an efd item. It is simply 448 * There isn't much you can do to push on an efd item. It is simply
487 * stuck waiting for the log to be flushed to disk. 449 * stuck waiting for the log to be flushed to disk.
488 */ 450 */
489/*ARGSUSED*/
490STATIC void 451STATIC void
491xfs_efd_item_push(xfs_efd_log_item_t *efdp) 452xfs_efd_item_push(
453 struct xfs_log_item *lip)
492{ 454{
493 return;
494} 455}
495 456
496/* 457/*
@@ -500,55 +461,48 @@ xfs_efd_item_push(xfs_efd_log_item_t *efdp)
500 * example, for inodes, the inode is locked throughout the extent freeing 461 * example, for inodes, the inode is locked throughout the extent freeing
501 * so the dependency should be recorded there. 462 * so the dependency should be recorded there.
502 */ 463 */
503/*ARGSUSED*/
504STATIC void 464STATIC void
505xfs_efd_item_committing(xfs_efd_log_item_t *efip, xfs_lsn_t lsn) 465xfs_efd_item_committing(
466 struct xfs_log_item *lip,
467 xfs_lsn_t lsn)
506{ 468{
507 return;
508} 469}
509 470
510/* 471/*
511 * This is the ops vector shared by all efd log items. 472 * This is the ops vector shared by all efd log items.
512 */ 473 */
513static struct xfs_item_ops xfs_efd_item_ops = { 474static struct xfs_item_ops xfs_efd_item_ops = {
514 .iop_size = (uint(*)(xfs_log_item_t*))xfs_efd_item_size, 475 .iop_size = xfs_efd_item_size,
515 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 476 .iop_format = xfs_efd_item_format,
516 xfs_efd_item_format, 477 .iop_pin = xfs_efd_item_pin,
517 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efd_item_pin, 478 .iop_unpin = xfs_efd_item_unpin,
518 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efd_item_unpin, 479 .iop_trylock = xfs_efd_item_trylock,
519 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 480 .iop_unlock = xfs_efd_item_unlock,
520 xfs_efd_item_unpin_remove, 481 .iop_committed = xfs_efd_item_committed,
521 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock, 482 .iop_push = xfs_efd_item_push,
522 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_efd_item_unlock, 483 .iop_committing = xfs_efd_item_committing
523 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
524 xfs_efd_item_committed,
525 .iop_push = (void(*)(xfs_log_item_t*))xfs_efd_item_push,
526 .iop_pushbuf = NULL,
527 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
528 xfs_efd_item_committing
529}; 484};
530 485
531
532/* 486/*
533 * Allocate and initialize an efd item with the given number of extents. 487 * Allocate and initialize an efd item with the given number of extents.
534 */ 488 */
535xfs_efd_log_item_t * 489struct xfs_efd_log_item *
536xfs_efd_init(xfs_mount_t *mp, 490xfs_efd_init(
537 xfs_efi_log_item_t *efip, 491 struct xfs_mount *mp,
538 uint nextents) 492 struct xfs_efi_log_item *efip,
493 uint nextents)
539 494
540{ 495{
541 xfs_efd_log_item_t *efdp; 496 struct xfs_efd_log_item *efdp;
542 uint size; 497 uint size;
543 498
544 ASSERT(nextents > 0); 499 ASSERT(nextents > 0);
545 if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { 500 if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
546 size = (uint)(sizeof(xfs_efd_log_item_t) + 501 size = (uint)(sizeof(xfs_efd_log_item_t) +
547 ((nextents - 1) * sizeof(xfs_extent_t))); 502 ((nextents - 1) * sizeof(xfs_extent_t)));
548 efdp = (xfs_efd_log_item_t*)kmem_zalloc(size, KM_SLEEP); 503 efdp = kmem_zalloc(size, KM_SLEEP);
549 } else { 504 } else {
550 efdp = (xfs_efd_log_item_t*)kmem_zone_zalloc(xfs_efd_zone, 505 efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
551 KM_SLEEP);
552 } 506 }
553 507
554 xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops); 508 xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops);
@@ -556,5 +510,5 @@ xfs_efd_init(xfs_mount_t *mp,
556 efdp->efd_format.efd_nextents = nextents; 510 efdp->efd_format.efd_nextents = nextents;
557 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; 511 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
558 512
559 return (efdp); 513 return efdp;
560} 514}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 390850ee6603..9b715dce5699 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -18,13 +18,9 @@
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_bmap_btree.h" 19#include "xfs_bmap_btree.h"
20#include "xfs_inum.h" 20#include "xfs_inum.h"
21#include "xfs_dir2.h"
22#include "xfs_dir2_sf.h"
23#include "xfs_attr_sf.h"
24#include "xfs_dinode.h" 21#include "xfs_dinode.h"
25#include "xfs_inode.h" 22#include "xfs_inode.h"
26#include "xfs_ag.h" 23#include "xfs_ag.h"
27#include "xfs_dmapi.h"
28#include "xfs_log.h" 24#include "xfs_log.h"
29#include "xfs_trans.h" 25#include "xfs_trans.h"
30#include "xfs_sb.h" 26#include "xfs_sb.h"
@@ -127,6 +123,82 @@ typedef struct fstrm_item
127 xfs_inode_t *pip; /* Parent directory inode pointer. */ 123 xfs_inode_t *pip; /* Parent directory inode pointer. */
128} fstrm_item_t; 124} fstrm_item_t;
129 125
126/*
127 * Allocation group filestream associations are tracked with per-ag atomic
128 * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
129 * particular AG already has active filestreams associated with it. The mount
130 * point's m_peraglock is used to protect these counters from per-ag array
131 * re-allocation during a growfs operation. When xfs_growfs_data_private() is
132 * about to reallocate the array, it calls xfs_filestream_flush() with the
133 * m_peraglock held in write mode.
134 *
135 * Since xfs_mru_cache_flush() guarantees that all the free functions for all
136 * the cache elements have finished executing before it returns, it's safe for
137 * the free functions to use the atomic counters without m_peraglock protection.
138 * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
139 * whether it was called with the m_peraglock held in read mode, write mode or
140 * not held at all. The race condition this addresses is the following:
141 *
142 * - The work queue scheduler fires and pulls a filestream directory cache
143 * element off the LRU end of the cache for deletion, then gets pre-empted.
144 * - A growfs operation grabs the m_peraglock in write mode, flushes all the
145 * remaining items from the cache and reallocates the mount point's per-ag
146 * array, resetting all the counters to zero.
147 * - The work queue thread resumes and calls the free function for the element
148 * it started cleaning up earlier. In the process it decrements the
149 * filestreams counter for an AG that now has no references.
150 *
151 * With a shrinkfs feature, the above scenario could panic the system.
152 *
153 * All other uses of the following macros should be protected by either the
154 * m_peraglock held in read mode, or the cache's internal locking exposed by the
155 * interval between a call to xfs_mru_cache_lookup() and a call to
156 * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
157 * when new elements are added to the cache.
158 *
159 * Combined, these locking rules ensure that no associations will ever exist in
160 * the cache that reference per-ag array elements that have since been
161 * reallocated.
162 */
163static int
164xfs_filestream_peek_ag(
165 xfs_mount_t *mp,
166 xfs_agnumber_t agno)
167{
168 struct xfs_perag *pag;
169 int ret;
170
171 pag = xfs_perag_get(mp, agno);
172 ret = atomic_read(&pag->pagf_fstrms);
173 xfs_perag_put(pag);
174 return ret;
175}
176
177static int
178xfs_filestream_get_ag(
179 xfs_mount_t *mp,
180 xfs_agnumber_t agno)
181{
182 struct xfs_perag *pag;
183 int ret;
184
185 pag = xfs_perag_get(mp, agno);
186 ret = atomic_inc_return(&pag->pagf_fstrms);
187 xfs_perag_put(pag);
188 return ret;
189}
190
191static void
192xfs_filestream_put_ag(
193 xfs_mount_t *mp,
194 xfs_agnumber_t agno)
195{
196 struct xfs_perag *pag;
197
198 pag = xfs_perag_get(mp, agno);
199 atomic_dec(&pag->pagf_fstrms);
200 xfs_perag_put(pag);
201}
130 202
131/* 203/*
132 * Scan the AGs starting at startag looking for an AG that isn't in use and has 204 * Scan the AGs starting at startag looking for an AG that isn't in use and has
@@ -355,16 +427,14 @@ xfs_fstrm_free_func(
355{ 427{
356 fstrm_item_t *item = (fstrm_item_t *)data; 428 fstrm_item_t *item = (fstrm_item_t *)data;
357 xfs_inode_t *ip = item->ip; 429 xfs_inode_t *ip = item->ip;
358 int ref;
359 430
360 ASSERT(ip->i_ino == ino); 431 ASSERT(ip->i_ino == ino);
361 432
362 xfs_iflags_clear(ip, XFS_IFILESTREAM); 433 xfs_iflags_clear(ip, XFS_IFILESTREAM);
363 434
364 /* Drop the reference taken on the AG when the item was added. */ 435 /* Drop the reference taken on the AG when the item was added. */
365 ref = xfs_filestream_put_ag(ip->i_mount, item->ag); 436 xfs_filestream_put_ag(ip->i_mount, item->ag);
366 437
367 ASSERT(ref >= 0);
368 TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, 438 TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
369 xfs_filestream_peek_ag(ip->i_mount, item->ag)); 439 xfs_filestream_peek_ag(ip->i_mount, item->ag));
370 440
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 260f757bbc5d..09dd9af45434 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -42,88 +42,6 @@ extern ktrace_t *xfs_filestreams_trace_buf;
42 42
43#endif 43#endif
44 44
45/*
46 * Allocation group filestream associations are tracked with per-ag atomic
47 * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
48 * particular AG already has active filestreams associated with it. The mount
49 * point's m_peraglock is used to protect these counters from per-ag array
50 * re-allocation during a growfs operation. When xfs_growfs_data_private() is
51 * about to reallocate the array, it calls xfs_filestream_flush() with the
52 * m_peraglock held in write mode.
53 *
54 * Since xfs_mru_cache_flush() guarantees that all the free functions for all
55 * the cache elements have finished executing before it returns, it's safe for
56 * the free functions to use the atomic counters without m_peraglock protection.
57 * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
58 * whether it was called with the m_peraglock held in read mode, write mode or
59 * not held at all. The race condition this addresses is the following:
60 *
61 * - The work queue scheduler fires and pulls a filestream directory cache
62 * element off the LRU end of the cache for deletion, then gets pre-empted.
63 * - A growfs operation grabs the m_peraglock in write mode, flushes all the
64 * remaining items from the cache and reallocates the mount point's per-ag
65 * array, resetting all the counters to zero.
66 * - The work queue thread resumes and calls the free function for the element
67 * it started cleaning up earlier. In the process it decrements the
68 * filestreams counter for an AG that now has no references.
69 *
70 * With a shrinkfs feature, the above scenario could panic the system.
71 *
72 * All other uses of the following macros should be protected by either the
73 * m_peraglock held in read mode, or the cache's internal locking exposed by the
74 * interval between a call to xfs_mru_cache_lookup() and a call to
75 * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
76 * when new elements are added to the cache.
77 *
78 * Combined, these locking rules ensure that no associations will ever exist in
79 * the cache that reference per-ag array elements that have since been
80 * reallocated.
81 */
82/*
83 * xfs_filestream_peek_ag is only used in tracing code
84 */
85static inline int
86xfs_filestream_peek_ag(
87 xfs_mount_t *mp,
88 xfs_agnumber_t agno)
89{
90 struct xfs_perag *pag;
91 int ret;
92
93 pag = xfs_perag_get(mp, agno);
94 ret = atomic_read(&pag->pagf_fstrms);
95 xfs_perag_put(pag);
96 return ret;
97}
98
99static inline int
100xfs_filestream_get_ag(
101 xfs_mount_t *mp,
102 xfs_agnumber_t agno)
103{
104 struct xfs_perag *pag;
105 int ret;
106
107 pag = xfs_perag_get(mp, agno);
108 ret = atomic_inc_return(&pag->pagf_fstrms);
109 xfs_perag_put(pag);
110 return ret;
111}
112
113static inline int
114xfs_filestream_put_ag(
115 xfs_mount_t *mp,
116 xfs_agnumber_t agno)
117{
118 struct xfs_perag *pag;
119 int ret;
120
121 pag = xfs_perag_get(mp, agno);
122 ret = atomic_dec_return(&pag->pagf_fstrms);
123 xfs_perag_put(pag);
124 return ret;
125}
126
127/* allocation selection flags */ 45/* allocation selection flags */
128typedef enum xfs_fstrm_alloc { 46typedef enum xfs_fstrm_alloc {
129 XFS_PICK_USERDATA = 1, 47 XFS_PICK_USERDATA = 1,
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 7cf7220e7d5f..8f6fc1a96386 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -114,8 +114,10 @@ struct getbmapx {
114#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ 114#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */
115#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ 115#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */
116#define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ 116#define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */
117#define BMV_IF_NO_HOLES 0x10 /* Do not return holes */
117#define BMV_IF_VALID \ 118#define BMV_IF_VALID \
118 (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) 119 (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \
120 BMV_IF_DELALLOC|BMV_IF_NO_HOLES)
119 121
120/* bmv_oflags values - returned for each non-header segment */ 122/* bmv_oflags values - returned for each non-header segment */
121#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ 123#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */
@@ -291,9 +293,11 @@ typedef struct xfs_bstat {
291 __s32 bs_extsize; /* extent size */ 293 __s32 bs_extsize; /* extent size */
292 __s32 bs_extents; /* number of extents */ 294 __s32 bs_extents; /* number of extents */
293 __u32 bs_gen; /* generation count */ 295 __u32 bs_gen; /* generation count */
294 __u16 bs_projid; /* project id */ 296 __u16 bs_projid_lo; /* lower part of project id */
297#define bs_projid bs_projid_lo /* (previously just bs_projid) */
295 __u16 bs_forkoff; /* inode fork offset in bytes */ 298 __u16 bs_forkoff; /* inode fork offset in bytes */
296 unsigned char bs_pad[12]; /* pad space, unused */ 299 __u16 bs_projid_hi; /* higher part of project id */
300 unsigned char bs_pad[10]; /* pad space, unused */
297 __u32 bs_dmevmask; /* DMIG event mask */ 301 __u32 bs_dmevmask; /* DMIG event mask */
298 __u16 bs_dmstate; /* DMIG state info */ 302 __u16 bs_dmstate; /* DMIG state info */
299 __u16 bs_aextents; /* attribute number of extents */ 303 __u16 bs_aextents; /* attribute number of extents */
@@ -446,6 +450,7 @@ typedef struct xfs_handle {
446/* XFS_IOC_SETBIOSIZE ---- deprecated 46 */ 450/* XFS_IOC_SETBIOSIZE ---- deprecated 46 */
447/* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ 451/* XFS_IOC_GETBIOSIZE ---- deprecated 47 */
448#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) 452#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
453#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
449 454
450/* 455/*
451 * ioctl commands that replace IRIX syssgi()'s 456 * ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 37a6f62c57b6..a7c116e814af 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -24,14 +24,10 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
@@ -148,12 +144,11 @@ xfs_growfs_data_private(
148 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) 144 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
149 return error; 145 return error;
150 dpct = pct - mp->m_sb.sb_imax_pct; 146 dpct = pct - mp->m_sb.sb_imax_pct;
151 error = xfs_read_buf(mp, mp->m_ddev_targp, 147 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
152 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), 148 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
153 XFS_FSS_TO_BB(mp, 1), 0, &bp); 149 BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
154 if (error) 150 if (!bp)
155 return error; 151 return EIO;
156 ASSERT(bp);
157 xfs_buf_relse(bp); 152 xfs_buf_relse(bp);
158 153
159 new = nb; /* use new as a temporary here */ 154 new = nb; /* use new as a temporary here */
@@ -601,39 +596,44 @@ out:
601 * the extra reserve blocks from the reserve..... 596 * the extra reserve blocks from the reserve.....
602 */ 597 */
603 int error; 598 int error;
604 error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0); 599 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
600 fdblks_delta, 0);
605 if (error == ENOSPC) 601 if (error == ENOSPC)
606 goto retry; 602 goto retry;
607 } 603 }
608 return 0; 604 return 0;
609} 605}
610 606
607/*
608 * Dump a transaction into the log that contains no real change. This is needed
609 * to be able to make the log dirty or stamp the current tail LSN into the log
610 * during the covering operation.
611 *
612 * We cannot use an inode here for this - that will push dirty state back up
613 * into the VFS and then periodic inode flushing will prevent log covering from
614 * making progress. Hence we log a field in the superblock instead.
615 */
611int 616int
612xfs_fs_log_dummy( 617xfs_fs_log_dummy(
613 xfs_mount_t *mp) 618 xfs_mount_t *mp,
619 int flags)
614{ 620{
615 xfs_trans_t *tp; 621 xfs_trans_t *tp;
616 xfs_inode_t *ip;
617 int error; 622 int error;
618 623
619 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); 624 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
620 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); 625 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
626 XFS_DEFAULT_LOG_COUNT);
621 if (error) { 627 if (error) {
622 xfs_trans_cancel(tp, 0); 628 xfs_trans_cancel(tp, 0);
623 return error; 629 return error;
624 } 630 }
625 631
626 ip = mp->m_rootip; 632 /* log the UUID because it is an unchanging field */
627 xfs_ilock(ip, XFS_ILOCK_EXCL); 633 xfs_mod_sb(tp, XFS_SB_UUID);
628 634 if (flags & SYNC_WAIT)
629 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 635 xfs_trans_set_sync(tp);
630 xfs_trans_ihold(tp, ip); 636 return xfs_trans_commit(tp, 0);
631 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
632 xfs_trans_set_sync(tp);
633 error = xfs_trans_commit(tp, 0);
634
635 xfs_iunlock(ip, XFS_ILOCK_EXCL);
636 return error;
637} 637}
638 638
639int 639int
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index 88435e0a77c9..a786c5212c1e 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
25extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, 25extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
26 xfs_fsop_resblks_t *outval); 26 xfs_fsop_resblks_t *outval);
27extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); 27extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
28extern int xfs_fs_log_dummy(xfs_mount_t *mp); 28extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags);
29 29
30#endif /* __XFS_FSOPS_H__ */ 30#endif /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index c7142a064c48..0626a32c3447 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -24,14 +24,10 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_btree.h" 33#include "xfs_btree.h"
@@ -216,7 +212,7 @@ xfs_ialloc_inode_init(
216 * to log a whole cluster of inodes instead of all the 212 * to log a whole cluster of inodes instead of all the
217 * individual transactions causing a lot of log traffic. 213 * individual transactions causing a lot of log traffic.
218 */ 214 */
219 xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); 215 xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
220 for (i = 0; i < ninodes; i++) { 216 for (i = 0; i < ninodes; i++) {
221 int ioffset = i << mp->m_sb.sb_inodelog; 217 int ioffset = i << mp->m_sb.sb_inodelog;
222 uint isize = sizeof(struct xfs_dinode); 218 uint isize = sizeof(struct xfs_dinode);
@@ -1217,7 +1213,6 @@ xfs_imap_lookup(
1217 struct xfs_inobt_rec_incore rec; 1213 struct xfs_inobt_rec_incore rec;
1218 struct xfs_btree_cur *cur; 1214 struct xfs_btree_cur *cur;
1219 struct xfs_buf *agbp; 1215 struct xfs_buf *agbp;
1220 xfs_agino_t startino;
1221 int error; 1216 int error;
1222 int i; 1217 int i;
1223 1218
@@ -1231,13 +1226,13 @@ xfs_imap_lookup(
1231 } 1226 }
1232 1227
1233 /* 1228 /*
1234 * derive and lookup the exact inode record for the given agino. If the 1229 * Lookup the inode record for the given agino. If the record cannot be
1235 * record cannot be found, then it's an invalid inode number and we 1230 * found, then it's an invalid inode number and we should abort. Once
1236 * should abort. 1231 * we have a record, we need to ensure it contains the inode number
1232 * we are looking up.
1237 */ 1233 */
1238 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1234 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1239 startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); 1235 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1240 error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
1241 if (!error) { 1236 if (!error) {
1242 if (i) 1237 if (i)
1243 error = xfs_inobt_get_rec(cur, &rec, &i); 1238 error = xfs_inobt_get_rec(cur, &rec, &i);
@@ -1250,6 +1245,11 @@ xfs_imap_lookup(
1250 if (error) 1245 if (error)
1251 return error; 1246 return error;
1252 1247
1248 /* check that the returned record contains the required inode */
1249 if (rec.ir_startino > agino ||
1250 rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
1251 return EINVAL;
1252
1253 /* for untrusted inodes check it is allocated first */ 1253 /* for untrusted inodes check it is allocated first */
1254 if ((flags & XFS_IGET_UNTRUSTED) && 1254 if ((flags & XFS_IGET_UNTRUSTED) &&
1255 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) 1255 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index c282a9af5393..16921f55c542 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -24,14 +24,10 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_btree.h" 33#include "xfs_btree.h"
@@ -187,38 +183,6 @@ xfs_inobt_key_diff(
187 cur->bc_rec.i.ir_startino; 183 cur->bc_rec.i.ir_startino;
188} 184}
189 185
190STATIC int
191xfs_inobt_kill_root(
192 struct xfs_btree_cur *cur,
193 struct xfs_buf *bp,
194 int level,
195 union xfs_btree_ptr *newroot)
196{
197 int error;
198
199 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
200 XFS_BTREE_STATS_INC(cur, killroot);
201
202 /*
203 * Update the root pointer, decreasing the level by 1 and then
204 * free the old root.
205 */
206 xfs_inobt_set_root(cur, newroot, -1);
207 error = xfs_inobt_free_block(cur, bp);
208 if (error) {
209 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
210 return error;
211 }
212
213 XFS_BTREE_STATS_INC(cur, free);
214
215 cur->bc_bufs[level] = NULL;
216 cur->bc_nlevels--;
217
218 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
219 return 0;
220}
221
222#ifdef DEBUG 186#ifdef DEBUG
223STATIC int 187STATIC int
224xfs_inobt_keys_inorder( 188xfs_inobt_keys_inorder(
@@ -313,7 +277,6 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
313 277
314 .dup_cursor = xfs_inobt_dup_cursor, 278 .dup_cursor = xfs_inobt_dup_cursor,
315 .set_root = xfs_inobt_set_root, 279 .set_root = xfs_inobt_set_root,
316 .kill_root = xfs_inobt_kill_root,
317 .alloc_block = xfs_inobt_alloc_block, 280 .alloc_block = xfs_inobt_alloc_block,
318 .free_block = xfs_inobt_free_block, 281 .free_block = xfs_inobt_free_block,
319 .get_minrecs = xfs_inobt_get_minrecs, 282 .get_minrecs = xfs_inobt_get_minrecs,
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8f8b91be2c99..0cdd26932d8e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -25,14 +25,10 @@
25#include "xfs_trans.h" 25#include "xfs_trans.h"
26#include "xfs_sb.h" 26#include "xfs_sb.h"
27#include "xfs_ag.h" 27#include "xfs_ag.h"
28#include "xfs_dir2.h"
29#include "xfs_dmapi.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 32#include "xfs_dinode.h"
37#include "xfs_inode.h" 33#include "xfs_inode.h"
38#include "xfs_btree.h" 34#include "xfs_btree.h"
@@ -95,7 +91,7 @@ xfs_inode_alloc(
95 return ip; 91 return ip;
96} 92}
97 93
98STATIC void 94void
99xfs_inode_free( 95xfs_inode_free(
100 struct xfs_inode *ip) 96 struct xfs_inode *ip)
101{ 97{
@@ -212,7 +208,7 @@ xfs_iget_cache_hit(
212 ip->i_flags &= ~XFS_INEW; 208 ip->i_flags &= ~XFS_INEW;
213 ip->i_flags |= XFS_IRECLAIMABLE; 209 ip->i_flags |= XFS_IRECLAIMABLE;
214 __xfs_inode_set_reclaim_tag(pag, ip); 210 __xfs_inode_set_reclaim_tag(pag, ip);
215 trace_xfs_iget_reclaim(ip); 211 trace_xfs_iget_reclaim_fail(ip);
216 goto out_error; 212 goto out_error;
217 } 213 }
218 214
@@ -227,6 +223,7 @@ xfs_iget_cache_hit(
227 } else { 223 } else {
228 /* If the VFS inode is being torn down, pause and try again. */ 224 /* If the VFS inode is being torn down, pause and try again. */
229 if (!igrab(inode)) { 225 if (!igrab(inode)) {
226 trace_xfs_iget_skip(ip);
230 error = EAGAIN; 227 error = EAGAIN;
231 goto out_error; 228 goto out_error;
232 } 229 }
@@ -234,6 +231,7 @@ xfs_iget_cache_hit(
234 /* We've got a live one. */ 231 /* We've got a live one. */
235 spin_unlock(&ip->i_flags_lock); 232 spin_unlock(&ip->i_flags_lock);
236 read_unlock(&pag->pag_ici_lock); 233 read_unlock(&pag->pag_ici_lock);
234 trace_xfs_iget_hit(ip);
237 } 235 }
238 236
239 if (lock_flags != 0) 237 if (lock_flags != 0)
@@ -242,7 +240,6 @@ xfs_iget_cache_hit(
242 xfs_iflags_clear(ip, XFS_ISTALE); 240 xfs_iflags_clear(ip, XFS_ISTALE);
243 XFS_STATS_INC(xs_ig_found); 241 XFS_STATS_INC(xs_ig_found);
244 242
245 trace_xfs_iget_found(ip);
246 return 0; 243 return 0;
247 244
248out_error: 245out_error:
@@ -264,7 +261,6 @@ xfs_iget_cache_miss(
264{ 261{
265 struct xfs_inode *ip; 262 struct xfs_inode *ip;
266 int error; 263 int error;
267 unsigned long first_index, mask;
268 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); 264 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
269 265
270 ip = xfs_inode_alloc(mp, ino); 266 ip = xfs_inode_alloc(mp, ino);
@@ -275,7 +271,7 @@ xfs_iget_cache_miss(
275 if (error) 271 if (error)
276 goto out_destroy; 272 goto out_destroy;
277 273
278 xfs_itrace_entry(ip); 274 trace_xfs_iget_miss(ip);
279 275
280 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 276 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
281 error = ENOENT; 277 error = ENOENT;
@@ -301,8 +297,6 @@ xfs_iget_cache_miss(
301 BUG(); 297 BUG();
302 } 298 }
303 299
304 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
305 first_index = agino & mask;
306 write_lock(&pag->pag_ici_lock); 300 write_lock(&pag->pag_ici_lock);
307 301
308 /* insert the new inode */ 302 /* insert the new inode */
@@ -321,7 +315,6 @@ xfs_iget_cache_miss(
321 write_unlock(&pag->pag_ici_lock); 315 write_unlock(&pag->pag_ici_lock);
322 radix_tree_preload_end(); 316 radix_tree_preload_end();
323 317
324 trace_xfs_iget_alloc(ip);
325 *ipp = ip; 318 *ipp = ip;
326 return 0; 319 return 0;
327 320
@@ -372,8 +365,8 @@ xfs_iget(
372 xfs_perag_t *pag; 365 xfs_perag_t *pag;
373 xfs_agino_t agino; 366 xfs_agino_t agino;
374 367
375 /* the radix tree exists only in inode capable AGs */ 368 /* reject inode numbers outside existing AGs */
376 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) 369 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
377 return EINVAL; 370 return EINVAL;
378 371
379 /* get the perag structure and ensure that it's inode capable */ 372 /* get the perag structure and ensure that it's inode capable */
@@ -422,97 +415,6 @@ out_error_or_again:
422} 415}
423 416
424/* 417/*
425 * Decrement reference count of an inode structure and unlock it.
426 *
427 * ip -- the inode being released
428 * lock_flags -- this parameter indicates the inode's locks to be
429 * to be released. See the comment on xfs_iunlock() for a list
430 * of valid values.
431 */
432void
433xfs_iput(xfs_inode_t *ip,
434 uint lock_flags)
435{
436 xfs_itrace_entry(ip);
437 xfs_iunlock(ip, lock_flags);
438 IRELE(ip);
439}
440
441/*
442 * Special iput for brand-new inodes that are still locked
443 */
444void
445xfs_iput_new(
446 xfs_inode_t *ip,
447 uint lock_flags)
448{
449 struct inode *inode = VFS_I(ip);
450
451 xfs_itrace_entry(ip);
452
453 if ((ip->i_d.di_mode == 0)) {
454 ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
455 make_bad_inode(inode);
456 }
457 if (inode->i_state & I_NEW)
458 unlock_new_inode(inode);
459 if (lock_flags)
460 xfs_iunlock(ip, lock_flags);
461 IRELE(ip);
462}
463
464/*
465 * This is called free all the memory associated with an inode.
466 * It must free the inode itself and any buffers allocated for
467 * if_extents/if_data and if_broot. It must also free the lock
468 * associated with the inode.
469 *
470 * Note: because we don't initialise everything on reallocation out
471 * of the zone, we must ensure we nullify everything correctly before
472 * freeing the structure.
473 */
474void
475xfs_ireclaim(
476 struct xfs_inode *ip)
477{
478 struct xfs_mount *mp = ip->i_mount;
479 struct xfs_perag *pag;
480 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
481
482 XFS_STATS_INC(xs_ig_reclaims);
483
484 /*
485 * Remove the inode from the per-AG radix tree.
486 *
487 * Because radix_tree_delete won't complain even if the item was never
488 * added to the tree assert that it's been there before to catch
489 * problems with the inode life time early on.
490 */
491 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
492 write_lock(&pag->pag_ici_lock);
493 if (!radix_tree_delete(&pag->pag_ici_root, agino))
494 ASSERT(0);
495 write_unlock(&pag->pag_ici_lock);
496 xfs_perag_put(pag);
497
498 /*
499 * Here we do an (almost) spurious inode lock in order to coordinate
500 * with inode cache radix tree lookups. This is because the lookup
501 * can reference the inodes in the cache without taking references.
502 *
503 * We make that OK here by ensuring that we wait until the inode is
504 * unlocked after the lookup before we go ahead and free it. We get
505 * both the ilock and the iolock because the code may need to drop the
506 * ilock one but will still hold the iolock.
507 */
508 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
509 xfs_qm_dqdetach(ip);
510 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
511
512 xfs_inode_free(ip);
513}
514
515/*
516 * This is a wrapper routine around the xfs_ilock() routine 418 * This is a wrapper routine around the xfs_ilock() routine
517 * used to centralize some grungy code. It is used in places 419 * used to centralize some grungy code. It is used in places
518 * that wish to lock the inode solely for reading the extents. 420 * that wish to lock the inode solely for reading the extents.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b76a829d7e20..108c7a085f94 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -27,13 +27,10 @@
27#include "xfs_trans_priv.h" 27#include "xfs_trans_priv.h"
28#include "xfs_sb.h" 28#include "xfs_sb.h"
29#include "xfs_ag.h" 29#include "xfs_ag.h"
30#include "xfs_dir2.h"
31#include "xfs_dmapi.h"
32#include "xfs_mount.h" 30#include "xfs_mount.h"
33#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
34#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
35#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
36#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 35#include "xfs_dinode.h"
39#include "xfs_inode.h" 36#include "xfs_inode.h"
@@ -44,7 +41,6 @@
44#include "xfs_alloc.h" 41#include "xfs_alloc.h"
45#include "xfs_ialloc.h" 42#include "xfs_ialloc.h"
46#include "xfs_bmap.h" 43#include "xfs_bmap.h"
47#include "xfs_rw.h"
48#include "xfs_error.h" 44#include "xfs_error.h"
49#include "xfs_utils.h" 45#include "xfs_utils.h"
50#include "xfs_quota.h" 46#include "xfs_quota.h"
@@ -426,7 +422,7 @@ xfs_iformat(
426 if (!XFS_DFORK_Q(dip)) 422 if (!XFS_DFORK_Q(dip))
427 return 0; 423 return 0;
428 ASSERT(ip->i_afp == NULL); 424 ASSERT(ip->i_afp == NULL);
429 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 425 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
430 ip->i_afp->if_ext_max = 426 ip->i_afp->if_ext_max =
431 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 427 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
432 switch (dip->di_aformat) { 428 switch (dip->di_aformat) {
@@ -509,7 +505,7 @@ xfs_iformat_local(
509 ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 505 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
510 else { 506 else {
511 real_size = roundup(size, 4); 507 real_size = roundup(size, 4);
512 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 508 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
513 } 509 }
514 ifp->if_bytes = size; 510 ifp->if_bytes = size;
515 ifp->if_real_bytes = real_size; 511 ifp->if_real_bytes = real_size;
@@ -636,7 +632,7 @@ xfs_iformat_btree(
636 } 632 }
637 633
638 ifp->if_broot_bytes = size; 634 ifp->if_broot_bytes = size;
639 ifp->if_broot = kmem_alloc(size, KM_SLEEP); 635 ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
640 ASSERT(ifp->if_broot != NULL); 636 ASSERT(ifp->if_broot != NULL);
641 /* 637 /*
642 * Copy and convert from the on-disk structure 638 * Copy and convert from the on-disk structure
@@ -664,7 +660,8 @@ xfs_dinode_from_disk(
664 to->di_uid = be32_to_cpu(from->di_uid); 660 to->di_uid = be32_to_cpu(from->di_uid);
665 to->di_gid = be32_to_cpu(from->di_gid); 661 to->di_gid = be32_to_cpu(from->di_gid);
666 to->di_nlink = be32_to_cpu(from->di_nlink); 662 to->di_nlink = be32_to_cpu(from->di_nlink);
667 to->di_projid = be16_to_cpu(from->di_projid); 663 to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
664 to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
668 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 665 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
669 to->di_flushiter = be16_to_cpu(from->di_flushiter); 666 to->di_flushiter = be16_to_cpu(from->di_flushiter);
670 to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); 667 to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
@@ -699,7 +696,8 @@ xfs_dinode_to_disk(
699 to->di_uid = cpu_to_be32(from->di_uid); 696 to->di_uid = cpu_to_be32(from->di_uid);
700 to->di_gid = cpu_to_be32(from->di_gid); 697 to->di_gid = cpu_to_be32(from->di_gid);
701 to->di_nlink = cpu_to_be32(from->di_nlink); 698 to->di_nlink = cpu_to_be32(from->di_nlink);
702 to->di_projid = cpu_to_be16(from->di_projid); 699 to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
700 to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
703 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 701 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
704 to->di_flushiter = cpu_to_be16(from->di_flushiter); 702 to->di_flushiter = cpu_to_be16(from->di_flushiter);
705 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); 703 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
@@ -878,7 +876,7 @@ xfs_iread(
878 if (ip->i_d.di_version == 1) { 876 if (ip->i_d.di_version == 1) {
879 ip->i_d.di_nlink = ip->i_d.di_onlink; 877 ip->i_d.di_nlink = ip->i_d.di_onlink;
880 ip->i_d.di_onlink = 0; 878 ip->i_d.di_onlink = 0;
881 ip->i_d.di_projid = 0; 879 xfs_set_projid(ip, 0);
882 } 880 }
883 881
884 ip->i_delayed_blks = 0; 882 ip->i_delayed_blks = 0;
@@ -922,7 +920,6 @@ xfs_iread_extents(
922 int error; 920 int error;
923 xfs_ifork_t *ifp; 921 xfs_ifork_t *ifp;
924 xfs_extnum_t nextents; 922 xfs_extnum_t nextents;
925 size_t size;
926 923
927 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 924 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
928 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 925 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
@@ -930,7 +927,6 @@ xfs_iread_extents(
930 return XFS_ERROR(EFSCORRUPTED); 927 return XFS_ERROR(EFSCORRUPTED);
931 } 928 }
932 nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 929 nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
933 size = nextents * sizeof(xfs_bmbt_rec_t);
934 ifp = XFS_IFORK_PTR(ip, whichfork); 930 ifp = XFS_IFORK_PTR(ip, whichfork);
935 931
936 /* 932 /*
@@ -988,8 +984,7 @@ xfs_ialloc(
988 mode_t mode, 984 mode_t mode,
989 xfs_nlink_t nlink, 985 xfs_nlink_t nlink,
990 xfs_dev_t rdev, 986 xfs_dev_t rdev,
991 cred_t *cr, 987 prid_t prid,
992 xfs_prid_t prid,
993 int okalloc, 988 int okalloc,
994 xfs_buf_t **ialloc_context, 989 xfs_buf_t **ialloc_context,
995 boolean_t *call_again, 990 boolean_t *call_again,
@@ -1033,7 +1028,7 @@ xfs_ialloc(
1033 ASSERT(ip->i_d.di_nlink == nlink); 1028 ASSERT(ip->i_d.di_nlink == nlink);
1034 ip->i_d.di_uid = current_fsuid(); 1029 ip->i_d.di_uid = current_fsuid();
1035 ip->i_d.di_gid = current_fsgid(); 1030 ip->i_d.di_gid = current_fsgid();
1036 ip->i_d.di_projid = prid; 1031 xfs_set_projid(ip, prid);
1037 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 1032 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
1038 1033
1039 /* 1034 /*
@@ -1226,7 +1221,7 @@ xfs_isize_check(
1226 (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - 1221 (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -
1227 map_first), 1222 map_first),
1228 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, 1223 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps,
1229 NULL, NULL)) 1224 NULL))
1230 return; 1225 return;
1231 ASSERT(nimaps == 1); 1226 ASSERT(nimaps == 1);
1232 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); 1227 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
@@ -1460,7 +1455,7 @@ xfs_itruncate_finish(
1460 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 1455 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
1461 ASSERT(ip->i_transp == *tp); 1456 ASSERT(ip->i_transp == *tp);
1462 ASSERT(ip->i_itemp != NULL); 1457 ASSERT(ip->i_itemp != NULL);
1463 ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); 1458 ASSERT(ip->i_itemp->ili_lock_flags == 0);
1464 1459
1465 1460
1466 ntp = *tp; 1461 ntp = *tp;
@@ -1589,11 +1584,10 @@ xfs_itruncate_finish(
1589 xfs_bmap_init(&free_list, &first_block); 1584 xfs_bmap_init(&free_list, &first_block);
1590 error = xfs_bunmapi(ntp, ip, 1585 error = xfs_bunmapi(ntp, ip,
1591 first_unmap_block, unmap_len, 1586 first_unmap_block, unmap_len,
1592 xfs_bmapi_aflag(fork) | 1587 xfs_bmapi_aflag(fork),
1593 (sync ? 0 : XFS_BMAPI_ASYNC),
1594 XFS_ITRUNC_MAX_EXTENTS, 1588 XFS_ITRUNC_MAX_EXTENTS,
1595 &first_block, &free_list, 1589 &first_block, &free_list,
1596 NULL, &done); 1590 &done);
1597 if (error) { 1591 if (error) {
1598 /* 1592 /*
1599 * If the bunmapi call encounters an error, 1593 * If the bunmapi call encounters an error,
@@ -1612,12 +1606,8 @@ xfs_itruncate_finish(
1612 */ 1606 */
1613 error = xfs_bmap_finish(tp, &free_list, &committed); 1607 error = xfs_bmap_finish(tp, &free_list, &committed);
1614 ntp = *tp; 1608 ntp = *tp;
1615 if (committed) { 1609 if (committed)
1616 /* link the inode into the next xact in the chain */ 1610 xfs_trans_ijoin(ntp, ip);
1617 xfs_trans_ijoin(ntp, ip,
1618 XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1619 xfs_trans_ihold(ntp, ip);
1620 }
1621 1611
1622 if (error) { 1612 if (error) {
1623 /* 1613 /*
@@ -1646,9 +1636,7 @@ xfs_itruncate_finish(
1646 error = xfs_trans_commit(*tp, 0); 1636 error = xfs_trans_commit(*tp, 0);
1647 *tp = ntp; 1637 *tp = ntp;
1648 1638
1649 /* link the inode into the next transaction in the chain */ 1639 xfs_trans_ijoin(ntp, ip);
1650 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1651 xfs_trans_ihold(ntp, ip);
1652 1640
1653 if (error) 1641 if (error)
1654 return error; 1642 return error;
@@ -1927,6 +1915,11 @@ xfs_iunlink_remove(
1927 return 0; 1915 return 0;
1928} 1916}
1929 1917
1918/*
1919 * A big issue when freeing the inode cluster is is that we _cannot_ skip any
1920 * inodes that are in memory - they all must be marked stale and attached to
1921 * the cluster buffer.
1922 */
1930STATIC void 1923STATIC void
1931xfs_ifree_cluster( 1924xfs_ifree_cluster(
1932 xfs_inode_t *free_ip, 1925 xfs_inode_t *free_ip,
@@ -1958,8 +1951,6 @@ xfs_ifree_cluster(
1958 } 1951 }
1959 1952
1960 for (j = 0; j < nbufs; j++, inum += ninodes) { 1953 for (j = 0; j < nbufs; j++, inum += ninodes) {
1961 int found = 0;
1962
1963 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 1954 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
1964 XFS_INO_TO_AGBNO(mp, inum)); 1955 XFS_INO_TO_AGBNO(mp, inum));
1965 1956
@@ -1978,23 +1969,25 @@ xfs_ifree_cluster(
1978 /* 1969 /*
1979 * Walk the inodes already attached to the buffer and mark them 1970 * Walk the inodes already attached to the buffer and mark them
1980 * stale. These will all have the flush locks held, so an 1971 * stale. These will all have the flush locks held, so an
1981 * in-memory inode walk can't lock them. 1972 * in-memory inode walk can't lock them. By marking them all
1973 * stale first, we will not attempt to lock them in the loop
1974 * below as the XFS_ISTALE flag will be set.
1982 */ 1975 */
1983 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 1976 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
1984 while (lip) { 1977 while (lip) {
1985 if (lip->li_type == XFS_LI_INODE) { 1978 if (lip->li_type == XFS_LI_INODE) {
1986 iip = (xfs_inode_log_item_t *)lip; 1979 iip = (xfs_inode_log_item_t *)lip;
1987 ASSERT(iip->ili_logged == 1); 1980 ASSERT(iip->ili_logged == 1);
1988 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; 1981 lip->li_cb = xfs_istale_done;
1989 xfs_trans_ail_copy_lsn(mp->m_ail, 1982 xfs_trans_ail_copy_lsn(mp->m_ail,
1990 &iip->ili_flush_lsn, 1983 &iip->ili_flush_lsn,
1991 &iip->ili_item.li_lsn); 1984 &iip->ili_item.li_lsn);
1992 xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 1985 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
1993 found++;
1994 } 1986 }
1995 lip = lip->li_bio_list; 1987 lip = lip->li_bio_list;
1996 } 1988 }
1997 1989
1990
1998 /* 1991 /*
1999 * For each inode in memory attempt to add it to the inode 1992 * For each inode in memory attempt to add it to the inode
2000 * buffer and set it up for being staled on buffer IO 1993 * buffer and set it up for being staled on buffer IO
@@ -2006,6 +1999,7 @@ xfs_ifree_cluster(
2006 * even trying to lock them. 1999 * even trying to lock them.
2007 */ 2000 */
2008 for (i = 0; i < ninodes; i++) { 2001 for (i = 0; i < ninodes; i++) {
2002retry:
2009 read_lock(&pag->pag_ici_lock); 2003 read_lock(&pag->pag_ici_lock);
2010 ip = radix_tree_lookup(&pag->pag_ici_root, 2004 ip = radix_tree_lookup(&pag->pag_ici_root,
2011 XFS_INO_TO_AGINO(mp, (inum + i))); 2005 XFS_INO_TO_AGINO(mp, (inum + i)));
@@ -2016,38 +2010,36 @@ xfs_ifree_cluster(
2016 continue; 2010 continue;
2017 } 2011 }
2018 2012
2019 /* don't try to lock/unlock the current inode */ 2013 /*
2014 * Don't try to lock/unlock the current inode, but we
2015 * _cannot_ skip the other inodes that we did not find
2016 * in the list attached to the buffer and are not
2017 * already marked stale. If we can't lock it, back off
2018 * and retry.
2019 */
2020 if (ip != free_ip && 2020 if (ip != free_ip &&
2021 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2021 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2022 read_unlock(&pag->pag_ici_lock); 2022 read_unlock(&pag->pag_ici_lock);
2023 continue; 2023 delay(1);
2024 goto retry;
2024 } 2025 }
2025 read_unlock(&pag->pag_ici_lock); 2026 read_unlock(&pag->pag_ici_lock);
2026 2027
2027 if (!xfs_iflock_nowait(ip)) { 2028 xfs_iflock(ip);
2028 if (ip != free_ip)
2029 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2030 continue;
2031 }
2032
2033 xfs_iflags_set(ip, XFS_ISTALE); 2029 xfs_iflags_set(ip, XFS_ISTALE);
2034 if (xfs_inode_clean(ip)) {
2035 ASSERT(ip != free_ip);
2036 xfs_ifunlock(ip);
2037 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2038 continue;
2039 }
2040 2030
2031 /*
2032 * we don't need to attach clean inodes or those only
2033 * with unlogged changes (which we throw away, anyway).
2034 */
2041 iip = ip->i_itemp; 2035 iip = ip->i_itemp;
2042 if (!iip) { 2036 if (!iip || xfs_inode_clean(ip)) {
2043 /* inode with unlogged changes only */
2044 ASSERT(ip != free_ip); 2037 ASSERT(ip != free_ip);
2045 ip->i_update_core = 0; 2038 ip->i_update_core = 0;
2046 xfs_ifunlock(ip); 2039 xfs_ifunlock(ip);
2047 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2040 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2048 continue; 2041 continue;
2049 } 2042 }
2050 found++;
2051 2043
2052 iip->ili_last_fields = iip->ili_format.ilf_fields; 2044 iip->ili_last_fields = iip->ili_format.ilf_fields;
2053 iip->ili_format.ilf_fields = 0; 2045 iip->ili_format.ilf_fields = 0;
@@ -2055,16 +2047,14 @@ xfs_ifree_cluster(
2055 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 2047 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
2056 &iip->ili_item.li_lsn); 2048 &iip->ili_item.li_lsn);
2057 2049
2058 xfs_buf_attach_iodone(bp, 2050 xfs_buf_attach_iodone(bp, xfs_istale_done,
2059 (void(*)(xfs_buf_t*,xfs_log_item_t*)) 2051 &iip->ili_item);
2060 xfs_istale_done, (xfs_log_item_t *)iip);
2061 2052
2062 if (ip != free_ip) 2053 if (ip != free_ip)
2063 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2054 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2064 } 2055 }
2065 2056
2066 if (found) 2057 xfs_trans_stale_inode_buf(tp, bp);
2067 xfs_trans_stale_inode_buf(tp, bp);
2068 xfs_trans_binval(tp, bp); 2058 xfs_trans_binval(tp, bp);
2069 } 2059 }
2070 2060
@@ -2203,7 +2193,7 @@ xfs_iroot_realloc(
2203 */ 2193 */
2204 if (ifp->if_broot_bytes == 0) { 2194 if (ifp->if_broot_bytes == 0) {
2205 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 2195 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
2206 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP); 2196 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
2207 ifp->if_broot_bytes = (int)new_size; 2197 ifp->if_broot_bytes = (int)new_size;
2208 return; 2198 return;
2209 } 2199 }
@@ -2219,7 +2209,7 @@ xfs_iroot_realloc(
2219 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2209 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
2220 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, 2210 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
2221 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 2211 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
2222 KM_SLEEP); 2212 KM_SLEEP | KM_NOFS);
2223 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 2213 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2224 ifp->if_broot_bytes); 2214 ifp->if_broot_bytes);
2225 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 2215 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
@@ -2245,7 +2235,7 @@ xfs_iroot_realloc(
2245 else 2235 else
2246 new_size = 0; 2236 new_size = 0;
2247 if (new_size > 0) { 2237 if (new_size > 0) {
2248 new_broot = kmem_alloc(new_size, KM_SLEEP); 2238 new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
2249 /* 2239 /*
2250 * First copy over the btree block header. 2240 * First copy over the btree block header.
2251 */ 2241 */
@@ -2349,7 +2339,8 @@ xfs_idata_realloc(
2349 real_size = roundup(new_size, 4); 2339 real_size = roundup(new_size, 4);
2350 if (ifp->if_u1.if_data == NULL) { 2340 if (ifp->if_u1.if_data == NULL) {
2351 ASSERT(ifp->if_real_bytes == 0); 2341 ASSERT(ifp->if_real_bytes == 0);
2352 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 2342 ifp->if_u1.if_data = kmem_alloc(real_size,
2343 KM_SLEEP | KM_NOFS);
2353 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2344 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2354 /* 2345 /*
2355 * Only do the realloc if the underlying size 2346 * Only do the realloc if the underlying size
@@ -2360,11 +2351,12 @@ xfs_idata_realloc(
2360 kmem_realloc(ifp->if_u1.if_data, 2351 kmem_realloc(ifp->if_u1.if_data,
2361 real_size, 2352 real_size,
2362 ifp->if_real_bytes, 2353 ifp->if_real_bytes,
2363 KM_SLEEP); 2354 KM_SLEEP | KM_NOFS);
2364 } 2355 }
2365 } else { 2356 } else {
2366 ASSERT(ifp->if_real_bytes == 0); 2357 ASSERT(ifp->if_real_bytes == 0);
2367 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 2358 ifp->if_u1.if_data = kmem_alloc(real_size,
2359 KM_SLEEP | KM_NOFS);
2368 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 2360 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
2369 ifp->if_bytes); 2361 ifp->if_bytes);
2370 } 2362 }
@@ -2731,11 +2723,10 @@ cluster_corrupt_out:
2731 * mark it as stale and brelse. 2723 * mark it as stale and brelse.
2732 */ 2724 */
2733 if (XFS_BUF_IODONE_FUNC(bp)) { 2725 if (XFS_BUF_IODONE_FUNC(bp)) {
2734 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
2735 XFS_BUF_UNDONE(bp); 2726 XFS_BUF_UNDONE(bp);
2736 XFS_BUF_STALE(bp); 2727 XFS_BUF_STALE(bp);
2737 XFS_BUF_ERROR(bp,EIO); 2728 XFS_BUF_ERROR(bp,EIO);
2738 xfs_biodone(bp); 2729 xfs_buf_ioend(bp, 0);
2739 } else { 2730 } else {
2740 XFS_BUF_STALE(bp); 2731 XFS_BUF_STALE(bp);
2741 xfs_buf_relse(bp); 2732 xfs_buf_relse(bp);
@@ -3018,7 +3009,7 @@ xfs_iflush_int(
3018 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 3009 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
3019 memset(&(dip->di_pad[0]), 0, 3010 memset(&(dip->di_pad[0]), 0,
3020 sizeof(dip->di_pad)); 3011 sizeof(dip->di_pad));
3021 ASSERT(ip->i_d.di_projid == 0); 3012 ASSERT(xfs_get_projid(ip) == 0);
3022 } 3013 }
3023 } 3014 }
3024 3015
@@ -3069,8 +3060,7 @@ xfs_iflush_int(
3069 * and unlock the inode's flush lock when the inode is 3060 * and unlock the inode's flush lock when the inode is
3070 * completely written to disk. 3061 * completely written to disk.
3071 */ 3062 */
3072 xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) 3063 xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
3073 xfs_iflush_done, (xfs_log_item_t *)iip);
3074 3064
3075 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 3065 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
3076 ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); 3066 ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
@@ -3514,13 +3504,11 @@ xfs_iext_remove_indirect(
3514 xfs_extnum_t ext_diff; /* extents to remove in current list */ 3504 xfs_extnum_t ext_diff; /* extents to remove in current list */
3515 xfs_extnum_t nex1; /* number of extents before idx */ 3505 xfs_extnum_t nex1; /* number of extents before idx */
3516 xfs_extnum_t nex2; /* extents after idx + count */ 3506 xfs_extnum_t nex2; /* extents after idx + count */
3517 int nlists; /* entries in indirection array */
3518 int page_idx = idx; /* index in target extent list */ 3507 int page_idx = idx; /* index in target extent list */
3519 3508
3520 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 3509 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
3521 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 3510 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
3522 ASSERT(erp != NULL); 3511 ASSERT(erp != NULL);
3523 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
3524 nex1 = page_idx; 3512 nex1 = page_idx;
3525 ext_cnt = count; 3513 ext_cnt = count;
3526 while (ext_cnt) { 3514 while (ext_cnt) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 78550df13cd6..fb2ca2e4cdc9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -134,8 +134,9 @@ typedef struct xfs_icdinode {
134 __uint32_t di_uid; /* owner's user id */ 134 __uint32_t di_uid; /* owner's user id */
135 __uint32_t di_gid; /* owner's group id */ 135 __uint32_t di_gid; /* owner's group id */
136 __uint32_t di_nlink; /* number of links to file */ 136 __uint32_t di_nlink; /* number of links to file */
137 __uint16_t di_projid; /* owner's project id */ 137 __uint16_t di_projid_lo; /* lower part of owner's project id */
138 __uint8_t di_pad[8]; /* unused, zeroed space */ 138 __uint16_t di_projid_hi; /* higher part of owner's project id */
139 __uint8_t di_pad[6]; /* unused, zeroed space */
139 __uint16_t di_flushiter; /* incremented on flush */ 140 __uint16_t di_flushiter; /* incremented on flush */
140 xfs_ictimestamp_t di_atime; /* time last accessed */ 141 xfs_ictimestamp_t di_atime; /* time last accessed */
141 xfs_ictimestamp_t di_mtime; /* time last modified */ 142 xfs_ictimestamp_t di_mtime; /* time last modified */
@@ -212,7 +213,6 @@ typedef struct xfs_icdinode {
212#ifdef __KERNEL__ 213#ifdef __KERNEL__
213 214
214struct bhv_desc; 215struct bhv_desc;
215struct cred;
216struct xfs_buf; 216struct xfs_buf;
217struct xfs_bmap_free; 217struct xfs_bmap_free;
218struct xfs_bmbt_irec; 218struct xfs_bmbt_irec;
@@ -335,6 +335,25 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
335} 335}
336 336
337/* 337/*
338 * Project quota id helpers (previously projid was 16bit only
339 * and using two 16bit values to hold new 32bit projid was choosen
340 * to retain compatibility with "old" filesystems).
341 */
342static inline prid_t
343xfs_get_projid(struct xfs_inode *ip)
344{
345 return (prid_t)ip->i_d.di_projid_hi << 16 | ip->i_d.di_projid_lo;
346}
347
348static inline void
349xfs_set_projid(struct xfs_inode *ip,
350 prid_t projid)
351{
352 ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16);
353 ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
354}
355
356/*
338 * Manage the i_flush queue embedded in the inode. This completion 357 * Manage the i_flush queue embedded in the inode. This completion
339 * queue synchronizes processes attempting to flush the in-core 358 * queue synchronizes processes attempting to flush the in-core
340 * inode back to disk. 359 * inode back to disk.
@@ -443,8 +462,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
443 */ 462 */
444int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 463int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
445 uint, uint, xfs_inode_t **); 464 uint, uint, xfs_inode_t **);
446void xfs_iput(xfs_inode_t *, uint);
447void xfs_iput_new(xfs_inode_t *, uint);
448void xfs_ilock(xfs_inode_t *, uint); 465void xfs_ilock(xfs_inode_t *, uint);
449int xfs_ilock_nowait(xfs_inode_t *, uint); 466int xfs_ilock_nowait(xfs_inode_t *, uint);
450void xfs_iunlock(xfs_inode_t *, uint); 467void xfs_iunlock(xfs_inode_t *, uint);
@@ -452,14 +469,14 @@ void xfs_ilock_demote(xfs_inode_t *, uint);
452int xfs_isilocked(xfs_inode_t *, uint); 469int xfs_isilocked(xfs_inode_t *, uint);
453uint xfs_ilock_map_shared(xfs_inode_t *); 470uint xfs_ilock_map_shared(xfs_inode_t *);
454void xfs_iunlock_map_shared(xfs_inode_t *, uint); 471void xfs_iunlock_map_shared(xfs_inode_t *, uint);
455void xfs_ireclaim(xfs_inode_t *); 472void xfs_inode_free(struct xfs_inode *ip);
456 473
457/* 474/*
458 * xfs_inode.c prototypes. 475 * xfs_inode.c prototypes.
459 */ 476 */
460int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, 477int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
461 xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t, 478 xfs_nlink_t, xfs_dev_t, prid_t, int,
462 int, struct xfs_buf **, boolean_t *, xfs_inode_t **); 479 struct xfs_buf **, boolean_t *, xfs_inode_t **);
463 480
464uint xfs_ip2xflags(struct xfs_inode *); 481uint xfs_ip2xflags(struct xfs_inode *);
465uint xfs_dic2xflags(struct xfs_dinode *); 482uint xfs_dic2xflags(struct xfs_dinode *);
@@ -473,7 +490,6 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
473void xfs_iext_realloc(xfs_inode_t *, int, int); 490void xfs_iext_realloc(xfs_inode_t *, int, int);
474void xfs_iunpin_wait(xfs_inode_t *); 491void xfs_iunpin_wait(xfs_inode_t *);
475int xfs_iflush(xfs_inode_t *, uint); 492int xfs_iflush(xfs_inode_t *, uint);
476void xfs_ichgtime(xfs_inode_t *, int);
477void xfs_lock_inodes(xfs_inode_t **, int, uint); 493void xfs_lock_inodes(xfs_inode_t **, int, uint);
478void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 494void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
479 495
@@ -484,7 +500,7 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *);
484#define IHOLD(ip) \ 500#define IHOLD(ip) \
485do { \ 501do { \
486 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ 502 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
487 atomic_inc(&(VFS_I(ip)->i_count)); \ 503 ihold(VFS_I(ip)); \
488 trace_xfs_ihold(ip, _THIS_IP_); \ 504 trace_xfs_ihold(ip, _THIS_IP_); \
489} while (0) 505} while (0)
490 506
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index cf8249a60004..c7ac020705df 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -22,30 +22,26 @@
22#include "xfs_log.h" 22#include "xfs_log.h"
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_buf_item.h"
26#include "xfs_sb.h" 25#include "xfs_sb.h"
27#include "xfs_ag.h" 26#include "xfs_ag.h"
28#include "xfs_dir2.h"
29#include "xfs_dmapi.h"
30#include "xfs_mount.h" 27#include "xfs_mount.h"
31#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
32#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 30#include "xfs_dinode.h"
38#include "xfs_inode.h" 31#include "xfs_inode.h"
39#include "xfs_inode_item.h" 32#include "xfs_inode_item.h"
40#include "xfs_btree.h"
41#include "xfs_ialloc.h"
42#include "xfs_rw.h"
43#include "xfs_error.h" 33#include "xfs_error.h"
44#include "xfs_trace.h" 34#include "xfs_trace.h"
45 35
46 36
47kmem_zone_t *xfs_ili_zone; /* inode log item zone */ 37kmem_zone_t *xfs_ili_zone; /* inode log item zone */
48 38
39static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
40{
41 return container_of(lip, struct xfs_inode_log_item, ili_item);
42}
43
44
49/* 45/*
50 * This returns the number of iovecs needed to log the given inode item. 46 * This returns the number of iovecs needed to log the given inode item.
51 * 47 *
@@ -55,13 +51,11 @@ kmem_zone_t *xfs_ili_zone; /* inode log item zone */
55 */ 51 */
56STATIC uint 52STATIC uint
57xfs_inode_item_size( 53xfs_inode_item_size(
58 xfs_inode_log_item_t *iip) 54 struct xfs_log_item *lip)
59{ 55{
60 uint nvecs; 56 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
61 xfs_inode_t *ip; 57 struct xfs_inode *ip = iip->ili_inode;
62 58 uint nvecs = 2;
63 ip = iip->ili_inode;
64 nvecs = 2;
65 59
66 /* 60 /*
67 * Only log the data/extents/b-tree root if there is something 61 * Only log the data/extents/b-tree root if there is something
@@ -212,36 +206,23 @@ xfs_inode_item_size(
212 */ 206 */
213STATIC void 207STATIC void
214xfs_inode_item_format( 208xfs_inode_item_format(
215 xfs_inode_log_item_t *iip, 209 struct xfs_log_item *lip,
216 xfs_log_iovec_t *log_vector) 210 struct xfs_log_iovec *vecp)
217{ 211{
212 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
213 struct xfs_inode *ip = iip->ili_inode;
218 uint nvecs; 214 uint nvecs;
219 xfs_log_iovec_t *vecp;
220 xfs_inode_t *ip;
221 size_t data_bytes; 215 size_t data_bytes;
222 xfs_bmbt_rec_t *ext_buffer; 216 xfs_bmbt_rec_t *ext_buffer;
223 int nrecs;
224 xfs_mount_t *mp; 217 xfs_mount_t *mp;
225 218
226 ip = iip->ili_inode; 219 vecp->i_addr = &iip->ili_format;
227 vecp = log_vector;
228
229 vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
230 vecp->i_len = sizeof(xfs_inode_log_format_t); 220 vecp->i_len = sizeof(xfs_inode_log_format_t);
231 vecp->i_type = XLOG_REG_TYPE_IFORMAT; 221 vecp->i_type = XLOG_REG_TYPE_IFORMAT;
232 vecp++; 222 vecp++;
233 nvecs = 1; 223 nvecs = 1;
234 224
235 /* 225 /*
236 * Make sure the linux inode is dirty. We do this before
237 * clearing i_update_core as the VFS will call back into
238 * XFS here and set i_update_core, so we need to dirty the
239 * inode first so that the ordering of i_update_core and
240 * unlogged modifications still works as described below.
241 */
242 xfs_mark_inode_dirty_sync(ip);
243
244 /*
245 * Clear i_update_core if the timestamps (or any other 226 * Clear i_update_core if the timestamps (or any other
246 * non-transactional modification) need flushing/logging 227 * non-transactional modification) need flushing/logging
247 * and we're about to log them with the rest of the core. 228 * and we're about to log them with the rest of the core.
@@ -277,7 +258,7 @@ xfs_inode_item_format(
277 */ 258 */
278 xfs_synchronize_times(ip); 259 xfs_synchronize_times(ip);
279 260
280 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 261 vecp->i_addr = &ip->i_d;
281 vecp->i_len = sizeof(struct xfs_icdinode); 262 vecp->i_len = sizeof(struct xfs_icdinode);
282 vecp->i_type = XLOG_REG_TYPE_ICORE; 263 vecp->i_type = XLOG_REG_TYPE_ICORE;
283 vecp++; 264 vecp++;
@@ -323,18 +304,17 @@ xfs_inode_item_format(
323 ASSERT(ip->i_df.if_u1.if_extents != NULL); 304 ASSERT(ip->i_df.if_u1.if_extents != NULL);
324 ASSERT(ip->i_d.di_nextents > 0); 305 ASSERT(ip->i_d.di_nextents > 0);
325 ASSERT(iip->ili_extents_buf == NULL); 306 ASSERT(iip->ili_extents_buf == NULL);
326 nrecs = ip->i_df.if_bytes / 307 ASSERT((ip->i_df.if_bytes /
327 (uint)sizeof(xfs_bmbt_rec_t); 308 (uint)sizeof(xfs_bmbt_rec_t)) > 0);
328 ASSERT(nrecs > 0);
329#ifdef XFS_NATIVE_HOST 309#ifdef XFS_NATIVE_HOST
330 if (nrecs == ip->i_d.di_nextents) { 310 if (ip->i_d.di_nextents == ip->i_df.if_bytes /
311 (uint)sizeof(xfs_bmbt_rec_t)) {
331 /* 312 /*
332 * There are no delayed allocation 313 * There are no delayed allocation
333 * extents, so just point to the 314 * extents, so just point to the
334 * real extents array. 315 * real extents array.
335 */ 316 */
336 vecp->i_addr = 317 vecp->i_addr = ip->i_df.if_u1.if_extents;
337 (char *)(ip->i_df.if_u1.if_extents);
338 vecp->i_len = ip->i_df.if_bytes; 318 vecp->i_len = ip->i_df.if_bytes;
339 vecp->i_type = XLOG_REG_TYPE_IEXT; 319 vecp->i_type = XLOG_REG_TYPE_IEXT;
340 } else 320 } else
@@ -352,7 +332,7 @@ xfs_inode_item_format(
352 ext_buffer = kmem_alloc(ip->i_df.if_bytes, 332 ext_buffer = kmem_alloc(ip->i_df.if_bytes,
353 KM_SLEEP); 333 KM_SLEEP);
354 iip->ili_extents_buf = ext_buffer; 334 iip->ili_extents_buf = ext_buffer;
355 vecp->i_addr = (xfs_caddr_t)ext_buffer; 335 vecp->i_addr = ext_buffer;
356 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 336 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
357 XFS_DATA_FORK); 337 XFS_DATA_FORK);
358 vecp->i_type = XLOG_REG_TYPE_IEXT; 338 vecp->i_type = XLOG_REG_TYPE_IEXT;
@@ -371,7 +351,7 @@ xfs_inode_item_format(
371 if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { 351 if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) {
372 ASSERT(ip->i_df.if_broot_bytes > 0); 352 ASSERT(ip->i_df.if_broot_bytes > 0);
373 ASSERT(ip->i_df.if_broot != NULL); 353 ASSERT(ip->i_df.if_broot != NULL);
374 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; 354 vecp->i_addr = ip->i_df.if_broot;
375 vecp->i_len = ip->i_df.if_broot_bytes; 355 vecp->i_len = ip->i_df.if_broot_bytes;
376 vecp->i_type = XLOG_REG_TYPE_IBROOT; 356 vecp->i_type = XLOG_REG_TYPE_IBROOT;
377 vecp++; 357 vecp++;
@@ -389,7 +369,7 @@ xfs_inode_item_format(
389 ASSERT(ip->i_df.if_u1.if_data != NULL); 369 ASSERT(ip->i_df.if_u1.if_data != NULL);
390 ASSERT(ip->i_d.di_size > 0); 370 ASSERT(ip->i_d.di_size > 0);
391 371
392 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data; 372 vecp->i_addr = ip->i_df.if_u1.if_data;
393 /* 373 /*
394 * Round i_bytes up to a word boundary. 374 * Round i_bytes up to a word boundary.
395 * The underlying memory is guaranteed to 375 * The underlying memory is guaranteed to
@@ -437,7 +417,7 @@ xfs_inode_item_format(
437 * Assert that no attribute-related log flags are set. 417 * Assert that no attribute-related log flags are set.
438 */ 418 */
439 if (!XFS_IFORK_Q(ip)) { 419 if (!XFS_IFORK_Q(ip)) {
440 ASSERT(nvecs == iip->ili_item.li_desc->lid_size); 420 ASSERT(nvecs == lip->li_desc->lid_size);
441 iip->ili_format.ilf_size = nvecs; 421 iip->ili_format.ilf_size = nvecs;
442 ASSERT(!(iip->ili_format.ilf_fields & 422 ASSERT(!(iip->ili_format.ilf_fields &
443 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 423 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
@@ -449,21 +429,21 @@ xfs_inode_item_format(
449 ASSERT(!(iip->ili_format.ilf_fields & 429 ASSERT(!(iip->ili_format.ilf_fields &
450 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); 430 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT)));
451 if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { 431 if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) {
452 ASSERT(ip->i_afp->if_bytes > 0);
453 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
454 ASSERT(ip->i_d.di_anextents > 0);
455#ifdef DEBUG 432#ifdef DEBUG
456 nrecs = ip->i_afp->if_bytes / 433 int nrecs = ip->i_afp->if_bytes /
457 (uint)sizeof(xfs_bmbt_rec_t); 434 (uint)sizeof(xfs_bmbt_rec_t);
458#endif
459 ASSERT(nrecs > 0); 435 ASSERT(nrecs > 0);
460 ASSERT(nrecs == ip->i_d.di_anextents); 436 ASSERT(nrecs == ip->i_d.di_anextents);
437 ASSERT(ip->i_afp->if_bytes > 0);
438 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
439 ASSERT(ip->i_d.di_anextents > 0);
440#endif
461#ifdef XFS_NATIVE_HOST 441#ifdef XFS_NATIVE_HOST
462 /* 442 /*
463 * There are not delayed allocation extents 443 * There are not delayed allocation extents
464 * for attributes, so just point at the array. 444 * for attributes, so just point at the array.
465 */ 445 */
466 vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents); 446 vecp->i_addr = ip->i_afp->if_u1.if_extents;
467 vecp->i_len = ip->i_afp->if_bytes; 447 vecp->i_len = ip->i_afp->if_bytes;
468#else 448#else
469 ASSERT(iip->ili_aextents_buf == NULL); 449 ASSERT(iip->ili_aextents_buf == NULL);
@@ -473,7 +453,7 @@ xfs_inode_item_format(
473 ext_buffer = kmem_alloc(ip->i_afp->if_bytes, 453 ext_buffer = kmem_alloc(ip->i_afp->if_bytes,
474 KM_SLEEP); 454 KM_SLEEP);
475 iip->ili_aextents_buf = ext_buffer; 455 iip->ili_aextents_buf = ext_buffer;
476 vecp->i_addr = (xfs_caddr_t)ext_buffer; 456 vecp->i_addr = ext_buffer;
477 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 457 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
478 XFS_ATTR_FORK); 458 XFS_ATTR_FORK);
479#endif 459#endif
@@ -490,7 +470,7 @@ xfs_inode_item_format(
490 if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { 470 if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) {
491 ASSERT(ip->i_afp->if_broot_bytes > 0); 471 ASSERT(ip->i_afp->if_broot_bytes > 0);
492 ASSERT(ip->i_afp->if_broot != NULL); 472 ASSERT(ip->i_afp->if_broot != NULL);
493 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; 473 vecp->i_addr = ip->i_afp->if_broot;
494 vecp->i_len = ip->i_afp->if_broot_bytes; 474 vecp->i_len = ip->i_afp->if_broot_bytes;
495 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; 475 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
496 vecp++; 476 vecp++;
@@ -506,7 +486,7 @@ xfs_inode_item_format(
506 ASSERT(ip->i_afp->if_bytes > 0); 486 ASSERT(ip->i_afp->if_bytes > 0);
507 ASSERT(ip->i_afp->if_u1.if_data != NULL); 487 ASSERT(ip->i_afp->if_u1.if_data != NULL);
508 488
509 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data; 489 vecp->i_addr = ip->i_afp->if_u1.if_data;
510 /* 490 /*
511 * Round i_bytes up to a word boundary. 491 * Round i_bytes up to a word boundary.
512 * The underlying memory is guaranteed to 492 * The underlying memory is guaranteed to
@@ -528,7 +508,7 @@ xfs_inode_item_format(
528 break; 508 break;
529 } 509 }
530 510
531 ASSERT(nvecs == iip->ili_item.li_desc->lid_size); 511 ASSERT(nvecs == lip->li_desc->lid_size);
532 iip->ili_format.ilf_size = nvecs; 512 iip->ili_format.ilf_size = nvecs;
533} 513}
534 514
@@ -539,12 +519,14 @@ xfs_inode_item_format(
539 */ 519 */
540STATIC void 520STATIC void
541xfs_inode_item_pin( 521xfs_inode_item_pin(
542 xfs_inode_log_item_t *iip) 522 struct xfs_log_item *lip)
543{ 523{
544 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 524 struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
525
526 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
545 527
546 trace_xfs_inode_pin(iip->ili_inode, _RET_IP_); 528 trace_xfs_inode_pin(ip, _RET_IP_);
547 atomic_inc(&iip->ili_inode->i_pincount); 529 atomic_inc(&ip->i_pincount);
548} 530}
549 531
550 532
@@ -554,12 +536,12 @@ xfs_inode_item_pin(
554 * 536 *
555 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0. 537 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
556 */ 538 */
557/* ARGSUSED */
558STATIC void 539STATIC void
559xfs_inode_item_unpin( 540xfs_inode_item_unpin(
560 xfs_inode_log_item_t *iip) 541 struct xfs_log_item *lip,
542 int remove)
561{ 543{
562 struct xfs_inode *ip = iip->ili_inode; 544 struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
563 545
564 trace_xfs_inode_unpin(ip, _RET_IP_); 546 trace_xfs_inode_unpin(ip, _RET_IP_);
565 ASSERT(atomic_read(&ip->i_pincount) > 0); 547 ASSERT(atomic_read(&ip->i_pincount) > 0);
@@ -567,15 +549,6 @@ xfs_inode_item_unpin(
567 wake_up(&ip->i_ipin_wait); 549 wake_up(&ip->i_ipin_wait);
568} 550}
569 551
570/* ARGSUSED */
571STATIC void
572xfs_inode_item_unpin_remove(
573 xfs_inode_log_item_t *iip,
574 xfs_trans_t *tp)
575{
576 xfs_inode_item_unpin(iip);
577}
578
579/* 552/*
580 * This is called to attempt to lock the inode associated with this 553 * This is called to attempt to lock the inode associated with this
581 * inode log item, in preparation for the push routine which does the actual 554 * inode log item, in preparation for the push routine which does the actual
@@ -591,19 +564,16 @@ xfs_inode_item_unpin_remove(
591 */ 564 */
592STATIC uint 565STATIC uint
593xfs_inode_item_trylock( 566xfs_inode_item_trylock(
594 xfs_inode_log_item_t *iip) 567 struct xfs_log_item *lip)
595{ 568{
596 register xfs_inode_t *ip; 569 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
597 570 struct xfs_inode *ip = iip->ili_inode;
598 ip = iip->ili_inode;
599 571
600 if (xfs_ipincount(ip) > 0) { 572 if (xfs_ipincount(ip) > 0)
601 return XFS_ITEM_PINNED; 573 return XFS_ITEM_PINNED;
602 }
603 574
604 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 575 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
605 return XFS_ITEM_LOCKED; 576 return XFS_ITEM_LOCKED;
606 }
607 577
608 if (!xfs_iflock_nowait(ip)) { 578 if (!xfs_iflock_nowait(ip)) {
609 /* 579 /*
@@ -629,7 +599,7 @@ xfs_inode_item_trylock(
629 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 599 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
630 ASSERT(iip->ili_format.ilf_fields != 0); 600 ASSERT(iip->ili_format.ilf_fields != 0);
631 ASSERT(iip->ili_logged == 0); 601 ASSERT(iip->ili_logged == 0);
632 ASSERT(iip->ili_item.li_flags & XFS_LI_IN_AIL); 602 ASSERT(lip->li_flags & XFS_LI_IN_AIL);
633 } 603 }
634#endif 604#endif
635 return XFS_ITEM_SUCCESS; 605 return XFS_ITEM_SUCCESS;
@@ -643,26 +613,18 @@ xfs_inode_item_trylock(
643 */ 613 */
644STATIC void 614STATIC void
645xfs_inode_item_unlock( 615xfs_inode_item_unlock(
646 xfs_inode_log_item_t *iip) 616 struct xfs_log_item *lip)
647{ 617{
648 uint hold; 618 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
649 uint iolocked; 619 struct xfs_inode *ip = iip->ili_inode;
650 uint lock_flags; 620 unsigned short lock_flags;
651 xfs_inode_t *ip;
652 621
653 ASSERT(iip != NULL);
654 ASSERT(iip->ili_inode->i_itemp != NULL); 622 ASSERT(iip->ili_inode->i_itemp != NULL);
655 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 623 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
656 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 624
657 XFS_ILI_IOLOCKED_EXCL)) ||
658 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL));
659 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
660 XFS_ILI_IOLOCKED_SHARED)) ||
661 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED));
662 /* 625 /*
663 * Clear the transaction pointer in the inode. 626 * Clear the transaction pointer in the inode.
664 */ 627 */
665 ip = iip->ili_inode;
666 ip->i_transp = NULL; 628 ip->i_transp = NULL;
667 629
668 /* 630 /*
@@ -686,34 +648,11 @@ xfs_inode_item_unlock(
686 iip->ili_aextents_buf = NULL; 648 iip->ili_aextents_buf = NULL;
687 } 649 }
688 650
689 /* 651 lock_flags = iip->ili_lock_flags;
690 * Figure out if we should unlock the inode or not. 652 iip->ili_lock_flags = 0;
691 */ 653 if (lock_flags) {
692 hold = iip->ili_flags & XFS_ILI_HOLD; 654 xfs_iunlock(iip->ili_inode, lock_flags);
693 655 IRELE(iip->ili_inode);
694 /*
695 * Before clearing out the flags, remember whether we
696 * are holding the inode's IO lock.
697 */
698 iolocked = iip->ili_flags & XFS_ILI_IOLOCKED_ANY;
699
700 /*
701 * Clear out the fields of the inode log item particular
702 * to the current transaction.
703 */
704 iip->ili_flags = 0;
705
706 /*
707 * Unlock the inode if XFS_ILI_HOLD was not set.
708 */
709 if (!hold) {
710 lock_flags = XFS_ILOCK_EXCL;
711 if (iolocked & XFS_ILI_IOLOCKED_EXCL) {
712 lock_flags |= XFS_IOLOCK_EXCL;
713 } else if (iolocked & XFS_ILI_IOLOCKED_SHARED) {
714 lock_flags |= XFS_IOLOCK_SHARED;
715 }
716 xfs_iput(iip->ili_inode, lock_flags);
717 } 656 }
718} 657}
719 658
@@ -725,13 +664,12 @@ xfs_inode_item_unlock(
725 * is the only one that matters. Therefore, simply return the 664 * is the only one that matters. Therefore, simply return the
726 * given lsn. 665 * given lsn.
727 */ 666 */
728/*ARGSUSED*/
729STATIC xfs_lsn_t 667STATIC xfs_lsn_t
730xfs_inode_item_committed( 668xfs_inode_item_committed(
731 xfs_inode_log_item_t *iip, 669 struct xfs_log_item *lip,
732 xfs_lsn_t lsn) 670 xfs_lsn_t lsn)
733{ 671{
734 return (lsn); 672 return lsn;
735} 673}
736 674
737/* 675/*
@@ -743,13 +681,12 @@ xfs_inode_item_committed(
743 */ 681 */
744STATIC void 682STATIC void
745xfs_inode_item_pushbuf( 683xfs_inode_item_pushbuf(
746 xfs_inode_log_item_t *iip) 684 struct xfs_log_item *lip)
747{ 685{
748 xfs_inode_t *ip; 686 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
749 xfs_mount_t *mp; 687 struct xfs_inode *ip = iip->ili_inode;
750 xfs_buf_t *bp; 688 struct xfs_buf *bp;
751 689
752 ip = iip->ili_inode;
753 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 690 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
754 691
755 /* 692 /*
@@ -757,14 +694,13 @@ xfs_inode_item_pushbuf(
757 * inode was taken off the AIL. So, just get out. 694 * inode was taken off the AIL. So, just get out.
758 */ 695 */
759 if (completion_done(&ip->i_flush) || 696 if (completion_done(&ip->i_flush) ||
760 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { 697 !(lip->li_flags & XFS_LI_IN_AIL)) {
761 xfs_iunlock(ip, XFS_ILOCK_SHARED); 698 xfs_iunlock(ip, XFS_ILOCK_SHARED);
762 return; 699 return;
763 } 700 }
764 701
765 mp = ip->i_mount; 702 bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
766 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, 703 iip->ili_format.ilf_len, XBF_TRYLOCK);
767 iip->ili_format.ilf_len, XBF_TRYLOCK);
768 704
769 xfs_iunlock(ip, XFS_ILOCK_SHARED); 705 xfs_iunlock(ip, XFS_ILOCK_SHARED);
770 if (!bp) 706 if (!bp)
@@ -772,10 +708,8 @@ xfs_inode_item_pushbuf(
772 if (XFS_BUF_ISDELAYWRITE(bp)) 708 if (XFS_BUF_ISDELAYWRITE(bp))
773 xfs_buf_delwri_promote(bp); 709 xfs_buf_delwri_promote(bp);
774 xfs_buf_relse(bp); 710 xfs_buf_relse(bp);
775 return;
776} 711}
777 712
778
779/* 713/*
780 * This is called to asynchronously write the inode associated with this 714 * This is called to asynchronously write the inode associated with this
781 * inode log item out to disk. The inode will already have been locked by 715 * inode log item out to disk. The inode will already have been locked by
@@ -783,14 +717,14 @@ xfs_inode_item_pushbuf(
783 */ 717 */
784STATIC void 718STATIC void
785xfs_inode_item_push( 719xfs_inode_item_push(
786 xfs_inode_log_item_t *iip) 720 struct xfs_log_item *lip)
787{ 721{
788 xfs_inode_t *ip; 722 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
789 723 struct xfs_inode *ip = iip->ili_inode;
790 ip = iip->ili_inode;
791 724
792 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 725 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
793 ASSERT(!completion_done(&ip->i_flush)); 726 ASSERT(!completion_done(&ip->i_flush));
727
794 /* 728 /*
795 * Since we were able to lock the inode's flush lock and 729 * Since we were able to lock the inode's flush lock and
796 * we found it on the AIL, the inode must be dirty. This 730 * we found it on the AIL, the inode must be dirty. This
@@ -813,43 +747,34 @@ xfs_inode_item_push(
813 */ 747 */
814 (void) xfs_iflush(ip, 0); 748 (void) xfs_iflush(ip, 0);
815 xfs_iunlock(ip, XFS_ILOCK_SHARED); 749 xfs_iunlock(ip, XFS_ILOCK_SHARED);
816
817 return;
818} 750}
819 751
820/* 752/*
821 * XXX rcc - this one really has to do something. Probably needs 753 * XXX rcc - this one really has to do something. Probably needs
822 * to stamp in a new field in the incore inode. 754 * to stamp in a new field in the incore inode.
823 */ 755 */
824/* ARGSUSED */
825STATIC void 756STATIC void
826xfs_inode_item_committing( 757xfs_inode_item_committing(
827 xfs_inode_log_item_t *iip, 758 struct xfs_log_item *lip,
828 xfs_lsn_t lsn) 759 xfs_lsn_t lsn)
829{ 760{
830 iip->ili_last_lsn = lsn; 761 INODE_ITEM(lip)->ili_last_lsn = lsn;
831 return;
832} 762}
833 763
834/* 764/*
835 * This is the ops vector shared by all buf log items. 765 * This is the ops vector shared by all buf log items.
836 */ 766 */
837static struct xfs_item_ops xfs_inode_item_ops = { 767static struct xfs_item_ops xfs_inode_item_ops = {
838 .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size, 768 .iop_size = xfs_inode_item_size,
839 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 769 .iop_format = xfs_inode_item_format,
840 xfs_inode_item_format, 770 .iop_pin = xfs_inode_item_pin,
841 .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin, 771 .iop_unpin = xfs_inode_item_unpin,
842 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_inode_item_unpin, 772 .iop_trylock = xfs_inode_item_trylock,
843 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 773 .iop_unlock = xfs_inode_item_unlock,
844 xfs_inode_item_unpin_remove, 774 .iop_committed = xfs_inode_item_committed,
845 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock, 775 .iop_push = xfs_inode_item_push,
846 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_inode_item_unlock, 776 .iop_pushbuf = xfs_inode_item_pushbuf,
847 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 777 .iop_committing = xfs_inode_item_committing
848 xfs_inode_item_committed,
849 .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push,
850 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf,
851 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
852 xfs_inode_item_committing
853}; 778};
854 779
855 780
@@ -858,10 +783,10 @@ static struct xfs_item_ops xfs_inode_item_ops = {
858 */ 783 */
859void 784void
860xfs_inode_item_init( 785xfs_inode_item_init(
861 xfs_inode_t *ip, 786 struct xfs_inode *ip,
862 xfs_mount_t *mp) 787 struct xfs_mount *mp)
863{ 788{
864 xfs_inode_log_item_t *iip; 789 struct xfs_inode_log_item *iip;
865 790
866 ASSERT(ip->i_itemp == NULL); 791 ASSERT(ip->i_itemp == NULL);
867 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); 792 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
@@ -899,14 +824,14 @@ xfs_inode_item_destroy(
899 * from the AIL if it has not been re-logged, and unlocking the inode's 824 * from the AIL if it has not been re-logged, and unlocking the inode's
900 * flush lock. 825 * flush lock.
901 */ 826 */
902/*ARGSUSED*/
903void 827void
904xfs_iflush_done( 828xfs_iflush_done(
905 xfs_buf_t *bp, 829 struct xfs_buf *bp,
906 xfs_inode_log_item_t *iip) 830 struct xfs_log_item *lip)
907{ 831{
832 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
908 xfs_inode_t *ip = iip->ili_inode; 833 xfs_inode_t *ip = iip->ili_inode;
909 struct xfs_ail *ailp = iip->ili_item.li_ailp; 834 struct xfs_ail *ailp = lip->li_ailp;
910 835
911 /* 836 /*
912 * We only want to pull the item from the AIL if it is 837 * We only want to pull the item from the AIL if it is
@@ -917,12 +842,11 @@ xfs_iflush_done(
917 * the lock since it's cheaper, and then we recheck while 842 * the lock since it's cheaper, and then we recheck while
918 * holding the lock before removing the inode from the AIL. 843 * holding the lock before removing the inode from the AIL.
919 */ 844 */
920 if (iip->ili_logged && 845 if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) {
921 (iip->ili_item.li_lsn == iip->ili_flush_lsn)) {
922 spin_lock(&ailp->xa_lock); 846 spin_lock(&ailp->xa_lock);
923 if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { 847 if (lip->li_lsn == iip->ili_flush_lsn) {
924 /* xfs_trans_ail_delete() drops the AIL lock. */ 848 /* xfs_trans_ail_delete() drops the AIL lock. */
925 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip); 849 xfs_trans_ail_delete(ailp, lip);
926 } else { 850 } else {
927 spin_unlock(&ailp->xa_lock); 851 spin_unlock(&ailp->xa_lock);
928 } 852 }
@@ -940,8 +864,6 @@ xfs_iflush_done(
940 * Release the inode's flush lock since we're done with it. 864 * Release the inode's flush lock since we're done with it.
941 */ 865 */
942 xfs_ifunlock(ip); 866 xfs_ifunlock(ip);
943
944 return;
945} 867}
946 868
947/* 869/*
@@ -957,10 +879,8 @@ xfs_iflush_abort(
957 xfs_inode_t *ip) 879 xfs_inode_t *ip)
958{ 880{
959 xfs_inode_log_item_t *iip = ip->i_itemp; 881 xfs_inode_log_item_t *iip = ip->i_itemp;
960 xfs_mount_t *mp;
961 882
962 iip = ip->i_itemp; 883 iip = ip->i_itemp;
963 mp = ip->i_mount;
964 if (iip) { 884 if (iip) {
965 struct xfs_ail *ailp = iip->ili_item.li_ailp; 885 struct xfs_ail *ailp = iip->ili_item.li_ailp;
966 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 886 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
@@ -991,10 +911,10 @@ xfs_iflush_abort(
991 911
992void 912void
993xfs_istale_done( 913xfs_istale_done(
994 xfs_buf_t *bp, 914 struct xfs_buf *bp,
995 xfs_inode_log_item_t *iip) 915 struct xfs_log_item *lip)
996{ 916{
997 xfs_iflush_abort(iip->ili_inode); 917 xfs_iflush_abort(INODE_ITEM(lip)->ili_inode);
998} 918}
999 919
1000/* 920/*
@@ -1007,9 +927,8 @@ xfs_inode_item_format_convert(
1007 xfs_inode_log_format_t *in_f) 927 xfs_inode_log_format_t *in_f)
1008{ 928{
1009 if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { 929 if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
1010 xfs_inode_log_format_32_t *in_f32; 930 xfs_inode_log_format_32_t *in_f32 = buf->i_addr;
1011 931
1012 in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr;
1013 in_f->ilf_type = in_f32->ilf_type; 932 in_f->ilf_type = in_f32->ilf_type;
1014 in_f->ilf_size = in_f32->ilf_size; 933 in_f->ilf_size = in_f32->ilf_size;
1015 in_f->ilf_fields = in_f32->ilf_fields; 934 in_f->ilf_fields = in_f32->ilf_fields;
@@ -1025,9 +944,8 @@ xfs_inode_item_format_convert(
1025 in_f->ilf_boffset = in_f32->ilf_boffset; 944 in_f->ilf_boffset = in_f32->ilf_boffset;
1026 return 0; 945 return 0;
1027 } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ 946 } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
1028 xfs_inode_log_format_64_t *in_f64; 947 xfs_inode_log_format_64_t *in_f64 = buf->i_addr;
1029 948
1030 in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr;
1031 in_f->ilf_type = in_f64->ilf_type; 949 in_f->ilf_type = in_f64->ilf_type;
1032 in_f->ilf_size = in_f64->ilf_size; 950 in_f->ilf_size = in_f64->ilf_size;
1033 in_f->ilf_fields = in_f64->ilf_fields; 951 in_f->ilf_fields = in_f64->ilf_fields;
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 9a467958ecdd..d3dee61e6d91 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -103,12 +103,6 @@ typedef struct xfs_inode_log_format_64 {
103 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ 103 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
104 XFS_ILOG_ABROOT) 104 XFS_ILOG_ABROOT)
105 105
106#define XFS_ILI_HOLD 0x1
107#define XFS_ILI_IOLOCKED_EXCL 0x2
108#define XFS_ILI_IOLOCKED_SHARED 0x4
109
110#define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED)
111
112static inline int xfs_ilog_fbroot(int w) 106static inline int xfs_ilog_fbroot(int w)
113{ 107{
114 return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT); 108 return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
@@ -137,7 +131,7 @@ typedef struct xfs_inode_log_item {
137 struct xfs_inode *ili_inode; /* inode ptr */ 131 struct xfs_inode *ili_inode; /* inode ptr */
138 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ 132 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
139 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ 133 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */
140 unsigned short ili_flags; /* misc flags */ 134 unsigned short ili_lock_flags; /* lock flags */
141 unsigned short ili_logged; /* flushed logged data */ 135 unsigned short ili_logged; /* flushed logged data */
142 unsigned int ili_last_fields; /* fields when flushed */ 136 unsigned int ili_last_fields; /* fields when flushed */
143 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged 137 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
@@ -161,8 +155,8 @@ static inline int xfs_inode_clean(xfs_inode_t *ip)
161 155
162extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); 156extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
163extern void xfs_inode_item_destroy(struct xfs_inode *); 157extern void xfs_inode_item_destroy(struct xfs_inode *);
164extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); 158extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *);
165extern void xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *); 159extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *);
166extern void xfs_iflush_abort(struct xfs_inode *); 160extern void xfs_iflush_abort(struct xfs_inode *);
167extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, 161extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
168 xfs_inode_log_format_t *); 162 xfs_inode_log_format_t *);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ef14943829da..20576146369f 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -23,19 +23,14 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 32#include "xfs_dinode.h"
37#include "xfs_inode.h" 33#include "xfs_inode.h"
38#include "xfs_ialloc.h"
39#include "xfs_btree.h" 34#include "xfs_btree.h"
40#include "xfs_bmap.h" 35#include "xfs_bmap.h"
41#include "xfs_rtalloc.h" 36#include "xfs_rtalloc.h"
@@ -123,7 +118,7 @@ xfs_iomap(
123 error = xfs_bmapi(NULL, ip, offset_fsb, 118 error = xfs_bmapi(NULL, ip, offset_fsb,
124 (xfs_filblks_t)(end_fsb - offset_fsb), 119 (xfs_filblks_t)(end_fsb - offset_fsb),
125 bmapi_flags, NULL, 0, imap, 120 bmapi_flags, NULL, 0, imap,
126 nimaps, NULL, NULL); 121 nimaps, NULL);
127 122
128 if (error) 123 if (error)
129 goto out; 124 goto out;
@@ -138,7 +133,7 @@ xfs_iomap(
138 break; 133 break;
139 } 134 }
140 135
141 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { 136 if (flags & BMAPI_DIRECT) {
142 error = xfs_iomap_write_direct(ip, offset, count, flags, 137 error = xfs_iomap_write_direct(ip, offset, count, flags,
143 imap, nimaps); 138 imap, nimaps);
144 } else { 139 } else {
@@ -247,7 +242,7 @@ xfs_iomap_write_direct(
247 xfs_off_t offset, 242 xfs_off_t offset,
248 size_t count, 243 size_t count,
249 int flags, 244 int flags,
250 xfs_bmbt_irec_t *ret_imap, 245 xfs_bmbt_irec_t *imap,
251 int *nmaps) 246 int *nmaps)
252{ 247{
253 xfs_mount_t *mp = ip->i_mount; 248 xfs_mount_t *mp = ip->i_mount;
@@ -261,7 +256,6 @@ xfs_iomap_write_direct(
261 int quota_flag; 256 int quota_flag;
262 int rt; 257 int rt;
263 xfs_trans_t *tp; 258 xfs_trans_t *tp;
264 xfs_bmbt_irec_t imap;
265 xfs_bmap_free_t free_list; 259 xfs_bmap_free_t free_list;
266 uint qblocks, resblks, resrtextents; 260 uint qblocks, resblks, resrtextents;
267 int committed; 261 int committed;
@@ -285,10 +279,10 @@ xfs_iomap_write_direct(
285 if (error) 279 if (error)
286 goto error_out; 280 goto error_out;
287 } else { 281 } else {
288 if (*nmaps && (ret_imap->br_startblock == HOLESTARTBLOCK)) 282 if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK))
289 last_fsb = MIN(last_fsb, (xfs_fileoff_t) 283 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
290 ret_imap->br_blockcount + 284 imap->br_blockcount +
291 ret_imap->br_startoff); 285 imap->br_startoff);
292 } 286 }
293 count_fsb = last_fsb - offset_fsb; 287 count_fsb = last_fsb - offset_fsb;
294 ASSERT(count_fsb > 0); 288 ASSERT(count_fsb > 0);
@@ -334,20 +328,22 @@ xfs_iomap_write_direct(
334 if (error) 328 if (error)
335 goto error1; 329 goto error1;
336 330
337 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 331 xfs_trans_ijoin(tp, ip);
338 xfs_trans_ihold(tp, ip);
339 332
340 bmapi_flag = XFS_BMAPI_WRITE; 333 bmapi_flag = XFS_BMAPI_WRITE;
341 if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) 334 if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
342 bmapi_flag |= XFS_BMAPI_PREALLOC; 335 bmapi_flag |= XFS_BMAPI_PREALLOC;
343 336
344 /* 337 /*
345 * Issue the xfs_bmapi() call to allocate the blocks 338 * Issue the xfs_bmapi() call to allocate the blocks.
339 *
340 * From this point onwards we overwrite the imap pointer that the
341 * caller gave to us.
346 */ 342 */
347 xfs_bmap_init(&free_list, &firstfsb); 343 xfs_bmap_init(&free_list, &firstfsb);
348 nimaps = 1; 344 nimaps = 1;
349 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, 345 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag,
350 &firstfsb, 0, &imap, &nimaps, &free_list, NULL); 346 &firstfsb, 0, imap, &nimaps, &free_list);
351 if (error) 347 if (error)
352 goto error0; 348 goto error0;
353 349
@@ -369,12 +365,11 @@ xfs_iomap_write_direct(
369 goto error_out; 365 goto error_out;
370 } 366 }
371 367
372 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) { 368 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) {
373 error = xfs_cmn_err_fsblock_zero(ip, &imap); 369 error = xfs_cmn_err_fsblock_zero(ip, imap);
374 goto error_out; 370 goto error_out;
375 } 371 }
376 372
377 *ret_imap = imap;
378 *nmaps = 1; 373 *nmaps = 1;
379 return 0; 374 return 0;
380 375
@@ -425,7 +420,7 @@ xfs_iomap_eof_want_preallocate(
425 imaps = nimaps; 420 imaps = nimaps;
426 firstblock = NULLFSBLOCK; 421 firstblock = NULLFSBLOCK;
427 error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0, 422 error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0,
428 &firstblock, 0, imap, &imaps, NULL, NULL); 423 &firstblock, 0, imap, &imaps, NULL);
429 if (error) 424 if (error)
430 return error; 425 return error;
431 for (n = 0; n < imaps; n++) { 426 for (n = 0; n < imaps; n++) {
@@ -500,7 +495,7 @@ retry:
500 (xfs_filblks_t)(last_fsb - offset_fsb), 495 (xfs_filblks_t)(last_fsb - offset_fsb),
501 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | 496 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
502 XFS_BMAPI_ENTIRE, &firstblock, 1, imap, 497 XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
503 &nimaps, NULL, NULL); 498 &nimaps, NULL);
504 if (error && (error != ENOSPC)) 499 if (error && (error != ENOSPC))
505 return XFS_ERROR(error); 500 return XFS_ERROR(error);
506 501
@@ -548,7 +543,7 @@ xfs_iomap_write_allocate(
548 xfs_inode_t *ip, 543 xfs_inode_t *ip,
549 xfs_off_t offset, 544 xfs_off_t offset,
550 size_t count, 545 size_t count,
551 xfs_bmbt_irec_t *map, 546 xfs_bmbt_irec_t *imap,
552 int *retmap) 547 int *retmap)
553{ 548{
554 xfs_mount_t *mp = ip->i_mount; 549 xfs_mount_t *mp = ip->i_mount;
@@ -557,7 +552,6 @@ xfs_iomap_write_allocate(
557 xfs_fsblock_t first_block; 552 xfs_fsblock_t first_block;
558 xfs_bmap_free_t free_list; 553 xfs_bmap_free_t free_list;
559 xfs_filblks_t count_fsb; 554 xfs_filblks_t count_fsb;
560 xfs_bmbt_irec_t imap;
561 xfs_trans_t *tp; 555 xfs_trans_t *tp;
562 int nimaps, committed; 556 int nimaps, committed;
563 int error = 0; 557 int error = 0;
@@ -573,8 +567,8 @@ xfs_iomap_write_allocate(
573 return XFS_ERROR(error); 567 return XFS_ERROR(error);
574 568
575 offset_fsb = XFS_B_TO_FSBT(mp, offset); 569 offset_fsb = XFS_B_TO_FSBT(mp, offset);
576 count_fsb = map->br_blockcount; 570 count_fsb = imap->br_blockcount;
577 map_start_fsb = map->br_startoff; 571 map_start_fsb = imap->br_startoff;
578 572
579 XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); 573 XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
580 574
@@ -602,8 +596,7 @@ xfs_iomap_write_allocate(
602 return XFS_ERROR(error); 596 return XFS_ERROR(error);
603 } 597 }
604 xfs_ilock(ip, XFS_ILOCK_EXCL); 598 xfs_ilock(ip, XFS_ILOCK_EXCL);
605 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 599 xfs_trans_ijoin(tp, ip);
606 xfs_trans_ihold(tp, ip);
607 600
608 xfs_bmap_init(&free_list, &first_block); 601 xfs_bmap_init(&free_list, &first_block);
609 602
@@ -654,10 +647,15 @@ xfs_iomap_write_allocate(
654 } 647 }
655 } 648 }
656 649
657 /* Go get the actual blocks */ 650 /*
651 * Go get the actual blocks.
652 *
653 * From this point onwards we overwrite the imap
654 * pointer that the caller gave to us.
655 */
658 error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, 656 error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
659 XFS_BMAPI_WRITE, &first_block, 1, 657 XFS_BMAPI_WRITE, &first_block, 1,
660 &imap, &nimaps, &free_list, NULL); 658 imap, &nimaps, &free_list);
661 if (error) 659 if (error)
662 goto trans_cancel; 660 goto trans_cancel;
663 661
@@ -676,13 +674,12 @@ xfs_iomap_write_allocate(
676 * See if we were able to allocate an extent that 674 * See if we were able to allocate an extent that
677 * covers at least part of the callers request 675 * covers at least part of the callers request
678 */ 676 */
679 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) 677 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
680 return xfs_cmn_err_fsblock_zero(ip, &imap); 678 return xfs_cmn_err_fsblock_zero(ip, imap);
681 679
682 if ((offset_fsb >= imap.br_startoff) && 680 if ((offset_fsb >= imap->br_startoff) &&
683 (offset_fsb < (imap.br_startoff + 681 (offset_fsb < (imap->br_startoff +
684 imap.br_blockcount))) { 682 imap->br_blockcount))) {
685 *map = imap;
686 *retmap = 1; 683 *retmap = 1;
687 XFS_STATS_INC(xs_xstrat_quick); 684 XFS_STATS_INC(xs_xstrat_quick);
688 return 0; 685 return 0;
@@ -692,8 +689,8 @@ xfs_iomap_write_allocate(
692 * So far we have not mapped the requested part of the 689 * So far we have not mapped the requested part of the
693 * file, just surrounding data, try again. 690 * file, just surrounding data, try again.
694 */ 691 */
695 count_fsb -= imap.br_blockcount; 692 count_fsb -= imap->br_blockcount;
696 map_start_fsb = imap.br_startoff + imap.br_blockcount; 693 map_start_fsb = imap->br_startoff + imap->br_blockcount;
697 } 694 }
698 695
699trans_cancel: 696trans_cancel:
@@ -766,8 +763,7 @@ xfs_iomap_write_unwritten(
766 } 763 }
767 764
768 xfs_ilock(ip, XFS_ILOCK_EXCL); 765 xfs_ilock(ip, XFS_ILOCK_EXCL);
769 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 766 xfs_trans_ijoin(tp, ip);
770 xfs_trans_ihold(tp, ip);
771 767
772 /* 768 /*
773 * Modify the unwritten extent state of the buffer. 769 * Modify the unwritten extent state of the buffer.
@@ -776,7 +772,7 @@ xfs_iomap_write_unwritten(
776 nimaps = 1; 772 nimaps = 1;
777 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, 773 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
778 XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, 774 XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
779 1, &imap, &nimaps, &free_list, NULL); 775 1, &imap, &nimaps, &free_list);
780 if (error) 776 if (error)
781 goto error_on_bmapi_transaction; 777 goto error_on_bmapi_transaction;
782 778
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 81ac4afd45b3..7748a430f50d 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,17 +18,16 @@
18#ifndef __XFS_IOMAP_H__ 18#ifndef __XFS_IOMAP_H__
19#define __XFS_IOMAP_H__ 19#define __XFS_IOMAP_H__
20 20
21typedef enum { 21/* base extent manipulation calls */
22 /* base extent manipulation calls */ 22#define BMAPI_READ (1 << 0) /* read extents */
23 BMAPI_READ = (1 << 0), /* read extents */ 23#define BMAPI_WRITE (1 << 1) /* create extents */
24 BMAPI_WRITE = (1 << 1), /* create extents */ 24#define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */
25 BMAPI_ALLOCATE = (1 << 2), /* delayed allocate to real extents */ 25
26 /* modifiers */ 26/* modifiers */
27 BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ 27#define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */
28 BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ 28#define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */
29 BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ 29#define BMAPI_MMA (1 << 6) /* allocate for mmap write */
30 BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */ 30#define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */
31} bmapi_flags_t;
32 31
33#define BMAPI_FLAGS \ 32#define BMAPI_FLAGS \
34 { BMAPI_READ, "READ" }, \ 33 { BMAPI_READ, "READ" }, \
@@ -36,7 +35,6 @@ typedef enum {
36 { BMAPI_ALLOCATE, "ALLOCATE" }, \ 35 { BMAPI_ALLOCATE, "ALLOCATE" }, \
37 { BMAPI_IGNSTATE, "IGNSTATE" }, \ 36 { BMAPI_IGNSTATE, "IGNSTATE" }, \
38 { BMAPI_DIRECT, "DIRECT" }, \ 37 { BMAPI_DIRECT, "DIRECT" }, \
39 { BMAPI_MMAP, "MMAP" }, \
40 { BMAPI_TRYLOCK, "TRYLOCK" } 38 { BMAPI_TRYLOCK, "TRYLOCK" }
41 39
42struct xfs_inode; 40struct xfs_inode;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 2b86f8610512..dc1882adaf54 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -24,20 +24,17 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_ialloc.h" 33#include "xfs_ialloc.h"
38#include "xfs_itable.h" 34#include "xfs_itable.h"
39#include "xfs_error.h" 35#include "xfs_error.h"
40#include "xfs_btree.h" 36#include "xfs_btree.h"
37#include "xfs_trace.h"
41 38
42STATIC int 39STATIC int
43xfs_internal_inum( 40xfs_internal_inum(
@@ -95,7 +92,8 @@ xfs_bulkstat_one_int(
95 * further change. 92 * further change.
96 */ 93 */
97 buf->bs_nlink = dic->di_nlink; 94 buf->bs_nlink = dic->di_nlink;
98 buf->bs_projid = dic->di_projid; 95 buf->bs_projid_lo = dic->di_projid_lo;
96 buf->bs_projid_hi = dic->di_projid_hi;
99 buf->bs_ino = ino; 97 buf->bs_ino = ino;
100 buf->bs_mode = dic->di_mode; 98 buf->bs_mode = dic->di_mode;
101 buf->bs_uid = dic->di_uid; 99 buf->bs_uid = dic->di_uid;
@@ -143,7 +141,8 @@ xfs_bulkstat_one_int(
143 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; 141 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
144 break; 142 break;
145 } 143 }
146 xfs_iput(ip, XFS_ILOCK_SHARED); 144 xfs_iunlock(ip, XFS_ILOCK_SHARED);
145 IRELE(ip);
147 146
148 error = formatter(buffer, ubsize, ubused, buf); 147 error = formatter(buffer, ubsize, ubused, buf);
149 148
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 5215abc8023a..cee4ab9f8a9e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -24,8 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_error.h" 28#include "xfs_error.h"
31#include "xfs_log_priv.h" 29#include "xfs_log_priv.h"
@@ -35,8 +33,6 @@
35#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
36#include "xfs_log_recover.h" 34#include "xfs_log_recover.h"
37#include "xfs_trans_priv.h" 35#include "xfs_trans_priv.h"
38#include "xfs_dir2_sf.h"
39#include "xfs_attr_sf.h"
40#include "xfs_dinode.h" 36#include "xfs_dinode.h"
41#include "xfs_inode.h" 37#include "xfs_inode.h"
42#include "xfs_rw.h" 38#include "xfs_rw.h"
@@ -337,7 +333,6 @@ xfs_log_reserve(
337 int retval = 0; 333 int retval = 0;
338 334
339 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); 335 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
340 ASSERT((flags & XFS_LOG_NOSLEEP) == 0);
341 336
342 if (XLOG_FORCED_SHUTDOWN(log)) 337 if (XLOG_FORCED_SHUTDOWN(log))
343 return XFS_ERROR(EIO); 338 return XFS_ERROR(EIO);
@@ -552,7 +547,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
552 .magic = XLOG_UNMOUNT_TYPE, 547 .magic = XLOG_UNMOUNT_TYPE,
553 }; 548 };
554 struct xfs_log_iovec reg = { 549 struct xfs_log_iovec reg = {
555 .i_addr = (void *)&magic, 550 .i_addr = &magic,
556 .i_len = sizeof(magic), 551 .i_len = sizeof(magic),
557 .i_type = XLOG_REG_TYPE_UNMOUNT, 552 .i_type = XLOG_REG_TYPE_UNMOUNT,
558 }; 553 };
@@ -922,19 +917,6 @@ xlog_iodone(xfs_buf_t *bp)
922 l = iclog->ic_log; 917 l = iclog->ic_log;
923 918
924 /* 919 /*
925 * If the _XFS_BARRIER_FAILED flag was set by a lower
926 * layer, it means the underlying device no longer supports
927 * barrier I/O. Warn loudly and turn off barriers.
928 */
929 if (bp->b_flags & _XFS_BARRIER_FAILED) {
930 bp->b_flags &= ~_XFS_BARRIER_FAILED;
931 l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
932 xfs_fs_cmn_err(CE_WARN, l->l_mp,
933 "xlog_iodone: Barriers are no longer supported"
934 " by device. Disabling barriers\n");
935 }
936
937 /*
938 * Race to shutdown the filesystem if we see an error. 920 * Race to shutdown the filesystem if we see an error.
939 */ 921 */
940 if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, 922 if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp,
@@ -1047,7 +1029,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1047 xlog_in_core_t *iclog, *prev_iclog=NULL; 1029 xlog_in_core_t *iclog, *prev_iclog=NULL;
1048 xfs_buf_t *bp; 1030 xfs_buf_t *bp;
1049 int i; 1031 int i;
1050 int iclogsize;
1051 int error = ENOMEM; 1032 int error = ENOMEM;
1052 uint log2_size = 0; 1033 uint log2_size = 0;
1053 1034
@@ -1127,7 +1108,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1127 * with different amounts of memory. See the definition of 1108 * with different amounts of memory. See the definition of
1128 * xlog_in_core_t in xfs_log_priv.h for details. 1109 * xlog_in_core_t in xfs_log_priv.h for details.
1129 */ 1110 */
1130 iclogsize = log->l_iclog_size;
1131 ASSERT(log->l_iclog_size >= 4096); 1111 ASSERT(log->l_iclog_size >= 4096);
1132 for (i=0; i < log->l_iclog_bufs; i++) { 1112 for (i=0; i < log->l_iclog_bufs; i++) {
1133 *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); 1113 *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL);
@@ -1138,7 +1118,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1138 iclog->ic_prev = prev_iclog; 1118 iclog->ic_prev = prev_iclog;
1139 prev_iclog = iclog; 1119 prev_iclog = iclog;
1140 1120
1141 bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); 1121 bp = xfs_buf_get_uncached(mp->m_logdev_targp,
1122 log->l_iclog_size, 0);
1142 if (!bp) 1123 if (!bp)
1143 goto out_free_iclog; 1124 goto out_free_iclog;
1144 if (!XFS_BUF_CPSEMA(bp)) 1125 if (!XFS_BUF_CPSEMA(bp))
@@ -1316,7 +1297,7 @@ xlog_bdstrat(
1316 if (iclog->ic_state & XLOG_STATE_IOERROR) { 1297 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1317 XFS_BUF_ERROR(bp, EIO); 1298 XFS_BUF_ERROR(bp, EIO);
1318 XFS_BUF_STALE(bp); 1299 XFS_BUF_STALE(bp);
1319 xfs_biodone(bp); 1300 xfs_buf_ioend(bp, 0);
1320 /* 1301 /*
1321 * It would seem logical to return EIO here, but we rely on 1302 * It would seem logical to return EIO here, but we rely on
1322 * the log state machine to propagate I/O errors instead of 1303 * the log state machine to propagate I/O errors instead of
@@ -1428,11 +1409,8 @@ xlog_sync(xlog_t *log,
1428 XFS_BUF_BUSY(bp); 1409 XFS_BUF_BUSY(bp);
1429 XFS_BUF_ASYNC(bp); 1410 XFS_BUF_ASYNC(bp);
1430 bp->b_flags |= XBF_LOG_BUFFER; 1411 bp->b_flags |= XBF_LOG_BUFFER;
1431 /* 1412
1432 * Do an ordered write for the log block. 1413 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1433 * Its unnecessary to flush the first split block in the log wrap case.
1434 */
1435 if (!split && (log->l_mp->m_flags & XFS_MOUNT_BARRIER))
1436 XFS_BUF_ORDERED(bp); 1414 XFS_BUF_ORDERED(bp);
1437 1415
1438 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1416 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
@@ -3025,7 +3003,8 @@ _xfs_log_force(
3025 3003
3026 XFS_STATS_INC(xs_log_force); 3004 XFS_STATS_INC(xs_log_force);
3027 3005
3028 xlog_cil_push(log, 1); 3006 if (log->l_cilp)
3007 xlog_cil_force(log);
3029 3008
3030 spin_lock(&log->l_icloglock); 3009 spin_lock(&log->l_icloglock);
3031 3010
@@ -3177,7 +3156,7 @@ _xfs_log_force_lsn(
3177 XFS_STATS_INC(xs_log_force); 3156 XFS_STATS_INC(xs_log_force);
3178 3157
3179 if (log->l_cilp) { 3158 if (log->l_cilp) {
3180 lsn = xlog_cil_push_lsn(log, lsn); 3159 lsn = xlog_cil_force_lsn(log, lsn);
3181 if (lsn == NULLCOMMITLSN) 3160 if (lsn == NULLCOMMITLSN)
3182 return 0; 3161 return 0;
3183 } 3162 }
@@ -3734,7 +3713,7 @@ xfs_log_force_umount(
3734 * call below. 3713 * call below.
3735 */ 3714 */
3736 if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) 3715 if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
3737 xlog_cil_push(log, 1); 3716 xlog_cil_force(log);
3738 3717
3739 /* 3718 /*
3740 * We must hold both the GRANT lock and the LOG lock, 3719 * We must hold both the GRANT lock and the LOG lock,
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 04c78e642cc8..916eb7db14d9 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -55,14 +55,10 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
55/* 55/*
56 * Flags to xfs_log_reserve() 56 * Flags to xfs_log_reserve()
57 * 57 *
58 * XFS_LOG_SLEEP: If space is not available, sleep (default)
59 * XFS_LOG_NOSLEEP: If space is not available, return error
60 * XFS_LOG_PERM_RESERV: Permanent reservation. When writes are 58 * XFS_LOG_PERM_RESERV: Permanent reservation. When writes are
61 * performed against this type of reservation, the reservation 59 * performed against this type of reservation, the reservation
62 * is not decreased. Long running transactions should use this. 60 * is not decreased. Long running transactions should use this.
63 */ 61 */
64#define XFS_LOG_SLEEP 0x0
65#define XFS_LOG_NOSLEEP 0x1
66#define XFS_LOG_PERM_RESERV 0x2 62#define XFS_LOG_PERM_RESERV 0x2
67 63
68/* 64/*
@@ -104,7 +100,7 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
104#define XLOG_REG_TYPE_MAX 19 100#define XLOG_REG_TYPE_MAX 19
105 101
106typedef struct xfs_log_iovec { 102typedef struct xfs_log_iovec {
107 xfs_caddr_t i_addr; /* beginning address of region */ 103 void *i_addr; /* beginning address of region */
108 int i_len; /* length in bytes of region */ 104 int i_len; /* length in bytes of region */
109 uint i_type; /* type of region */ 105 uint i_type; /* type of region */
110} xfs_log_iovec_t; 106} xfs_log_iovec_t;
@@ -201,9 +197,4 @@ int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
201bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 197bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
202 198
203#endif 199#endif
204
205
206extern int xlog_debug; /* set to 1 to enable real log */
207
208
209#endif /* __XFS_LOG_H__ */ 200#endif /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index bb17cc044bf3..23d6ceb5e97b 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -26,8 +26,6 @@
26#include "xfs_log_priv.h" 26#include "xfs_log_priv.h"
27#include "xfs_sb.h" 27#include "xfs_sb.h"
28#include "xfs_ag.h" 28#include "xfs_ag.h"
29#include "xfs_dir2.h"
30#include "xfs_dmapi.h"
31#include "xfs_mount.h" 29#include "xfs_mount.h"
32#include "xfs_error.h" 30#include "xfs_error.h"
33#include "xfs_alloc.h" 31#include "xfs_alloc.h"
@@ -70,6 +68,7 @@ xlog_cil_init(
70 ctx->sequence = 1; 68 ctx->sequence = 1;
71 ctx->cil = cil; 69 ctx->cil = cil;
72 cil->xc_ctx = ctx; 70 cil->xc_ctx = ctx;
71 cil->xc_current_sequence = ctx->sequence;
73 72
74 cil->xc_log = log; 73 cil->xc_log = log;
75 log->l_cilp = cil; 74 log->l_cilp = cil;
@@ -147,102 +146,6 @@ xlog_cil_init_post_recovery(
147} 146}
148 147
149/* 148/*
150 * Insert the log item into the CIL and calculate the difference in space
151 * consumed by the item. Add the space to the checkpoint ticket and calculate
152 * if the change requires additional log metadata. If it does, take that space
153 * as well. Remove the amount of space we addded to the checkpoint ticket from
154 * the current transaction ticket so that the accounting works out correctly.
155 *
156 * If this is the first time the item is being placed into the CIL in this
157 * context, pin it so it can't be written to disk until the CIL is flushed to
158 * the iclog and the iclog written to disk.
159 */
160static void
161xlog_cil_insert(
162 struct log *log,
163 struct xlog_ticket *ticket,
164 struct xfs_log_item *item,
165 struct xfs_log_vec *lv)
166{
167 struct xfs_cil *cil = log->l_cilp;
168 struct xfs_log_vec *old = lv->lv_item->li_lv;
169 struct xfs_cil_ctx *ctx = cil->xc_ctx;
170 int len;
171 int diff_iovecs;
172 int iclog_space;
173
174 if (old) {
175 /* existing lv on log item, space used is a delta */
176 ASSERT(!list_empty(&item->li_cil));
177 ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
178
179 len = lv->lv_buf_len - old->lv_buf_len;
180 diff_iovecs = lv->lv_niovecs - old->lv_niovecs;
181 kmem_free(old->lv_buf);
182 kmem_free(old);
183 } else {
184 /* new lv, must pin the log item */
185 ASSERT(!lv->lv_item->li_lv);
186 ASSERT(list_empty(&item->li_cil));
187
188 len = lv->lv_buf_len;
189 diff_iovecs = lv->lv_niovecs;
190 IOP_PIN(lv->lv_item);
191
192 }
193 len += diff_iovecs * sizeof(xlog_op_header_t);
194
195 /* attach new log vector to log item */
196 lv->lv_item->li_lv = lv;
197
198 spin_lock(&cil->xc_cil_lock);
199 list_move_tail(&item->li_cil, &cil->xc_cil);
200 ctx->nvecs += diff_iovecs;
201
202 /*
203 * If this is the first time the item is being committed to the CIL,
204 * store the sequence number on the log item so we can tell
205 * in future commits whether this is the first checkpoint the item is
206 * being committed into.
207 */
208 if (!item->li_seq)
209 item->li_seq = ctx->sequence;
210
211 /*
212 * Now transfer enough transaction reservation to the context ticket
213 * for the checkpoint. The context ticket is special - the unit
214 * reservation has to grow as well as the current reservation as we
215 * steal from tickets so we can correctly determine the space used
216 * during the transaction commit.
217 */
218 if (ctx->ticket->t_curr_res == 0) {
219 /* first commit in checkpoint, steal the header reservation */
220 ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
221 ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
222 ticket->t_curr_res -= ctx->ticket->t_unit_res;
223 }
224
225 /* do we need space for more log record headers? */
226 iclog_space = log->l_iclog_size - log->l_iclog_hsize;
227 if (len > 0 && (ctx->space_used / iclog_space !=
228 (ctx->space_used + len) / iclog_space)) {
229 int hdrs;
230
231 hdrs = (len + iclog_space - 1) / iclog_space;
232 /* need to take into account split region headers, too */
233 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
234 ctx->ticket->t_unit_res += hdrs;
235 ctx->ticket->t_curr_res += hdrs;
236 ticket->t_curr_res -= hdrs;
237 ASSERT(ticket->t_curr_res >= len);
238 }
239 ticket->t_curr_res -= len;
240 ctx->space_used += len;
241
242 spin_unlock(&cil->xc_cil_lock);
243}
244
245/*
246 * Format log item into a flat buffers 149 * Format log item into a flat buffers
247 * 150 *
248 * For delayed logging, we need to hold a formatted buffer containing all the 151 * For delayed logging, we need to hold a formatted buffer containing all the
@@ -271,15 +174,10 @@ xlog_cil_insert(
271static void 174static void
272xlog_cil_format_items( 175xlog_cil_format_items(
273 struct log *log, 176 struct log *log,
274 struct xfs_log_vec *log_vector, 177 struct xfs_log_vec *log_vector)
275 struct xlog_ticket *ticket,
276 xfs_lsn_t *start_lsn)
277{ 178{
278 struct xfs_log_vec *lv; 179 struct xfs_log_vec *lv;
279 180
280 if (start_lsn)
281 *start_lsn = log->l_cilp->xc_ctx->sequence;
282
283 ASSERT(log_vector); 181 ASSERT(log_vector);
284 for (lv = log_vector; lv; lv = lv->lv_next) { 182 for (lv = log_vector; lv; lv = lv->lv_next) {
285 void *ptr; 183 void *ptr;
@@ -292,7 +190,7 @@ xlog_cil_format_items(
292 len += lv->lv_iovecp[index].i_len; 190 len += lv->lv_iovecp[index].i_len;
293 191
294 lv->lv_buf_len = len; 192 lv->lv_buf_len = len;
295 lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); 193 lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
296 ptr = lv->lv_buf; 194 ptr = lv->lv_buf;
297 195
298 for (index = 0; index < lv->lv_niovecs; index++) { 196 for (index = 0; index < lv->lv_niovecs; index++) {
@@ -303,97 +201,153 @@ xlog_cil_format_items(
303 ptr += vec->i_len; 201 ptr += vec->i_len;
304 } 202 }
305 ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); 203 ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
306
307 xlog_cil_insert(log, ticket, lv->lv_item, lv);
308 } 204 }
309} 205}
310 206
311static void 207/*
312xlog_cil_free_logvec( 208 * Prepare the log item for insertion into the CIL. Calculate the difference in
313 struct xfs_log_vec *log_vector) 209 * log space and vectors it will consume, and if it is a new item pin it as
210 * well.
211 */
212STATIC void
213xfs_cil_prepare_item(
214 struct log *log,
215 struct xfs_log_vec *lv,
216 int *len,
217 int *diff_iovecs)
314{ 218{
315 struct xfs_log_vec *lv; 219 struct xfs_log_vec *old = lv->lv_item->li_lv;
220
221 if (old) {
222 /* existing lv on log item, space used is a delta */
223 ASSERT(!list_empty(&lv->lv_item->li_cil));
224 ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
225
226 *len += lv->lv_buf_len - old->lv_buf_len;
227 *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
228 kmem_free(old->lv_buf);
229 kmem_free(old);
230 } else {
231 /* new lv, must pin the log item */
232 ASSERT(!lv->lv_item->li_lv);
233 ASSERT(list_empty(&lv->lv_item->li_cil));
234
235 *len += lv->lv_buf_len;
236 *diff_iovecs += lv->lv_niovecs;
237 IOP_PIN(lv->lv_item);
316 238
317 for (lv = log_vector; lv; ) {
318 struct xfs_log_vec *next = lv->lv_next;
319 kmem_free(lv->lv_buf);
320 kmem_free(lv);
321 lv = next;
322 } 239 }
240
241 /* attach new log vector to log item */
242 lv->lv_item->li_lv = lv;
243
244 /*
245 * If this is the first time the item is being committed to the
246 * CIL, store the sequence number on the log item so we can
247 * tell in future commits whether this is the first checkpoint
248 * the item is being committed into.
249 */
250 if (!lv->lv_item->li_seq)
251 lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
323} 252}
324 253
325/* 254/*
326 * Commit a transaction with the given vector to the Committed Item List. 255 * Insert the log items into the CIL and calculate the difference in space
327 * 256 * consumed by the item. Add the space to the checkpoint ticket and calculate
328 * To do this, we need to format the item, pin it in memory if required and 257 * if the change requires additional log metadata. If it does, take that space
329 * account for the space used by the transaction. Once we have done that we 258 * as well. Remove the amount of space we addded to the checkpoint ticket from
330 * need to release the unused reservation for the transaction, attach the 259 * the current transaction ticket so that the accounting works out correctly.
331 * transaction to the checkpoint context so we carry the busy extents through
332 * to checkpoint completion, and then unlock all the items in the transaction.
333 *
334 * For more specific information about the order of operations in
335 * xfs_log_commit_cil() please refer to the comments in
336 * xfs_trans_commit_iclog().
337 *
338 * Called with the context lock already held in read mode to lock out
339 * background commit, returns without it held once background commits are
340 * allowed again.
341 */ 260 */
342int 261static void
343xfs_log_commit_cil( 262xlog_cil_insert_items(
344 struct xfs_mount *mp, 263 struct log *log,
345 struct xfs_trans *tp,
346 struct xfs_log_vec *log_vector, 264 struct xfs_log_vec *log_vector,
347 xfs_lsn_t *commit_lsn, 265 struct xlog_ticket *ticket)
348 int flags)
349{ 266{
350 struct log *log = mp->m_log; 267 struct xfs_cil *cil = log->l_cilp;
351 int log_flags = 0; 268 struct xfs_cil_ctx *ctx = cil->xc_ctx;
352 int push = 0; 269 struct xfs_log_vec *lv;
353 270 int len = 0;
354 if (flags & XFS_TRANS_RELEASE_LOG_RES) 271 int diff_iovecs = 0;
355 log_flags = XFS_LOG_REL_PERM_RESERV; 272 int iclog_space;
356 273
357 if (XLOG_FORCED_SHUTDOWN(log)) { 274 ASSERT(log_vector);
358 xlog_cil_free_logvec(log_vector);
359 return XFS_ERROR(EIO);
360 }
361 275
362 /* lock out background commit */ 276 /*
363 down_read(&log->l_cilp->xc_ctx_lock); 277 * Do all the accounting aggregation and switching of log vectors
364 xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); 278 * around in a separate loop to the insertion of items into the CIL.
279 * Then we can do a separate loop to update the CIL within a single
280 * lock/unlock pair. This reduces the number of round trips on the CIL
281 * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
282 * hold time for the transaction commit.
283 *
284 * If this is the first time the item is being placed into the CIL in
285 * this context, pin it so it can't be written to disk until the CIL is
286 * flushed to the iclog and the iclog written to disk.
287 *
288 * We can do this safely because the context can't checkpoint until we
289 * are done so it doesn't matter exactly how we update the CIL.
290 */
291 for (lv = log_vector; lv; lv = lv->lv_next)
292 xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
365 293
366 /* check we didn't blow the reservation */ 294 /* account for space used by new iovec headers */
367 if (tp->t_ticket->t_curr_res < 0) 295 len += diff_iovecs * sizeof(xlog_op_header_t);
368 xlog_print_tic_res(log->l_mp, tp->t_ticket);
369 296
370 /* attach the transaction to the CIL if it has any busy extents */ 297 spin_lock(&cil->xc_cil_lock);
371 if (!list_empty(&tp->t_busy)) {
372 spin_lock(&log->l_cilp->xc_cil_lock);
373 list_splice_init(&tp->t_busy,
374 &log->l_cilp->xc_ctx->busy_extents);
375 spin_unlock(&log->l_cilp->xc_cil_lock);
376 }
377 298
378 tp->t_commit_lsn = *commit_lsn; 299 /* move the items to the tail of the CIL */
379 xfs_log_done(mp, tp->t_ticket, NULL, log_flags); 300 for (lv = log_vector; lv; lv = lv->lv_next)
380 xfs_trans_unreserve_and_mod_sb(tp); 301 list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
381 302
382 /* check for background commit before unlock */ 303 ctx->nvecs += diff_iovecs;
383 if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
384 push = 1;
385 up_read(&log->l_cilp->xc_ctx_lock);
386 304
387 /* 305 /*
388 * We need to push CIL every so often so we don't cache more than we 306 * Now transfer enough transaction reservation to the context ticket
389 * can fit in the log. The limit really is that a checkpoint can't be 307 * for the checkpoint. The context ticket is special - the unit
390 * more than half the log (the current checkpoint is not allowed to 308 * reservation has to grow as well as the current reservation as we
391 * overwrite the previous checkpoint), but commit latency and memory 309 * steal from tickets so we can correctly determine the space used
392 * usage limit this to a smaller size in most cases. 310 * during the transaction commit.
393 */ 311 */
394 if (push) 312 if (ctx->ticket->t_curr_res == 0) {
395 xlog_cil_push(log, 0); 313 /* first commit in checkpoint, steal the header reservation */
396 return 0; 314 ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
315 ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
316 ticket->t_curr_res -= ctx->ticket->t_unit_res;
317 }
318
319 /* do we need space for more log record headers? */
320 iclog_space = log->l_iclog_size - log->l_iclog_hsize;
321 if (len > 0 && (ctx->space_used / iclog_space !=
322 (ctx->space_used + len) / iclog_space)) {
323 int hdrs;
324
325 hdrs = (len + iclog_space - 1) / iclog_space;
326 /* need to take into account split region headers, too */
327 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
328 ctx->ticket->t_unit_res += hdrs;
329 ctx->ticket->t_curr_res += hdrs;
330 ticket->t_curr_res -= hdrs;
331 ASSERT(ticket->t_curr_res >= len);
332 }
333 ticket->t_curr_res -= len;
334 ctx->space_used += len;
335
336 spin_unlock(&cil->xc_cil_lock);
337}
338
339static void
340xlog_cil_free_logvec(
341 struct xfs_log_vec *log_vector)
342{
343 struct xfs_log_vec *lv;
344
345 for (lv = log_vector; lv; ) {
346 struct xfs_log_vec *next = lv->lv_next;
347 kmem_free(lv->lv_buf);
348 kmem_free(lv);
349 lv = next;
350 }
397} 351}
398 352
399/* 353/*
@@ -429,13 +383,23 @@ xlog_cil_committed(
429} 383}
430 384
431/* 385/*
432 * Push the Committed Item List to the log. If the push_now flag is not set, 386 * Push the Committed Item List to the log. If @push_seq flag is zero, then it
433 * then it is a background flush and so we can chose to ignore it. 387 * is a background flush and so we can chose to ignore it. Otherwise, if the
388 * current sequence is the same as @push_seq we need to do a flush. If
389 * @push_seq is less than the current sequence, then it has already been
390 * flushed and we don't need to do anything - the caller will wait for it to
391 * complete if necessary.
392 *
393 * @push_seq is a value rather than a flag because that allows us to do an
394 * unlocked check of the sequence number for a match. Hence we can allows log
395 * forces to run racily and not issue pushes for the same sequence twice. If we
396 * get a race between multiple pushes for the same sequence they will block on
397 * the first one and then abort, hence avoiding needless pushes.
434 */ 398 */
435int 399STATIC int
436xlog_cil_push( 400xlog_cil_push(
437 struct log *log, 401 struct log *log,
438 int push_now) 402 xfs_lsn_t push_seq)
439{ 403{
440 struct xfs_cil *cil = log->l_cilp; 404 struct xfs_cil *cil = log->l_cilp;
441 struct xfs_log_vec *lv; 405 struct xfs_log_vec *lv;
@@ -455,12 +419,20 @@ xlog_cil_push(
455 if (!cil) 419 if (!cil)
456 return 0; 420 return 0;
457 421
422 ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
423
458 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); 424 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
459 new_ctx->ticket = xlog_cil_ticket_alloc(log); 425 new_ctx->ticket = xlog_cil_ticket_alloc(log);
460 426
461 /* lock out transaction commit, but don't block on background push */ 427 /*
428 * Lock out transaction commit, but don't block for background pushes
429 * unless we are well over the CIL space limit. See the definition of
430 * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
431 * used here.
432 */
462 if (!down_write_trylock(&cil->xc_ctx_lock)) { 433 if (!down_write_trylock(&cil->xc_ctx_lock)) {
463 if (!push_now) 434 if (!push_seq &&
435 cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
464 goto out_free_ticket; 436 goto out_free_ticket;
465 down_write(&cil->xc_ctx_lock); 437 down_write(&cil->xc_ctx_lock);
466 } 438 }
@@ -471,7 +443,11 @@ xlog_cil_push(
471 goto out_skip; 443 goto out_skip;
472 444
473 /* check for spurious background flush */ 445 /* check for spurious background flush */
474 if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) 446 if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
447 goto out_skip;
448
449 /* check for a previously pushed seqeunce */
450 if (push_seq && push_seq < cil->xc_ctx->sequence)
475 goto out_skip; 451 goto out_skip;
476 452
477 /* 453 /*
@@ -517,6 +493,13 @@ xlog_cil_push(
517 cil->xc_ctx = new_ctx; 493 cil->xc_ctx = new_ctx;
518 494
519 /* 495 /*
496 * mirror the new sequence into the cil structure so that we can do
497 * unlocked checks against the current sequence in log forces without
498 * risking deferencing a freed context pointer.
499 */
500 cil->xc_current_sequence = new_ctx->sequence;
501
502 /*
520 * The switch is now done, so we can drop the context lock and move out 503 * The switch is now done, so we can drop the context lock and move out
521 * of a shared context. We can't just go straight to the commit record, 504 * of a shared context. We can't just go straight to the commit record,
522 * though - we need to synchronise with previous and future commits so 505 * though - we need to synchronise with previous and future commits so
@@ -554,7 +537,7 @@ xlog_cil_push(
554 thdr.th_type = XFS_TRANS_CHECKPOINT; 537 thdr.th_type = XFS_TRANS_CHECKPOINT;
555 thdr.th_tid = tic->t_tid; 538 thdr.th_tid = tic->t_tid;
556 thdr.th_num_items = num_iovecs; 539 thdr.th_num_items = num_iovecs;
557 lhdr.i_addr = (xfs_caddr_t)&thdr; 540 lhdr.i_addr = &thdr;
558 lhdr.i_len = sizeof(xfs_trans_header_t); 541 lhdr.i_len = sizeof(xfs_trans_header_t);
559 lhdr.i_type = XLOG_REG_TYPE_TRANSHDR; 542 lhdr.i_type = XLOG_REG_TYPE_TRANSHDR;
560 tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t); 543 tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t);
@@ -628,6 +611,105 @@ out_abort:
628} 611}
629 612
630/* 613/*
614 * Commit a transaction with the given vector to the Committed Item List.
615 *
616 * To do this, we need to format the item, pin it in memory if required and
617 * account for the space used by the transaction. Once we have done that we
618 * need to release the unused reservation for the transaction, attach the
619 * transaction to the checkpoint context so we carry the busy extents through
620 * to checkpoint completion, and then unlock all the items in the transaction.
621 *
622 * For more specific information about the order of operations in
623 * xfs_log_commit_cil() please refer to the comments in
624 * xfs_trans_commit_iclog().
625 *
626 * Called with the context lock already held in read mode to lock out
627 * background commit, returns without it held once background commits are
628 * allowed again.
629 */
630int
631xfs_log_commit_cil(
632 struct xfs_mount *mp,
633 struct xfs_trans *tp,
634 struct xfs_log_vec *log_vector,
635 xfs_lsn_t *commit_lsn,
636 int flags)
637{
638 struct log *log = mp->m_log;
639 int log_flags = 0;
640 int push = 0;
641
642 if (flags & XFS_TRANS_RELEASE_LOG_RES)
643 log_flags = XFS_LOG_REL_PERM_RESERV;
644
645 if (XLOG_FORCED_SHUTDOWN(log)) {
646 xlog_cil_free_logvec(log_vector);
647 return XFS_ERROR(EIO);
648 }
649
650 /*
651 * do all the hard work of formatting items (including memory
652 * allocation) outside the CIL context lock. This prevents stalling CIL
653 * pushes when we are low on memory and a transaction commit spends a
654 * lot of time in memory reclaim.
655 */
656 xlog_cil_format_items(log, log_vector);
657
658 /* lock out background commit */
659 down_read(&log->l_cilp->xc_ctx_lock);
660 if (commit_lsn)
661 *commit_lsn = log->l_cilp->xc_ctx->sequence;
662
663 xlog_cil_insert_items(log, log_vector, tp->t_ticket);
664
665 /* check we didn't blow the reservation */
666 if (tp->t_ticket->t_curr_res < 0)
667 xlog_print_tic_res(log->l_mp, tp->t_ticket);
668
669 /* attach the transaction to the CIL if it has any busy extents */
670 if (!list_empty(&tp->t_busy)) {
671 spin_lock(&log->l_cilp->xc_cil_lock);
672 list_splice_init(&tp->t_busy,
673 &log->l_cilp->xc_ctx->busy_extents);
674 spin_unlock(&log->l_cilp->xc_cil_lock);
675 }
676
677 tp->t_commit_lsn = *commit_lsn;
678 xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
679 xfs_trans_unreserve_and_mod_sb(tp);
680
681 /*
682 * Once all the items of the transaction have been copied to the CIL,
683 * the items can be unlocked and freed.
684 *
685 * This needs to be done before we drop the CIL context lock because we
686 * have to update state in the log items and unlock them before they go
687 * to disk. If we don't, then the CIL checkpoint can race with us and
688 * we can run checkpoint completion before we've updated and unlocked
689 * the log items. This affects (at least) processing of stale buffers,
690 * inodes and EFIs.
691 */
692 xfs_trans_free_items(tp, *commit_lsn, 0);
693
694 /* check for background commit before unlock */
695 if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
696 push = 1;
697
698 up_read(&log->l_cilp->xc_ctx_lock);
699
700 /*
701 * We need to push CIL every so often so we don't cache more than we
702 * can fit in the log. The limit really is that a checkpoint can't be
703 * more than half the log (the current checkpoint is not allowed to
704 * overwrite the previous checkpoint), but commit latency and memory
705 * usage limit this to a smaller size in most cases.
706 */
707 if (push)
708 xlog_cil_push(log, 0);
709 return 0;
710}
711
712/*
631 * Conditionally push the CIL based on the sequence passed in. 713 * Conditionally push the CIL based on the sequence passed in.
632 * 714 *
633 * We only need to push if we haven't already pushed the sequence 715 * We only need to push if we haven't already pushed the sequence
@@ -641,39 +723,34 @@ out_abort:
641 * commit lsn is there. It'll be empty, so this is broken for now. 723 * commit lsn is there. It'll be empty, so this is broken for now.
642 */ 724 */
643xfs_lsn_t 725xfs_lsn_t
644xlog_cil_push_lsn( 726xlog_cil_force_lsn(
645 struct log *log, 727 struct log *log,
646 xfs_lsn_t push_seq) 728 xfs_lsn_t sequence)
647{ 729{
648 struct xfs_cil *cil = log->l_cilp; 730 struct xfs_cil *cil = log->l_cilp;
649 struct xfs_cil_ctx *ctx; 731 struct xfs_cil_ctx *ctx;
650 xfs_lsn_t commit_lsn = NULLCOMMITLSN; 732 xfs_lsn_t commit_lsn = NULLCOMMITLSN;
651 733
652restart: 734 ASSERT(sequence <= cil->xc_current_sequence);
653 down_write(&cil->xc_ctx_lock); 735
654 ASSERT(push_seq <= cil->xc_ctx->sequence); 736 /*
655 737 * check to see if we need to force out the current context.
656 /* check to see if we need to force out the current context */ 738 * xlog_cil_push() handles racing pushes for the same sequence,
657 if (push_seq == cil->xc_ctx->sequence) { 739 * so no need to deal with it here.
658 up_write(&cil->xc_ctx_lock); 740 */
659 xlog_cil_push(log, 1); 741 if (sequence == cil->xc_current_sequence)
660 goto restart; 742 xlog_cil_push(log, sequence);
661 }
662 743
663 /* 744 /*
664 * See if we can find a previous sequence still committing. 745 * See if we can find a previous sequence still committing.
665 * We can drop the flush lock as soon as we have the cil lock
666 * because we are now only comparing contexts protected by
667 * the cil lock.
668 *
669 * We need to wait for all previous sequence commits to complete 746 * We need to wait for all previous sequence commits to complete
670 * before allowing the force of push_seq to go ahead. Hence block 747 * before allowing the force of push_seq to go ahead. Hence block
671 * on commits for those as well. 748 * on commits for those as well.
672 */ 749 */
750restart:
673 spin_lock(&cil->xc_cil_lock); 751 spin_lock(&cil->xc_cil_lock);
674 up_write(&cil->xc_ctx_lock);
675 list_for_each_entry(ctx, &cil->xc_committing, committing) { 752 list_for_each_entry(ctx, &cil->xc_committing, committing) {
676 if (ctx->sequence > push_seq) 753 if (ctx->sequence > sequence)
677 continue; 754 continue;
678 if (!ctx->commit_lsn) { 755 if (!ctx->commit_lsn) {
679 /* 756 /*
@@ -683,7 +760,7 @@ restart:
683 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); 760 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
684 goto restart; 761 goto restart;
685 } 762 }
686 if (ctx->sequence != push_seq) 763 if (ctx->sequence != sequence)
687 continue; 764 continue;
688 /* found it! */ 765 /* found it! */
689 commit_lsn = ctx->commit_lsn; 766 commit_lsn = ctx->commit_lsn;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8c072618965c..edcdfe01617f 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -422,16 +422,17 @@ struct xfs_cil {
422 struct rw_semaphore xc_ctx_lock; 422 struct rw_semaphore xc_ctx_lock;
423 struct list_head xc_committing; 423 struct list_head xc_committing;
424 sv_t xc_commit_wait; 424 sv_t xc_commit_wait;
425 xfs_lsn_t xc_current_sequence;
425}; 426};
426 427
427/* 428/*
428 * The amount of log space we should the CIL to aggregate is difficult to size. 429 * The amount of log space we allow the CIL to aggregate is difficult to size.
429 * Whatever we chose we have to make we can get a reservation for the log space 430 * Whatever we choose, we have to make sure we can get a reservation for the
430 * effectively, that it is large enough to capture sufficient relogging to 431 * log space effectively, that it is large enough to capture sufficient
431 * reduce log buffer IO significantly, but it is not too large for the log or 432 * relogging to reduce log buffer IO significantly, but it is not too large for
432 * induces too much latency when writing out through the iclogs. We track both 433 * the log or induces too much latency when writing out through the iclogs. We
433 * space consumed and the number of vectors in the checkpoint context, so we 434 * track both space consumed and the number of vectors in the checkpoint
434 * need to decide which to use for limiting. 435 * context, so we need to decide which to use for limiting.
435 * 436 *
436 * Every log buffer we write out during a push needs a header reserved, which 437 * Every log buffer we write out during a push needs a header reserved, which
437 * is at least one sector and more for v2 logs. Hence we need a reservation of 438 * is at least one sector and more for v2 logs. Hence we need a reservation of
@@ -458,16 +459,21 @@ struct xfs_cil {
458 * checkpoint transaction ticket is specific to the checkpoint context, rather 459 * checkpoint transaction ticket is specific to the checkpoint context, rather
459 * than the CIL itself. 460 * than the CIL itself.
460 * 461 *
461 * With dynamic reservations, we can basically make up arbitrary limits for the 462 * With dynamic reservations, we can effectively make up arbitrary limits for
462 * checkpoint size so long as they don't violate any other size rules. Hence 463 * the checkpoint size so long as they don't violate any other size rules.
463 * the initial maximum size for the checkpoint transaction will be set to a 464 * Recovery imposes a rule that no transaction exceed half the log, so we are
464 * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit 465 * limited by that. Furthermore, the log transaction reservation subsystem
465 * right now based on the latency of writing out a large amount of data through 466 * tries to keep 25% of the log free, so we need to keep below that limit or we
466 * the circular iclog buffers. 467 * risk running out of free log space to start any new transactions.
468 *
469 * In order to keep background CIL push efficient, we will set a lower
470 * threshold at which background pushing is attempted without blocking current
471 * transaction commits. A separate, higher bound defines when CIL pushes are
472 * enforced to ensure we stay within our maximum checkpoint size bounds.
473 * threshold, yet give us plenty of space for aggregation on large logs.
467 */ 474 */
468 475#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3)
469#define XLOG_CIL_SPACE_LIMIT(log) \ 476#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
470 (min((log->l_logsize >> 2), (8 * 1024 * 1024)))
471 477
472/* 478/*
473 * The reservation head lsn is not made up of a cycle number and block number. 479 * The reservation head lsn is not made up of a cycle number and block number.
@@ -562,8 +568,16 @@ int xlog_cil_init(struct log *log);
562void xlog_cil_init_post_recovery(struct log *log); 568void xlog_cil_init_post_recovery(struct log *log);
563void xlog_cil_destroy(struct log *log); 569void xlog_cil_destroy(struct log *log);
564 570
565int xlog_cil_push(struct log *log, int push_now); 571/*
566xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); 572 * CIL force routines
573 */
574xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
575
576static inline void
577xlog_cil_force(struct log *log)
578{
579 xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
580}
567 581
568/* 582/*
569 * Unmount record type is used as a pseudo transaction type for the ticket. 583 * Unmount record type is used as a pseudo transaction type for the ticket.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 9ac5cfab27b9..966d3f97458c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -24,15 +24,11 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_error.h" 28#include "xfs_error.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 32#include "xfs_dinode.h"
37#include "xfs_inode.h" 33#include "xfs_inode.h"
38#include "xfs_inode_item.h" 34#include "xfs_inode_item.h"
@@ -111,7 +107,8 @@ xlog_get_bp(
111 nbblks += log->l_sectBBsize; 107 nbblks += log->l_sectBBsize;
112 nbblks = round_up(nbblks, log->l_sectBBsize); 108 nbblks = round_up(nbblks, log->l_sectBBsize);
113 109
114 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); 110 return xfs_buf_get_uncached(log->l_mp->m_logdev_targp,
111 BBTOB(nbblks), 0);
115} 112}
116 113
117STATIC void 114STATIC void
@@ -171,7 +168,7 @@ xlog_bread_noalign(
171 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); 168 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
172 169
173 xfsbdstrat(log->l_mp, bp); 170 xfsbdstrat(log->l_mp, bp);
174 error = xfs_iowait(bp); 171 error = xfs_buf_iowait(bp);
175 if (error) 172 if (error)
176 xfs_ioerror_alert("xlog_bread", log->l_mp, 173 xfs_ioerror_alert("xlog_bread", log->l_mp,
177 bp, XFS_BUF_ADDR(bp)); 174 bp, XFS_BUF_ADDR(bp));
@@ -325,12 +322,13 @@ xlog_recover_iodone(
325 * this during recovery. One strike! 322 * this during recovery. One strike!
326 */ 323 */
327 xfs_ioerror_alert("xlog_recover_iodone", 324 xfs_ioerror_alert("xlog_recover_iodone",
328 bp->b_mount, bp, XFS_BUF_ADDR(bp)); 325 bp->b_target->bt_mount, bp,
329 xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); 326 XFS_BUF_ADDR(bp));
327 xfs_force_shutdown(bp->b_target->bt_mount,
328 SHUTDOWN_META_IO_ERROR);
330 } 329 }
331 bp->b_mount = NULL;
332 XFS_BUF_CLR_IODONE_FUNC(bp); 330 XFS_BUF_CLR_IODONE_FUNC(bp);
333 xfs_biodone(bp); 331 xfs_buf_ioend(bp, 0);
334} 332}
335 333
336/* 334/*
@@ -1565,9 +1563,7 @@ xlog_recover_reorder_trans(
1565 1563
1566 list_splice_init(&trans->r_itemq, &sort_list); 1564 list_splice_init(&trans->r_itemq, &sort_list);
1567 list_for_each_entry_safe(item, n, &sort_list, ri_list) { 1565 list_for_each_entry_safe(item, n, &sort_list, ri_list) {
1568 xfs_buf_log_format_t *buf_f; 1566 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
1569
1570 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
1571 1567
1572 switch (ITEM_TYPE(item)) { 1568 switch (ITEM_TYPE(item)) {
1573 case XFS_LI_BUF: 1569 case XFS_LI_BUF:
@@ -1892,9 +1888,8 @@ xlog_recover_do_inode_buffer(
1892 * current di_next_unlinked field. Extract its value 1888 * current di_next_unlinked field. Extract its value
1893 * and copy it to the buffer copy. 1889 * and copy it to the buffer copy.
1894 */ 1890 */
1895 logged_nextp = (xfs_agino_t *) 1891 logged_nextp = item->ri_buf[item_index].i_addr +
1896 ((char *)(item->ri_buf[item_index].i_addr) + 1892 next_unlinked_offset - reg_buf_offset;
1897 (next_unlinked_offset - reg_buf_offset));
1898 if (unlikely(*logged_nextp == 0)) { 1893 if (unlikely(*logged_nextp == 0)) {
1899 xfs_fs_cmn_err(CE_ALERT, mp, 1894 xfs_fs_cmn_err(CE_ALERT, mp,
1900 "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", 1895 "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field",
@@ -1973,8 +1968,7 @@ xlog_recover_do_reg_buffer(
1973 item->ri_buf[i].i_len, __func__); 1968 item->ri_buf[i].i_len, __func__);
1974 goto next; 1969 goto next;
1975 } 1970 }
1976 error = xfs_qm_dqcheck((xfs_disk_dquot_t *) 1971 error = xfs_qm_dqcheck(item->ri_buf[i].i_addr,
1977 item->ri_buf[i].i_addr,
1978 -1, 0, XFS_QMOPT_DOWARN, 1972 -1, 0, XFS_QMOPT_DOWARN,
1979 "dquot_buf_recover"); 1973 "dquot_buf_recover");
1980 if (error) 1974 if (error)
@@ -2187,7 +2181,7 @@ xlog_recover_do_buffer_trans(
2187 xlog_recover_item_t *item, 2181 xlog_recover_item_t *item,
2188 int pass) 2182 int pass)
2189{ 2183{
2190 xfs_buf_log_format_t *buf_f; 2184 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
2191 xfs_mount_t *mp; 2185 xfs_mount_t *mp;
2192 xfs_buf_t *bp; 2186 xfs_buf_t *bp;
2193 int error; 2187 int error;
@@ -2197,8 +2191,6 @@ xlog_recover_do_buffer_trans(
2197 ushort flags; 2191 ushort flags;
2198 uint buf_flags; 2192 uint buf_flags;
2199 2193
2200 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
2201
2202 if (pass == XLOG_RECOVER_PASS1) { 2194 if (pass == XLOG_RECOVER_PASS1) {
2203 /* 2195 /*
2204 * In this pass we're only looking for buf items 2196 * In this pass we're only looking for buf items
@@ -2285,8 +2277,7 @@ xlog_recover_do_buffer_trans(
2285 XFS_BUF_STALE(bp); 2277 XFS_BUF_STALE(bp);
2286 error = xfs_bwrite(mp, bp); 2278 error = xfs_bwrite(mp, bp);
2287 } else { 2279 } else {
2288 ASSERT(bp->b_mount == NULL || bp->b_mount == mp); 2280 ASSERT(bp->b_target->bt_mount == mp);
2289 bp->b_mount = mp;
2290 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2281 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2291 xfs_bdwrite(mp, bp); 2282 xfs_bdwrite(mp, bp);
2292 } 2283 }
@@ -2319,10 +2310,9 @@ xlog_recover_do_inode_trans(
2319 } 2310 }
2320 2311
2321 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 2312 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
2322 in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; 2313 in_f = item->ri_buf[0].i_addr;
2323 } else { 2314 } else {
2324 in_f = (xfs_inode_log_format_t *)kmem_alloc( 2315 in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
2325 sizeof(xfs_inode_log_format_t), KM_SLEEP);
2326 need_free = 1; 2316 need_free = 1;
2327 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); 2317 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
2328 if (error) 2318 if (error)
@@ -2370,7 +2360,7 @@ xlog_recover_do_inode_trans(
2370 error = EFSCORRUPTED; 2360 error = EFSCORRUPTED;
2371 goto error; 2361 goto error;
2372 } 2362 }
2373 dicp = (xfs_icdinode_t *)(item->ri_buf[1].i_addr); 2363 dicp = item->ri_buf[1].i_addr;
2374 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { 2364 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
2375 xfs_buf_relse(bp); 2365 xfs_buf_relse(bp);
2376 xfs_fs_cmn_err(CE_ALERT, mp, 2366 xfs_fs_cmn_err(CE_ALERT, mp,
@@ -2461,7 +2451,7 @@ xlog_recover_do_inode_trans(
2461 } 2451 }
2462 2452
2463 /* The core is in in-core format */ 2453 /* The core is in in-core format */
2464 xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr); 2454 xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr);
2465 2455
2466 /* the rest is in on-disk format */ 2456 /* the rest is in on-disk format */
2467 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { 2457 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
@@ -2551,8 +2541,7 @@ xlog_recover_do_inode_trans(
2551 } 2541 }
2552 2542
2553write_inode_buffer: 2543write_inode_buffer:
2554 ASSERT(bp->b_mount == NULL || bp->b_mount == mp); 2544 ASSERT(bp->b_target->bt_mount == mp);
2555 bp->b_mount = mp;
2556 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2545 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2557 xfs_bdwrite(mp, bp); 2546 xfs_bdwrite(mp, bp);
2558error: 2547error:
@@ -2578,7 +2567,7 @@ xlog_recover_do_quotaoff_trans(
2578 return (0); 2567 return (0);
2579 } 2568 }
2580 2569
2581 qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr; 2570 qoff_f = item->ri_buf[0].i_addr;
2582 ASSERT(qoff_f); 2571 ASSERT(qoff_f);
2583 2572
2584 /* 2573 /*
@@ -2622,9 +2611,8 @@ xlog_recover_do_dquot_trans(
2622 if (mp->m_qflags == 0) 2611 if (mp->m_qflags == 0)
2623 return (0); 2612 return (0);
2624 2613
2625 recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; 2614 recddq = item->ri_buf[1].i_addr;
2626 2615 if (recddq == NULL) {
2627 if (item->ri_buf[1].i_addr == NULL) {
2628 cmn_err(CE_ALERT, 2616 cmn_err(CE_ALERT,
2629 "XFS: NULL dquot in %s.", __func__); 2617 "XFS: NULL dquot in %s.", __func__);
2630 return XFS_ERROR(EIO); 2618 return XFS_ERROR(EIO);
@@ -2654,7 +2642,7 @@ xlog_recover_do_dquot_trans(
2654 * The other possibility, of course, is that the quota subsystem was 2642 * The other possibility, of course, is that the quota subsystem was
2655 * removed since the last mount - ENOSYS. 2643 * removed since the last mount - ENOSYS.
2656 */ 2644 */
2657 dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr; 2645 dq_f = item->ri_buf[0].i_addr;
2658 ASSERT(dq_f); 2646 ASSERT(dq_f);
2659 if ((error = xfs_qm_dqcheck(recddq, 2647 if ((error = xfs_qm_dqcheck(recddq,
2660 dq_f->qlf_id, 2648 dq_f->qlf_id,
@@ -2690,8 +2678,7 @@ xlog_recover_do_dquot_trans(
2690 memcpy(ddq, recddq, item->ri_buf[1].i_len); 2678 memcpy(ddq, recddq, item->ri_buf[1].i_len);
2691 2679
2692 ASSERT(dq_f->qlf_size == 2); 2680 ASSERT(dq_f->qlf_size == 2);
2693 ASSERT(bp->b_mount == NULL || bp->b_mount == mp); 2681 ASSERT(bp->b_target->bt_mount == mp);
2694 bp->b_mount = mp;
2695 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2682 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2696 xfs_bdwrite(mp, bp); 2683 xfs_bdwrite(mp, bp);
2697 2684
@@ -2721,7 +2708,7 @@ xlog_recover_do_efi_trans(
2721 return 0; 2708 return 0;
2722 } 2709 }
2723 2710
2724 efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; 2711 efi_formatp = item->ri_buf[0].i_addr;
2725 2712
2726 mp = log->l_mp; 2713 mp = log->l_mp;
2727 efip = xfs_efi_init(mp, efi_formatp->efi_nextents); 2714 efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
@@ -2767,7 +2754,7 @@ xlog_recover_do_efd_trans(
2767 return; 2754 return;
2768 } 2755 }
2769 2756
2770 efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; 2757 efd_formatp = item->ri_buf[0].i_addr;
2771 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + 2758 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
2772 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || 2759 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
2773 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + 2760 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
@@ -3829,7 +3816,7 @@ xlog_do_recover(
3829 XFS_BUF_READ(bp); 3816 XFS_BUF_READ(bp);
3830 XFS_BUF_UNASYNC(bp); 3817 XFS_BUF_UNASYNC(bp);
3831 xfsbdstrat(log->l_mp, bp); 3818 xfsbdstrat(log->l_mp, bp);
3832 error = xfs_iowait(bp); 3819 error = xfs_buf_iowait(bp);
3833 if (error) { 3820 if (error) {
3834 xfs_ioerror_alert("xlog_do_recover", 3821 xfs_ioerror_alert("xlog_do_recover",
3835 log->l_mp, bp, XFS_BUF_ADDR(bp)); 3822 log->l_mp, bp, XFS_BUF_ADDR(bp));
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 69f62d8b2816..b1498ab5a399 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -25,13 +25,10 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 32#include "xfs_dinode.h"
36#include "xfs_inode.h" 33#include "xfs_inode.h"
37#include "xfs_btree.h" 34#include "xfs_btree.h"
@@ -55,16 +52,11 @@ STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
55 int); 52 int);
56STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, 53STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
57 int); 54 int);
58STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
59 int64_t, int);
60STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 55STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
61
62#else 56#else
63 57
64#define xfs_icsb_balance_counter(mp, a, b) do { } while (0) 58#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
65#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) 59#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
66#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
67
68#endif 60#endif
69 61
70static const struct { 62static const struct {
@@ -202,6 +194,8 @@ xfs_uuid_unmount(
202 194
203/* 195/*
204 * Reference counting access wrappers to the perag structures. 196 * Reference counting access wrappers to the perag structures.
197 * Because we never free per-ag structures, the only thing we
198 * have to protect against changes is the tree structure itself.
205 */ 199 */
206struct xfs_perag * 200struct xfs_perag *
207xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) 201xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
@@ -209,19 +203,43 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
209 struct xfs_perag *pag; 203 struct xfs_perag *pag;
210 int ref = 0; 204 int ref = 0;
211 205
212 spin_lock(&mp->m_perag_lock); 206 rcu_read_lock();
213 pag = radix_tree_lookup(&mp->m_perag_tree, agno); 207 pag = radix_tree_lookup(&mp->m_perag_tree, agno);
214 if (pag) { 208 if (pag) {
215 ASSERT(atomic_read(&pag->pag_ref) >= 0); 209 ASSERT(atomic_read(&pag->pag_ref) >= 0);
216 /* catch leaks in the positive direction during testing */
217 ASSERT(atomic_read(&pag->pag_ref) < 1000);
218 ref = atomic_inc_return(&pag->pag_ref); 210 ref = atomic_inc_return(&pag->pag_ref);
219 } 211 }
220 spin_unlock(&mp->m_perag_lock); 212 rcu_read_unlock();
221 trace_xfs_perag_get(mp, agno, ref, _RET_IP_); 213 trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
222 return pag; 214 return pag;
223} 215}
224 216
217/*
218 * search from @first to find the next perag with the given tag set.
219 */
220struct xfs_perag *
221xfs_perag_get_tag(
222 struct xfs_mount *mp,
223 xfs_agnumber_t first,
224 int tag)
225{
226 struct xfs_perag *pag;
227 int found;
228 int ref;
229
230 rcu_read_lock();
231 found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
232 (void **)&pag, first, 1, tag);
233 if (found <= 0) {
234 rcu_read_unlock();
235 return NULL;
236 }
237 ref = atomic_inc_return(&pag->pag_ref);
238 rcu_read_unlock();
239 trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
240 return pag;
241}
242
225void 243void
226xfs_perag_put(struct xfs_perag *pag) 244xfs_perag_put(struct xfs_perag *pag)
227{ 245{
@@ -232,10 +250,18 @@ xfs_perag_put(struct xfs_perag *pag)
232 trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); 250 trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
233} 251}
234 252
253STATIC void
254__xfs_free_perag(
255 struct rcu_head *head)
256{
257 struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
258
259 ASSERT(atomic_read(&pag->pag_ref) == 0);
260 kmem_free(pag);
261}
262
235/* 263/*
236 * Free up the resources associated with a mount structure. Assume that 264 * Free up the per-ag resources associated with the mount structure.
237 * the structure was initially zeroed, so we can tell which fields got
238 * initialized.
239 */ 265 */
240STATIC void 266STATIC void
241xfs_free_perag( 267xfs_free_perag(
@@ -247,10 +273,9 @@ xfs_free_perag(
247 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 273 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
248 spin_lock(&mp->m_perag_lock); 274 spin_lock(&mp->m_perag_lock);
249 pag = radix_tree_delete(&mp->m_perag_tree, agno); 275 pag = radix_tree_delete(&mp->m_perag_tree, agno);
250 ASSERT(pag);
251 ASSERT(atomic_read(&pag->pag_ref) == 0);
252 spin_unlock(&mp->m_perag_lock); 276 spin_unlock(&mp->m_perag_lock);
253 kmem_free(pag); 277 ASSERT(pag);
278 call_rcu(&pag->rcu_head, __xfs_free_perag);
254 } 279 }
255} 280}
256 281
@@ -447,7 +472,10 @@ xfs_initialize_perag(
447 pag->pag_agno = index; 472 pag->pag_agno = index;
448 pag->pag_mount = mp; 473 pag->pag_mount = mp;
449 rwlock_init(&pag->pag_ici_lock); 474 rwlock_init(&pag->pag_ici_lock);
475 mutex_init(&pag->pag_ici_reclaim_lock);
450 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 476 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
477 spin_lock_init(&pag->pag_buf_lock);
478 pag->pag_buf_tree = RB_ROOT;
451 479
452 if (radix_tree_preload(GFP_NOFS)) 480 if (radix_tree_preload(GFP_NOFS))
453 goto out_unwind; 481 goto out_unwind;
@@ -642,7 +670,6 @@ int
642xfs_readsb(xfs_mount_t *mp, int flags) 670xfs_readsb(xfs_mount_t *mp, int flags)
643{ 671{
644 unsigned int sector_size; 672 unsigned int sector_size;
645 unsigned int extra_flags;
646 xfs_buf_t *bp; 673 xfs_buf_t *bp;
647 int error; 674 int error;
648 675
@@ -655,28 +682,24 @@ xfs_readsb(xfs_mount_t *mp, int flags)
655 * access to the superblock. 682 * access to the superblock.
656 */ 683 */
657 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 684 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
658 extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED;
659 685
660 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), 686reread:
661 extra_flags); 687 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
662 if (!bp || XFS_BUF_ISERROR(bp)) { 688 XFS_SB_DADDR, sector_size, 0);
663 xfs_fs_mount_cmn_err(flags, "SB read failed"); 689 if (!bp) {
664 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 690 xfs_fs_mount_cmn_err(flags, "SB buffer read failed");
665 goto fail; 691 return EIO;
666 } 692 }
667 ASSERT(XFS_BUF_ISBUSY(bp));
668 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
669 693
670 /* 694 /*
671 * Initialize the mount structure from the superblock. 695 * Initialize the mount structure from the superblock.
672 * But first do some basic consistency checking. 696 * But first do some basic consistency checking.
673 */ 697 */
674 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); 698 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
675
676 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 699 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
677 if (error) { 700 if (error) {
678 xfs_fs_mount_cmn_err(flags, "SB validate failed"); 701 xfs_fs_mount_cmn_err(flags, "SB validate failed");
679 goto fail; 702 goto release_buf;
680 } 703 }
681 704
682 /* 705 /*
@@ -687,7 +710,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
687 "device supports only %u byte sectors (not %u)", 710 "device supports only %u byte sectors (not %u)",
688 sector_size, mp->m_sb.sb_sectsize); 711 sector_size, mp->m_sb.sb_sectsize);
689 error = ENOSYS; 712 error = ENOSYS;
690 goto fail; 713 goto release_buf;
691 } 714 }
692 715
693 /* 716 /*
@@ -695,33 +718,20 @@ xfs_readsb(xfs_mount_t *mp, int flags)
695 * re-read the superblock so the buffer is correctly sized. 718 * re-read the superblock so the buffer is correctly sized.
696 */ 719 */
697 if (sector_size < mp->m_sb.sb_sectsize) { 720 if (sector_size < mp->m_sb.sb_sectsize) {
698 XFS_BUF_UNMANAGE(bp);
699 xfs_buf_relse(bp); 721 xfs_buf_relse(bp);
700 sector_size = mp->m_sb.sb_sectsize; 722 sector_size = mp->m_sb.sb_sectsize;
701 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, 723 goto reread;
702 BTOBB(sector_size), extra_flags);
703 if (!bp || XFS_BUF_ISERROR(bp)) {
704 xfs_fs_mount_cmn_err(flags, "SB re-read failed");
705 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
706 goto fail;
707 }
708 ASSERT(XFS_BUF_ISBUSY(bp));
709 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
710 } 724 }
711 725
712 /* Initialize per-cpu counters */ 726 /* Initialize per-cpu counters */
713 xfs_icsb_reinit_counters(mp); 727 xfs_icsb_reinit_counters(mp);
714 728
715 mp->m_sb_bp = bp; 729 mp->m_sb_bp = bp;
716 xfs_buf_relse(bp); 730 xfs_buf_unlock(bp);
717 ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
718 return 0; 731 return 0;
719 732
720 fail: 733release_buf:
721 if (bp) { 734 xfs_buf_relse(bp);
722 XFS_BUF_UNMANAGE(bp);
723 xfs_buf_relse(bp);
724 }
725 return error; 735 return error;
726} 736}
727 737
@@ -994,42 +1004,35 @@ xfs_check_sizes(xfs_mount_t *mp)
994{ 1004{
995 xfs_buf_t *bp; 1005 xfs_buf_t *bp;
996 xfs_daddr_t d; 1006 xfs_daddr_t d;
997 int error;
998 1007
999 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 1008 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
1000 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 1009 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
1001 cmn_err(CE_WARN, "XFS: size check 1 failed"); 1010 cmn_err(CE_WARN, "XFS: filesystem size mismatch detected");
1002 return XFS_ERROR(EFBIG); 1011 return XFS_ERROR(EFBIG);
1003 } 1012 }
1004 error = xfs_read_buf(mp, mp->m_ddev_targp, 1013 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
1005 d - XFS_FSS_TO_BB(mp, 1), 1014 d - XFS_FSS_TO_BB(mp, 1),
1006 XFS_FSS_TO_BB(mp, 1), 0, &bp); 1015 BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
1007 if (!error) { 1016 if (!bp) {
1008 xfs_buf_relse(bp); 1017 cmn_err(CE_WARN, "XFS: last sector read failed");
1009 } else { 1018 return EIO;
1010 cmn_err(CE_WARN, "XFS: size check 2 failed");
1011 if (error == ENOSPC)
1012 error = XFS_ERROR(EFBIG);
1013 return error;
1014 } 1019 }
1020 xfs_buf_relse(bp);
1015 1021
1016 if (mp->m_logdev_targp != mp->m_ddev_targp) { 1022 if (mp->m_logdev_targp != mp->m_ddev_targp) {
1017 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 1023 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
1018 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 1024 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
1019 cmn_err(CE_WARN, "XFS: size check 3 failed"); 1025 cmn_err(CE_WARN, "XFS: log size mismatch detected");
1020 return XFS_ERROR(EFBIG); 1026 return XFS_ERROR(EFBIG);
1021 } 1027 }
1022 error = xfs_read_buf(mp, mp->m_logdev_targp, 1028 bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp,
1023 d - XFS_FSB_TO_BB(mp, 1), 1029 d - XFS_FSB_TO_BB(mp, 1),
1024 XFS_FSB_TO_BB(mp, 1), 0, &bp); 1030 XFS_FSB_TO_B(mp, 1), 0);
1025 if (!error) { 1031 if (!bp) {
1026 xfs_buf_relse(bp); 1032 cmn_err(CE_WARN, "XFS: log device read failed");
1027 } else { 1033 return EIO;
1028 cmn_err(CE_WARN, "XFS: size check 3 failed");
1029 if (error == ENOSPC)
1030 error = XFS_ERROR(EFBIG);
1031 return error;
1032 } 1034 }
1035 xfs_buf_relse(bp);
1033 } 1036 }
1034 return 0; 1037 return 0;
1035} 1038}
@@ -1604,7 +1607,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1604 XFS_BUF_UNASYNC(sbp); 1607 XFS_BUF_UNASYNC(sbp);
1605 ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); 1608 ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
1606 xfsbdstrat(mp, sbp); 1609 xfsbdstrat(mp, sbp);
1607 error = xfs_iowait(sbp); 1610 error = xfs_buf_iowait(sbp);
1608 if (error) 1611 if (error)
1609 xfs_ioerror_alert("xfs_unmountfs_writesb", 1612 xfs_ioerror_alert("xfs_unmountfs_writesb",
1610 mp, sbp, XFS_BUF_ADDR(sbp)); 1613 mp, sbp, XFS_BUF_ADDR(sbp));
@@ -1835,135 +1838,72 @@ xfs_mod_incore_sb_unlocked(
1835 */ 1838 */
1836int 1839int
1837xfs_mod_incore_sb( 1840xfs_mod_incore_sb(
1838 xfs_mount_t *mp, 1841 struct xfs_mount *mp,
1839 xfs_sb_field_t field, 1842 xfs_sb_field_t field,
1840 int64_t delta, 1843 int64_t delta,
1841 int rsvd) 1844 int rsvd)
1842{ 1845{
1843 int status; 1846 int status;
1844 1847
1845 /* check for per-cpu counters */
1846 switch (field) {
1847#ifdef HAVE_PERCPU_SB 1848#ifdef HAVE_PERCPU_SB
1848 case XFS_SBS_ICOUNT: 1849 ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
1849 case XFS_SBS_IFREE:
1850 case XFS_SBS_FDBLOCKS:
1851 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
1852 status = xfs_icsb_modify_counters(mp, field,
1853 delta, rsvd);
1854 break;
1855 }
1856 /* FALLTHROUGH */
1857#endif 1850#endif
1858 default: 1851 spin_lock(&mp->m_sb_lock);
1859 spin_lock(&mp->m_sb_lock); 1852 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
1860 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1853 spin_unlock(&mp->m_sb_lock);
1861 spin_unlock(&mp->m_sb_lock);
1862 break;
1863 }
1864 1854
1865 return status; 1855 return status;
1866} 1856}
1867 1857
1868/* 1858/*
1869 * xfs_mod_incore_sb_batch() is used to change more than one field 1859 * Change more than one field in the in-core superblock structure at a time.
1870 * in the in-core superblock structure at a time. This modification
1871 * is protected by a lock internal to this module. The fields and
1872 * changes to those fields are specified in the array of xfs_mod_sb
1873 * structures passed in.
1874 * 1860 *
1875 * Either all of the specified deltas will be applied or none of 1861 * The fields and changes to those fields are specified in the array of
1876 * them will. If any modified field dips below 0, then all modifications 1862 * xfs_mod_sb structures passed in. Either all of the specified deltas
1877 * will be backed out and EINVAL will be returned. 1863 * will be applied or none of them will. If any modified field dips below 0,
1864 * then all modifications will be backed out and EINVAL will be returned.
1865 *
1866 * Note that this function may not be used for the superblock values that
1867 * are tracked with the in-memory per-cpu counters - a direct call to
1868 * xfs_icsb_modify_counters is required for these.
1878 */ 1869 */
1879int 1870int
1880xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) 1871xfs_mod_incore_sb_batch(
1872 struct xfs_mount *mp,
1873 xfs_mod_sb_t *msb,
1874 uint nmsb,
1875 int rsvd)
1881{ 1876{
1882 int status=0; 1877 xfs_mod_sb_t *msbp = &msb[0];
1883 xfs_mod_sb_t *msbp; 1878 int error = 0;
1884 1879
1885 /* 1880 /*
1886 * Loop through the array of mod structures and apply each 1881 * Loop through the array of mod structures and apply each individually.
1887 * individually. If any fail, then back out all those 1882 * If any fail, then back out all those which have already been applied.
1888 * which have already been applied. Do all of this within 1883 * Do all of this within the scope of the m_sb_lock so that all of the
1889 * the scope of the m_sb_lock so that all of the changes will 1884 * changes will be atomic.
1890 * be atomic.
1891 */ 1885 */
1892 spin_lock(&mp->m_sb_lock); 1886 spin_lock(&mp->m_sb_lock);
1893 msbp = &msb[0];
1894 for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { 1887 for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
1895 /* 1888 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
1896 * Apply the delta at index n. If it fails, break 1889 msbp->msb_field > XFS_SBS_FDBLOCKS);
1897 * from the loop so we'll fall into the undo loop
1898 * below.
1899 */
1900 switch (msbp->msb_field) {
1901#ifdef HAVE_PERCPU_SB
1902 case XFS_SBS_ICOUNT:
1903 case XFS_SBS_IFREE:
1904 case XFS_SBS_FDBLOCKS:
1905 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
1906 spin_unlock(&mp->m_sb_lock);
1907 status = xfs_icsb_modify_counters(mp,
1908 msbp->msb_field,
1909 msbp->msb_delta, rsvd);
1910 spin_lock(&mp->m_sb_lock);
1911 break;
1912 }
1913 /* FALLTHROUGH */
1914#endif
1915 default:
1916 status = xfs_mod_incore_sb_unlocked(mp,
1917 msbp->msb_field,
1918 msbp->msb_delta, rsvd);
1919 break;
1920 }
1921 1890
1922 if (status != 0) { 1891 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
1923 break; 1892 msbp->msb_delta, rsvd);
1924 } 1893 if (error)
1894 goto unwind;
1925 } 1895 }
1896 spin_unlock(&mp->m_sb_lock);
1897 return 0;
1926 1898
1927 /* 1899unwind:
1928 * If we didn't complete the loop above, then back out 1900 while (--msbp >= msb) {
1929 * any changes made to the superblock. If you add code 1901 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
1930 * between the loop above and here, make sure that you 1902 -msbp->msb_delta, rsvd);
1931 * preserve the value of status. Loop back until 1903 ASSERT(error == 0);
1932 * we step below the beginning of the array. Make sure
1933 * we don't touch anything back there.
1934 */
1935 if (status != 0) {
1936 msbp--;
1937 while (msbp >= msb) {
1938 switch (msbp->msb_field) {
1939#ifdef HAVE_PERCPU_SB
1940 case XFS_SBS_ICOUNT:
1941 case XFS_SBS_IFREE:
1942 case XFS_SBS_FDBLOCKS:
1943 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
1944 spin_unlock(&mp->m_sb_lock);
1945 status = xfs_icsb_modify_counters(mp,
1946 msbp->msb_field,
1947 -(msbp->msb_delta),
1948 rsvd);
1949 spin_lock(&mp->m_sb_lock);
1950 break;
1951 }
1952 /* FALLTHROUGH */
1953#endif
1954 default:
1955 status = xfs_mod_incore_sb_unlocked(mp,
1956 msbp->msb_field,
1957 -(msbp->msb_delta),
1958 rsvd);
1959 break;
1960 }
1961 ASSERT(status == 0);
1962 msbp--;
1963 }
1964 } 1904 }
1965 spin_unlock(&mp->m_sb_lock); 1905 spin_unlock(&mp->m_sb_lock);
1966 return status; 1906 return error;
1967} 1907}
1968 1908
1969/* 1909/*
@@ -2001,18 +1941,13 @@ xfs_getsb(
2001 */ 1941 */
2002void 1942void
2003xfs_freesb( 1943xfs_freesb(
2004 xfs_mount_t *mp) 1944 struct xfs_mount *mp)
2005{ 1945{
2006 xfs_buf_t *bp; 1946 struct xfs_buf *bp = mp->m_sb_bp;
2007 1947
2008 /* 1948 xfs_buf_lock(bp);
2009 * Use xfs_getsb() so that the buffer will be locked
2010 * when we call xfs_buf_relse().
2011 */
2012 bp = xfs_getsb(mp, 0);
2013 XFS_BUF_UNMANAGE(bp);
2014 xfs_buf_relse(bp);
2015 mp->m_sb_bp = NULL; 1949 mp->m_sb_bp = NULL;
1950 xfs_buf_relse(bp);
2016} 1951}
2017 1952
2018/* 1953/*
@@ -2499,7 +2434,7 @@ xfs_icsb_balance_counter(
2499 spin_unlock(&mp->m_sb_lock); 2434 spin_unlock(&mp->m_sb_lock);
2500} 2435}
2501 2436
2502STATIC int 2437int
2503xfs_icsb_modify_counters( 2438xfs_icsb_modify_counters(
2504 xfs_mount_t *mp, 2439 xfs_mount_t *mp,
2505 xfs_sb_field_t field, 2440 xfs_sb_field_t field,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5761087ee8ea..5861b4980740 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -53,7 +53,6 @@ typedef struct xfs_trans_reservations {
53 53
54#include "xfs_sync.h" 54#include "xfs_sync.h"
55 55
56struct cred;
57struct log; 56struct log;
58struct xfs_mount_args; 57struct xfs_mount_args;
59struct xfs_inode; 58struct xfs_inode;
@@ -66,65 +65,6 @@ struct xfs_nameops;
66struct xfs_ail; 65struct xfs_ail;
67struct xfs_quotainfo; 66struct xfs_quotainfo;
68 67
69
70/*
71 * Prototypes and functions for the Data Migration subsystem.
72 */
73
74typedef int (*xfs_send_data_t)(int, struct xfs_inode *,
75 xfs_off_t, size_t, int, int *);
76typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint);
77typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t);
78typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *,
79 struct xfs_inode *, dm_right_t,
80 struct xfs_inode *, dm_right_t,
81 const unsigned char *, const unsigned char *,
82 mode_t, int, int);
83typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t,
84 char *, char *);
85typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *,
86 dm_right_t, mode_t, int, int);
87
88typedef struct xfs_dmops {
89 xfs_send_data_t xfs_send_data;
90 xfs_send_mmap_t xfs_send_mmap;
91 xfs_send_destroy_t xfs_send_destroy;
92 xfs_send_namesp_t xfs_send_namesp;
93 xfs_send_mount_t xfs_send_mount;
94 xfs_send_unmount_t xfs_send_unmount;
95} xfs_dmops_t;
96
97#define XFS_DMAPI_UNMOUNT_FLAGS(mp) \
98 (((mp)->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? 0 : DM_FLAGS_UNWANTED)
99
100#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \
101 (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock)
102#define XFS_SEND_MMAP(mp, vma,fl) \
103 (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl)
104#define XFS_SEND_DESTROY(mp, ip,right) \
105 (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right)
106#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
107 (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl)
108#define XFS_SEND_MOUNT(mp,right,path,name) \
109 (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name)
110#define XFS_SEND_PREUNMOUNT(mp) \
111do { \
112 if (mp->m_flags & XFS_MOUNT_DMAPI) { \
113 (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT, mp, \
114 (mp)->m_rootip, DM_RIGHT_NULL, \
115 (mp)->m_rootip, DM_RIGHT_NULL, \
116 NULL, NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
117 } \
118} while (0)
119#define XFS_SEND_UNMOUNT(mp) \
120do { \
121 if (mp->m_flags & XFS_MOUNT_DMAPI) { \
122 (*(mp)->m_dm_ops->xfs_send_unmount)(mp, (mp)->m_rootip, \
123 DM_RIGHT_NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
124 } \
125} while (0)
126
127
128#ifdef HAVE_PERCPU_SB 68#ifdef HAVE_PERCPU_SB
129 69
130/* 70/*
@@ -150,6 +90,8 @@ extern void xfs_icsb_reinit_counters(struct xfs_mount *);
150extern void xfs_icsb_destroy_counters(struct xfs_mount *); 90extern void xfs_icsb_destroy_counters(struct xfs_mount *);
151extern void xfs_icsb_sync_counters(struct xfs_mount *, int); 91extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
152extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); 92extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
93extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
94 int64_t, int);
153 95
154#else 96#else
155#define xfs_icsb_init_counters(mp) (0) 97#define xfs_icsb_init_counters(mp) (0)
@@ -157,6 +99,8 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
157#define xfs_icsb_reinit_counters(mp) do { } while (0) 99#define xfs_icsb_reinit_counters(mp) do { } while (0)
158#define xfs_icsb_sync_counters(mp, flags) do { } while (0) 100#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
159#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) 101#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
102#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
103 xfs_mod_incore_sb(mp, field, delta, rsvd)
160#endif 104#endif
161 105
162typedef struct xfs_mount { 106typedef struct xfs_mount {
@@ -241,8 +185,6 @@ typedef struct xfs_mount {
241 uint m_chsize; /* size of next field */ 185 uint m_chsize; /* size of next field */
242 struct xfs_chash *m_chash; /* fs private inode per-cluster 186 struct xfs_chash *m_chash; /* fs private inode per-cluster
243 * hash table */ 187 * hash table */
244 struct xfs_dmops *m_dm_ops; /* vector of DMI ops */
245 struct xfs_qmops *m_qm_ops; /* vector of XQM ops */
246 atomic_t m_active_trans; /* number trans frozen */ 188 atomic_t m_active_trans; /* number trans frozen */
247#ifdef HAVE_PERCPU_SB 189#ifdef HAVE_PERCPU_SB
248 xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ 190 xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */
@@ -269,7 +211,6 @@ typedef struct xfs_mount {
269 must be synchronous except 211 must be synchronous except
270 for space allocations */ 212 for space allocations */
271#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */ 213#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */
272#define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */
273#define XFS_MOUNT_WAS_CLEAN (1ULL << 3) 214#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
274#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 215#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
275 operations, typically for 216 operations, typically for
@@ -282,8 +223,6 @@ typedef struct xfs_mount {
282#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */ 223#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */
283#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ 224#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */
284#define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */ 225#define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */
285#define XFS_MOUNT_OSYNCISOSYNC (1ULL << 13) /* o_sync is REALLY o_sync */
286 /* osyncisdsync is now default*/
287#define XFS_MOUNT_32BITINODES (1ULL << 14) /* do not create inodes above 226#define XFS_MOUNT_32BITINODES (1ULL << 14) /* do not create inodes above
288 * 32 bits in size */ 227 * 32 bits in size */
289#define XFS_MOUNT_SMALL_INUMS (1ULL << 15) /* users wants 32bit inodes */ 228#define XFS_MOUNT_SMALL_INUMS (1ULL << 15) /* users wants 32bit inodes */
@@ -296,8 +235,6 @@ typedef struct xfs_mount {
296#define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ 235#define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */
297#define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred 236#define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred
298 * I/O size in stat() */ 237 * I/O size in stat() */
299#define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock
300 counters */
301#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams 238#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams
302 allocator */ 239 allocator */
303#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ 240#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
@@ -391,6 +328,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
391 * perag get/put wrappers for ref counting 328 * perag get/put wrappers for ref counting
392 */ 329 */
393struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); 330struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
331struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
332 int tag);
394void xfs_perag_put(struct xfs_perag *pag); 333void xfs_perag_put(struct xfs_perag *pag);
395 334
396/* 335/*
@@ -440,11 +379,6 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
440 379
441extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 380extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
442 381
443extern int xfs_dmops_get(struct xfs_mount *);
444extern void xfs_dmops_put(struct xfs_mount *);
445
446extern struct xfs_dmops xfs_dmcore_xfs;
447
448#endif /* __KERNEL__ */ 382#endif /* __KERNEL__ */
449 383
450extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 384extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
diff --git a/fs/xfs/xfs_refcache.h b/fs/xfs/xfs_refcache.h
deleted file mode 100644
index 2dec79edb510..000000000000
--- a/fs/xfs/xfs_refcache.h
+++ /dev/null
@@ -1,52 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_REFCACHE_H__
19#define __XFS_REFCACHE_H__
20
21#ifdef HAVE_REFCACHE
22/*
23 * Maximum size (in inodes) for the NFS reference cache
24 */
25#define XFS_REFCACHE_SIZE_MAX 512
26
27struct xfs_inode;
28struct xfs_mount;
29
30extern void xfs_refcache_insert(struct xfs_inode *);
31extern void xfs_refcache_purge_ip(struct xfs_inode *);
32extern void xfs_refcache_purge_mp(struct xfs_mount *);
33extern void xfs_refcache_purge_some(struct xfs_mount *);
34extern void xfs_refcache_resize(int);
35extern void xfs_refcache_destroy(void);
36
37extern void xfs_refcache_iunlock(struct xfs_inode *, uint);
38
39#else
40
41#define xfs_refcache_insert(ip) do { } while (0)
42#define xfs_refcache_purge_ip(ip) do { } while (0)
43#define xfs_refcache_purge_mp(mp) do { } while (0)
44#define xfs_refcache_purge_some(mp) do { } while (0)
45#define xfs_refcache_resize(size) do { } while (0)
46#define xfs_refcache_destroy() do { } while (0)
47
48#define xfs_refcache_iunlock(ip, flags) xfs_iunlock(ip, flags)
49
50#endif
51
52#endif /* __XFS_REFCACHE_H__ */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index fc1cda23b817..d2af0a8381a6 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -24,12 +24,9 @@
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 30#include "xfs_dinode.h"
34#include "xfs_inode.h" 31#include "xfs_inode.h"
35#include "xfs_inode_item.h" 32#include "xfs_inode_item.h"
@@ -116,20 +113,7 @@ xfs_rename(
116 int spaceres; 113 int spaceres;
117 int num_inodes; 114 int num_inodes;
118 115
119 xfs_itrace_entry(src_dp); 116 trace_xfs_rename(src_dp, target_dp, src_name, target_name);
120 xfs_itrace_entry(target_dp);
121
122 if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) ||
123 DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) {
124 error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME,
125 src_dp, DM_RIGHT_NULL,
126 target_dp, DM_RIGHT_NULL,
127 src_name->name, target_name->name,
128 0, 0, 0);
129 if (error)
130 return error;
131 }
132 /* Return through std_return after this point. */
133 117
134 new_parent = (src_dp != target_dp); 118 new_parent = (src_dp != target_dp);
135 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); 119 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
@@ -184,26 +168,14 @@ xfs_rename(
184 /* 168 /*
185 * Join all the inodes to the transaction. From this point on, 169 * Join all the inodes to the transaction. From this point on,
186 * we can rely on either trans_commit or trans_cancel to unlock 170 * we can rely on either trans_commit or trans_cancel to unlock
187 * them. Note that we need to add a vnode reference to the 171 * them.
188 * directories since trans_commit & trans_cancel will decrement
189 * them when they unlock the inodes. Also, we need to be careful
190 * not to add an inode to the transaction more than once.
191 */ 172 */
192 IHOLD(src_dp); 173 xfs_trans_ijoin_ref(tp, src_dp, XFS_ILOCK_EXCL);
193 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 174 if (new_parent)
194 175 xfs_trans_ijoin_ref(tp, target_dp, XFS_ILOCK_EXCL);
195 if (new_parent) { 176 xfs_trans_ijoin_ref(tp, src_ip, XFS_ILOCK_EXCL);
196 IHOLD(target_dp); 177 if (target_ip)
197 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 178 xfs_trans_ijoin_ref(tp, target_ip, XFS_ILOCK_EXCL);
198 }
199
200 IHOLD(src_ip);
201 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
202
203 if (target_ip) {
204 IHOLD(target_ip);
205 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
206 }
207 179
208 /* 180 /*
209 * If we are using project inheritance, we only allow renames 181 * If we are using project inheritance, we only allow renames
@@ -211,7 +183,7 @@ xfs_rename(
211 * tree quota mechanism would be circumvented. 183 * tree quota mechanism would be circumvented.
212 */ 184 */
213 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 185 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
214 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { 186 (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
215 error = XFS_ERROR(EXDEV); 187 error = XFS_ERROR(EXDEV);
216 goto error_return; 188 goto error_return;
217 } 189 }
@@ -239,7 +211,9 @@ xfs_rename(
239 goto error_return; 211 goto error_return;
240 if (error) 212 if (error)
241 goto abort_return; 213 goto abort_return;
242 xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 214
215 xfs_trans_ichgtime(tp, target_dp,
216 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
243 217
244 if (new_parent && src_is_directory) { 218 if (new_parent && src_is_directory) {
245 error = xfs_bumplink(tp, target_dp); 219 error = xfs_bumplink(tp, target_dp);
@@ -277,7 +251,9 @@ xfs_rename(
277 &first_block, &free_list, spaceres); 251 &first_block, &free_list, spaceres);
278 if (error) 252 if (error)
279 goto abort_return; 253 goto abort_return;
280 xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 254
255 xfs_trans_ichgtime(tp, target_dp,
256 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
281 257
282 /* 258 /*
283 * Decrement the link count on the target since the target 259 * Decrement the link count on the target since the target
@@ -320,7 +296,7 @@ xfs_rename(
320 * inode isn't really being changed, but old unix file systems did 296 * inode isn't really being changed, but old unix file systems did
321 * it and some incremental backup programs won't work without it. 297 * it and some incremental backup programs won't work without it.
322 */ 298 */
323 xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); 299 xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
324 300
325 /* 301 /*
326 * Adjust the link count on src_dp. This is necessary when 302 * Adjust the link count on src_dp. This is necessary when
@@ -343,7 +319,7 @@ xfs_rename(
343 if (error) 319 if (error)
344 goto abort_return; 320 goto abort_return;
345 321
346 xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 322 xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
347 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); 323 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
348 if (new_parent) 324 if (new_parent)
349 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 325 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
@@ -369,26 +345,13 @@ xfs_rename(
369 * trans_commit will unlock src_ip, target_ip & decrement 345 * trans_commit will unlock src_ip, target_ip & decrement
370 * the vnode references. 346 * the vnode references.
371 */ 347 */
372 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 348 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
373
374 /* Fall through to std_return with error = 0 or errno from
375 * xfs_trans_commit */
376std_return:
377 if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) ||
378 DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) {
379 (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME,
380 src_dp, DM_RIGHT_NULL,
381 target_dp, DM_RIGHT_NULL,
382 src_name->name, target_name->name,
383 0, error, 0);
384 }
385 return error;
386 349
387 abort_return: 350 abort_return:
388 cancel_flags |= XFS_TRANS_ABORT; 351 cancel_flags |= XFS_TRANS_ABORT;
389 /* FALLTHROUGH */
390 error_return: 352 error_return:
391 xfs_bmap_cancel(&free_list); 353 xfs_bmap_cancel(&free_list);
392 xfs_trans_cancel(tp, cancel_flags); 354 xfs_trans_cancel(tp, cancel_flags);
393 goto std_return; 355 std_return:
356 return error;
394} 357}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a2d32ce335aa..12a191385310 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -25,17 +25,10 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 30#include "xfs_dinode.h"
36#include "xfs_inode.h" 31#include "xfs_inode.h"
37#include "xfs_btree.h"
38#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 32#include "xfs_alloc.h"
40#include "xfs_bmap.h" 33#include "xfs_bmap.h"
41#include "xfs_rtalloc.h" 34#include "xfs_rtalloc.h"
@@ -46,6 +39,7 @@
46#include "xfs_trans_space.h" 39#include "xfs_trans_space.h"
47#include "xfs_utils.h" 40#include "xfs_utils.h"
48#include "xfs_trace.h" 41#include "xfs_trace.h"
42#include "xfs_buf.h"
49 43
50 44
51/* 45/*
@@ -129,7 +123,7 @@ xfs_growfs_rt_alloc(
129 cancelflags |= XFS_TRANS_ABORT; 123 cancelflags |= XFS_TRANS_ABORT;
130 error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks, 124 error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks,
131 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock, 125 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock,
132 resblks, &map, &nmap, &flist, NULL); 126 resblks, &map, &nmap, &flist);
133 if (!error && nmap < 1) 127 if (!error && nmap < 1)
134 error = XFS_ERROR(ENOSPC); 128 error = XFS_ERROR(ENOSPC);
135 if (error) 129 if (error)
@@ -1890,13 +1884,13 @@ xfs_growfs_rt(
1890 /* 1884 /*
1891 * Read in the last block of the device, make sure it exists. 1885 * Read in the last block of the device, make sure it exists.
1892 */ 1886 */
1893 error = xfs_read_buf(mp, mp->m_rtdev_targp, 1887 bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
1894 XFS_FSB_TO_BB(mp, nrblocks - 1), 1888 XFS_FSB_TO_BB(mp, nrblocks - 1),
1895 XFS_FSB_TO_BB(mp, 1), 0, &bp); 1889 XFS_FSB_TO_B(mp, 1), 0);
1896 if (error) 1890 if (!bp)
1897 return error; 1891 return EIO;
1898 ASSERT(bp);
1899 xfs_buf_relse(bp); 1892 xfs_buf_relse(bp);
1893
1900 /* 1894 /*
1901 * Calculate new parameters. These are the final values to be reached. 1895 * Calculate new parameters. These are the final values to be reached.
1902 */ 1896 */
@@ -2222,7 +2216,6 @@ xfs_rtmount_init(
2222{ 2216{
2223 xfs_buf_t *bp; /* buffer for last block of subvolume */ 2217 xfs_buf_t *bp; /* buffer for last block of subvolume */
2224 xfs_daddr_t d; /* address of last block of subvolume */ 2218 xfs_daddr_t d; /* address of last block of subvolume */
2225 int error; /* error return value */
2226 xfs_sb_t *sbp; /* filesystem superblock copy in mount */ 2219 xfs_sb_t *sbp; /* filesystem superblock copy in mount */
2227 2220
2228 sbp = &mp->m_sb; 2221 sbp = &mp->m_sb;
@@ -2249,15 +2242,12 @@ xfs_rtmount_init(
2249 (unsigned long long) mp->m_sb.sb_rblocks); 2242 (unsigned long long) mp->m_sb.sb_rblocks);
2250 return XFS_ERROR(EFBIG); 2243 return XFS_ERROR(EFBIG);
2251 } 2244 }
2252 error = xfs_read_buf(mp, mp->m_rtdev_targp, 2245 bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
2253 d - XFS_FSB_TO_BB(mp, 1), 2246 d - XFS_FSB_TO_BB(mp, 1),
2254 XFS_FSB_TO_BB(mp, 1), 0, &bp); 2247 XFS_FSB_TO_B(mp, 1), 0);
2255 if (error) { 2248 if (!bp) {
2256 cmn_err(CE_WARN, 2249 cmn_err(CE_WARN, "XFS: realtime device size check failed");
2257 "XFS: realtime mount -- xfs_read_buf failed, returned %d", error); 2250 return EIO;
2258 if (error == ENOSPC)
2259 return XFS_ERROR(EFBIG);
2260 return error;
2261 } 2251 }
2262 xfs_buf_relse(bp); 2252 xfs_buf_relse(bp);
2263 return 0; 2253 return 0;
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index e336742a58a4..56861d5daaef 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -24,27 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 29#include "xfs_dinode.h"
36#include "xfs_inode.h" 30#include "xfs_inode.h"
37#include "xfs_inode_item.h"
38#include "xfs_itable.h"
39#include "xfs_btree.h"
40#include "xfs_alloc.h"
41#include "xfs_ialloc.h"
42#include "xfs_attr.h"
43#include "xfs_bmap.h"
44#include "xfs_error.h" 31#include "xfs_error.h"
45#include "xfs_buf_item.h"
46#include "xfs_rw.h" 32#include "xfs_rw.h"
47#include "xfs_trace.h"
48 33
49/* 34/*
50 * Force a shutdown of the filesystem instantly while keeping 35 * Force a shutdown of the filesystem instantly while keeping
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 1b017c657494..1eb2ba586814 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -80,10 +80,12 @@ struct xfs_mount;
80#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 80#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004
81#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ 81#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */
82#define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ 82#define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */
83#define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */
83 84
84#define XFS_SB_VERSION2_OKREALFBITS \ 85#define XFS_SB_VERSION2_OKREALFBITS \
85 (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ 86 (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
86 XFS_SB_VERSION2_ATTR2BIT) 87 XFS_SB_VERSION2_ATTR2BIT | \
88 XFS_SB_VERSION2_PROJID32BIT)
87#define XFS_SB_VERSION2_OKSASHFBITS \ 89#define XFS_SB_VERSION2_OKSASHFBITS \
88 (0) 90 (0)
89#define XFS_SB_VERSION2_OKREALBITS \ 91#define XFS_SB_VERSION2_OKREALBITS \
@@ -495,6 +497,12 @@ static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
495 sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; 497 sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
496} 498}
497 499
500static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
501{
502 return xfs_sb_version_hasmorebits(sbp) &&
503 (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT);
504}
505
498/* 506/*
499 * end of superblock version macros 507 * end of superblock version macros
500 */ 508 */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 28547dfce037..f6d956b7711e 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * Copyright (C) 2010 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -24,16 +25,12 @@
24#include "xfs_trans.h" 25#include "xfs_trans.h"
25#include "xfs_sb.h" 26#include "xfs_sb.h"
26#include "xfs_ag.h" 27#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_error.h" 29#include "xfs_error.h"
31#include "xfs_da_btree.h" 30#include "xfs_da_btree.h"
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 34#include "xfs_dinode.h"
38#include "xfs_inode.h" 35#include "xfs_inode.h"
39#include "xfs_btree.h" 36#include "xfs_btree.h"
@@ -47,6 +44,7 @@
47#include "xfs_trace.h" 44#include "xfs_trace.h"
48 45
49kmem_zone_t *xfs_trans_zone; 46kmem_zone_t *xfs_trans_zone;
47kmem_zone_t *xfs_log_item_desc_zone;
50 48
51 49
52/* 50/*
@@ -597,8 +595,7 @@ _xfs_trans_alloc(
597 tp->t_magic = XFS_TRANS_MAGIC; 595 tp->t_magic = XFS_TRANS_MAGIC;
598 tp->t_type = type; 596 tp->t_type = type;
599 tp->t_mountp = mp; 597 tp->t_mountp = mp;
600 tp->t_items_free = XFS_LIC_NUM_SLOTS; 598 INIT_LIST_HEAD(&tp->t_items);
601 xfs_lic_init(&(tp->t_items));
602 INIT_LIST_HEAD(&tp->t_busy); 599 INIT_LIST_HEAD(&tp->t_busy);
603 return tp; 600 return tp;
604} 601}
@@ -643,8 +640,7 @@ xfs_trans_dup(
643 ntp->t_magic = XFS_TRANS_MAGIC; 640 ntp->t_magic = XFS_TRANS_MAGIC;
644 ntp->t_type = tp->t_type; 641 ntp->t_type = tp->t_type;
645 ntp->t_mountp = tp->t_mountp; 642 ntp->t_mountp = tp->t_mountp;
646 ntp->t_items_free = XFS_LIC_NUM_SLOTS; 643 INIT_LIST_HEAD(&ntp->t_items);
647 xfs_lic_init(&(ntp->t_items));
648 INIT_LIST_HEAD(&ntp->t_busy); 644 INIT_LIST_HEAD(&ntp->t_busy);
649 645
650 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 646 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -700,7 +696,7 @@ xfs_trans_reserve(
700 * fail if the count would go below zero. 696 * fail if the count would go below zero.
701 */ 697 */
702 if (blocks > 0) { 698 if (blocks > 0) {
703 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, 699 error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
704 -((int64_t)blocks), rsvd); 700 -((int64_t)blocks), rsvd);
705 if (error != 0) { 701 if (error != 0) {
706 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 702 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -771,7 +767,7 @@ undo_log:
771 767
772undo_blocks: 768undo_blocks:
773 if (blocks > 0) { 769 if (blocks > 0) {
774 (void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, 770 xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
775 (int64_t)blocks, rsvd); 771 (int64_t)blocks, rsvd);
776 tp->t_blk_res = 0; 772 tp->t_blk_res = 0;
777 } 773 }
@@ -1013,7 +1009,7 @@ void
1013xfs_trans_unreserve_and_mod_sb( 1009xfs_trans_unreserve_and_mod_sb(
1014 xfs_trans_t *tp) 1010 xfs_trans_t *tp)
1015{ 1011{
1016 xfs_mod_sb_t msb[14]; /* If you add cases, add entries */ 1012 xfs_mod_sb_t msb[9]; /* If you add cases, add entries */
1017 xfs_mod_sb_t *msbp; 1013 xfs_mod_sb_t *msbp;
1018 xfs_mount_t *mp = tp->t_mountp; 1014 xfs_mount_t *mp = tp->t_mountp;
1019 /* REFERENCED */ 1015 /* REFERENCED */
@@ -1021,55 +1017,61 @@ xfs_trans_unreserve_and_mod_sb(
1021 int rsvd; 1017 int rsvd;
1022 int64_t blkdelta = 0; 1018 int64_t blkdelta = 0;
1023 int64_t rtxdelta = 0; 1019 int64_t rtxdelta = 0;
1020 int64_t idelta = 0;
1021 int64_t ifreedelta = 0;
1024 1022
1025 msbp = msb; 1023 msbp = msb;
1026 rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 1024 rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
1027 1025
1028 /* calculate free blocks delta */ 1026 /* calculate deltas */
1029 if (tp->t_blk_res > 0) 1027 if (tp->t_blk_res > 0)
1030 blkdelta = tp->t_blk_res; 1028 blkdelta = tp->t_blk_res;
1031
1032 if ((tp->t_fdblocks_delta != 0) && 1029 if ((tp->t_fdblocks_delta != 0) &&
1033 (xfs_sb_version_haslazysbcount(&mp->m_sb) || 1030 (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
1034 (tp->t_flags & XFS_TRANS_SB_DIRTY))) 1031 (tp->t_flags & XFS_TRANS_SB_DIRTY)))
1035 blkdelta += tp->t_fdblocks_delta; 1032 blkdelta += tp->t_fdblocks_delta;
1036 1033
1037 if (blkdelta != 0) {
1038 msbp->msb_field = XFS_SBS_FDBLOCKS;
1039 msbp->msb_delta = blkdelta;
1040 msbp++;
1041 }
1042
1043 /* calculate free realtime extents delta */
1044 if (tp->t_rtx_res > 0) 1034 if (tp->t_rtx_res > 0)
1045 rtxdelta = tp->t_rtx_res; 1035 rtxdelta = tp->t_rtx_res;
1046
1047 if ((tp->t_frextents_delta != 0) && 1036 if ((tp->t_frextents_delta != 0) &&
1048 (tp->t_flags & XFS_TRANS_SB_DIRTY)) 1037 (tp->t_flags & XFS_TRANS_SB_DIRTY))
1049 rtxdelta += tp->t_frextents_delta; 1038 rtxdelta += tp->t_frextents_delta;
1050 1039
1040 if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
1041 (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
1042 idelta = tp->t_icount_delta;
1043 ifreedelta = tp->t_ifree_delta;
1044 }
1045
1046 /* apply the per-cpu counters */
1047 if (blkdelta) {
1048 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
1049 blkdelta, rsvd);
1050 if (error)
1051 goto out;
1052 }
1053
1054 if (idelta) {
1055 error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT,
1056 idelta, rsvd);
1057 if (error)
1058 goto out_undo_fdblocks;
1059 }
1060
1061 if (ifreedelta) {
1062 error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE,
1063 ifreedelta, rsvd);
1064 if (error)
1065 goto out_undo_icount;
1066 }
1067
1068 /* apply remaining deltas */
1051 if (rtxdelta != 0) { 1069 if (rtxdelta != 0) {
1052 msbp->msb_field = XFS_SBS_FREXTENTS; 1070 msbp->msb_field = XFS_SBS_FREXTENTS;
1053 msbp->msb_delta = rtxdelta; 1071 msbp->msb_delta = rtxdelta;
1054 msbp++; 1072 msbp++;
1055 } 1073 }
1056 1074
1057 /* apply remaining deltas */
1058
1059 if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
1060 (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
1061 if (tp->t_icount_delta != 0) {
1062 msbp->msb_field = XFS_SBS_ICOUNT;
1063 msbp->msb_delta = tp->t_icount_delta;
1064 msbp++;
1065 }
1066 if (tp->t_ifree_delta != 0) {
1067 msbp->msb_field = XFS_SBS_IFREE;
1068 msbp->msb_delta = tp->t_ifree_delta;
1069 msbp++;
1070 }
1071 }
1072
1073 if (tp->t_flags & XFS_TRANS_SB_DIRTY) { 1075 if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
1074 if (tp->t_dblocks_delta != 0) { 1076 if (tp->t_dblocks_delta != 0) {
1075 msbp->msb_field = XFS_SBS_DBLOCKS; 1077 msbp->msb_field = XFS_SBS_DBLOCKS;
@@ -1119,7 +1121,125 @@ xfs_trans_unreserve_and_mod_sb(
1119 if (msbp > msb) { 1121 if (msbp > msb) {
1120 error = xfs_mod_incore_sb_batch(tp->t_mountp, msb, 1122 error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
1121 (uint)(msbp - msb), rsvd); 1123 (uint)(msbp - msb), rsvd);
1122 ASSERT(error == 0); 1124 if (error)
1125 goto out_undo_ifreecount;
1126 }
1127
1128 return;
1129
1130out_undo_ifreecount:
1131 if (ifreedelta)
1132 xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd);
1133out_undo_icount:
1134 if (idelta)
1135 xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
1136out_undo_fdblocks:
1137 if (blkdelta)
1138 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
1139out:
1140 ASSERT(error = 0);
1141 return;
1142}
1143
1144/*
1145 * Add the given log item to the transaction's list of log items.
1146 *
1147 * The log item will now point to its new descriptor with its li_desc field.
1148 */
1149void
1150xfs_trans_add_item(
1151 struct xfs_trans *tp,
1152 struct xfs_log_item *lip)
1153{
1154 struct xfs_log_item_desc *lidp;
1155
1156 ASSERT(lip->li_mountp = tp->t_mountp);
1157 ASSERT(lip->li_ailp = tp->t_mountp->m_ail);
1158
1159 lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
1160
1161 lidp->lid_item = lip;
1162 lidp->lid_flags = 0;
1163 lidp->lid_size = 0;
1164 list_add_tail(&lidp->lid_trans, &tp->t_items);
1165
1166 lip->li_desc = lidp;
1167}
1168
1169STATIC void
1170xfs_trans_free_item_desc(
1171 struct xfs_log_item_desc *lidp)
1172{
1173 list_del_init(&lidp->lid_trans);
1174 kmem_zone_free(xfs_log_item_desc_zone, lidp);
1175}
1176
1177/*
1178 * Unlink and free the given descriptor.
1179 */
1180void
1181xfs_trans_del_item(
1182 struct xfs_log_item *lip)
1183{
1184 xfs_trans_free_item_desc(lip->li_desc);
1185 lip->li_desc = NULL;
1186}
1187
1188/*
1189 * Unlock all of the items of a transaction and free all the descriptors
1190 * of that transaction.
1191 */
1192void
1193xfs_trans_free_items(
1194 struct xfs_trans *tp,
1195 xfs_lsn_t commit_lsn,
1196 int flags)
1197{
1198 struct xfs_log_item_desc *lidp, *next;
1199
1200 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1201 struct xfs_log_item *lip = lidp->lid_item;
1202
1203 lip->li_desc = NULL;
1204
1205 if (commit_lsn != NULLCOMMITLSN)
1206 IOP_COMMITTING(lip, commit_lsn);
1207 if (flags & XFS_TRANS_ABORT)
1208 lip->li_flags |= XFS_LI_ABORTED;
1209 IOP_UNLOCK(lip);
1210
1211 xfs_trans_free_item_desc(lidp);
1212 }
1213}
1214
1215/*
1216 * Unlock the items associated with a transaction.
1217 *
1218 * Items which were not logged should be freed. Those which were logged must
1219 * still be tracked so they can be unpinned when the transaction commits.
1220 */
1221STATIC void
1222xfs_trans_unlock_items(
1223 struct xfs_trans *tp,
1224 xfs_lsn_t commit_lsn)
1225{
1226 struct xfs_log_item_desc *lidp, *next;
1227
1228 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1229 struct xfs_log_item *lip = lidp->lid_item;
1230
1231 lip->li_desc = NULL;
1232
1233 if (commit_lsn != NULLCOMMITLSN)
1234 IOP_COMMITTING(lip, commit_lsn);
1235 IOP_UNLOCK(lip);
1236
1237 /*
1238 * Free the descriptor if the item is not dirty
1239 * within this transaction.
1240 */
1241 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1242 xfs_trans_free_item_desc(lidp);
1123 } 1243 }
1124} 1244}
1125 1245
@@ -1134,30 +1254,27 @@ xfs_trans_count_vecs(
1134 struct xfs_trans *tp) 1254 struct xfs_trans *tp)
1135{ 1255{
1136 int nvecs; 1256 int nvecs;
1137 xfs_log_item_desc_t *lidp; 1257 struct xfs_log_item_desc *lidp;
1138 1258
1139 nvecs = 1; 1259 nvecs = 1;
1140 lidp = xfs_trans_first_item(tp);
1141 ASSERT(lidp != NULL);
1142 1260
1143 /* In the non-debug case we need to start bailing out if we 1261 /* In the non-debug case we need to start bailing out if we
1144 * didn't find a log_item here, return zero and let trans_commit 1262 * didn't find a log_item here, return zero and let trans_commit
1145 * deal with it. 1263 * deal with it.
1146 */ 1264 */
1147 if (lidp == NULL) 1265 if (list_empty(&tp->t_items)) {
1266 ASSERT(0);
1148 return 0; 1267 return 0;
1268 }
1149 1269
1150 while (lidp != NULL) { 1270 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1151 /* 1271 /*
1152 * Skip items which aren't dirty in this transaction. 1272 * Skip items which aren't dirty in this transaction.
1153 */ 1273 */
1154 if (!(lidp->lid_flags & XFS_LID_DIRTY)) { 1274 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1155 lidp = xfs_trans_next_item(tp, lidp);
1156 continue; 1275 continue;
1157 }
1158 lidp->lid_size = IOP_SIZE(lidp->lid_item); 1276 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1159 nvecs += lidp->lid_size; 1277 nvecs += lidp->lid_size;
1160 lidp = xfs_trans_next_item(tp, lidp);
1161 } 1278 }
1162 1279
1163 return nvecs; 1280 return nvecs;
@@ -1177,7 +1294,7 @@ xfs_trans_fill_vecs(
1177 struct xfs_trans *tp, 1294 struct xfs_trans *tp,
1178 struct xfs_log_iovec *log_vector) 1295 struct xfs_log_iovec *log_vector)
1179{ 1296{
1180 xfs_log_item_desc_t *lidp; 1297 struct xfs_log_item_desc *lidp;
1181 struct xfs_log_iovec *vecp; 1298 struct xfs_log_iovec *vecp;
1182 uint nitems; 1299 uint nitems;
1183 1300
@@ -1188,14 +1305,11 @@ xfs_trans_fill_vecs(
1188 vecp = log_vector + 1; 1305 vecp = log_vector + 1;
1189 1306
1190 nitems = 0; 1307 nitems = 0;
1191 lidp = xfs_trans_first_item(tp); 1308 ASSERT(!list_empty(&tp->t_items));
1192 ASSERT(lidp); 1309 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1193 while (lidp) {
1194 /* Skip items which aren't dirty in this transaction. */ 1310 /* Skip items which aren't dirty in this transaction. */
1195 if (!(lidp->lid_flags & XFS_LID_DIRTY)) { 1311 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1196 lidp = xfs_trans_next_item(tp, lidp);
1197 continue; 1312 continue;
1198 }
1199 1313
1200 /* 1314 /*
1201 * The item may be marked dirty but not log anything. This can 1315 * The item may be marked dirty but not log anything. This can
@@ -1206,7 +1320,6 @@ xfs_trans_fill_vecs(
1206 IOP_FORMAT(lidp->lid_item, vecp); 1320 IOP_FORMAT(lidp->lid_item, vecp);
1207 vecp += lidp->lid_size; 1321 vecp += lidp->lid_size;
1208 IOP_PIN(lidp->lid_item); 1322 IOP_PIN(lidp->lid_item);
1209 lidp = xfs_trans_next_item(tp, lidp);
1210 } 1323 }
1211 1324
1212 /* 1325 /*
@@ -1284,7 +1397,7 @@ xfs_trans_item_committed(
1284 * log item flags, if anyone else stales the buffer we do not want to 1397 * log item flags, if anyone else stales the buffer we do not want to
1285 * pay any attention to it. 1398 * pay any attention to it.
1286 */ 1399 */
1287 IOP_UNPIN(lip); 1400 IOP_UNPIN(lip, 0);
1288} 1401}
1289 1402
1290/* 1403/*
@@ -1298,27 +1411,15 @@ xfs_trans_item_committed(
1298 */ 1411 */
1299STATIC void 1412STATIC void
1300xfs_trans_committed( 1413xfs_trans_committed(
1301 struct xfs_trans *tp, 1414 void *arg,
1302 int abortflag) 1415 int abortflag)
1303{ 1416{
1304 xfs_log_item_desc_t *lidp; 1417 struct xfs_trans *tp = arg;
1305 xfs_log_item_chunk_t *licp; 1418 struct xfs_log_item_desc *lidp, *next;
1306 xfs_log_item_chunk_t *next_licp;
1307
1308 /* Call the transaction's completion callback if there is one. */
1309 if (tp->t_callback != NULL)
1310 tp->t_callback(tp, tp->t_callarg);
1311 1419
1312 for (lidp = xfs_trans_first_item(tp); 1420 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1313 lidp != NULL;
1314 lidp = xfs_trans_next_item(tp, lidp)) {
1315 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag); 1421 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
1316 } 1422 xfs_trans_free_item_desc(lidp);
1317
1318 /* free the item chunks, ignoring the embedded chunk */
1319 for (licp = tp->t_items.lic_next; licp != NULL; licp = next_licp) {
1320 next_licp = licp->lic_next;
1321 kmem_free(licp);
1322 } 1423 }
1323 1424
1324 xfs_trans_free(tp); 1425 xfs_trans_free(tp);
@@ -1333,16 +1434,14 @@ xfs_trans_uncommit(
1333 struct xfs_trans *tp, 1434 struct xfs_trans *tp,
1334 uint flags) 1435 uint flags)
1335{ 1436{
1336 xfs_log_item_desc_t *lidp; 1437 struct xfs_log_item_desc *lidp;
1337 1438
1338 for (lidp = xfs_trans_first_item(tp); 1439 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1339 lidp != NULL;
1340 lidp = xfs_trans_next_item(tp, lidp)) {
1341 /* 1440 /*
1342 * Unpin all but those that aren't dirty. 1441 * Unpin all but those that aren't dirty.
1343 */ 1442 */
1344 if (lidp->lid_flags & XFS_LID_DIRTY) 1443 if (lidp->lid_flags & XFS_LID_DIRTY)
1345 IOP_UNPIN_REMOVE(lidp->lid_item, tp); 1444 IOP_UNPIN(lidp->lid_item, 1);
1346 } 1445 }
1347 1446
1348 xfs_trans_unreserve_and_mod_sb(tp); 1447 xfs_trans_unreserve_and_mod_sb(tp);
@@ -1445,7 +1544,7 @@ xfs_trans_commit_iclog(
1445 * running in simulation mode (the log is explicitly turned 1544 * running in simulation mode (the log is explicitly turned
1446 * off). 1545 * off).
1447 */ 1546 */
1448 tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed; 1547 tp->t_logcb.cb_func = xfs_trans_committed;
1449 tp->t_logcb.cb_arg = tp; 1548 tp->t_logcb.cb_arg = tp;
1450 1549
1451 /* 1550 /*
@@ -1508,33 +1607,28 @@ STATIC struct xfs_log_vec *
1508xfs_trans_alloc_log_vecs( 1607xfs_trans_alloc_log_vecs(
1509 xfs_trans_t *tp) 1608 xfs_trans_t *tp)
1510{ 1609{
1511 xfs_log_item_desc_t *lidp; 1610 struct xfs_log_item_desc *lidp;
1512 struct xfs_log_vec *lv = NULL; 1611 struct xfs_log_vec *lv = NULL;
1513 struct xfs_log_vec *ret_lv = NULL; 1612 struct xfs_log_vec *ret_lv = NULL;
1514 1613
1515 lidp = xfs_trans_first_item(tp);
1516 1614
1517 /* Bail out if we didn't find a log item. */ 1615 /* Bail out if we didn't find a log item. */
1518 if (!lidp) { 1616 if (list_empty(&tp->t_items)) {
1519 ASSERT(0); 1617 ASSERT(0);
1520 return NULL; 1618 return NULL;
1521 } 1619 }
1522 1620
1523 while (lidp != NULL) { 1621 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1524 struct xfs_log_vec *new_lv; 1622 struct xfs_log_vec *new_lv;
1525 1623
1526 /* Skip items which aren't dirty in this transaction. */ 1624 /* Skip items which aren't dirty in this transaction. */
1527 if (!(lidp->lid_flags & XFS_LID_DIRTY)) { 1625 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1528 lidp = xfs_trans_next_item(tp, lidp);
1529 continue; 1626 continue;
1530 }
1531 1627
1532 /* Skip items that do not have any vectors for writing */ 1628 /* Skip items that do not have any vectors for writing */
1533 lidp->lid_size = IOP_SIZE(lidp->lid_item); 1629 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1534 if (!lidp->lid_size) { 1630 if (!lidp->lid_size)
1535 lidp = xfs_trans_next_item(tp, lidp);
1536 continue; 1631 continue;
1537 }
1538 1632
1539 new_lv = kmem_zalloc(sizeof(*new_lv) + 1633 new_lv = kmem_zalloc(sizeof(*new_lv) +
1540 lidp->lid_size * sizeof(struct xfs_log_iovec), 1634 lidp->lid_size * sizeof(struct xfs_log_iovec),
@@ -1549,7 +1643,6 @@ xfs_trans_alloc_log_vecs(
1549 else 1643 else
1550 lv->lv_next = new_lv; 1644 lv->lv_next = new_lv;
1551 lv = new_lv; 1645 lv = new_lv;
1552 lidp = xfs_trans_next_item(tp, lidp);
1553 } 1646 }
1554 1647
1555 return ret_lv; 1648 return ret_lv;
@@ -1579,9 +1672,6 @@ xfs_trans_commit_cil(
1579 return error; 1672 return error;
1580 1673
1581 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1674 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1582
1583 /* xfs_trans_free_items() unlocks them first */
1584 xfs_trans_free_items(tp, *commit_lsn, 0);
1585 xfs_trans_free(tp); 1675 xfs_trans_free(tp);
1586 return 0; 1676 return 0;
1587} 1677}
@@ -1708,12 +1798,6 @@ xfs_trans_cancel(
1708 int flags) 1798 int flags)
1709{ 1799{
1710 int log_flags; 1800 int log_flags;
1711#ifdef DEBUG
1712 xfs_log_item_chunk_t *licp;
1713 xfs_log_item_desc_t *lidp;
1714 xfs_log_item_t *lip;
1715 int i;
1716#endif
1717 xfs_mount_t *mp = tp->t_mountp; 1801 xfs_mount_t *mp = tp->t_mountp;
1718 1802
1719 /* 1803 /*
@@ -1732,21 +1816,11 @@ xfs_trans_cancel(
1732 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1816 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1733 } 1817 }
1734#ifdef DEBUG 1818#ifdef DEBUG
1735 if (!(flags & XFS_TRANS_ABORT)) { 1819 if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) {
1736 licp = &(tp->t_items); 1820 struct xfs_log_item_desc *lidp;
1737 while (licp != NULL) { 1821
1738 lidp = licp->lic_descs; 1822 list_for_each_entry(lidp, &tp->t_items, lid_trans)
1739 for (i = 0; i < licp->lic_unused; i++, lidp++) { 1823 ASSERT(!(lidp->lid_item->li_type == XFS_LI_EFD));
1740 if (xfs_lic_isfree(licp, i)) {
1741 continue;
1742 }
1743
1744 lip = lidp->lid_item;
1745 if (!XFS_FORCED_SHUTDOWN(mp))
1746 ASSERT(!(lip->li_type == XFS_LI_EFD));
1747 }
1748 licp = licp->lic_next;
1749 }
1750 } 1824 }
1751#endif 1825#endif
1752 xfs_trans_unreserve_and_mod_sb(tp); 1826 xfs_trans_unreserve_and_mod_sb(tp);
@@ -1834,7 +1908,6 @@ xfs_trans_roll(
1834 if (error) 1908 if (error)
1835 return error; 1909 return error;
1836 1910
1837 xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); 1911 xfs_trans_ijoin(trans, dp);
1838 xfs_trans_ihold(trans, dp);
1839 return 0; 1912 return 0;
1840} 1913}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index e639e8e9a2a9..246286b77a86 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -161,105 +161,14 @@ typedef struct xfs_trans_header {
161 * the amount of space needed to log the item it describes 161 * the amount of space needed to log the item it describes
162 * once we get to commit processing (see xfs_trans_commit()). 162 * once we get to commit processing (see xfs_trans_commit()).
163 */ 163 */
164typedef struct xfs_log_item_desc { 164struct xfs_log_item_desc {
165 struct xfs_log_item *lid_item; 165 struct xfs_log_item *lid_item;
166 ushort lid_size; 166 ushort lid_size;
167 unsigned char lid_flags; 167 unsigned char lid_flags;
168 unsigned char lid_index; 168 struct list_head lid_trans;
169} xfs_log_item_desc_t; 169};
170 170
171#define XFS_LID_DIRTY 0x1 171#define XFS_LID_DIRTY 0x1
172#define XFS_LID_PINNED 0x2
173
174/*
175 * This structure is used to maintain a chunk list of log_item_desc
176 * structures. The free field is a bitmask indicating which descriptors
177 * in this chunk's array are free. The unused field is the first value
178 * not used since this chunk was allocated.
179 */
180#define XFS_LIC_NUM_SLOTS 15
181typedef struct xfs_log_item_chunk {
182 struct xfs_log_item_chunk *lic_next;
183 ushort lic_free;
184 ushort lic_unused;
185 xfs_log_item_desc_t lic_descs[XFS_LIC_NUM_SLOTS];
186} xfs_log_item_chunk_t;
187
188#define XFS_LIC_MAX_SLOT (XFS_LIC_NUM_SLOTS - 1)
189#define XFS_LIC_FREEMASK ((1 << XFS_LIC_NUM_SLOTS) - 1)
190
191
192/*
193 * Initialize the given chunk. Set the chunk's free descriptor mask
194 * to indicate that all descriptors are free. The caller gets to set
195 * lic_unused to the right value (0 matches all free). The
196 * lic_descs.lid_index values are set up as each desc is allocated.
197 */
198static inline void xfs_lic_init(xfs_log_item_chunk_t *cp)
199{
200 cp->lic_free = XFS_LIC_FREEMASK;
201}
202
203static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot)
204{
205 cp->lic_descs[slot].lid_index = (unsigned char)(slot);
206}
207
208static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp)
209{
210 return cp->lic_free & XFS_LIC_FREEMASK;
211}
212
213static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp)
214{
215 cp->lic_free = XFS_LIC_FREEMASK;
216}
217
218static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp)
219{
220 return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK);
221}
222
223static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot)
224{
225 return (cp->lic_free & (1 << slot));
226}
227
228static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot)
229{
230 cp->lic_free &= ~(1 << slot);
231}
232
233static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot)
234{
235 cp->lic_free |= 1 << slot;
236}
237
238static inline xfs_log_item_desc_t *
239xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot)
240{
241 return &(cp->lic_descs[slot]);
242}
243
244static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
245{
246 return (uint)dp->lid_index;
247}
248
249/*
250 * Calculate the address of a chunk given a descriptor pointer:
251 * dp - dp->lid_index give the address of the start of the lic_descs array.
252 * From this we subtract the offset of the lic_descs field in a chunk.
253 * All of this yields the address of the chunk, which is
254 * cast to a chunk pointer.
255 */
256static inline xfs_log_item_chunk_t *
257xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
258{
259 return (xfs_log_item_chunk_t*) \
260 (((xfs_caddr_t)((dp) - (dp)->lid_index)) - \
261 (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs));
262}
263 172
264#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */ 173#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */
265/* 174/*
@@ -275,8 +184,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
275/* 184/*
276 * Values for call flags parameter. 185 * Values for call flags parameter.
277 */ 186 */
278#define XFS_TRANS_NOSLEEP 0x1
279#define XFS_TRANS_WAIT 0x2
280#define XFS_TRANS_RELEASE_LOG_RES 0x4 187#define XFS_TRANS_RELEASE_LOG_RES 0x4
281#define XFS_TRANS_ABORT 0x8 188#define XFS_TRANS_ABORT 0x8
282 189
@@ -438,8 +345,7 @@ typedef struct xfs_item_ops {
438 uint (*iop_size)(xfs_log_item_t *); 345 uint (*iop_size)(xfs_log_item_t *);
439 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); 346 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
440 void (*iop_pin)(xfs_log_item_t *); 347 void (*iop_pin)(xfs_log_item_t *);
441 void (*iop_unpin)(xfs_log_item_t *); 348 void (*iop_unpin)(xfs_log_item_t *, int remove);
442 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
443 uint (*iop_trylock)(xfs_log_item_t *); 349 uint (*iop_trylock)(xfs_log_item_t *);
444 void (*iop_unlock)(xfs_log_item_t *); 350 void (*iop_unlock)(xfs_log_item_t *);
445 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); 351 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
@@ -451,8 +357,7 @@ typedef struct xfs_item_ops {
451#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) 357#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
452#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) 358#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
453#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) 359#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
454#define IOP_UNPIN(ip) (*(ip)->li_ops->iop_unpin)(ip) 360#define IOP_UNPIN(ip, remove) (*(ip)->li_ops->iop_unpin)(ip, remove)
455#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
456#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) 361#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
457#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) 362#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
458#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) 363#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
@@ -494,8 +399,6 @@ typedef struct xfs_trans {
494 * transaction. */ 399 * transaction. */
495 struct xfs_mount *t_mountp; /* ptr to fs mount struct */ 400 struct xfs_mount *t_mountp; /* ptr to fs mount struct */
496 struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ 401 struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
497 xfs_trans_callback_t t_callback; /* transaction callback */
498 void *t_callarg; /* callback arg */
499 unsigned int t_flags; /* misc flags */ 402 unsigned int t_flags; /* misc flags */
500 int64_t t_icount_delta; /* superblock icount change */ 403 int64_t t_icount_delta; /* superblock icount change */
501 int64_t t_ifree_delta; /* superblock ifree change */ 404 int64_t t_ifree_delta; /* superblock ifree change */
@@ -516,8 +419,7 @@ typedef struct xfs_trans {
516 int64_t t_rblocks_delta;/* superblock rblocks change */ 419 int64_t t_rblocks_delta;/* superblock rblocks change */
517 int64_t t_rextents_delta;/* superblocks rextents chg */ 420 int64_t t_rextents_delta;/* superblocks rextents chg */
518 int64_t t_rextslog_delta;/* superblocks rextslog chg */ 421 int64_t t_rextslog_delta;/* superblocks rextslog chg */
519 unsigned int t_items_free; /* log item descs free */ 422 struct list_head t_items; /* log item descriptors */
520 xfs_log_item_chunk_t t_items; /* first log item desc chunk */
521 xfs_trans_header_t t_header; /* header for in-log trans */ 423 xfs_trans_header_t t_header; /* header for in-log trans */
522 struct list_head t_busy; /* list of busy extents */ 424 struct list_head t_busy; /* list of busy extents */
523 unsigned long t_pflags; /* saved process flags state */ 425 unsigned long t_pflags; /* saved process flags state */
@@ -569,8 +471,9 @@ void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
569void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); 471void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
570int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, 472int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
571 xfs_ino_t , uint, uint, struct xfs_inode **); 473 xfs_ino_t , uint, uint, struct xfs_inode **);
572void xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint); 474void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
573void xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *); 475void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
476void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
574void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); 477void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
575void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); 478void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
576struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); 479struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint);
@@ -595,6 +498,7 @@ int xfs_trans_ail_init(struct xfs_mount *);
595void xfs_trans_ail_destroy(struct xfs_mount *); 498void xfs_trans_ail_destroy(struct xfs_mount *);
596 499
597extern kmem_zone_t *xfs_trans_zone; 500extern kmem_zone_t *xfs_trans_zone;
501extern kmem_zone_t *xfs_log_item_desc_zone;
598 502
599#endif /* __KERNEL__ */ 503#endif /* __KERNEL__ */
600 504
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index e799824f7245..dc9069568ff7 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
30#include "xfs_error.h" 29#include "xfs_error.h"
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 63d81a22f4fd..c47918c302a5 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -24,14 +24,10 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_buf_item.h" 33#include "xfs_buf_item.h"
@@ -51,36 +47,17 @@ xfs_trans_buf_item_match(
51 xfs_daddr_t blkno, 47 xfs_daddr_t blkno,
52 int len) 48 int len)
53{ 49{
54 xfs_log_item_chunk_t *licp; 50 struct xfs_log_item_desc *lidp;
55 xfs_log_item_desc_t *lidp; 51 struct xfs_buf_log_item *blip;
56 xfs_buf_log_item_t *blip;
57 int i;
58 52
59 len = BBTOB(len); 53 len = BBTOB(len);
60 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { 54 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
61 if (xfs_lic_are_all_free(licp)) { 55 blip = (struct xfs_buf_log_item *)lidp->lid_item;
62 ASSERT(licp == &tp->t_items); 56 if (blip->bli_item.li_type == XFS_LI_BUF &&
63 ASSERT(licp->lic_next == NULL); 57 XFS_BUF_TARGET(blip->bli_buf) == target &&
64 return NULL; 58 XFS_BUF_ADDR(blip->bli_buf) == blkno &&
65 } 59 XFS_BUF_COUNT(blip->bli_buf) == len)
66 60 return blip->bli_buf;
67 for (i = 0; i < licp->lic_unused; i++) {
68 /*
69 * Skip unoccupied slots.
70 */
71 if (xfs_lic_isfree(licp, i))
72 continue;
73
74 lidp = xfs_lic_slot(licp, i);
75 blip = (xfs_buf_log_item_t *)lidp->lid_item;
76 if (blip->bli_item.li_type != XFS_LI_BUF)
77 continue;
78
79 if (XFS_BUF_TARGET(blip->bli_buf) == target &&
80 XFS_BUF_ADDR(blip->bli_buf) == blkno &&
81 XFS_BUF_COUNT(blip->bli_buf) == len)
82 return blip->bli_buf;
83 }
84 } 61 }
85 62
86 return NULL; 63 return NULL;
@@ -127,7 +104,7 @@ _xfs_trans_bjoin(
127 /* 104 /*
128 * Get a log_item_desc to point at the new item. 105 * Get a log_item_desc to point at the new item.
129 */ 106 */
130 (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip); 107 xfs_trans_add_item(tp, &bip->bli_item);
131 108
132 /* 109 /*
133 * Initialize b_fsprivate2 so we can find it with incore_match() 110 * Initialize b_fsprivate2 so we can find it with incore_match()
@@ -359,7 +336,7 @@ xfs_trans_read_buf(
359 ASSERT(!XFS_BUF_ISASYNC(bp)); 336 ASSERT(!XFS_BUF_ISASYNC(bp));
360 XFS_BUF_READ(bp); 337 XFS_BUF_READ(bp);
361 xfsbdstrat(tp->t_mountp, bp); 338 xfsbdstrat(tp->t_mountp, bp);
362 error = xfs_iowait(bp); 339 error = xfs_buf_iowait(bp);
363 if (error) { 340 if (error) {
364 xfs_ioerror_alert("xfs_trans_read_buf", mp, 341 xfs_ioerror_alert("xfs_trans_read_buf", mp,
365 bp, blkno); 342 bp, blkno);
@@ -483,7 +460,6 @@ xfs_trans_brelse(xfs_trans_t *tp,
483{ 460{
484 xfs_buf_log_item_t *bip; 461 xfs_buf_log_item_t *bip;
485 xfs_log_item_t *lip; 462 xfs_log_item_t *lip;
486 xfs_log_item_desc_t *lidp;
487 463
488 /* 464 /*
489 * Default to a normal brelse() call if the tp is NULL. 465 * Default to a normal brelse() call if the tp is NULL.
@@ -514,13 +490,6 @@ xfs_trans_brelse(xfs_trans_t *tp,
514 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 490 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
515 ASSERT(atomic_read(&bip->bli_refcount) > 0); 491 ASSERT(atomic_read(&bip->bli_refcount) > 0);
516 492
517 /*
518 * Find the item descriptor pointing to this buffer's
519 * log item. It must be there.
520 */
521 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
522 ASSERT(lidp != NULL);
523
524 trace_xfs_trans_brelse(bip); 493 trace_xfs_trans_brelse(bip);
525 494
526 /* 495 /*
@@ -536,7 +505,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
536 * If the buffer is dirty within this transaction, we can't 505 * If the buffer is dirty within this transaction, we can't
537 * release it until we commit. 506 * release it until we commit.
538 */ 507 */
539 if (lidp->lid_flags & XFS_LID_DIRTY) 508 if (bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY)
540 return; 509 return;
541 510
542 /* 511 /*
@@ -553,7 +522,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
553 /* 522 /*
554 * Free up the log item descriptor tracking the released item. 523 * Free up the log item descriptor tracking the released item.
555 */ 524 */
556 xfs_trans_free_item(tp, lidp); 525 xfs_trans_del_item(&bip->bli_item);
557 526
558 /* 527 /*
559 * Clear the hold flag in the buf log item if it is set. 528 * Clear the hold flag in the buf log item if it is set.
@@ -665,7 +634,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
665 uint last) 634 uint last)
666{ 635{
667 xfs_buf_log_item_t *bip; 636 xfs_buf_log_item_t *bip;
668 xfs_log_item_desc_t *lidp;
669 637
670 ASSERT(XFS_BUF_ISBUSY(bp)); 638 ASSERT(XFS_BUF_ISBUSY(bp));
671 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 639 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
@@ -690,7 +658,7 @@ xfs_trans_log_buf(xfs_trans_t *tp,
690 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 658 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
691 ASSERT(atomic_read(&bip->bli_refcount) > 0); 659 ASSERT(atomic_read(&bip->bli_refcount) > 0);
692 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 660 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
693 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone; 661 bip->bli_item.li_cb = xfs_buf_iodone;
694 662
695 trace_xfs_trans_log_buf(bip); 663 trace_xfs_trans_log_buf(bip);
696 664
@@ -707,11 +675,8 @@ xfs_trans_log_buf(xfs_trans_t *tp,
707 bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL; 675 bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL;
708 } 676 }
709 677
710 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
711 ASSERT(lidp != NULL);
712
713 tp->t_flags |= XFS_TRANS_DIRTY; 678 tp->t_flags |= XFS_TRANS_DIRTY;
714 lidp->lid_flags |= XFS_LID_DIRTY; 679 bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
715 bip->bli_flags |= XFS_BLI_LOGGED; 680 bip->bli_flags |= XFS_BLI_LOGGED;
716 xfs_buf_item_log(bip, first, last); 681 xfs_buf_item_log(bip, first, last);
717} 682}
@@ -740,7 +705,6 @@ xfs_trans_binval(
740 xfs_trans_t *tp, 705 xfs_trans_t *tp,
741 xfs_buf_t *bp) 706 xfs_buf_t *bp)
742{ 707{
743 xfs_log_item_desc_t *lidp;
744 xfs_buf_log_item_t *bip; 708 xfs_buf_log_item_t *bip;
745 709
746 ASSERT(XFS_BUF_ISBUSY(bp)); 710 ASSERT(XFS_BUF_ISBUSY(bp));
@@ -748,8 +712,6 @@ xfs_trans_binval(
748 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 712 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
749 713
750 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 714 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
751 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
752 ASSERT(lidp != NULL);
753 ASSERT(atomic_read(&bip->bli_refcount) > 0); 715 ASSERT(atomic_read(&bip->bli_refcount) > 0);
754 716
755 trace_xfs_trans_binval(bip); 717 trace_xfs_trans_binval(bip);
@@ -764,7 +726,7 @@ xfs_trans_binval(
764 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); 726 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
765 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); 727 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF));
766 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 728 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
767 ASSERT(lidp->lid_flags & XFS_LID_DIRTY); 729 ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
768 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 730 ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
769 return; 731 return;
770 } 732 }
@@ -797,7 +759,7 @@ xfs_trans_binval(
797 bip->bli_format.blf_flags |= XFS_BLF_CANCEL; 759 bip->bli_format.blf_flags |= XFS_BLF_CANCEL;
798 memset((char *)(bip->bli_format.blf_data_map), 0, 760 memset((char *)(bip->bli_format.blf_data_map), 0,
799 (bip->bli_format.blf_map_size * sizeof(uint))); 761 (bip->bli_format.blf_map_size * sizeof(uint)));
800 lidp->lid_flags |= XFS_LID_DIRTY; 762 bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
801 tp->t_flags |= XFS_TRANS_DIRTY; 763 tp->t_flags |= XFS_TRANS_DIRTY;
802} 764}
803 765
@@ -853,12 +815,9 @@ xfs_trans_stale_inode_buf(
853 ASSERT(atomic_read(&bip->bli_refcount) > 0); 815 ASSERT(atomic_read(&bip->bli_refcount) > 0);
854 816
855 bip->bli_flags |= XFS_BLI_STALE_INODE; 817 bip->bli_flags |= XFS_BLI_STALE_INODE;
856 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) 818 bip->bli_item.li_cb = xfs_buf_iodone;
857 xfs_buf_iodone;
858} 819}
859 820
860
861
862/* 821/*
863 * Mark the buffer as being one which contains newly allocated 822 * Mark the buffer as being one which contains newly allocated
864 * inodes. We need to make sure that even if this buffer is 823 * inodes. We need to make sure that even if this buffer is
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index 27cce2a9c7e9..f783d5e9fa70 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dmapi.h"
27#include "xfs_mount.h" 26#include "xfs_mount.h"
28#include "xfs_trans_priv.h" 27#include "xfs_trans_priv.h"
29#include "xfs_extfree_item.h" 28#include "xfs_extfree_item.h"
@@ -49,9 +48,8 @@ xfs_trans_get_efi(xfs_trans_t *tp,
49 /* 48 /*
50 * Get a log_item_desc to point at the new item. 49 * Get a log_item_desc to point at the new item.
51 */ 50 */
52 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)efip); 51 xfs_trans_add_item(tp, &efip->efi_item);
53 52 return efip;
54 return (efip);
55} 53}
56 54
57/* 55/*
@@ -65,15 +63,11 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp,
65 xfs_fsblock_t start_block, 63 xfs_fsblock_t start_block,
66 xfs_extlen_t ext_len) 64 xfs_extlen_t ext_len)
67{ 65{
68 xfs_log_item_desc_t *lidp;
69 uint next_extent; 66 uint next_extent;
70 xfs_extent_t *extp; 67 xfs_extent_t *extp;
71 68
72 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)efip);
73 ASSERT(lidp != NULL);
74
75 tp->t_flags |= XFS_TRANS_DIRTY; 69 tp->t_flags |= XFS_TRANS_DIRTY;
76 lidp->lid_flags |= XFS_LID_DIRTY; 70 efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
77 71
78 next_extent = efip->efi_next_extent; 72 next_extent = efip->efi_next_extent;
79 ASSERT(next_extent < efip->efi_format.efi_nextents); 73 ASSERT(next_extent < efip->efi_format.efi_nextents);
@@ -106,9 +100,8 @@ xfs_trans_get_efd(xfs_trans_t *tp,
106 /* 100 /*
107 * Get a log_item_desc to point at the new item. 101 * Get a log_item_desc to point at the new item.
108 */ 102 */
109 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)efdp); 103 xfs_trans_add_item(tp, &efdp->efd_item);
110 104 return efdp;
111 return (efdp);
112} 105}
113 106
114/* 107/*
@@ -122,15 +115,11 @@ xfs_trans_log_efd_extent(xfs_trans_t *tp,
122 xfs_fsblock_t start_block, 115 xfs_fsblock_t start_block,
123 xfs_extlen_t ext_len) 116 xfs_extlen_t ext_len)
124{ 117{
125 xfs_log_item_desc_t *lidp;
126 uint next_extent; 118 uint next_extent;
127 xfs_extent_t *extp; 119 xfs_extent_t *extp;
128 120
129 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)efdp);
130 ASSERT(lidp != NULL);
131
132 tp->t_flags |= XFS_TRANS_DIRTY; 121 tp->t_flags |= XFS_TRANS_DIRTY;
133 lidp->lid_flags |= XFS_LID_DIRTY; 122 efdp->efd_item.li_desc->lid_flags |= XFS_LID_DIRTY;
134 123
135 next_extent = efdp->efd_next_extent; 124 next_extent = efdp->efd_next_extent;
136 ASSERT(next_extent < efdp->efd_format.efd_nextents); 125 ASSERT(next_extent < efdp->efd_format.efd_nextents);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 2559dfec946b..ccb34532768b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -24,20 +24,16 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_btree.h" 33#include "xfs_btree.h"
38#include "xfs_ialloc.h"
39#include "xfs_trans_priv.h" 34#include "xfs_trans_priv.h"
40#include "xfs_inode_item.h" 35#include "xfs_inode_item.h"
36#include "xfs_trace.h"
41 37
42#ifdef XFS_TRANS_DEBUG 38#ifdef XFS_TRANS_DEBUG
43STATIC void 39STATIC void
@@ -47,7 +43,6 @@ xfs_trans_inode_broot_debug(
47#define xfs_trans_inode_broot_debug(ip) 43#define xfs_trans_inode_broot_debug(ip)
48#endif 44#endif
49 45
50
51/* 46/*
52 * Get an inode and join it to the transaction. 47 * Get an inode and join it to the transaction.
53 */ 48 */
@@ -63,76 +58,94 @@ xfs_trans_iget(
63 int error; 58 int error;
64 59
65 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp); 60 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
66 if (!error && tp) 61 if (!error && tp) {
67 xfs_trans_ijoin(tp, *ipp, lock_flags); 62 xfs_trans_ijoin(tp, *ipp);
63 (*ipp)->i_itemp->ili_lock_flags = lock_flags;
64 }
68 return error; 65 return error;
69} 66}
70 67
71/* 68/*
72 * Add the locked inode to the transaction. 69 * Add a locked inode to the transaction.
73 * The inode must be locked, and it cannot be associated with any 70 *
74 * transaction. The caller must specify the locks already held 71 * The inode must be locked, and it cannot be associated with any transaction.
75 * on the inode.
76 */ 72 */
77void 73void
78xfs_trans_ijoin( 74xfs_trans_ijoin(
79 xfs_trans_t *tp, 75 struct xfs_trans *tp,
80 xfs_inode_t *ip, 76 struct xfs_inode *ip)
81 uint lock_flags)
82{ 77{
83 xfs_inode_log_item_t *iip; 78 xfs_inode_log_item_t *iip;
84 79
85 ASSERT(ip->i_transp == NULL); 80 ASSERT(ip->i_transp == NULL);
86 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 81 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
87 ASSERT(lock_flags & XFS_ILOCK_EXCL);
88 if (ip->i_itemp == NULL) 82 if (ip->i_itemp == NULL)
89 xfs_inode_item_init(ip, ip->i_mount); 83 xfs_inode_item_init(ip, ip->i_mount);
90 iip = ip->i_itemp; 84 iip = ip->i_itemp;
91 ASSERT(iip->ili_flags == 0); 85 ASSERT(iip->ili_lock_flags == 0);
92 86
93 /* 87 /*
94 * Get a log_item_desc to point at the new item. 88 * Get a log_item_desc to point at the new item.
95 */ 89 */
96 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(iip)); 90 xfs_trans_add_item(tp, &iip->ili_item);
97 91
98 xfs_trans_inode_broot_debug(ip); 92 xfs_trans_inode_broot_debug(ip);
99 93
100 /* 94 /*
101 * If the IO lock is already held, mark that in the inode log item.
102 */
103 if (lock_flags & XFS_IOLOCK_EXCL) {
104 iip->ili_flags |= XFS_ILI_IOLOCKED_EXCL;
105 } else if (lock_flags & XFS_IOLOCK_SHARED) {
106 iip->ili_flags |= XFS_ILI_IOLOCKED_SHARED;
107 }
108
109 /*
110 * Initialize i_transp so we can find it with xfs_inode_incore() 95 * Initialize i_transp so we can find it with xfs_inode_incore()
111 * in xfs_trans_iget() above. 96 * in xfs_trans_iget() above.
112 */ 97 */
113 ip->i_transp = tp; 98 ip->i_transp = tp;
114} 99}
115 100
116 101/*
102 * Add a locked inode to the transaction.
103 *
104 *
105 * Grabs a reference to the inode which will be dropped when the transaction
106 * is commited. The inode will also be unlocked at that point. The inode
107 * must be locked, and it cannot be associated with any transaction.
108 */
109void
110xfs_trans_ijoin_ref(
111 struct xfs_trans *tp,
112 struct xfs_inode *ip,
113 uint lock_flags)
114{
115 xfs_trans_ijoin(tp, ip);
116 IHOLD(ip);
117 ip->i_itemp->ili_lock_flags = lock_flags;
118}
117 119
118/* 120/*
119 * Mark the inode as not needing to be unlocked when the inode item's 121 * Transactional inode timestamp update. Requires the inode to be locked and
120 * IOP_UNLOCK() routine is called. The inode must already be locked 122 * joined to the transaction supplied. Relies on the transaction subsystem to
121 * and associated with the given transaction. 123 * track dirty state and update/writeback the inode accordingly.
122 */ 124 */
123/*ARGSUSED*/
124void 125void
125xfs_trans_ihold( 126xfs_trans_ichgtime(
126 xfs_trans_t *tp, 127 struct xfs_trans *tp,
127 xfs_inode_t *ip) 128 struct xfs_inode *ip,
129 int flags)
128{ 130{
129 ASSERT(ip->i_transp == tp); 131 struct inode *inode = VFS_I(ip);
130 ASSERT(ip->i_itemp != NULL); 132 timespec_t tv;
133
134 ASSERT(tp);
131 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 135 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
136 ASSERT(ip->i_transp == tp);
132 137
133 ip->i_itemp->ili_flags |= XFS_ILI_HOLD; 138 tv = current_fs_time(inode->i_sb);
134}
135 139
140 if ((flags & XFS_ICHGTIME_MOD) &&
141 !timespec_equal(&inode->i_mtime, &tv)) {
142 inode->i_mtime = tv;
143 }
144 if ((flags & XFS_ICHGTIME_CHG) &&
145 !timespec_equal(&inode->i_ctime, &tv)) {
146 inode->i_ctime = tv;
147 }
148}
136 149
137/* 150/*
138 * This is called to mark the fields indicated in fieldmask as needing 151 * This is called to mark the fields indicated in fieldmask as needing
@@ -149,17 +162,12 @@ xfs_trans_log_inode(
149 xfs_inode_t *ip, 162 xfs_inode_t *ip,
150 uint flags) 163 uint flags)
151{ 164{
152 xfs_log_item_desc_t *lidp;
153
154 ASSERT(ip->i_transp == tp); 165 ASSERT(ip->i_transp == tp);
155 ASSERT(ip->i_itemp != NULL); 166 ASSERT(ip->i_itemp != NULL);
156 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 167 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
157 168
158 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
159 ASSERT(lidp != NULL);
160
161 tp->t_flags |= XFS_TRANS_DIRTY; 169 tp->t_flags |= XFS_TRANS_DIRTY;
162 lidp->lid_flags |= XFS_LID_DIRTY; 170 ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY;
163 171
164 /* 172 /*
165 * Always OR in the bits from the ili_last_fields field. 173 * Always OR in the bits from the ili_last_fields field.
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
deleted file mode 100644
index f11d37d06dcc..000000000000
--- a/fs/xfs/xfs_trans_item.c
+++ /dev/null
@@ -1,441 +0,0 @@
1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_trans_priv.h"
25/* XXX: from here down needed until struct xfs_trans has its own ailp */
26#include "xfs_bit.h"
27#include "xfs_buf_item.h"
28#include "xfs_sb.h"
29#include "xfs_ag.h"
30#include "xfs_dir2.h"
31#include "xfs_dmapi.h"
32#include "xfs_mount.h"
33
34STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *,
35 int, int, xfs_lsn_t);
36
37/*
38 * This is called to add the given log item to the transaction's
39 * list of log items. It must find a free log item descriptor
40 * or allocate a new one and add the item to that descriptor.
41 * The function returns a pointer to item descriptor used to point
42 * to the new item. The log item will now point to its new descriptor
43 * with its li_desc field.
44 */
45xfs_log_item_desc_t *
46xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
47{
48 xfs_log_item_desc_t *lidp;
49 xfs_log_item_chunk_t *licp;
50 int i=0;
51
52 /*
53 * If there are no free descriptors, allocate a new chunk
54 * of them and put it at the front of the chunk list.
55 */
56 if (tp->t_items_free == 0) {
57 licp = (xfs_log_item_chunk_t*)
58 kmem_alloc(sizeof(xfs_log_item_chunk_t), KM_SLEEP);
59 ASSERT(licp != NULL);
60 /*
61 * Initialize the chunk, and then
62 * claim the first slot in the newly allocated chunk.
63 */
64 xfs_lic_init(licp);
65 xfs_lic_claim(licp, 0);
66 licp->lic_unused = 1;
67 xfs_lic_init_slot(licp, 0);
68 lidp = xfs_lic_slot(licp, 0);
69
70 /*
71 * Link in the new chunk and update the free count.
72 */
73 licp->lic_next = tp->t_items.lic_next;
74 tp->t_items.lic_next = licp;
75 tp->t_items_free = XFS_LIC_NUM_SLOTS - 1;
76
77 /*
78 * Initialize the descriptor and the generic portion
79 * of the log item.
80 *
81 * Point the new slot at this item and return it.
82 * Also point the log item at its currently active
83 * descriptor and set the item's mount pointer.
84 */
85 lidp->lid_item = lip;
86 lidp->lid_flags = 0;
87 lidp->lid_size = 0;
88 lip->li_desc = lidp;
89 lip->li_mountp = tp->t_mountp;
90 lip->li_ailp = tp->t_mountp->m_ail;
91 return lidp;
92 }
93
94 /*
95 * Find the free descriptor. It is somewhere in the chunklist
96 * of descriptors.
97 */
98 licp = &tp->t_items;
99 while (licp != NULL) {
100 if (xfs_lic_vacancy(licp)) {
101 if (licp->lic_unused <= XFS_LIC_MAX_SLOT) {
102 i = licp->lic_unused;
103 ASSERT(xfs_lic_isfree(licp, i));
104 break;
105 }
106 for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) {
107 if (xfs_lic_isfree(licp, i))
108 break;
109 }
110 ASSERT(i <= XFS_LIC_MAX_SLOT);
111 break;
112 }
113 licp = licp->lic_next;
114 }
115 ASSERT(licp != NULL);
116 /*
117 * If we find a free descriptor, claim it,
118 * initialize it, and return it.
119 */
120 xfs_lic_claim(licp, i);
121 if (licp->lic_unused <= i) {
122 licp->lic_unused = i + 1;
123 xfs_lic_init_slot(licp, i);
124 }
125 lidp = xfs_lic_slot(licp, i);
126 tp->t_items_free--;
127 lidp->lid_item = lip;
128 lidp->lid_flags = 0;
129 lidp->lid_size = 0;
130 lip->li_desc = lidp;
131 lip->li_mountp = tp->t_mountp;
132 lip->li_ailp = tp->t_mountp->m_ail;
133 return lidp;
134}
135
136/*
137 * Free the given descriptor.
138 *
139 * This requires setting the bit in the chunk's free mask corresponding
140 * to the given slot.
141 */
142void
143xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
144{
145 uint slot;
146 xfs_log_item_chunk_t *licp;
147 xfs_log_item_chunk_t **licpp;
148
149 slot = xfs_lic_desc_to_slot(lidp);
150 licp = xfs_lic_desc_to_chunk(lidp);
151 xfs_lic_relse(licp, slot);
152 lidp->lid_item->li_desc = NULL;
153 tp->t_items_free++;
154
155 /*
156 * If there are no more used items in the chunk and this is not
157 * the chunk embedded in the transaction structure, then free
158 * the chunk. First pull it from the chunk list and then
159 * free it back to the heap. We didn't bother with a doubly
160 * linked list here because the lists should be very short
161 * and this is not a performance path. It's better to save
162 * the memory of the extra pointer.
163 *
164 * Also decrement the transaction structure's count of free items
165 * by the number in a chunk since we are freeing an empty chunk.
166 */
167 if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) {
168 licpp = &(tp->t_items.lic_next);
169 while (*licpp != licp) {
170 ASSERT(*licpp != NULL);
171 licpp = &((*licpp)->lic_next);
172 }
173 *licpp = licp->lic_next;
174 kmem_free(licp);
175 tp->t_items_free -= XFS_LIC_NUM_SLOTS;
176 }
177}
178
179/*
180 * This is called to find the descriptor corresponding to the given
181 * log item. It returns a pointer to the descriptor.
182 * The log item MUST have a corresponding descriptor in the given
183 * transaction. This routine does not return NULL, it panics.
184 *
185 * The descriptor pointer is kept in the log item's li_desc field.
186 * Just return it.
187 */
188/*ARGSUSED*/
189xfs_log_item_desc_t *
190xfs_trans_find_item(xfs_trans_t *tp, xfs_log_item_t *lip)
191{
192 ASSERT(lip->li_desc != NULL);
193
194 return lip->li_desc;
195}
196
197
198/*
199 * Return a pointer to the first descriptor in the chunk list.
200 * This does not return NULL if there are none, it panics.
201 *
202 * The first descriptor must be in either the first or second chunk.
203 * This is because the only chunk allowed to be empty is the first.
204 * All others are freed when they become empty.
205 *
206 * At some point this and xfs_trans_next_item() should be optimized
207 * to quickly look at the mask to determine if there is anything to
208 * look at.
209 */
210xfs_log_item_desc_t *
211xfs_trans_first_item(xfs_trans_t *tp)
212{
213 xfs_log_item_chunk_t *licp;
214 int i;
215
216 licp = &tp->t_items;
217 /*
218 * If it's not in the first chunk, skip to the second.
219 */
220 if (xfs_lic_are_all_free(licp)) {
221 licp = licp->lic_next;
222 }
223
224 /*
225 * Return the first non-free descriptor in the chunk.
226 */
227 ASSERT(!xfs_lic_are_all_free(licp));
228 for (i = 0; i < licp->lic_unused; i++) {
229 if (xfs_lic_isfree(licp, i)) {
230 continue;
231 }
232
233 return xfs_lic_slot(licp, i);
234 }
235 cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item");
236 return NULL;
237}
238
239
240/*
241 * Given a descriptor, return the next descriptor in the chunk list.
242 * This returns NULL if there are no more used descriptors in the list.
243 *
244 * We do this by first locating the chunk in which the descriptor resides,
245 * and then scanning forward in the chunk and the list for the next
246 * used descriptor.
247 */
248/*ARGSUSED*/
249xfs_log_item_desc_t *
250xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
251{
252 xfs_log_item_chunk_t *licp;
253 int i;
254
255 licp = xfs_lic_desc_to_chunk(lidp);
256
257 /*
258 * First search the rest of the chunk. The for loop keeps us
259 * from referencing things beyond the end of the chunk.
260 */
261 for (i = (int)xfs_lic_desc_to_slot(lidp) + 1; i < licp->lic_unused; i++) {
262 if (xfs_lic_isfree(licp, i)) {
263 continue;
264 }
265
266 return xfs_lic_slot(licp, i);
267 }
268
269 /*
270 * Now search the next chunk. It must be there, because the
271 * next chunk would have been freed if it were empty.
272 * If there is no next chunk, return NULL.
273 */
274 if (licp->lic_next == NULL) {
275 return NULL;
276 }
277
278 licp = licp->lic_next;
279 ASSERT(!xfs_lic_are_all_free(licp));
280 for (i = 0; i < licp->lic_unused; i++) {
281 if (xfs_lic_isfree(licp, i)) {
282 continue;
283 }
284
285 return xfs_lic_slot(licp, i);
286 }
287 ASSERT(0);
288 /* NOTREACHED */
289 return NULL; /* keep gcc quite */
290}
291
292/*
293 * This is called to unlock all of the items of a transaction and to free
294 * all the descriptors of that transaction.
295 *
296 * It walks the list of descriptors and unlocks each item. It frees
297 * each chunk except that embedded in the transaction as it goes along.
298 */
299void
300xfs_trans_free_items(
301 xfs_trans_t *tp,
302 xfs_lsn_t commit_lsn,
303 int flags)
304{
305 xfs_log_item_chunk_t *licp;
306 xfs_log_item_chunk_t *next_licp;
307 int abort;
308
309 abort = flags & XFS_TRANS_ABORT;
310 licp = &tp->t_items;
311 /*
312 * Special case the embedded chunk so we don't free it below.
313 */
314 if (!xfs_lic_are_all_free(licp)) {
315 (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn);
316 xfs_lic_all_free(licp);
317 licp->lic_unused = 0;
318 }
319 licp = licp->lic_next;
320
321 /*
322 * Unlock each item in each chunk and free the chunks.
323 */
324 while (licp != NULL) {
325 ASSERT(!xfs_lic_are_all_free(licp));
326 (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn);
327 next_licp = licp->lic_next;
328 kmem_free(licp);
329 licp = next_licp;
330 }
331
332 /*
333 * Reset the transaction structure's free item count.
334 */
335 tp->t_items_free = XFS_LIC_NUM_SLOTS;
336 tp->t_items.lic_next = NULL;
337}
338
339
340
341/*
342 * This is called to unlock the items associated with a transaction.
343 * Items which were not logged should be freed.
344 * Those which were logged must still be tracked so they can be unpinned
345 * when the transaction commits.
346 */
347void
348xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
349{
350 xfs_log_item_chunk_t *licp;
351 xfs_log_item_chunk_t *next_licp;
352 xfs_log_item_chunk_t **licpp;
353 int freed;
354
355 freed = 0;
356 licp = &tp->t_items;
357
358 /*
359 * Special case the embedded chunk so we don't free.
360 */
361 if (!xfs_lic_are_all_free(licp)) {
362 freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
363 }
364 licpp = &(tp->t_items.lic_next);
365 licp = licp->lic_next;
366
367 /*
368 * Unlock each item in each chunk, free non-dirty descriptors,
369 * and free empty chunks.
370 */
371 while (licp != NULL) {
372 ASSERT(!xfs_lic_are_all_free(licp));
373 freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
374 next_licp = licp->lic_next;
375 if (xfs_lic_are_all_free(licp)) {
376 *licpp = next_licp;
377 kmem_free(licp);
378 freed -= XFS_LIC_NUM_SLOTS;
379 } else {
380 licpp = &(licp->lic_next);
381 }
382 ASSERT(*licpp == next_licp);
383 licp = next_licp;
384 }
385
386 /*
387 * Fix the free descriptor count in the transaction.
388 */
389 tp->t_items_free += freed;
390}
391
392/*
393 * Unlock each item pointed to by a descriptor in the given chunk.
394 * Stamp the commit lsn into each item if necessary.
395 * Free descriptors pointing to items which are not dirty if freeing_chunk
396 * is zero. If freeing_chunk is non-zero, then we need to unlock all
397 * items in the chunk.
398 *
399 * Return the number of descriptors freed.
400 */
401STATIC int
402xfs_trans_unlock_chunk(
403 xfs_log_item_chunk_t *licp,
404 int freeing_chunk,
405 int abort,
406 xfs_lsn_t commit_lsn)
407{
408 xfs_log_item_desc_t *lidp;
409 xfs_log_item_t *lip;
410 int i;
411 int freed;
412
413 freed = 0;
414 lidp = licp->lic_descs;
415 for (i = 0; i < licp->lic_unused; i++, lidp++) {
416 if (xfs_lic_isfree(licp, i)) {
417 continue;
418 }
419 lip = lidp->lid_item;
420 lip->li_desc = NULL;
421
422 if (commit_lsn != NULLCOMMITLSN)
423 IOP_COMMITTING(lip, commit_lsn);
424 if (abort)
425 lip->li_flags |= XFS_LI_ABORTED;
426 IOP_UNLOCK(lip);
427
428 /*
429 * Free the descriptor if the item is not dirty
430 * within this transaction and the caller is not
431 * going to just free the entire thing regardless.
432 */
433 if (!(freeing_chunk) &&
434 (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) {
435 xfs_lic_relse(licp, i);
436 freed++;
437 }
438 }
439
440 return freed;
441}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index c6e4f2c8de6e..62da86c90de5 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -23,23 +23,10 @@ struct xfs_log_item_desc;
23struct xfs_mount; 23struct xfs_mount;
24struct xfs_trans; 24struct xfs_trans;
25 25
26/* 26void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
27 * From xfs_trans_item.c 27void xfs_trans_del_item(struct xfs_log_item *);
28 */
29struct xfs_log_item_desc *xfs_trans_add_item(struct xfs_trans *,
30 struct xfs_log_item *);
31void xfs_trans_free_item(struct xfs_trans *,
32 struct xfs_log_item_desc *);
33struct xfs_log_item_desc *xfs_trans_find_item(struct xfs_trans *,
34 struct xfs_log_item *);
35struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *);
36struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *,
37 struct xfs_log_item_desc *);
38
39void xfs_trans_unlock_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn);
40void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, 28void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
41 int flags); 29 int flags);
42
43void xfs_trans_item_committed(struct xfs_log_item *lip, 30void xfs_trans_item_committed(struct xfs_log_item *lip,
44 xfs_lsn_t commit_lsn, int aborted); 31 xfs_lsn_t commit_lsn, int aborted);
45void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); 32void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 320775295e32..26d1867d8156 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ 73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ 74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
75 75
76typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */
77
78typedef __uint32_t xlog_tid_t; /* transaction ID type */ 76typedef __uint32_t xlog_tid_t; /* transaction ID type */
79 77
80/* 78/*
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 4d88616bde91..8b32d1a4c5a1 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -25,18 +25,14 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 30#include "xfs_dinode.h"
34#include "xfs_inode.h" 31#include "xfs_inode.h"
35#include "xfs_inode_item.h" 32#include "xfs_inode_item.h"
36#include "xfs_bmap.h" 33#include "xfs_bmap.h"
37#include "xfs_error.h" 34#include "xfs_error.h"
38#include "xfs_quota.h" 35#include "xfs_quota.h"
39#include "xfs_rw.h"
40#include "xfs_itable.h" 36#include "xfs_itable.h"
41#include "xfs_utils.h" 37#include "xfs_utils.h"
42 38
@@ -60,7 +56,6 @@ xfs_dir_ialloc(
60 mode_t mode, 56 mode_t mode,
61 xfs_nlink_t nlink, 57 xfs_nlink_t nlink,
62 xfs_dev_t rdev, 58 xfs_dev_t rdev,
63 cred_t *credp,
64 prid_t prid, /* project id */ 59 prid_t prid, /* project id */
65 int okalloc, /* ok to allocate new space */ 60 int okalloc, /* ok to allocate new space */
66 xfs_inode_t **ipp, /* pointer to inode; it will be 61 xfs_inode_t **ipp, /* pointer to inode; it will be
@@ -97,7 +92,7 @@ xfs_dir_ialloc(
97 * transaction commit so that no other process can steal 92 * transaction commit so that no other process can steal
98 * the inode(s) that we've just allocated. 93 * the inode(s) that we've just allocated.
99 */ 94 */
100 code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, okalloc, 95 code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
101 &ialloc_context, &call_again, &ip); 96 &ialloc_context, &call_again, &ip);
102 97
103 /* 98 /*
@@ -201,7 +196,7 @@ xfs_dir_ialloc(
201 * other allocations in this allocation group, 196 * other allocations in this allocation group,
202 * this call should always succeed. 197 * this call should always succeed.
203 */ 198 */
204 code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, 199 code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
205 okalloc, &ialloc_context, &call_again, &ip); 200 okalloc, &ialloc_context, &call_again, &ip);
206 201
207 /* 202 /*
@@ -239,7 +234,7 @@ xfs_droplink(
239{ 234{
240 int error; 235 int error;
241 236
242 xfs_ichgtime(ip, XFS_ICHGTIME_CHG); 237 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
243 238
244 ASSERT (ip->i_d.di_nlink > 0); 239 ASSERT (ip->i_d.di_nlink > 0);
245 ip->i_d.di_nlink--; 240 ip->i_d.di_nlink--;
@@ -303,7 +298,7 @@ xfs_bumplink(
303{ 298{
304 if (ip->i_d.di_nlink >= XFS_MAXLINK) 299 if (ip->i_d.di_nlink >= XFS_MAXLINK)
305 return XFS_ERROR(EMLINK); 300 return XFS_ERROR(EMLINK);
306 xfs_ichgtime(ip, XFS_ICHGTIME_CHG); 301 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
307 302
308 ASSERT(ip->i_d.di_nlink > 0); 303 ASSERT(ip->i_d.di_nlink > 0);
309 ip->i_d.di_nlink++; 304 ip->i_d.di_nlink++;
@@ -324,86 +319,3 @@ xfs_bumplink(
324 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 319 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
325 return 0; 320 return 0;
326} 321}
327
328/*
329 * Try to truncate the given file to 0 length. Currently called
330 * only out of xfs_remove when it has to truncate a file to free
331 * up space for the remove to proceed.
332 */
333int
334xfs_truncate_file(
335 xfs_mount_t *mp,
336 xfs_inode_t *ip)
337{
338 xfs_trans_t *tp;
339 int error;
340
341#ifdef QUOTADEBUG
342 /*
343 * This is called to truncate the quotainodes too.
344 */
345 if (XFS_IS_UQUOTA_ON(mp)) {
346 if (ip->i_ino != mp->m_sb.sb_uquotino)
347 ASSERT(ip->i_udquot);
348 }
349 if (XFS_IS_OQUOTA_ON(mp)) {
350 if (ip->i_ino != mp->m_sb.sb_gquotino)
351 ASSERT(ip->i_gdquot);
352 }
353#endif
354 /*
355 * Make the call to xfs_itruncate_start before starting the
356 * transaction, because we cannot make the call while we're
357 * in a transaction.
358 */
359 xfs_ilock(ip, XFS_IOLOCK_EXCL);
360 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0);
361 if (error) {
362 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
363 return error;
364 }
365
366 tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
367 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
368 XFS_TRANS_PERM_LOG_RES,
369 XFS_ITRUNCATE_LOG_COUNT))) {
370 xfs_trans_cancel(tp, 0);
371 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
372 return error;
373 }
374
375 /*
376 * Follow the normal truncate locking protocol. Since we
377 * hold the inode in the transaction, we know that its number
378 * of references will stay constant.
379 */
380 xfs_ilock(ip, XFS_ILOCK_EXCL);
381 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
382 xfs_trans_ihold(tp, ip);
383 /*
384 * Signal a sync xaction. The only case where that isn't
385 * the case is if we're truncating an already unlinked file
386 * on a wsync fs. In that case, we know the blocks can't
387 * reappear in the file because the links to file are
388 * permanently toast. Currently, we're always going to
389 * want a sync transaction because this code is being
390 * called from places where nlink is guaranteed to be 1
391 * but I'm leaving the tests in to protect against future
392 * changes -- rcc.
393 */
394 error = xfs_itruncate_finish(&tp, ip, (xfs_fsize_t)0,
395 XFS_DATA_FORK,
396 ((ip->i_d.di_nlink != 0 ||
397 !(mp->m_flags & XFS_MOUNT_WSYNC))
398 ? 1 : 0));
399 if (error) {
400 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
401 XFS_TRANS_ABORT);
402 } else {
403 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
404 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
405 }
406 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
407
408 return error;
409}
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index ef321225d269..456fca314933 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -18,10 +18,8 @@
18#ifndef __XFS_UTILS_H__ 18#ifndef __XFS_UTILS_H__
19#define __XFS_UTILS_H__ 19#define __XFS_UTILS_H__
20 20
21extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
22extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 21extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
23 xfs_dev_t, cred_t *, prid_t, int, 22 xfs_dev_t, prid_t, int, xfs_inode_t **, int *);
24 xfs_inode_t **, int *);
25extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); 23extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);
26extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); 24extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *);
27extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *); 25extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c1646838898f..8e4a63c4151a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -26,19 +26,14 @@
26#include "xfs_sb.h" 26#include "xfs_sb.h"
27#include "xfs_ag.h" 27#include "xfs_ag.h"
28#include "xfs_dir2.h" 28#include "xfs_dir2.h"
29#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_da_btree.h" 30#include "xfs_da_btree.h"
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 33#include "xfs_dinode.h"
38#include "xfs_inode.h" 34#include "xfs_inode.h"
39#include "xfs_inode_item.h" 35#include "xfs_inode_item.h"
40#include "xfs_itable.h" 36#include "xfs_itable.h"
41#include "xfs_btree.h"
42#include "xfs_ialloc.h" 37#include "xfs_ialloc.h"
43#include "xfs_alloc.h" 38#include "xfs_alloc.h"
44#include "xfs_bmap.h" 39#include "xfs_bmap.h"
@@ -73,7 +68,7 @@ xfs_setattr(
73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 68 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
74 int need_iolock = 1; 69 int need_iolock = 1;
75 70
76 xfs_itrace_entry(ip); 71 trace_xfs_setattr(ip);
77 72
78 if (mp->m_flags & XFS_MOUNT_RDONLY) 73 if (mp->m_flags & XFS_MOUNT_RDONLY)
79 return XFS_ERROR(EROFS); 74 return XFS_ERROR(EROFS);
@@ -119,7 +114,7 @@ xfs_setattr(
119 */ 114 */
120 ASSERT(udqp == NULL); 115 ASSERT(udqp == NULL);
121 ASSERT(gdqp == NULL); 116 ASSERT(gdqp == NULL);
122 code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, 117 code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
123 qflags, &udqp, &gdqp); 118 qflags, &udqp, &gdqp);
124 if (code) 119 if (code)
125 return code; 120 return code;
@@ -143,16 +138,6 @@ xfs_setattr(
143 goto error_return; 138 goto error_return;
144 } 139 }
145 } else { 140 } else {
146 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
147 !(flags & XFS_ATTR_DMI)) {
148 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
149 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
150 iattr->ia_size, 0, dmflags, NULL);
151 if (code) {
152 lock_flags = 0;
153 goto error_return;
154 }
155 }
156 if (need_iolock) 141 if (need_iolock)
157 lock_flags |= XFS_IOLOCK_EXCL; 142 lock_flags |= XFS_IOLOCK_EXCL;
158 } 143 }
@@ -199,8 +184,11 @@ xfs_setattr(
199 ip->i_size == 0 && ip->i_d.di_nextents == 0) { 184 ip->i_size == 0 && ip->i_d.di_nextents == 0) {
200 xfs_iunlock(ip, XFS_ILOCK_EXCL); 185 xfs_iunlock(ip, XFS_ILOCK_EXCL);
201 lock_flags &= ~XFS_ILOCK_EXCL; 186 lock_flags &= ~XFS_ILOCK_EXCL;
202 if (mask & ATTR_CTIME) 187 if (mask & ATTR_CTIME) {
203 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 188 inode->i_mtime = inode->i_ctime =
189 current_fs_time(inode->i_sb);
190 xfs_mark_inode_dirty_sync(ip);
191 }
204 code = 0; 192 code = 0;
205 goto error_return; 193 goto error_return;
206 } 194 }
@@ -236,8 +224,11 @@ xfs_setattr(
236 * transaction to modify the i_size. 224 * transaction to modify the i_size.
237 */ 225 */
238 code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); 226 code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
227 if (code)
228 goto error_return;
239 } 229 }
240 xfs_iunlock(ip, XFS_ILOCK_EXCL); 230 xfs_iunlock(ip, XFS_ILOCK_EXCL);
231 lock_flags &= ~XFS_ILOCK_EXCL;
241 232
242 /* 233 /*
243 * We are going to log the inode size change in this 234 * We are going to log the inode size change in this
@@ -251,40 +242,38 @@ xfs_setattr(
251 * really care about here and prevents waiting for other data 242 * really care about here and prevents waiting for other data
252 * not within the range we care about here. 243 * not within the range we care about here.
253 */ 244 */
254 if (!code && 245 if (ip->i_size != ip->i_d.di_size &&
255 ip->i_size != ip->i_d.di_size &&
256 iattr->ia_size > ip->i_d.di_size) { 246 iattr->ia_size > ip->i_d.di_size) {
257 code = xfs_flush_pages(ip, 247 code = xfs_flush_pages(ip,
258 ip->i_d.di_size, iattr->ia_size, 248 ip->i_d.di_size, iattr->ia_size,
259 XBF_ASYNC, FI_NONE); 249 XBF_ASYNC, FI_NONE);
250 if (code)
251 goto error_return;
260 } 252 }
261 253
262 /* wait for all I/O to complete */ 254 /* wait for all I/O to complete */
263 xfs_ioend_wait(ip); 255 xfs_ioend_wait(ip);
264 256
265 if (!code) 257 code = -block_truncate_page(inode->i_mapping, iattr->ia_size,
266 code = xfs_itruncate_data(ip, iattr->ia_size); 258 xfs_get_blocks);
267 if (code) { 259 if (code)
268 ASSERT(tp == NULL);
269 lock_flags &= ~XFS_ILOCK_EXCL;
270 ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
271 goto error_return; 260 goto error_return;
272 } 261
273 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 262 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
274 if ((code = xfs_trans_reserve(tp, 0, 263 code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
275 XFS_ITRUNCATE_LOG_RES(mp), 0, 264 XFS_TRANS_PERM_LOG_RES,
276 XFS_TRANS_PERM_LOG_RES, 265 XFS_ITRUNCATE_LOG_COUNT);
277 XFS_ITRUNCATE_LOG_COUNT))) { 266 if (code)
278 xfs_trans_cancel(tp, 0); 267 goto error_return;
279 if (need_iolock) 268
280 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 269 truncate_setsize(inode, iattr->ia_size);
281 return code; 270
282 }
283 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 271 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
272 lock_flags |= XFS_ILOCK_EXCL;
273
284 xfs_ilock(ip, XFS_ILOCK_EXCL); 274 xfs_ilock(ip, XFS_ILOCK_EXCL);
285 275
286 xfs_trans_ijoin(tp, ip, lock_flags); 276 xfs_trans_ijoin(tp, ip);
287 xfs_trans_ihold(tp, ip);
288 277
289 /* 278 /*
290 * Only change the c/mtime if we are changing the size 279 * Only change the c/mtime if we are changing the size
@@ -334,8 +323,7 @@ xfs_setattr(
334 xfs_iflags_set(ip, XFS_ITRUNCATED); 323 xfs_iflags_set(ip, XFS_ITRUNCATED);
335 } 324 }
336 } else if (tp) { 325 } else if (tp) {
337 xfs_trans_ijoin(tp, ip, lock_flags); 326 xfs_trans_ijoin(tp, ip);
338 xfs_trans_ihold(tp, ip);
339 } 327 }
340 328
341 /* 329 /*
@@ -470,17 +458,10 @@ xfs_setattr(
470 return XFS_ERROR(code); 458 return XFS_ERROR(code);
471 } 459 }
472 460
473 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
474 !(flags & XFS_ATTR_DMI)) {
475 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
476 NULL, DM_RIGHT_NULL, NULL, NULL,
477 0, 0, AT_DELAY_FLAG(flags));
478 }
479 return 0; 461 return 0;
480 462
481 abort_return: 463 abort_return:
482 commit_flags |= XFS_TRANS_ABORT; 464 commit_flags |= XFS_TRANS_ABORT;
483 /* FALLTHROUGH */
484 error_return: 465 error_return:
485 xfs_qm_dqrele(udqp); 466 xfs_qm_dqrele(udqp);
486 xfs_qm_dqrele(gdqp); 467 xfs_qm_dqrele(gdqp);
@@ -516,7 +497,7 @@ xfs_readlink_bmap(
516 int error = 0; 497 int error = 0;
517 498
518 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0, 499 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0,
519 mval, &nmaps, NULL, NULL); 500 mval, &nmaps, NULL);
520 if (error) 501 if (error)
521 goto out; 502 goto out;
522 503
@@ -557,7 +538,7 @@ xfs_readlink(
557 int pathlen; 538 int pathlen;
558 int error = 0; 539 int error = 0;
559 540
560 xfs_itrace_entry(ip); 541 trace_xfs_readlink(ip);
561 542
562 if (XFS_FORCED_SHUTDOWN(mp)) 543 if (XFS_FORCED_SHUTDOWN(mp))
563 return XFS_ERROR(EIO); 544 return XFS_ERROR(EIO);
@@ -613,14 +594,14 @@ xfs_free_eofblocks(
613 */ 594 */
614 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); 595 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size));
615 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 596 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
616 map_len = last_fsb - end_fsb; 597 if (last_fsb <= end_fsb)
617 if (map_len <= 0)
618 return 0; 598 return 0;
599 map_len = last_fsb - end_fsb;
619 600
620 nimaps = 1; 601 nimaps = 1;
621 xfs_ilock(ip, XFS_ILOCK_SHARED); 602 xfs_ilock(ip, XFS_ILOCK_SHARED);
622 error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, 603 error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0,
623 NULL, 0, &imap, &nimaps, NULL, NULL); 604 NULL, 0, &imap, &nimaps, NULL);
624 xfs_iunlock(ip, XFS_ILOCK_SHARED); 605 xfs_iunlock(ip, XFS_ILOCK_SHARED);
625 606
626 if (!error && (nimaps != 0) && 607 if (!error && (nimaps != 0) &&
@@ -675,10 +656,7 @@ xfs_free_eofblocks(
675 } 656 }
676 657
677 xfs_ilock(ip, XFS_ILOCK_EXCL); 658 xfs_ilock(ip, XFS_ILOCK_EXCL);
678 xfs_trans_ijoin(tp, ip, 659 xfs_trans_ijoin(tp, ip);
679 XFS_IOLOCK_EXCL |
680 XFS_ILOCK_EXCL);
681 xfs_trans_ihold(tp, ip);
682 660
683 error = xfs_itruncate_finish(&tp, ip, 661 error = xfs_itruncate_finish(&tp, ip,
684 ip->i_size, 662 ip->i_size,
@@ -750,8 +728,7 @@ xfs_inactive_symlink_rmt(
750 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 728 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
751 size = (int)ip->i_d.di_size; 729 size = (int)ip->i_d.di_size;
752 ip->i_d.di_size = 0; 730 ip->i_d.di_size = 0;
753 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 731 xfs_trans_ijoin(tp, ip);
754 xfs_trans_ihold(tp, ip);
755 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 732 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
756 /* 733 /*
757 * Find the block(s) so we can inval and unmap them. 734 * Find the block(s) so we can inval and unmap them.
@@ -761,7 +738,7 @@ xfs_inactive_symlink_rmt(
761 nmaps = ARRAY_SIZE(mval); 738 nmaps = ARRAY_SIZE(mval);
762 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), 739 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
763 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, 740 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
764 &free_list, NULL))) 741 &free_list)))
765 goto error0; 742 goto error0;
766 /* 743 /*
767 * Invalidate the block(s). 744 * Invalidate the block(s).
@@ -776,7 +753,7 @@ xfs_inactive_symlink_rmt(
776 * Unmap the dead block(s) to the free_list. 753 * Unmap the dead block(s) to the free_list.
777 */ 754 */
778 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 755 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
779 &first_block, &free_list, NULL, &done))) 756 &first_block, &free_list, &done)))
780 goto error1; 757 goto error1;
781 ASSERT(done); 758 ASSERT(done);
782 /* 759 /*
@@ -795,8 +772,7 @@ xfs_inactive_symlink_rmt(
795 * Mark it dirty so it will be logged and moved forward in the log as 772 * Mark it dirty so it will be logged and moved forward in the log as
796 * part of every commit. 773 * part of every commit.
797 */ 774 */
798 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 775 xfs_trans_ijoin(tp, ip);
799 xfs_trans_ihold(tp, ip);
800 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 776 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
801 /* 777 /*
802 * Get a new, empty transaction to return to our caller. 778 * Get a new, empty transaction to return to our caller.
@@ -929,8 +905,7 @@ xfs_inactive_attrs(
929 goto error_cancel; 905 goto error_cancel;
930 906
931 xfs_ilock(ip, XFS_ILOCK_EXCL); 907 xfs_ilock(ip, XFS_ILOCK_EXCL);
932 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 908 xfs_trans_ijoin(tp, ip);
933 xfs_trans_ihold(tp, ip);
934 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 909 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
935 910
936 ASSERT(ip->i_d.di_anextents == 0); 911 ASSERT(ip->i_d.di_anextents == 0);
@@ -1035,8 +1010,6 @@ xfs_inactive(
1035 int error; 1010 int error;
1036 int truncate; 1011 int truncate;
1037 1012
1038 xfs_itrace_entry(ip);
1039
1040 /* 1013 /*
1041 * If the inode is already free, then there can be nothing 1014 * If the inode is already free, then there can be nothing
1042 * to clean up here. 1015 * to clean up here.
@@ -1060,9 +1033,6 @@ xfs_inactive(
1060 1033
1061 mp = ip->i_mount; 1034 mp = ip->i_mount;
1062 1035
1063 if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY))
1064 XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL);
1065
1066 error = 0; 1036 error = 0;
1067 1037
1068 /* If this is a read-only mount, don't do this (would generate I/O) */ 1038 /* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1120,8 +1090,7 @@ xfs_inactive(
1120 } 1090 }
1121 1091
1122 xfs_ilock(ip, XFS_ILOCK_EXCL); 1092 xfs_ilock(ip, XFS_ILOCK_EXCL);
1123 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1093 xfs_trans_ijoin(tp, ip);
1124 xfs_trans_ihold(tp, ip);
1125 1094
1126 /* 1095 /*
1127 * normally, we have to run xfs_itruncate_finish sync. 1096 * normally, we have to run xfs_itruncate_finish sync.
@@ -1154,8 +1123,7 @@ xfs_inactive(
1154 return VN_INACTIVE_CACHE; 1123 return VN_INACTIVE_CACHE;
1155 } 1124 }
1156 1125
1157 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1126 xfs_trans_ijoin(tp, ip);
1158 xfs_trans_ihold(tp, ip);
1159 } else { 1127 } else {
1160 error = xfs_trans_reserve(tp, 0, 1128 error = xfs_trans_reserve(tp, 0,
1161 XFS_IFREE_LOG_RES(mp), 1129 XFS_IFREE_LOG_RES(mp),
@@ -1168,8 +1136,7 @@ xfs_inactive(
1168 } 1136 }
1169 1137
1170 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1138 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1171 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1139 xfs_trans_ijoin(tp, ip);
1172 xfs_trans_ihold(tp, ip);
1173 } 1140 }
1174 1141
1175 /* 1142 /*
@@ -1257,7 +1224,7 @@ xfs_lookup(
1257 int error; 1224 int error;
1258 uint lock_mode; 1225 uint lock_mode;
1259 1226
1260 xfs_itrace_entry(dp); 1227 trace_xfs_lookup(dp, name);
1261 1228
1262 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 1229 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
1263 return XFS_ERROR(EIO); 1230 return XFS_ERROR(EIO);
@@ -1289,8 +1256,7 @@ xfs_create(
1289 struct xfs_name *name, 1256 struct xfs_name *name,
1290 mode_t mode, 1257 mode_t mode,
1291 xfs_dev_t rdev, 1258 xfs_dev_t rdev,
1292 xfs_inode_t **ipp, 1259 xfs_inode_t **ipp)
1293 cred_t *credp)
1294{ 1260{
1295 int is_dir = S_ISDIR(mode); 1261 int is_dir = S_ISDIR(mode);
1296 struct xfs_mount *mp = dp->i_mount; 1262 struct xfs_mount *mp = dp->i_mount;
@@ -1302,32 +1268,22 @@ xfs_create(
1302 boolean_t unlock_dp_on_error = B_FALSE; 1268 boolean_t unlock_dp_on_error = B_FALSE;
1303 uint cancel_flags; 1269 uint cancel_flags;
1304 int committed; 1270 int committed;
1305 xfs_prid_t prid; 1271 prid_t prid;
1306 struct xfs_dquot *udqp = NULL; 1272 struct xfs_dquot *udqp = NULL;
1307 struct xfs_dquot *gdqp = NULL; 1273 struct xfs_dquot *gdqp = NULL;
1308 uint resblks; 1274 uint resblks;
1309 uint log_res; 1275 uint log_res;
1310 uint log_count; 1276 uint log_count;
1311 1277
1312 xfs_itrace_entry(dp); 1278 trace_xfs_create(dp, name);
1313 1279
1314 if (XFS_FORCED_SHUTDOWN(mp)) 1280 if (XFS_FORCED_SHUTDOWN(mp))
1315 return XFS_ERROR(EIO); 1281 return XFS_ERROR(EIO);
1316 1282
1317 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
1318 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
1319 dp, DM_RIGHT_NULL, NULL,
1320 DM_RIGHT_NULL, name->name, NULL,
1321 mode, 0, 0);
1322
1323 if (error)
1324 return error;
1325 }
1326
1327 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1283 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1328 prid = dp->i_d.di_projid; 1284 prid = xfs_get_projid(dp);
1329 else 1285 else
1330 prid = dfltprid; 1286 prid = XFS_PROJID_DEFAULT;
1331 1287
1332 /* 1288 /*
1333 * Make sure that we have allocated dquot(s) on disk. 1289 * Make sure that we have allocated dquot(s) on disk.
@@ -1406,7 +1362,7 @@ xfs_create(
1406 * entry pointing to them, but a directory also the "." entry 1362 * entry pointing to them, but a directory also the "." entry
1407 * pointing to itself. 1363 * pointing to itself.
1408 */ 1364 */
1409 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp, 1365 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
1410 prid, resblks > 0, &ip, &committed); 1366 prid, resblks > 0, &ip, &committed);
1411 if (error) { 1367 if (error) {
1412 if (error == ENOSPC) 1368 if (error == ENOSPC)
@@ -1427,8 +1383,7 @@ xfs_create(
1427 * the transaction cancel unlocking dp so don't do it explicitly in the 1383 * the transaction cancel unlocking dp so don't do it explicitly in the
1428 * error path. 1384 * error path.
1429 */ 1385 */
1430 IHOLD(dp); 1386 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
1431 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1432 unlock_dp_on_error = B_FALSE; 1387 unlock_dp_on_error = B_FALSE;
1433 1388
1434 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1389 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
@@ -1438,7 +1393,7 @@ xfs_create(
1438 ASSERT(error != ENOSPC); 1393 ASSERT(error != ENOSPC);
1439 goto out_trans_abort; 1394 goto out_trans_abort;
1440 } 1395 }
1441 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1396 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1442 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1397 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1443 1398
1444 if (is_dir) { 1399 if (is_dir) {
@@ -1487,16 +1442,7 @@ xfs_create(
1487 xfs_qm_dqrele(gdqp); 1442 xfs_qm_dqrele(gdqp);
1488 1443
1489 *ipp = ip; 1444 *ipp = ip;
1490 1445 return 0;
1491 /* Fallthrough to std_return with error = 0 */
1492 std_return:
1493 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
1494 XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, dp, DM_RIGHT_NULL,
1495 ip, DM_RIGHT_NULL, name->name, NULL, mode,
1496 error, 0);
1497 }
1498
1499 return error;
1500 1446
1501 out_bmap_cancel: 1447 out_bmap_cancel:
1502 xfs_bmap_cancel(&free_list); 1448 xfs_bmap_cancel(&free_list);
@@ -1510,8 +1456,8 @@ xfs_create(
1510 1456
1511 if (unlock_dp_on_error) 1457 if (unlock_dp_on_error)
1512 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1458 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1513 1459 std_return:
1514 goto std_return; 1460 return error;
1515 1461
1516 out_abort_rele: 1462 out_abort_rele:
1517 /* 1463 /*
@@ -1726,20 +1672,11 @@ xfs_remove(
1726 uint resblks; 1672 uint resblks;
1727 uint log_count; 1673 uint log_count;
1728 1674
1729 xfs_itrace_entry(dp); 1675 trace_xfs_remove(dp, name);
1730 xfs_itrace_entry(ip);
1731 1676
1732 if (XFS_FORCED_SHUTDOWN(mp)) 1677 if (XFS_FORCED_SHUTDOWN(mp))
1733 return XFS_ERROR(EIO); 1678 return XFS_ERROR(EIO);
1734 1679
1735 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
1736 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp, DM_RIGHT_NULL,
1737 NULL, DM_RIGHT_NULL, name->name, NULL,
1738 ip->i_d.di_mode, 0, 0);
1739 if (error)
1740 return error;
1741 }
1742
1743 error = xfs_qm_dqattach(dp, 0); 1680 error = xfs_qm_dqattach(dp, 0);
1744 if (error) 1681 if (error)
1745 goto std_return; 1682 goto std_return;
@@ -1782,15 +1719,8 @@ xfs_remove(
1782 1719
1783 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 1720 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
1784 1721
1785 /* 1722 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
1786 * At this point, we've gotten both the directory and the entry 1723 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
1787 * inodes locked.
1788 */
1789 IHOLD(ip);
1790 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1791
1792 IHOLD(dp);
1793 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1794 1724
1795 /* 1725 /*
1796 * If we're removing a directory perform some additional validation. 1726 * If we're removing a directory perform some additional validation.
@@ -1814,7 +1744,7 @@ xfs_remove(
1814 ASSERT(error != ENOENT); 1744 ASSERT(error != ENOENT);
1815 goto out_bmap_cancel; 1745 goto out_bmap_cancel;
1816 } 1746 }
1817 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1747 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1818 1748
1819 if (is_dir) { 1749 if (is_dir) {
1820 /* 1750 /*
@@ -1877,21 +1807,15 @@ xfs_remove(
1877 if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) 1807 if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
1878 xfs_filestream_deassociate(ip); 1808 xfs_filestream_deassociate(ip);
1879 1809
1880 std_return: 1810 return 0;
1881 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
1882 XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
1883 NULL, DM_RIGHT_NULL, name->name, NULL,
1884 ip->i_d.di_mode, error, 0);
1885 }
1886
1887 return error;
1888 1811
1889 out_bmap_cancel: 1812 out_bmap_cancel:
1890 xfs_bmap_cancel(&free_list); 1813 xfs_bmap_cancel(&free_list);
1891 cancel_flags |= XFS_TRANS_ABORT; 1814 cancel_flags |= XFS_TRANS_ABORT;
1892 out_trans_cancel: 1815 out_trans_cancel:
1893 xfs_trans_cancel(tp, cancel_flags); 1816 xfs_trans_cancel(tp, cancel_flags);
1894 goto std_return; 1817 std_return:
1818 return error;
1895} 1819}
1896 1820
1897int 1821int
@@ -1909,25 +1833,13 @@ xfs_link(
1909 int committed; 1833 int committed;
1910 int resblks; 1834 int resblks;
1911 1835
1912 xfs_itrace_entry(tdp); 1836 trace_xfs_link(tdp, target_name);
1913 xfs_itrace_entry(sip);
1914 1837
1915 ASSERT(!S_ISDIR(sip->i_d.di_mode)); 1838 ASSERT(!S_ISDIR(sip->i_d.di_mode));
1916 1839
1917 if (XFS_FORCED_SHUTDOWN(mp)) 1840 if (XFS_FORCED_SHUTDOWN(mp))
1918 return XFS_ERROR(EIO); 1841 return XFS_ERROR(EIO);
1919 1842
1920 if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) {
1921 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
1922 tdp, DM_RIGHT_NULL,
1923 sip, DM_RIGHT_NULL,
1924 target_name->name, NULL, 0, 0, 0);
1925 if (error)
1926 return error;
1927 }
1928
1929 /* Return through std_return after this point. */
1930
1931 error = xfs_qm_dqattach(sip, 0); 1843 error = xfs_qm_dqattach(sip, 0);
1932 if (error) 1844 if (error)
1933 goto std_return; 1845 goto std_return;
@@ -1953,15 +1865,8 @@ xfs_link(
1953 1865
1954 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1866 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
1955 1867
1956 /* 1868 xfs_trans_ijoin_ref(tp, sip, XFS_ILOCK_EXCL);
1957 * Increment vnode ref counts since xfs_trans_commit & 1869 xfs_trans_ijoin_ref(tp, tdp, XFS_ILOCK_EXCL);
1958 * xfs_trans_cancel will both unlock the inodes and
1959 * decrement the associated ref counts.
1960 */
1961 IHOLD(sip);
1962 IHOLD(tdp);
1963 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
1964 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
1965 1870
1966 /* 1871 /*
1967 * If the source has too many links, we can't make any more to it. 1872 * If the source has too many links, we can't make any more to it.
@@ -1977,7 +1882,7 @@ xfs_link(
1977 * the tree quota mechanism could be circumvented. 1882 * the tree quota mechanism could be circumvented.
1978 */ 1883 */
1979 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1884 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1980 (tdp->i_d.di_projid != sip->i_d.di_projid))) { 1885 (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
1981 error = XFS_ERROR(EXDEV); 1886 error = XFS_ERROR(EXDEV);
1982 goto error_return; 1887 goto error_return;
1983 } 1888 }
@@ -1992,7 +1897,7 @@ xfs_link(
1992 &first_block, &free_list, resblks); 1897 &first_block, &free_list, resblks);
1993 if (error) 1898 if (error)
1994 goto abort_return; 1899 goto abort_return;
1995 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1900 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1996 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 1901 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
1997 1902
1998 error = xfs_bumplink(tp, sip); 1903 error = xfs_bumplink(tp, sip);
@@ -2014,27 +1919,14 @@ xfs_link(
2014 goto abort_return; 1919 goto abort_return;
2015 } 1920 }
2016 1921
2017 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1922 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2018 if (error)
2019 goto std_return;
2020
2021 /* Fall through to std_return with error = 0. */
2022std_return:
2023 if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) {
2024 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
2025 tdp, DM_RIGHT_NULL,
2026 sip, DM_RIGHT_NULL,
2027 target_name->name, NULL, 0, error, 0);
2028 }
2029 return error;
2030 1923
2031 abort_return: 1924 abort_return:
2032 cancel_flags |= XFS_TRANS_ABORT; 1925 cancel_flags |= XFS_TRANS_ABORT;
2033 /* FALLTHROUGH */
2034
2035 error_return: 1926 error_return:
2036 xfs_trans_cancel(tp, cancel_flags); 1927 xfs_trans_cancel(tp, cancel_flags);
2037 goto std_return; 1928 std_return:
1929 return error;
2038} 1930}
2039 1931
2040int 1932int
@@ -2043,8 +1935,7 @@ xfs_symlink(
2043 struct xfs_name *link_name, 1935 struct xfs_name *link_name,
2044 const char *target_path, 1936 const char *target_path,
2045 mode_t mode, 1937 mode_t mode,
2046 xfs_inode_t **ipp, 1938 xfs_inode_t **ipp)
2047 cred_t *credp)
2048{ 1939{
2049 xfs_mount_t *mp = dp->i_mount; 1940 xfs_mount_t *mp = dp->i_mount;
2050 xfs_trans_t *tp; 1941 xfs_trans_t *tp;
@@ -2065,7 +1956,7 @@ xfs_symlink(
2065 int byte_cnt; 1956 int byte_cnt;
2066 int n; 1957 int n;
2067 xfs_buf_t *bp; 1958 xfs_buf_t *bp;
2068 xfs_prid_t prid; 1959 prid_t prid;
2069 struct xfs_dquot *udqp, *gdqp; 1960 struct xfs_dquot *udqp, *gdqp;
2070 uint resblks; 1961 uint resblks;
2071 1962
@@ -2074,7 +1965,7 @@ xfs_symlink(
2074 ip = NULL; 1965 ip = NULL;
2075 tp = NULL; 1966 tp = NULL;
2076 1967
2077 xfs_itrace_entry(dp); 1968 trace_xfs_symlink(dp, link_name);
2078 1969
2079 if (XFS_FORCED_SHUTDOWN(mp)) 1970 if (XFS_FORCED_SHUTDOWN(mp))
2080 return XFS_ERROR(EIO); 1971 return XFS_ERROR(EIO);
@@ -2086,22 +1977,11 @@ xfs_symlink(
2086 if (pathlen >= MAXPATHLEN) /* total string too long */ 1977 if (pathlen >= MAXPATHLEN) /* total string too long */
2087 return XFS_ERROR(ENAMETOOLONG); 1978 return XFS_ERROR(ENAMETOOLONG);
2088 1979
2089 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
2090 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
2091 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
2092 link_name->name,
2093 (unsigned char *)target_path, 0, 0, 0);
2094 if (error)
2095 return error;
2096 }
2097
2098 /* Return through std_return after this point. */
2099
2100 udqp = gdqp = NULL; 1980 udqp = gdqp = NULL;
2101 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1981 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
2102 prid = dp->i_d.di_projid; 1982 prid = xfs_get_projid(dp);
2103 else 1983 else
2104 prid = (xfs_prid_t)dfltprid; 1984 prid = XFS_PROJID_DEFAULT;
2105 1985
2106 /* 1986 /*
2107 * Make sure that we have allocated dquot(s) on disk. 1987 * Make sure that we have allocated dquot(s) on disk.
@@ -2167,8 +2047,8 @@ xfs_symlink(
2167 /* 2047 /*
2168 * Allocate an inode for the symlink. 2048 * Allocate an inode for the symlink.
2169 */ 2049 */
2170 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 2050 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
2171 1, 0, credp, prid, resblks > 0, &ip, NULL); 2051 prid, resblks > 0, &ip, NULL);
2172 if (error) { 2052 if (error) {
2173 if (error == ENOSPC) 2053 if (error == ENOSPC)
2174 goto error_return; 2054 goto error_return;
@@ -2180,8 +2060,7 @@ xfs_symlink(
2180 * transaction cancel unlocking dp so don't do it explicitly in the 2060 * transaction cancel unlocking dp so don't do it explicitly in the
2181 * error path. 2061 * error path.
2182 */ 2062 */
2183 IHOLD(dp); 2063 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
2184 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2185 unlock_dp_on_error = B_FALSE; 2064 unlock_dp_on_error = B_FALSE;
2186 2065
2187 /* 2066 /*
@@ -2215,7 +2094,7 @@ xfs_symlink(
2215 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, 2094 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks,
2216 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 2095 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
2217 &first_block, resblks, mval, &nmaps, 2096 &first_block, resblks, mval, &nmaps,
2218 &free_list, NULL); 2097 &free_list);
2219 if (error) { 2098 if (error) {
2220 goto error1; 2099 goto error1;
2221 } 2100 }
@@ -2251,7 +2130,7 @@ xfs_symlink(
2251 &first_block, &free_list, resblks); 2130 &first_block, &free_list, resblks);
2252 if (error) 2131 if (error)
2253 goto error1; 2132 goto error1;
2254 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2133 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2255 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2134 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2256 2135
2257 /* 2136 /*
@@ -2278,21 +2157,8 @@ xfs_symlink(
2278 xfs_qm_dqrele(udqp); 2157 xfs_qm_dqrele(udqp);
2279 xfs_qm_dqrele(gdqp); 2158 xfs_qm_dqrele(gdqp);
2280 2159
2281 /* Fall through to std_return with error = 0 or errno from 2160 *ipp = ip;
2282 * xfs_trans_commit */ 2161 return 0;
2283std_return:
2284 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) {
2285 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK,
2286 dp, DM_RIGHT_NULL,
2287 error ? NULL : ip,
2288 DM_RIGHT_NULL, link_name->name,
2289 (unsigned char *)target_path,
2290 0, error, 0);
2291 }
2292
2293 if (!error)
2294 *ipp = ip;
2295 return error;
2296 2162
2297 error2: 2163 error2:
2298 IRELE(ip); 2164 IRELE(ip);
@@ -2306,8 +2172,8 @@ std_return:
2306 2172
2307 if (unlock_dp_on_error) 2173 if (unlock_dp_on_error)
2308 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2174 xfs_iunlock(dp, XFS_ILOCK_EXCL);
2309 2175 std_return:
2310 goto std_return; 2176 return error;
2311} 2177}
2312 2178
2313int 2179int
@@ -2333,13 +2199,12 @@ xfs_set_dmattrs(
2333 return error; 2199 return error;
2334 } 2200 }
2335 xfs_ilock(ip, XFS_ILOCK_EXCL); 2201 xfs_ilock(ip, XFS_ILOCK_EXCL);
2336 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2202 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
2337 2203
2338 ip->i_d.di_dmevmask = evmask; 2204 ip->i_d.di_dmevmask = evmask;
2339 ip->i_d.di_dmstate = state; 2205 ip->i_d.di_dmstate = state;
2340 2206
2341 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2207 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2342 IHOLD(ip);
2343 error = xfs_trans_commit(tp, 0); 2208 error = xfs_trans_commit(tp, 0);
2344 2209
2345 return error; 2210 return error;
@@ -2390,7 +2255,7 @@ xfs_alloc_file_space(
2390 int committed; 2255 int committed;
2391 int error; 2256 int error;
2392 2257
2393 xfs_itrace_entry(ip); 2258 trace_xfs_alloc_file_space(ip);
2394 2259
2395 if (XFS_FORCED_SHUTDOWN(mp)) 2260 if (XFS_FORCED_SHUTDOWN(mp))
2396 return XFS_ERROR(EIO); 2261 return XFS_ERROR(EIO);
@@ -2408,29 +2273,13 @@ xfs_alloc_file_space(
2408 count = len; 2273 count = len;
2409 imapp = &imaps[0]; 2274 imapp = &imaps[0];
2410 nimaps = 1; 2275 nimaps = 1;
2411 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 2276 bmapi_flag = XFS_BMAPI_WRITE | alloc_type;
2412 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 2277 startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
2413 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 2278 allocatesize_fsb = XFS_B_TO_FSB(mp, count);
2414 2279
2415 /* Generate a DMAPI event if needed. */
2416 if (alloc_type != 0 && offset < ip->i_size &&
2417 (attr_flags & XFS_ATTR_DMI) == 0 &&
2418 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
2419 xfs_off_t end_dmi_offset;
2420
2421 end_dmi_offset = offset+len;
2422 if (end_dmi_offset > ip->i_size)
2423 end_dmi_offset = ip->i_size;
2424 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset,
2425 end_dmi_offset - offset, 0, NULL);
2426 if (error)
2427 return error;
2428 }
2429
2430 /* 2280 /*
2431 * Allocate file space until done or until there is an error 2281 * Allocate file space until done or until there is an error
2432 */ 2282 */
2433retry:
2434 while (allocatesize_fsb && !error) { 2283 while (allocatesize_fsb && !error) {
2435 xfs_fileoff_t s, e; 2284 xfs_fileoff_t s, e;
2436 2285
@@ -2451,15 +2300,22 @@ retry:
2451 e = allocatesize_fsb; 2300 e = allocatesize_fsb;
2452 } 2301 }
2453 2302
2303 /*
2304 * The transaction reservation is limited to a 32-bit block
2305 * count, hence we need to limit the number of blocks we are
2306 * trying to reserve to avoid an overflow. We can't allocate
2307 * more than @nimaps extents, and an extent is limited on disk
2308 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
2309 */
2310 resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
2454 if (unlikely(rt)) { 2311 if (unlikely(rt)) {
2455 resrtextents = qblocks = (uint)(e - s); 2312 resrtextents = qblocks = resblks;
2456 resrtextents /= mp->m_sb.sb_rextsize; 2313 resrtextents /= mp->m_sb.sb_rextsize;
2457 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 2314 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
2458 quota_flag = XFS_QMOPT_RES_RTBLKS; 2315 quota_flag = XFS_QMOPT_RES_RTBLKS;
2459 } else { 2316 } else {
2460 resrtextents = 0; 2317 resrtextents = 0;
2461 resblks = qblocks = \ 2318 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
2462 XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
2463 quota_flag = XFS_QMOPT_RES_REGBLKS; 2319 quota_flag = XFS_QMOPT_RES_REGBLKS;
2464 } 2320 }
2465 2321
@@ -2488,8 +2344,7 @@ retry:
2488 if (error) 2344 if (error)
2489 goto error1; 2345 goto error1;
2490 2346
2491 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2347 xfs_trans_ijoin(tp, ip);
2492 xfs_trans_ihold(tp, ip);
2493 2348
2494 /* 2349 /*
2495 * Issue the xfs_bmapi() call to allocate the blocks 2350 * Issue the xfs_bmapi() call to allocate the blocks
@@ -2498,7 +2353,7 @@ retry:
2498 error = xfs_bmapi(tp, ip, startoffset_fsb, 2353 error = xfs_bmapi(tp, ip, startoffset_fsb,
2499 allocatesize_fsb, bmapi_flag, 2354 allocatesize_fsb, bmapi_flag,
2500 &firstfsb, 0, imapp, &nimaps, 2355 &firstfsb, 0, imapp, &nimaps,
2501 &free_list, NULL); 2356 &free_list);
2502 if (error) { 2357 if (error) {
2503 goto error0; 2358 goto error0;
2504 } 2359 }
@@ -2527,17 +2382,6 @@ retry:
2527 startoffset_fsb += allocated_fsb; 2382 startoffset_fsb += allocated_fsb;
2528 allocatesize_fsb -= allocated_fsb; 2383 allocatesize_fsb -= allocated_fsb;
2529 } 2384 }
2530dmapi_enospc_check:
2531 if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 &&
2532 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
2533 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
2534 ip, DM_RIGHT_NULL,
2535 ip, DM_RIGHT_NULL,
2536 NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */
2537 if (error == 0)
2538 goto retry; /* Maybe DMAPI app. has made space */
2539 /* else fall through with error from XFS_SEND_DATA */
2540 }
2541 2385
2542 return error; 2386 return error;
2543 2387
@@ -2548,7 +2392,7 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
2548error1: /* Just cancel transaction */ 2392error1: /* Just cancel transaction */
2549 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 2393 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
2550 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2394 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2551 goto dmapi_enospc_check; 2395 return error;
2552} 2396}
2553 2397
2554/* 2398/*
@@ -2588,9 +2432,9 @@ xfs_zero_remaining_bytes(
2588 if (endoff > ip->i_size) 2432 if (endoff > ip->i_size)
2589 endoff = ip->i_size; 2433 endoff = ip->i_size;
2590 2434
2591 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 2435 bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
2592 XFS_IS_REALTIME_INODE(ip) ? 2436 mp->m_rtdev_targp : mp->m_ddev_targp,
2593 mp->m_rtdev_targp : mp->m_ddev_targp); 2437 mp->m_sb.sb_blocksize, XBF_DONT_BLOCK);
2594 if (!bp) 2438 if (!bp)
2595 return XFS_ERROR(ENOMEM); 2439 return XFS_ERROR(ENOMEM);
2596 2440
@@ -2598,7 +2442,7 @@ xfs_zero_remaining_bytes(
2598 offset_fsb = XFS_B_TO_FSBT(mp, offset); 2442 offset_fsb = XFS_B_TO_FSBT(mp, offset);
2599 nimap = 1; 2443 nimap = 1;
2600 error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, 2444 error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0,
2601 NULL, 0, &imap, &nimap, NULL, NULL); 2445 NULL, 0, &imap, &nimap, NULL);
2602 if (error || nimap < 1) 2446 if (error || nimap < 1)
2603 break; 2447 break;
2604 ASSERT(imap.br_blockcount >= 1); 2448 ASSERT(imap.br_blockcount >= 1);
@@ -2616,7 +2460,7 @@ xfs_zero_remaining_bytes(
2616 XFS_BUF_READ(bp); 2460 XFS_BUF_READ(bp);
2617 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); 2461 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
2618 xfsbdstrat(mp, bp); 2462 xfsbdstrat(mp, bp);
2619 error = xfs_iowait(bp); 2463 error = xfs_buf_iowait(bp);
2620 if (error) { 2464 if (error) {
2621 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", 2465 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)",
2622 mp, bp, XFS_BUF_ADDR(bp)); 2466 mp, bp, XFS_BUF_ADDR(bp));
@@ -2629,7 +2473,7 @@ xfs_zero_remaining_bytes(
2629 XFS_BUF_UNREAD(bp); 2473 XFS_BUF_UNREAD(bp);
2630 XFS_BUF_WRITE(bp); 2474 XFS_BUF_WRITE(bp);
2631 xfsbdstrat(mp, bp); 2475 xfsbdstrat(mp, bp);
2632 error = xfs_iowait(bp); 2476 error = xfs_buf_iowait(bp);
2633 if (error) { 2477 if (error) {
2634 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", 2478 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)",
2635 mp, bp, XFS_BUF_ADDR(bp)); 2479 mp, bp, XFS_BUF_ADDR(bp));
@@ -2661,7 +2505,6 @@ xfs_free_file_space(
2661{ 2505{
2662 int committed; 2506 int committed;
2663 int done; 2507 int done;
2664 xfs_off_t end_dmi_offset;
2665 xfs_fileoff_t endoffset_fsb; 2508 xfs_fileoff_t endoffset_fsb;
2666 int error; 2509 int error;
2667 xfs_fsblock_t firstfsb; 2510 xfs_fsblock_t firstfsb;
@@ -2680,7 +2523,7 @@ xfs_free_file_space(
2680 2523
2681 mp = ip->i_mount; 2524 mp = ip->i_mount;
2682 2525
2683 xfs_itrace_entry(ip); 2526 trace_xfs_free_file_space(ip);
2684 2527
2685 error = xfs_qm_dqattach(ip, 0); 2528 error = xfs_qm_dqattach(ip, 0);
2686 if (error) 2529 if (error)
@@ -2691,19 +2534,7 @@ xfs_free_file_space(
2691 return error; 2534 return error;
2692 rt = XFS_IS_REALTIME_INODE(ip); 2535 rt = XFS_IS_REALTIME_INODE(ip);
2693 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 2536 startoffset_fsb = XFS_B_TO_FSB(mp, offset);
2694 end_dmi_offset = offset + len; 2537 endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
2695 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
2696
2697 if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 &&
2698 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
2699 if (end_dmi_offset > ip->i_size)
2700 end_dmi_offset = ip->i_size;
2701 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip,
2702 offset, end_dmi_offset - offset,
2703 AT_DELAY_FLAG(attr_flags), NULL);
2704 if (error)
2705 return error;
2706 }
2707 2538
2708 if (attr_flags & XFS_ATTR_NOLOCK) 2539 if (attr_flags & XFS_ATTR_NOLOCK)
2709 need_iolock = 0; 2540 need_iolock = 0;
@@ -2731,7 +2562,7 @@ xfs_free_file_space(
2731 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 2562 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
2732 nimap = 1; 2563 nimap = 1;
2733 error = xfs_bmapi(NULL, ip, startoffset_fsb, 2564 error = xfs_bmapi(NULL, ip, startoffset_fsb,
2734 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 2565 1, 0, NULL, 0, &imap, &nimap, NULL);
2735 if (error) 2566 if (error)
2736 goto out_unlock_iolock; 2567 goto out_unlock_iolock;
2737 ASSERT(nimap == 0 || nimap == 1); 2568 ASSERT(nimap == 0 || nimap == 1);
@@ -2746,7 +2577,7 @@ xfs_free_file_space(
2746 } 2577 }
2747 nimap = 1; 2578 nimap = 1;
2748 error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 2579 error = xfs_bmapi(NULL, ip, endoffset_fsb - 1,
2749 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 2580 1, 0, NULL, 0, &imap, &nimap, NULL);
2750 if (error) 2581 if (error)
2751 goto out_unlock_iolock; 2582 goto out_unlock_iolock;
2752 ASSERT(nimap == 0 || nimap == 1); 2583 ASSERT(nimap == 0 || nimap == 1);
@@ -2814,8 +2645,7 @@ xfs_free_file_space(
2814 if (error) 2645 if (error)
2815 goto error1; 2646 goto error1;
2816 2647
2817 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2648 xfs_trans_ijoin(tp, ip);
2818 xfs_trans_ihold(tp, ip);
2819 2649
2820 /* 2650 /*
2821 * issue the bunmapi() call to free the blocks 2651 * issue the bunmapi() call to free the blocks
@@ -2823,7 +2653,7 @@ xfs_free_file_space(
2823 xfs_bmap_init(&free_list, &firstfsb); 2653 xfs_bmap_init(&free_list, &firstfsb);
2824 error = xfs_bunmapi(tp, ip, startoffset_fsb, 2654 error = xfs_bunmapi(tp, ip, startoffset_fsb,
2825 endoffset_fsb - startoffset_fsb, 2655 endoffset_fsb - startoffset_fsb,
2826 0, 2, &firstfsb, &free_list, NULL, &done); 2656 0, 2, &firstfsb, &free_list, &done);
2827 if (error) { 2657 if (error) {
2828 goto error0; 2658 goto error0;
2829 } 2659 }
@@ -2882,8 +2712,7 @@ xfs_change_file_space(
2882 xfs_off_t llen; 2712 xfs_off_t llen;
2883 xfs_trans_t *tp; 2713 xfs_trans_t *tp;
2884 struct iattr iattr; 2714 struct iattr iattr;
2885 2715 int prealloc_type;
2886 xfs_itrace_entry(ip);
2887 2716
2888 if (!S_ISREG(ip->i_d.di_mode)) 2717 if (!S_ISREG(ip->i_d.di_mode))
2889 return XFS_ERROR(EINVAL); 2718 return XFS_ERROR(EINVAL);
@@ -2926,12 +2755,17 @@ xfs_change_file_space(
2926 * size to be changed. 2755 * size to be changed.
2927 */ 2756 */
2928 setprealloc = clrprealloc = 0; 2757 setprealloc = clrprealloc = 0;
2758 prealloc_type = XFS_BMAPI_PREALLOC;
2929 2759
2930 switch (cmd) { 2760 switch (cmd) {
2761 case XFS_IOC_ZERO_RANGE:
2762 prealloc_type |= XFS_BMAPI_CONVERT;
2763 xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0);
2764 /* FALLTHRU */
2931 case XFS_IOC_RESVSP: 2765 case XFS_IOC_RESVSP:
2932 case XFS_IOC_RESVSP64: 2766 case XFS_IOC_RESVSP64:
2933 error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 2767 error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
2934 1, attr_flags); 2768 prealloc_type, attr_flags);
2935 if (error) 2769 if (error)
2936 return error; 2770 return error;
2937 setprealloc = 1; 2771 setprealloc = 1;
@@ -2985,8 +2819,7 @@ xfs_change_file_space(
2985 2819
2986 xfs_ilock(ip, XFS_ILOCK_EXCL); 2820 xfs_ilock(ip, XFS_ILOCK_EXCL);
2987 2821
2988 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2822 xfs_trans_ijoin(tp, ip);
2989 xfs_trans_ihold(tp, ip);
2990 2823
2991 if ((attr_flags & XFS_ATTR_DMI) == 0) { 2824 if ((attr_flags & XFS_ATTR_DMI) == 0) {
2992 ip->i_d.di_mode &= ~S_ISUID; 2825 ip->i_d.di_mode &= ~S_ISUID;
@@ -3001,7 +2834,7 @@ xfs_change_file_space(
3001 if (ip->i_d.di_mode & S_IXGRP) 2834 if (ip->i_d.di_mode & S_IXGRP)
3002 ip->i_d.di_mode &= ~S_ISGID; 2835 ip->i_d.di_mode &= ~S_ISGID;
3003 2836
3004 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2837 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3005 } 2838 }
3006 if (setprealloc) 2839 if (setprealloc)
3007 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 2840 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index d8dfa8d0dadd..f6702927eee4 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -2,7 +2,6 @@
2#define _XFS_VNODEOPS_H 1 2#define _XFS_VNODEOPS_H 1
3 3
4struct attrlist_cursor_kern; 4struct attrlist_cursor_kern;
5struct cred;
6struct file; 5struct file;
7struct iattr; 6struct iattr;
8struct inode; 7struct inode;
@@ -26,7 +25,7 @@ int xfs_inactive(struct xfs_inode *ip);
26int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 25int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
27 struct xfs_inode **ipp, struct xfs_name *ci_name); 26 struct xfs_inode **ipp, struct xfs_name *ci_name);
28int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, 27int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
29 xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp); 28 xfs_dev_t rdev, struct xfs_inode **ipp);
30int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 29int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
31 struct xfs_inode *ip); 30 struct xfs_inode *ip);
32int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, 31int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
@@ -34,8 +33,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
34int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, 33int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
35 xfs_off_t *offset, filldir_t filldir); 34 xfs_off_t *offset, filldir_t filldir);
36int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, 35int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
37 const char *target_path, mode_t mode, struct xfs_inode **ipp, 36 const char *target_path, mode_t mode, struct xfs_inode **ipp);
38 cred_t *credp);
39int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 37int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
40int xfs_change_file_space(struct xfs_inode *ip, int cmd, 38int xfs_change_file_space(struct xfs_inode *ip, int cmd,
41 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); 39 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);