aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Elder <aelder@sgi.com>2010-06-04 14:22:30 -0400
committerAlex Elder <aelder@sgi.com>2010-06-04 14:22:30 -0400
commit1bf7dbfde8fe7ddaa8e2e1b4e0fc41a9fc6aa7a5 (patch)
treefdb99e686fa40e79cc53f80dfed58e9b548ed4eb
parentad8456361fa19068cf49b50a4f98e41b73c08e76 (diff)
parentf9369729496a0f4c607a4cc1ea4dfeddbbfc505a (diff)
Merge branch 'master' into for-linus
-rw-r--r--Documentation/filesystems/xfs-delayed-logging-design.txt5
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h356
-rw-r--r--fs/xfs/quota/xfs_qm.c4
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_iget.c29
-rw-r--r--fs/xfs/xfs_inode.c144
-rw-r--r--fs/xfs/xfs_log_recover.c11
-rw-r--r--fs/xfs/xfs_mount.c68
-rw-r--r--fs/xfs/xfs_rtalloc.c4
-rw-r--r--fs/xfs/xfs_rtalloc.h11
-rw-r--r--fs/xfs/xfs_trans.c446
-rw-r--r--fs/xfs/xfs_trans.h411
-rw-r--r--fs/xfs/xfs_vnodeops.c2
18 files changed, 736 insertions, 798 deletions
diff --git a/Documentation/filesystems/xfs-delayed-logging-design.txt b/Documentation/filesystems/xfs-delayed-logging-design.txt
index d8119e9d2d60..96d0df28bed3 100644
--- a/Documentation/filesystems/xfs-delayed-logging-design.txt
+++ b/Documentation/filesystems/xfs-delayed-logging-design.txt
@@ -794,11 +794,6 @@ designed.
794 794
795Roadmap: 795Roadmap:
796 796
7972.6.35 Inclusion in mainline as an experimental mount option
798 => approximately 2-3 months to merge window
799 => needs to be in xfs-dev tree in 4-6 weeks
800 => code is nearing readiness for review
801
8022.6.37 Remove experimental tag from mount option 7972.6.37 Remove experimental tag from mount option
803 => should be roughly 6 months after initial merge 798 => should be roughly 6 months after initial merge
804 => enough time to: 799 => enough time to:
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 089eaca860b4..a0fa3bf0d1bb 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1333,6 +1333,21 @@ xfs_vm_writepage(
1333 trace_xfs_writepage(inode, page, 0); 1333 trace_xfs_writepage(inode, page, 0);
1334 1334
1335 /* 1335 /*
1336 * Refuse to write the page out if we are called from reclaim context.
1337 *
1338 * This is primarily to avoid stack overflows when called from deep
1339 * used stacks in random callers for direct reclaim, but disabling
1340 * reclaim for kswap is a nice side-effect as kswapd causes rather
1341 * suboptimal I/O patters, too.
1342 *
1343 * This should really be done by the core VM, but until that happens
1344 * filesystems like XFS, btrfs and ext4 have to take care of this
1345 * by themselves.
1346 */
1347 if (current->flags & PF_MEMALLOC)
1348 goto out_fail;
1349
1350 /*
1336 * We need a transaction if: 1351 * We need a transaction if:
1337 * 1. There are delalloc buffers on the page 1352 * 1. There are delalloc buffers on the page
1338 * 2. The page is uptodate and we have unmapped buffers 1353 * 2. The page is uptodate and we have unmapped buffers
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9c8019c78c92..44f0b2de153e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -585,11 +585,20 @@ xfs_vn_fallocate(
585 bf.l_len = len; 585 bf.l_len = len;
586 586
587 xfs_ilock(ip, XFS_IOLOCK_EXCL); 587 xfs_ilock(ip, XFS_IOLOCK_EXCL);
588
589 /* check the new inode size is valid before allocating */
590 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
591 offset + len > i_size_read(inode)) {
592 new_size = offset + len;
593 error = inode_newsize_ok(inode, new_size);
594 if (error)
595 goto out_unlock;
596 }
597
588 error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 598 error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
589 0, XFS_ATTR_NOLOCK); 599 0, XFS_ATTR_NOLOCK);
590 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 600 if (error)
591 offset + len > i_size_read(inode)) 601 goto out_unlock;
592 new_size = offset + len;
593 602
594 /* Change file size if needed */ 603 /* Change file size if needed */
595 if (new_size) { 604 if (new_size) {
@@ -600,6 +609,7 @@ xfs_vn_fallocate(
600 error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); 609 error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
601 } 610 }
602 611
612out_unlock:
603 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 613 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
604out_error: 614out_error:
605 return error; 615 return error;
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 9ac8aea91529..067cafbfc635 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -23,7 +23,6 @@
23#include "xfs_ag.h" 23#include "xfs_ag.h"
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_quota.h" 25#include "xfs_quota.h"
26#include "xfs_log.h"
27#include "xfs_trans.h" 26#include "xfs_trans.h"
28#include "xfs_bmap_btree.h" 27#include "xfs_bmap_btree.h"
29#include "xfs_inode.h" 28#include "xfs_inode.h"
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 3884e20bc14e..ef7f0218bccb 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -164,10 +164,6 @@ xfs_inode_ag_iterator(
164 struct xfs_perag *pag; 164 struct xfs_perag *pag;
165 165
166 pag = xfs_perag_get(mp, ag); 166 pag = xfs_perag_get(mp, ag);
167 if (!pag->pag_ici_init) {
168 xfs_perag_put(pag);
169 continue;
170 }
171 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
172 exclusive, &nr); 168 exclusive, &nr);
173 xfs_perag_put(pag); 169 xfs_perag_put(pag);
@@ -867,12 +863,7 @@ xfs_reclaim_inode_shrink(
867 down_read(&xfs_mount_list_lock); 863 down_read(&xfs_mount_list_lock);
868 list_for_each_entry(mp, &xfs_mount_list, m_mplist) { 864 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
869 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 865 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
870
871 pag = xfs_perag_get(mp, ag); 866 pag = xfs_perag_get(mp, ag);
872 if (!pag->pag_ici_init) {
873 xfs_perag_put(pag);
874 continue;
875 }
876 reclaimable += pag->pag_ici_reclaimable; 867 reclaimable += pag->pag_ici_reclaimable;
877 xfs_perag_put(pag); 868 xfs_perag_put(pag);
878 } 869 }
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 207fa77f63ae..d12be8470cba 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -50,7 +50,6 @@
50#include "quota/xfs_dquot_item.h" 50#include "quota/xfs_dquot_item.h"
51#include "quota/xfs_dquot.h" 51#include "quota/xfs_dquot.h"
52#include "xfs_log_recover.h" 52#include "xfs_log_recover.h"
53#include "xfs_buf_item.h"
54#include "xfs_inode_item.h" 53#include "xfs_inode_item.h"
55 54
56/* 55/*
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index ff6bc797baf2..73d5aa117384 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -82,33 +82,6 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
82 ) 82 )
83) 83)
84 84
85#define DEFINE_PERAG_REF_EVENT(name) \
86TRACE_EVENT(name, \
87 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
88 unsigned long caller_ip), \
89 TP_ARGS(mp, agno, refcount, caller_ip), \
90 TP_STRUCT__entry( \
91 __field(dev_t, dev) \
92 __field(xfs_agnumber_t, agno) \
93 __field(int, refcount) \
94 __field(unsigned long, caller_ip) \
95 ), \
96 TP_fast_assign( \
97 __entry->dev = mp->m_super->s_dev; \
98 __entry->agno = agno; \
99 __entry->refcount = refcount; \
100 __entry->caller_ip = caller_ip; \
101 ), \
102 TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
103 MAJOR(__entry->dev), MINOR(__entry->dev), \
104 __entry->agno, \
105 __entry->refcount, \
106 (char *)__entry->caller_ip) \
107);
108
109DEFINE_PERAG_REF_EVENT(xfs_perag_get)
110DEFINE_PERAG_REF_EVENT(xfs_perag_put)
111
112#define DEFINE_ATTR_LIST_EVENT(name) \ 85#define DEFINE_ATTR_LIST_EVENT(name) \
113DEFINE_EVENT(xfs_attr_list_class, name, \ 86DEFINE_EVENT(xfs_attr_list_class, name, \
114 TP_PROTO(struct xfs_attr_list_context *ctx), \ 87 TP_PROTO(struct xfs_attr_list_context *ctx), \
@@ -122,6 +95,37 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
122DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); 95DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
123DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); 96DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
124 97
98DECLARE_EVENT_CLASS(xfs_perag_class,
99 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
100 unsigned long caller_ip),
101 TP_ARGS(mp, agno, refcount, caller_ip),
102 TP_STRUCT__entry(
103 __field(dev_t, dev)
104 __field(xfs_agnumber_t, agno)
105 __field(int, refcount)
106 __field(unsigned long, caller_ip)
107 ),
108 TP_fast_assign(
109 __entry->dev = mp->m_super->s_dev;
110 __entry->agno = agno;
111 __entry->refcount = refcount;
112 __entry->caller_ip = caller_ip;
113 ),
114 TP_printk("dev %d:%d agno %u refcount %d caller %pf",
115 MAJOR(__entry->dev), MINOR(__entry->dev),
116 __entry->agno,
117 __entry->refcount,
118 (char *)__entry->caller_ip)
119);
120
121#define DEFINE_PERAG_REF_EVENT(name) \
122DEFINE_EVENT(xfs_perag_class, name, \
123 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
124 unsigned long caller_ip), \
125 TP_ARGS(mp, agno, refcount, caller_ip))
126DEFINE_PERAG_REF_EVENT(xfs_perag_get);
127DEFINE_PERAG_REF_EVENT(xfs_perag_put);
128
125TRACE_EVENT(xfs_attr_list_node_descend, 129TRACE_EVENT(xfs_attr_list_node_descend,
126 TP_PROTO(struct xfs_attr_list_context *ctx, 130 TP_PROTO(struct xfs_attr_list_context *ctx,
127 struct xfs_da_node_entry *btree), 131 struct xfs_da_node_entry *btree),
@@ -775,165 +779,181 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
775DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); 779DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
776DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); 780DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
777 781
778#define DEFINE_RW_EVENT(name) \ 782DECLARE_EVENT_CLASS(xfs_file_class,
779TRACE_EVENT(name, \ 783 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
780 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ 784 TP_ARGS(ip, count, offset, flags),
781 TP_ARGS(ip, count, offset, flags), \ 785 TP_STRUCT__entry(
782 TP_STRUCT__entry( \ 786 __field(dev_t, dev)
783 __field(dev_t, dev) \ 787 __field(xfs_ino_t, ino)
784 __field(xfs_ino_t, ino) \ 788 __field(xfs_fsize_t, size)
785 __field(xfs_fsize_t, size) \ 789 __field(xfs_fsize_t, new_size)
786 __field(xfs_fsize_t, new_size) \ 790 __field(loff_t, offset)
787 __field(loff_t, offset) \ 791 __field(size_t, count)
788 __field(size_t, count) \ 792 __field(int, flags)
789 __field(int, flags) \ 793 ),
790 ), \ 794 TP_fast_assign(
791 TP_fast_assign( \ 795 __entry->dev = VFS_I(ip)->i_sb->s_dev;
792 __entry->dev = VFS_I(ip)->i_sb->s_dev; \ 796 __entry->ino = ip->i_ino;
793 __entry->ino = ip->i_ino; \ 797 __entry->size = ip->i_d.di_size;
794 __entry->size = ip->i_d.di_size; \ 798 __entry->new_size = ip->i_new_size;
795 __entry->new_size = ip->i_new_size; \ 799 __entry->offset = offset;
796 __entry->offset = offset; \ 800 __entry->count = count;
797 __entry->count = count; \ 801 __entry->flags = flags;
798 __entry->flags = flags; \ 802 ),
799 ), \ 803 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
800 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 804 "offset 0x%llx count 0x%zx ioflags %s",
801 "offset 0x%llx count 0x%zx ioflags %s", \ 805 MAJOR(__entry->dev), MINOR(__entry->dev),
802 MAJOR(__entry->dev), MINOR(__entry->dev), \ 806 __entry->ino,
803 __entry->ino, \ 807 __entry->size,
804 __entry->size, \ 808 __entry->new_size,
805 __entry->new_size, \ 809 __entry->offset,
806 __entry->offset, \ 810 __entry->count,
807 __entry->count, \ 811 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
808 __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \
809) 812)
813
814#define DEFINE_RW_EVENT(name) \
815DEFINE_EVENT(xfs_file_class, name, \
816 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
817 TP_ARGS(ip, count, offset, flags))
810DEFINE_RW_EVENT(xfs_file_read); 818DEFINE_RW_EVENT(xfs_file_read);
811DEFINE_RW_EVENT(xfs_file_buffered_write); 819DEFINE_RW_EVENT(xfs_file_buffered_write);
812DEFINE_RW_EVENT(xfs_file_direct_write); 820DEFINE_RW_EVENT(xfs_file_direct_write);
813DEFINE_RW_EVENT(xfs_file_splice_read); 821DEFINE_RW_EVENT(xfs_file_splice_read);
814DEFINE_RW_EVENT(xfs_file_splice_write); 822DEFINE_RW_EVENT(xfs_file_splice_write);
815 823
816 824DECLARE_EVENT_CLASS(xfs_page_class,
817#define DEFINE_PAGE_EVENT(name) \ 825 TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
818TRACE_EVENT(name, \ 826 TP_ARGS(inode, page, off),
819 TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ 827 TP_STRUCT__entry(
820 TP_ARGS(inode, page, off), \ 828 __field(dev_t, dev)
821 TP_STRUCT__entry( \ 829 __field(xfs_ino_t, ino)
822 __field(dev_t, dev) \ 830 __field(pgoff_t, pgoff)
823 __field(xfs_ino_t, ino) \ 831 __field(loff_t, size)
824 __field(pgoff_t, pgoff) \ 832 __field(unsigned long, offset)
825 __field(loff_t, size) \ 833 __field(int, delalloc)
826 __field(unsigned long, offset) \ 834 __field(int, unmapped)
827 __field(int, delalloc) \ 835 __field(int, unwritten)
828 __field(int, unmapped) \ 836 ),
829 __field(int, unwritten) \ 837 TP_fast_assign(
830 ), \ 838 int delalloc = -1, unmapped = -1, unwritten = -1;
831 TP_fast_assign( \ 839
832 int delalloc = -1, unmapped = -1, unwritten = -1; \ 840 if (page_has_buffers(page))
833 \ 841 xfs_count_page_state(page, &delalloc,
834 if (page_has_buffers(page)) \ 842 &unmapped, &unwritten);
835 xfs_count_page_state(page, &delalloc, \ 843 __entry->dev = inode->i_sb->s_dev;
836 &unmapped, &unwritten); \ 844 __entry->ino = XFS_I(inode)->i_ino;
837 __entry->dev = inode->i_sb->s_dev; \ 845 __entry->pgoff = page_offset(page);
838 __entry->ino = XFS_I(inode)->i_ino; \ 846 __entry->size = i_size_read(inode);
839 __entry->pgoff = page_offset(page); \ 847 __entry->offset = off;
840 __entry->size = i_size_read(inode); \ 848 __entry->delalloc = delalloc;
841 __entry->offset = off; \ 849 __entry->unmapped = unmapped;
842 __entry->delalloc = delalloc; \ 850 __entry->unwritten = unwritten;
843 __entry->unmapped = unmapped; \ 851 ),
844 __entry->unwritten = unwritten; \ 852 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
845 ), \ 853 "delalloc %d unmapped %d unwritten %d",
846 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \ 854 MAJOR(__entry->dev), MINOR(__entry->dev),
847 "delalloc %d unmapped %d unwritten %d", \ 855 __entry->ino,
848 MAJOR(__entry->dev), MINOR(__entry->dev), \ 856 __entry->pgoff,
849 __entry->ino, \ 857 __entry->size,
850 __entry->pgoff, \ 858 __entry->offset,
851 __entry->size, \ 859 __entry->delalloc,
852 __entry->offset, \ 860 __entry->unmapped,
853 __entry->delalloc, \ 861 __entry->unwritten)
854 __entry->unmapped, \
855 __entry->unwritten) \
856) 862)
863
864#define DEFINE_PAGE_EVENT(name) \
865DEFINE_EVENT(xfs_page_class, name, \
866 TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
867 TP_ARGS(inode, page, off))
857DEFINE_PAGE_EVENT(xfs_writepage); 868DEFINE_PAGE_EVENT(xfs_writepage);
858DEFINE_PAGE_EVENT(xfs_releasepage); 869DEFINE_PAGE_EVENT(xfs_releasepage);
859DEFINE_PAGE_EVENT(xfs_invalidatepage); 870DEFINE_PAGE_EVENT(xfs_invalidatepage);
860 871
861#define DEFINE_IOMAP_EVENT(name) \ 872DECLARE_EVENT_CLASS(xfs_iomap_class,
862TRACE_EVENT(name, \ 873 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
863 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ 874 int flags, struct xfs_bmbt_irec *irec),
864 int flags, struct xfs_bmbt_irec *irec), \ 875 TP_ARGS(ip, offset, count, flags, irec),
865 TP_ARGS(ip, offset, count, flags, irec), \ 876 TP_STRUCT__entry(
866 TP_STRUCT__entry( \ 877 __field(dev_t, dev)
867 __field(dev_t, dev) \ 878 __field(xfs_ino_t, ino)
868 __field(xfs_ino_t, ino) \ 879 __field(loff_t, size)
869 __field(loff_t, size) \ 880 __field(loff_t, new_size)
870 __field(loff_t, new_size) \ 881 __field(loff_t, offset)
871 __field(loff_t, offset) \ 882 __field(size_t, count)
872 __field(size_t, count) \ 883 __field(int, flags)
873 __field(int, flags) \ 884 __field(xfs_fileoff_t, startoff)
874 __field(xfs_fileoff_t, startoff) \ 885 __field(xfs_fsblock_t, startblock)
875 __field(xfs_fsblock_t, startblock) \ 886 __field(xfs_filblks_t, blockcount)
876 __field(xfs_filblks_t, blockcount) \ 887 ),
877 ), \ 888 TP_fast_assign(
878 TP_fast_assign( \ 889 __entry->dev = VFS_I(ip)->i_sb->s_dev;
879 __entry->dev = VFS_I(ip)->i_sb->s_dev; \ 890 __entry->ino = ip->i_ino;
880 __entry->ino = ip->i_ino; \ 891 __entry->size = ip->i_d.di_size;
881 __entry->size = ip->i_d.di_size; \ 892 __entry->new_size = ip->i_new_size;
882 __entry->new_size = ip->i_new_size; \ 893 __entry->offset = offset;
883 __entry->offset = offset; \ 894 __entry->count = count;
884 __entry->count = count; \ 895 __entry->flags = flags;
885 __entry->flags = flags; \ 896 __entry->startoff = irec ? irec->br_startoff : 0;
886 __entry->startoff = irec ? irec->br_startoff : 0; \ 897 __entry->startblock = irec ? irec->br_startblock : 0;
887 __entry->startblock = irec ? irec->br_startblock : 0; \ 898 __entry->blockcount = irec ? irec->br_blockcount : 0;
888 __entry->blockcount = irec ? irec->br_blockcount : 0; \ 899 ),
889 ), \ 900 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
890 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 901 "offset 0x%llx count %zd flags %s "
891 "offset 0x%llx count %zd flags %s " \ 902 "startoff 0x%llx startblock %lld blockcount 0x%llx",
892 "startoff 0x%llx startblock %lld blockcount 0x%llx", \ 903 MAJOR(__entry->dev), MINOR(__entry->dev),
893 MAJOR(__entry->dev), MINOR(__entry->dev), \ 904 __entry->ino,
894 __entry->ino, \ 905 __entry->size,
895 __entry->size, \ 906 __entry->new_size,
896 __entry->new_size, \ 907 __entry->offset,
897 __entry->offset, \ 908 __entry->count,
898 __entry->count, \ 909 __print_flags(__entry->flags, "|", BMAPI_FLAGS),
899 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ 910 __entry->startoff,
900 __entry->startoff, \ 911 (__int64_t)__entry->startblock,
901 (__int64_t)__entry->startblock, \ 912 __entry->blockcount)
902 __entry->blockcount) \
903) 913)
914
915#define DEFINE_IOMAP_EVENT(name) \
916DEFINE_EVENT(xfs_iomap_class, name, \
917 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
918 int flags, struct xfs_bmbt_irec *irec), \
919 TP_ARGS(ip, offset, count, flags, irec))
904DEFINE_IOMAP_EVENT(xfs_iomap_enter); 920DEFINE_IOMAP_EVENT(xfs_iomap_enter);
905DEFINE_IOMAP_EVENT(xfs_iomap_found); 921DEFINE_IOMAP_EVENT(xfs_iomap_found);
906DEFINE_IOMAP_EVENT(xfs_iomap_alloc); 922DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
907 923
908#define DEFINE_SIMPLE_IO_EVENT(name) \ 924DECLARE_EVENT_CLASS(xfs_simple_io_class,
909TRACE_EVENT(name, \ 925 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
910 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ 926 TP_ARGS(ip, offset, count),
911 TP_ARGS(ip, offset, count), \ 927 TP_STRUCT__entry(
912 TP_STRUCT__entry( \ 928 __field(dev_t, dev)
913 __field(dev_t, dev) \ 929 __field(xfs_ino_t, ino)
914 __field(xfs_ino_t, ino) \ 930 __field(loff_t, size)
915 __field(loff_t, size) \ 931 __field(loff_t, new_size)
916 __field(loff_t, new_size) \ 932 __field(loff_t, offset)
917 __field(loff_t, offset) \ 933 __field(size_t, count)
918 __field(size_t, count) \ 934 ),
919 ), \ 935 TP_fast_assign(
920 TP_fast_assign( \ 936 __entry->dev = VFS_I(ip)->i_sb->s_dev;
921 __entry->dev = VFS_I(ip)->i_sb->s_dev; \ 937 __entry->ino = ip->i_ino;
922 __entry->ino = ip->i_ino; \ 938 __entry->size = ip->i_d.di_size;
923 __entry->size = ip->i_d.di_size; \ 939 __entry->new_size = ip->i_new_size;
924 __entry->new_size = ip->i_new_size; \ 940 __entry->offset = offset;
925 __entry->offset = offset; \ 941 __entry->count = count;
926 __entry->count = count; \ 942 ),
927 ), \ 943 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
928 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 944 "offset 0x%llx count %zd",
929 "offset 0x%llx count %zd", \ 945 MAJOR(__entry->dev), MINOR(__entry->dev),
930 MAJOR(__entry->dev), MINOR(__entry->dev), \ 946 __entry->ino,
931 __entry->ino, \ 947 __entry->size,
932 __entry->size, \ 948 __entry->new_size,
933 __entry->new_size, \ 949 __entry->offset,
934 __entry->offset, \ 950 __entry->count)
935 __entry->count) \
936); 951);
952
953#define DEFINE_SIMPLE_IO_EVENT(name) \
954DEFINE_EVENT(xfs_simple_io_class, name, \
955 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
956 TP_ARGS(ip, offset, count))
937DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); 957DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
938DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); 958DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
939 959
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 38e764146644..2d8b7bc792c9 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -249,8 +249,10 @@ xfs_qm_hold_quotafs_ref(
249 249
250 if (!xfs_Gqm) { 250 if (!xfs_Gqm) {
251 xfs_Gqm = xfs_Gqm_init(); 251 xfs_Gqm = xfs_Gqm_init();
252 if (!xfs_Gqm) 252 if (!xfs_Gqm) {
253 mutex_unlock(&xfs_Gqm_lock);
253 return ENOMEM; 254 return ENOMEM;
255 }
254 } 256 }
255 257
256 /* 258 /*
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 401f364ad36c..4917d4eed4ed 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -227,7 +227,6 @@ typedef struct xfs_perag {
227 227
228 atomic_t pagf_fstrms; /* # of filestreams active in this AG */ 228 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
229 229
230 int pag_ici_init; /* incore inode cache initialised */
231 rwlock_t pag_ici_lock; /* incore inode lock */ 230 rwlock_t pag_ici_lock; /* incore inode lock */
232 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 231 struct radix_tree_root pag_ici_root; /* incore inode cache root */
233 int pag_ici_reclaimable; /* reclaimable inodes */ 232 int pag_ici_reclaimable; /* reclaimable inodes */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 6845db90818f..75df75f43d48 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -382,9 +382,6 @@ xfs_iget(
382 382
383 /* get the perag structure and ensure that it's inode capable */ 383 /* get the perag structure and ensure that it's inode capable */
384 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 384 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
385 if (!pag->pagi_inodeok)
386 return EINVAL;
387 ASSERT(pag->pag_ici_init);
388 agino = XFS_INO_TO_AGINO(mp, ino); 385 agino = XFS_INO_TO_AGINO(mp, ino);
389 386
390again: 387again:
@@ -744,30 +741,24 @@ xfs_ilock_demote(
744} 741}
745 742
746#ifdef DEBUG 743#ifdef DEBUG
747/*
748 * Debug-only routine, without additional rw_semaphore APIs, we can
749 * now only answer requests regarding whether we hold the lock for write
750 * (reader state is outside our visibility, we only track writer state).
751 *
752 * Note: this means !xfs_isilocked would give false positives, so don't do that.
753 */
754int 744int
755xfs_isilocked( 745xfs_isilocked(
756 xfs_inode_t *ip, 746 xfs_inode_t *ip,
757 uint lock_flags) 747 uint lock_flags)
758{ 748{
759 if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) == 749 if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
760 XFS_ILOCK_EXCL) { 750 if (!(lock_flags & XFS_ILOCK_SHARED))
761 if (!ip->i_lock.mr_writer) 751 return !!ip->i_lock.mr_writer;
762 return 0; 752 return rwsem_is_locked(&ip->i_lock.mr_lock);
763 } 753 }
764 754
765 if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) == 755 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
766 XFS_IOLOCK_EXCL) { 756 if (!(lock_flags & XFS_IOLOCK_SHARED))
767 if (!ip->i_iolock.mr_writer) 757 return !!ip->i_iolock.mr_writer;
768 return 0; 758 return rwsem_is_locked(&ip->i_iolock.mr_lock);
769 } 759 }
770 760
771 return 1; 761 ASSERT(0);
762 return 0;
772} 763}
773#endif 764#endif
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8cd6e8d8fe9c..d53c39de7d05 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1940,10 +1940,10 @@ xfs_ifree_cluster(
1940 int blks_per_cluster; 1940 int blks_per_cluster;
1941 int nbufs; 1941 int nbufs;
1942 int ninodes; 1942 int ninodes;
1943 int i, j, found, pre_flushed; 1943 int i, j;
1944 xfs_daddr_t blkno; 1944 xfs_daddr_t blkno;
1945 xfs_buf_t *bp; 1945 xfs_buf_t *bp;
1946 xfs_inode_t *ip, **ip_found; 1946 xfs_inode_t *ip;
1947 xfs_inode_log_item_t *iip; 1947 xfs_inode_log_item_t *iip;
1948 xfs_log_item_t *lip; 1948 xfs_log_item_t *lip;
1949 struct xfs_perag *pag; 1949 struct xfs_perag *pag;
@@ -1960,114 +1960,97 @@ xfs_ifree_cluster(
1960 nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 1960 nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
1961 } 1961 }
1962 1962
1963 ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
1964
1965 for (j = 0; j < nbufs; j++, inum += ninodes) { 1963 for (j = 0; j < nbufs; j++, inum += ninodes) {
1964 int found = 0;
1965
1966 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 1966 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
1967 XFS_INO_TO_AGBNO(mp, inum)); 1967 XFS_INO_TO_AGBNO(mp, inum));
1968 1968
1969 /*
1970 * We obtain and lock the backing buffer first in the process
1971 * here, as we have to ensure that any dirty inode that we
1972 * can't get the flush lock on is attached to the buffer.
1973 * If we scan the in-memory inodes first, then buffer IO can
1974 * complete before we get a lock on it, and hence we may fail
1975 * to mark all the active inodes on the buffer stale.
1976 */
1977 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
1978 mp->m_bsize * blks_per_cluster,
1979 XBF_LOCK);
1980
1981 /*
1982 * Walk the inodes already attached to the buffer and mark them
1983 * stale. These will all have the flush locks held, so an
1984 * in-memory inode walk can't lock them.
1985 */
1986 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
1987 while (lip) {
1988 if (lip->li_type == XFS_LI_INODE) {
1989 iip = (xfs_inode_log_item_t *)lip;
1990 ASSERT(iip->ili_logged == 1);
1991 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
1992 xfs_trans_ail_copy_lsn(mp->m_ail,
1993 &iip->ili_flush_lsn,
1994 &iip->ili_item.li_lsn);
1995 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
1996 found++;
1997 }
1998 lip = lip->li_bio_list;
1999 }
1969 2000
1970 /* 2001 /*
1971 * Look for each inode in memory and attempt to lock it, 2002 * For each inode in memory attempt to add it to the inode
1972 * we can be racing with flush and tail pushing here. 2003 * buffer and set it up for being staled on buffer IO
1973 * any inode we get the locks on, add to an array of 2004 * completion. This is safe as we've locked out tail pushing
1974 * inode items to process later. 2005 * and flushing by locking the buffer.
1975 * 2006 *
1976 * The get the buffer lock, we could beat a flush 2007 * We have already marked every inode that was part of a
1977 * or tail pushing thread to the lock here, in which 2008 * transaction stale above, which means there is no point in
1978 * case they will go looking for the inode buffer 2009 * even trying to lock them.
1979 * and fail, we need some other form of interlock
1980 * here.
1981 */ 2010 */
1982 found = 0;
1983 for (i = 0; i < ninodes; i++) { 2011 for (i = 0; i < ninodes; i++) {
1984 read_lock(&pag->pag_ici_lock); 2012 read_lock(&pag->pag_ici_lock);
1985 ip = radix_tree_lookup(&pag->pag_ici_root, 2013 ip = radix_tree_lookup(&pag->pag_ici_root,
1986 XFS_INO_TO_AGINO(mp, (inum + i))); 2014 XFS_INO_TO_AGINO(mp, (inum + i)));
1987 2015
1988 /* Inode not in memory or we found it already, 2016 /* Inode not in memory or stale, nothing to do */
1989 * nothing to do
1990 */
1991 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { 2017 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
1992 read_unlock(&pag->pag_ici_lock); 2018 read_unlock(&pag->pag_ici_lock);
1993 continue; 2019 continue;
1994 } 2020 }
1995 2021
1996 if (xfs_inode_clean(ip)) { 2022 /* don't try to lock/unlock the current inode */
1997 read_unlock(&pag->pag_ici_lock); 2023 if (ip != free_ip &&
1998 continue; 2024 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
1999 }
2000
2001 /* If we can get the locks then add it to the
2002 * list, otherwise by the time we get the bp lock
2003 * below it will already be attached to the
2004 * inode buffer.
2005 */
2006
2007 /* This inode will already be locked - by us, lets
2008 * keep it that way.
2009 */
2010
2011 if (ip == free_ip) {
2012 if (xfs_iflock_nowait(ip)) {
2013 xfs_iflags_set(ip, XFS_ISTALE);
2014 if (xfs_inode_clean(ip)) {
2015 xfs_ifunlock(ip);
2016 } else {
2017 ip_found[found++] = ip;
2018 }
2019 }
2020 read_unlock(&pag->pag_ici_lock); 2025 read_unlock(&pag->pag_ici_lock);
2021 continue; 2026 continue;
2022 } 2027 }
2028 read_unlock(&pag->pag_ici_lock);
2023 2029
2024 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2030 if (!xfs_iflock_nowait(ip)) {
2025 if (xfs_iflock_nowait(ip)) { 2031 if (ip != free_ip)
2026 xfs_iflags_set(ip, XFS_ISTALE);
2027
2028 if (xfs_inode_clean(ip)) {
2029 xfs_ifunlock(ip);
2030 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2031 } else {
2032 ip_found[found++] = ip;
2033 }
2034 } else {
2035 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2032 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2036 } 2033 continue;
2037 } 2034 }
2038 read_unlock(&pag->pag_ici_lock);
2039 }
2040 2035
2041 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2036 xfs_iflags_set(ip, XFS_ISTALE);
2042 mp->m_bsize * blks_per_cluster, 2037 if (xfs_inode_clean(ip)) {
2043 XBF_LOCK); 2038 ASSERT(ip != free_ip);
2044 2039 xfs_ifunlock(ip);
2045 pre_flushed = 0; 2040 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2046 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 2041 continue;
2047 while (lip) {
2048 if (lip->li_type == XFS_LI_INODE) {
2049 iip = (xfs_inode_log_item_t *)lip;
2050 ASSERT(iip->ili_logged == 1);
2051 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
2052 xfs_trans_ail_copy_lsn(mp->m_ail,
2053 &iip->ili_flush_lsn,
2054 &iip->ili_item.li_lsn);
2055 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
2056 pre_flushed++;
2057 } 2042 }
2058 lip = lip->li_bio_list;
2059 }
2060 2043
2061 for (i = 0; i < found; i++) {
2062 ip = ip_found[i];
2063 iip = ip->i_itemp; 2044 iip = ip->i_itemp;
2064
2065 if (!iip) { 2045 if (!iip) {
2046 /* inode with unlogged changes only */
2047 ASSERT(ip != free_ip);
2066 ip->i_update_core = 0; 2048 ip->i_update_core = 0;
2067 xfs_ifunlock(ip); 2049 xfs_ifunlock(ip);
2068 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2050 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2069 continue; 2051 continue;
2070 } 2052 }
2053 found++;
2071 2054
2072 iip->ili_last_fields = iip->ili_format.ilf_fields; 2055 iip->ili_last_fields = iip->ili_format.ilf_fields;
2073 iip->ili_format.ilf_fields = 0; 2056 iip->ili_format.ilf_fields = 0;
@@ -2078,17 +2061,16 @@ xfs_ifree_cluster(
2078 xfs_buf_attach_iodone(bp, 2061 xfs_buf_attach_iodone(bp,
2079 (void(*)(xfs_buf_t*,xfs_log_item_t*)) 2062 (void(*)(xfs_buf_t*,xfs_log_item_t*))
2080 xfs_istale_done, (xfs_log_item_t *)iip); 2063 xfs_istale_done, (xfs_log_item_t *)iip);
2081 if (ip != free_ip) { 2064
2065 if (ip != free_ip)
2082 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2066 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2083 }
2084 } 2067 }
2085 2068
2086 if (found || pre_flushed) 2069 if (found)
2087 xfs_trans_stale_inode_buf(tp, bp); 2070 xfs_trans_stale_inode_buf(tp, bp);
2088 xfs_trans_binval(tp, bp); 2071 xfs_trans_binval(tp, bp);
2089 } 2072 }
2090 2073
2091 kmem_free(ip_found);
2092 xfs_perag_put(pag); 2074 xfs_perag_put(pag);
2093} 2075}
2094 2076
@@ -2649,8 +2631,6 @@ xfs_iflush_cluster(
2649 int i; 2631 int i;
2650 2632
2651 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 2633 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
2652 ASSERT(pag->pagi_inodeok);
2653 ASSERT(pag->pag_ici_init);
2654 2634
2655 inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; 2635 inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
2656 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 2636 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 14a69aec2c0b..ed0684cc50ee 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -132,15 +132,10 @@ xlog_align(
132 int nbblks, 132 int nbblks,
133 xfs_buf_t *bp) 133 xfs_buf_t *bp)
134{ 134{
135 xfs_daddr_t offset; 135 xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
136 xfs_caddr_t ptr;
137 136
138 offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1); 137 ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
139 ptr = XFS_BUF_PTR(bp) + BBTOB(offset); 138 return XFS_BUF_PTR(bp) + BBTOB(offset);
140
141 ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
142
143 return ptr;
144} 139}
145 140
146 141
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d7bf38c8cd1c..d59f4e8bedcf 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -268,10 +268,10 @@ xfs_sb_validate_fsb_count(
268 268
269#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ 269#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
270 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) 270 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
271 return E2BIG; 271 return EFBIG;
272#else /* Limited by UINT_MAX of sectors */ 272#else /* Limited by UINT_MAX of sectors */
273 if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX) 273 if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
274 return E2BIG; 274 return EFBIG;
275#endif 275#endif
276 return 0; 276 return 0;
277} 277}
@@ -393,7 +393,7 @@ xfs_mount_validate_sb(
393 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 393 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
394 xfs_fs_mount_cmn_err(flags, 394 xfs_fs_mount_cmn_err(flags,
395 "file system too large to be mounted on this system."); 395 "file system too large to be mounted on this system.");
396 return XFS_ERROR(E2BIG); 396 return XFS_ERROR(EFBIG);
397 } 397 }
398 398
399 if (unlikely(sbp->sb_inprogress)) { 399 if (unlikely(sbp->sb_inprogress)) {
@@ -413,17 +413,6 @@ xfs_mount_validate_sb(
413 return 0; 413 return 0;
414} 414}
415 415
416STATIC void
417xfs_initialize_perag_icache(
418 xfs_perag_t *pag)
419{
420 if (!pag->pag_ici_init) {
421 rwlock_init(&pag->pag_ici_lock);
422 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
423 pag->pag_ici_init = 1;
424 }
425}
426
427int 416int
428xfs_initialize_perag( 417xfs_initialize_perag(
429 xfs_mount_t *mp, 418 xfs_mount_t *mp,
@@ -436,13 +425,8 @@ xfs_initialize_perag(
436 xfs_agino_t agino; 425 xfs_agino_t agino;
437 xfs_ino_t ino; 426 xfs_ino_t ino;
438 xfs_sb_t *sbp = &mp->m_sb; 427 xfs_sb_t *sbp = &mp->m_sb;
439 xfs_ino_t max_inum = XFS_MAXINUMBER_32;
440 int error = -ENOMEM; 428 int error = -ENOMEM;
441 429
442 /* Check to see if the filesystem can overflow 32 bit inodes */
443 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
444 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
445
446 /* 430 /*
447 * Walk the current per-ag tree so we don't try to initialise AGs 431 * Walk the current per-ag tree so we don't try to initialise AGs
448 * that already exist (growfs case). Allocate and insert all the 432 * that already exist (growfs case). Allocate and insert all the
@@ -456,11 +440,18 @@ xfs_initialize_perag(
456 } 440 }
457 if (!first_initialised) 441 if (!first_initialised)
458 first_initialised = index; 442 first_initialised = index;
443
459 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); 444 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
460 if (!pag) 445 if (!pag)
461 goto out_unwind; 446 goto out_unwind;
447 pag->pag_agno = index;
448 pag->pag_mount = mp;
449 rwlock_init(&pag->pag_ici_lock);
450 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
451
462 if (radix_tree_preload(GFP_NOFS)) 452 if (radix_tree_preload(GFP_NOFS))
463 goto out_unwind; 453 goto out_unwind;
454
464 spin_lock(&mp->m_perag_lock); 455 spin_lock(&mp->m_perag_lock);
465 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { 456 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
466 BUG(); 457 BUG();
@@ -469,25 +460,26 @@ xfs_initialize_perag(
469 error = -EEXIST; 460 error = -EEXIST;
470 goto out_unwind; 461 goto out_unwind;
471 } 462 }
472 pag->pag_agno = index;
473 pag->pag_mount = mp;
474 spin_unlock(&mp->m_perag_lock); 463 spin_unlock(&mp->m_perag_lock);
475 radix_tree_preload_end(); 464 radix_tree_preload_end();
476 } 465 }
477 466
478 /* Clear the mount flag if no inode can overflow 32 bits 467 /*
479 * on this filesystem, or if specifically requested.. 468 * If we mount with the inode64 option, or no inode overflows
469 * the legacy 32-bit address space clear the inode32 option.
480 */ 470 */
481 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) { 471 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
472 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
473
474 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
482 mp->m_flags |= XFS_MOUNT_32BITINODES; 475 mp->m_flags |= XFS_MOUNT_32BITINODES;
483 } else { 476 else
484 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 477 mp->m_flags &= ~XFS_MOUNT_32BITINODES;
485 }
486 478
487 /* If we can overflow then setup the ag headers accordingly */
488 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 479 if (mp->m_flags & XFS_MOUNT_32BITINODES) {
489 /* Calculate how much should be reserved for inodes to 480 /*
490 * meet the max inode percentage. 481 * Calculate how much should be reserved for inodes to meet
482 * the max inode percentage.
491 */ 483 */
492 if (mp->m_maxicount) { 484 if (mp->m_maxicount) {
493 __uint64_t icount; 485 __uint64_t icount;
@@ -500,30 +492,28 @@ xfs_initialize_perag(
500 } else { 492 } else {
501 max_metadata = agcount; 493 max_metadata = agcount;
502 } 494 }
495
503 for (index = 0; index < agcount; index++) { 496 for (index = 0; index < agcount; index++) {
504 ino = XFS_AGINO_TO_INO(mp, index, agino); 497 ino = XFS_AGINO_TO_INO(mp, index, agino);
505 if (ino > max_inum) { 498 if (ino > XFS_MAXINUMBER_32) {
506 index++; 499 index++;
507 break; 500 break;
508 } 501 }
509 502
510 /* This ag is preferred for inodes */
511 pag = xfs_perag_get(mp, index); 503 pag = xfs_perag_get(mp, index);
512 pag->pagi_inodeok = 1; 504 pag->pagi_inodeok = 1;
513 if (index < max_metadata) 505 if (index < max_metadata)
514 pag->pagf_metadata = 1; 506 pag->pagf_metadata = 1;
515 xfs_initialize_perag_icache(pag);
516 xfs_perag_put(pag); 507 xfs_perag_put(pag);
517 } 508 }
518 } else { 509 } else {
519 /* Setup default behavior for smaller filesystems */
520 for (index = 0; index < agcount; index++) { 510 for (index = 0; index < agcount; index++) {
521 pag = xfs_perag_get(mp, index); 511 pag = xfs_perag_get(mp, index);
522 pag->pagi_inodeok = 1; 512 pag->pagi_inodeok = 1;
523 xfs_initialize_perag_icache(pag);
524 xfs_perag_put(pag); 513 xfs_perag_put(pag);
525 } 514 }
526 } 515 }
516
527 if (maxagi) 517 if (maxagi)
528 *maxagi = index; 518 *maxagi = index;
529 return 0; 519 return 0;
@@ -1009,7 +999,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1009 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 999 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
1010 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 1000 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
1011 cmn_err(CE_WARN, "XFS: size check 1 failed"); 1001 cmn_err(CE_WARN, "XFS: size check 1 failed");
1012 return XFS_ERROR(E2BIG); 1002 return XFS_ERROR(EFBIG);
1013 } 1003 }
1014 error = xfs_read_buf(mp, mp->m_ddev_targp, 1004 error = xfs_read_buf(mp, mp->m_ddev_targp,
1015 d - XFS_FSS_TO_BB(mp, 1), 1005 d - XFS_FSS_TO_BB(mp, 1),
@@ -1019,7 +1009,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1019 } else { 1009 } else {
1020 cmn_err(CE_WARN, "XFS: size check 2 failed"); 1010 cmn_err(CE_WARN, "XFS: size check 2 failed");
1021 if (error == ENOSPC) 1011 if (error == ENOSPC)
1022 error = XFS_ERROR(E2BIG); 1012 error = XFS_ERROR(EFBIG);
1023 return error; 1013 return error;
1024 } 1014 }
1025 1015
@@ -1027,7 +1017,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1027 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 1017 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
1028 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 1018 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
1029 cmn_err(CE_WARN, "XFS: size check 3 failed"); 1019 cmn_err(CE_WARN, "XFS: size check 3 failed");
1030 return XFS_ERROR(E2BIG); 1020 return XFS_ERROR(EFBIG);
1031 } 1021 }
1032 error = xfs_read_buf(mp, mp->m_logdev_targp, 1022 error = xfs_read_buf(mp, mp->m_logdev_targp,
1033 d - XFS_FSB_TO_BB(mp, 1), 1023 d - XFS_FSB_TO_BB(mp, 1),
@@ -1037,7 +1027,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1037 } else { 1027 } else {
1038 cmn_err(CE_WARN, "XFS: size check 3 failed"); 1028 cmn_err(CE_WARN, "XFS: size check 3 failed");
1039 if (error == ENOSPC) 1029 if (error == ENOSPC)
1040 error = XFS_ERROR(E2BIG); 1030 error = XFS_ERROR(EFBIG);
1041 return error; 1031 return error;
1042 } 1032 }
1043 } 1033 }
@@ -1254,7 +1244,7 @@ xfs_mountfs(
1254 * Allocate and initialize the per-ag data. 1244 * Allocate and initialize the per-ag data.
1255 */ 1245 */
1256 spin_lock_init(&mp->m_perag_lock); 1246 spin_lock_init(&mp->m_perag_lock);
1257 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS); 1247 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1258 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); 1248 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1259 if (error) { 1249 if (error) {
1260 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); 1250 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6be05f756d59..16445518506d 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2247,7 +2247,7 @@ xfs_rtmount_init(
2247 cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu", 2247 cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu",
2248 (unsigned long long) XFS_BB_TO_FSB(mp, d), 2248 (unsigned long long) XFS_BB_TO_FSB(mp, d),
2249 (unsigned long long) mp->m_sb.sb_rblocks); 2249 (unsigned long long) mp->m_sb.sb_rblocks);
2250 return XFS_ERROR(E2BIG); 2250 return XFS_ERROR(EFBIG);
2251 } 2251 }
2252 error = xfs_read_buf(mp, mp->m_rtdev_targp, 2252 error = xfs_read_buf(mp, mp->m_rtdev_targp,
2253 d - XFS_FSB_TO_BB(mp, 1), 2253 d - XFS_FSB_TO_BB(mp, 1),
@@ -2256,7 +2256,7 @@ xfs_rtmount_init(
2256 cmn_err(CE_WARN, 2256 cmn_err(CE_WARN,
2257 "XFS: realtime mount -- xfs_read_buf failed, returned %d", error); 2257 "XFS: realtime mount -- xfs_read_buf failed, returned %d", error);
2258 if (error == ENOSPC) 2258 if (error == ENOSPC)
2259 return XFS_ERROR(E2BIG); 2259 return XFS_ERROR(EFBIG);
2260 return error; 2260 return error;
2261 } 2261 }
2262 xfs_buf_relse(bp); 2262 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index b2d67adb6a08..ff614c29b441 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -147,7 +147,16 @@ xfs_growfs_rt(
147# define xfs_rtfree_extent(t,b,l) (ENOSYS) 147# define xfs_rtfree_extent(t,b,l) (ENOSYS)
148# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) 148# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
149# define xfs_growfs_rt(mp,in) (ENOSYS) 149# define xfs_growfs_rt(mp,in) (ENOSYS)
150# define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 150static inline int /* error */
151xfs_rtmount_init(
152 xfs_mount_t *mp) /* file system mount structure */
153{
154 if (mp->m_sb.sb_rblocks == 0)
155 return 0;
156
157 cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT");
158 return ENOSYS;
159}
151# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 160# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
152# define xfs_rtunmount_inodes(m) 161# define xfs_rtunmount_inodes(m)
153#endif /* CONFIG_XFS_RT */ 162#endif /* CONFIG_XFS_RT */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ce558efa2ea0..28547dfce037 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -48,134 +48,489 @@
48 48
49kmem_zone_t *xfs_trans_zone; 49kmem_zone_t *xfs_trans_zone;
50 50
51
51/* 52/*
52 * Reservation functions here avoid a huge stack in xfs_trans_init 53 * Various log reservation values.
53 * due to register overflow from temporaries in the calculations. 54 *
55 * These are based on the size of the file system block because that is what
56 * most transactions manipulate. Each adds in an additional 128 bytes per
57 * item logged to try to account for the overhead of the transaction mechanism.
58 *
59 * Note: Most of the reservations underestimate the number of allocation
60 * groups into which they could free extents in the xfs_bmap_finish() call.
61 * This is because the number in the worst case is quite high and quite
62 * unusual. In order to fix this we need to change xfs_bmap_finish() to free
63 * extents in only a single AG at a time. This will require changes to the
64 * EFI code as well, however, so that the EFI for the extents not freed is
65 * logged again in each transaction. See SGI PV #261917.
66 *
67 * Reservation functions here avoid a huge stack in xfs_trans_init due to
68 * register overflow from temporaries in the calculations.
69 */
70
71
72/*
73 * In a write transaction we can allocate a maximum of 2
74 * extents. This gives:
75 * the inode getting the new extents: inode size
76 * the inode's bmap btree: max depth * block size
77 * the agfs of the ags from which the extents are allocated: 2 * sector
78 * the superblock free block counter: sector size
79 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
80 * And the bmap_finish transaction can free bmap blocks in a join:
81 * the agfs of the ags containing the blocks: 2 * sector size
82 * the agfls of the ags containing the blocks: 2 * sector size
83 * the super block free block counter: sector size
84 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
54 */ 85 */
55STATIC uint 86STATIC uint
56xfs_calc_write_reservation(xfs_mount_t *mp) 87xfs_calc_write_reservation(
88 struct xfs_mount *mp)
57{ 89{
58 return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 90 return XFS_DQUOT_LOGRES(mp) +
91 MAX((mp->m_sb.sb_inodesize +
92 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
93 2 * mp->m_sb.sb_sectsize +
94 mp->m_sb.sb_sectsize +
95 XFS_ALLOCFREE_LOG_RES(mp, 2) +
96 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
97 XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
98 (2 * mp->m_sb.sb_sectsize +
99 2 * mp->m_sb.sb_sectsize +
100 mp->m_sb.sb_sectsize +
101 XFS_ALLOCFREE_LOG_RES(mp, 2) +
102 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
59} 103}
60 104
105/*
106 * In truncating a file we free up to two extents at once. We can modify:
107 * the inode being truncated: inode size
108 * the inode's bmap btree: (max depth + 1) * block size
109 * And the bmap_finish transaction can free the blocks and bmap blocks:
110 * the agf for each of the ags: 4 * sector size
111 * the agfl for each of the ags: 4 * sector size
112 * the super block to reflect the freed blocks: sector size
113 * worst case split in allocation btrees per extent assuming 4 extents:
114 * 4 exts * 2 trees * (2 * max depth - 1) * block size
115 * the inode btree: max depth * blocksize
116 * the allocation btrees: 2 trees * (max depth - 1) * block size
117 */
61STATIC uint 118STATIC uint
62xfs_calc_itruncate_reservation(xfs_mount_t *mp) 119xfs_calc_itruncate_reservation(
120 struct xfs_mount *mp)
63{ 121{
64 return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 122 return XFS_DQUOT_LOGRES(mp) +
123 MAX((mp->m_sb.sb_inodesize +
124 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
125 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
126 (4 * mp->m_sb.sb_sectsize +
127 4 * mp->m_sb.sb_sectsize +
128 mp->m_sb.sb_sectsize +
129 XFS_ALLOCFREE_LOG_RES(mp, 4) +
130 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
131 128 * 5 +
132 XFS_ALLOCFREE_LOG_RES(mp, 1) +
133 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
134 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
65} 135}
66 136
137/*
138 * In renaming a files we can modify:
139 * the four inodes involved: 4 * inode size
140 * the two directory btrees: 2 * (max depth + v2) * dir block size
141 * the two directory bmap btrees: 2 * max depth * block size
142 * And the bmap_finish transaction can free dir and bmap blocks (two sets
143 * of bmap blocks) giving:
144 * the agf for the ags in which the blocks live: 3 * sector size
145 * the agfl for the ags in which the blocks live: 3 * sector size
146 * the superblock for the free block count: sector size
147 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
148 */
67STATIC uint 149STATIC uint
68xfs_calc_rename_reservation(xfs_mount_t *mp) 150xfs_calc_rename_reservation(
151 struct xfs_mount *mp)
69{ 152{
70 return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 153 return XFS_DQUOT_LOGRES(mp) +
154 MAX((4 * mp->m_sb.sb_inodesize +
155 2 * XFS_DIROP_LOG_RES(mp) +
156 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
157 (3 * mp->m_sb.sb_sectsize +
158 3 * mp->m_sb.sb_sectsize +
159 mp->m_sb.sb_sectsize +
160 XFS_ALLOCFREE_LOG_RES(mp, 3) +
161 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
71} 162}
72 163
164/*
165 * For creating a link to an inode:
166 * the parent directory inode: inode size
167 * the linked inode: inode size
168 * the directory btree could split: (max depth + v2) * dir block size
169 * the directory bmap btree could join or split: (max depth + v2) * blocksize
170 * And the bmap_finish transaction can free some bmap blocks giving:
171 * the agf for the ag in which the blocks live: sector size
172 * the agfl for the ag in which the blocks live: sector size
173 * the superblock for the free block count: sector size
174 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
175 */
73STATIC uint 176STATIC uint
74xfs_calc_link_reservation(xfs_mount_t *mp) 177xfs_calc_link_reservation(
178 struct xfs_mount *mp)
75{ 179{
76 return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 180 return XFS_DQUOT_LOGRES(mp) +
181 MAX((mp->m_sb.sb_inodesize +
182 mp->m_sb.sb_inodesize +
183 XFS_DIROP_LOG_RES(mp) +
184 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
185 (mp->m_sb.sb_sectsize +
186 mp->m_sb.sb_sectsize +
187 mp->m_sb.sb_sectsize +
188 XFS_ALLOCFREE_LOG_RES(mp, 1) +
189 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
77} 190}
78 191
192/*
193 * For removing a directory entry we can modify:
194 * the parent directory inode: inode size
195 * the removed inode: inode size
196 * the directory btree could join: (max depth + v2) * dir block size
197 * the directory bmap btree could join or split: (max depth + v2) * blocksize
198 * And the bmap_finish transaction can free the dir and bmap blocks giving:
199 * the agf for the ag in which the blocks live: 2 * sector size
200 * the agfl for the ag in which the blocks live: 2 * sector size
201 * the superblock for the free block count: sector size
202 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
203 */
79STATIC uint 204STATIC uint
80xfs_calc_remove_reservation(xfs_mount_t *mp) 205xfs_calc_remove_reservation(
206 struct xfs_mount *mp)
81{ 207{
82 return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 208 return XFS_DQUOT_LOGRES(mp) +
209 MAX((mp->m_sb.sb_inodesize +
210 mp->m_sb.sb_inodesize +
211 XFS_DIROP_LOG_RES(mp) +
212 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
213 (2 * mp->m_sb.sb_sectsize +
214 2 * mp->m_sb.sb_sectsize +
215 mp->m_sb.sb_sectsize +
216 XFS_ALLOCFREE_LOG_RES(mp, 2) +
217 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
83} 218}
84 219
220/*
221 * For symlink we can modify:
222 * the parent directory inode: inode size
223 * the new inode: inode size
224 * the inode btree entry: 1 block
225 * the directory btree: (max depth + v2) * dir block size
226 * the directory inode's bmap btree: (max depth + v2) * block size
227 * the blocks for the symlink: 1 kB
228 * Or in the first xact we allocate some inodes giving:
229 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
230 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
231 * the inode btree: max depth * blocksize
232 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
233 */
85STATIC uint 234STATIC uint
86xfs_calc_symlink_reservation(xfs_mount_t *mp) 235xfs_calc_symlink_reservation(
236 struct xfs_mount *mp)
87{ 237{
88 return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 238 return XFS_DQUOT_LOGRES(mp) +
239 MAX((mp->m_sb.sb_inodesize +
240 mp->m_sb.sb_inodesize +
241 XFS_FSB_TO_B(mp, 1) +
242 XFS_DIROP_LOG_RES(mp) +
243 1024 +
244 128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
245 (2 * mp->m_sb.sb_sectsize +
246 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
247 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
248 XFS_ALLOCFREE_LOG_RES(mp, 1) +
249 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
250 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
89} 251}
90 252
253/*
254 * For create we can modify:
255 * the parent directory inode: inode size
256 * the new inode: inode size
257 * the inode btree entry: block size
258 * the superblock for the nlink flag: sector size
259 * the directory btree: (max depth + v2) * dir block size
260 * the directory inode's bmap btree: (max depth + v2) * block size
261 * Or in the first xact we allocate some inodes giving:
262 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
263 * the superblock for the nlink flag: sector size
264 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
265 * the inode btree: max depth * blocksize
266 * the allocation btrees: 2 trees * (max depth - 1) * block size
267 */
91STATIC uint 268STATIC uint
92xfs_calc_create_reservation(xfs_mount_t *mp) 269xfs_calc_create_reservation(
270 struct xfs_mount *mp)
93{ 271{
94 return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 272 return XFS_DQUOT_LOGRES(mp) +
273 MAX((mp->m_sb.sb_inodesize +
274 mp->m_sb.sb_inodesize +
275 mp->m_sb.sb_sectsize +
276 XFS_FSB_TO_B(mp, 1) +
277 XFS_DIROP_LOG_RES(mp) +
278 128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
279 (3 * mp->m_sb.sb_sectsize +
280 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
281 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
282 XFS_ALLOCFREE_LOG_RES(mp, 1) +
283 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
284 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
95} 285}
96 286
287/*
288 * Making a new directory is the same as creating a new file.
289 */
97STATIC uint 290STATIC uint
98xfs_calc_mkdir_reservation(xfs_mount_t *mp) 291xfs_calc_mkdir_reservation(
292 struct xfs_mount *mp)
99{ 293{
100 return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 294 return xfs_calc_create_reservation(mp);
101} 295}
102 296
297/*
298 * In freeing an inode we can modify:
299 * the inode being freed: inode size
300 * the super block free inode counter: sector size
301 * the agi hash list and counters: sector size
302 * the inode btree entry: block size
303 * the on disk inode before ours in the agi hash list: inode cluster size
304 * the inode btree: max depth * blocksize
305 * the allocation btrees: 2 trees * (max depth - 1) * block size
306 */
103STATIC uint 307STATIC uint
104xfs_calc_ifree_reservation(xfs_mount_t *mp) 308xfs_calc_ifree_reservation(
309 struct xfs_mount *mp)
105{ 310{
106 return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 311 return XFS_DQUOT_LOGRES(mp) +
312 mp->m_sb.sb_inodesize +
313 mp->m_sb.sb_sectsize +
314 mp->m_sb.sb_sectsize +
315 XFS_FSB_TO_B(mp, 1) +
316 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
317 XFS_INODE_CLUSTER_SIZE(mp)) +
318 128 * 5 +
319 XFS_ALLOCFREE_LOG_RES(mp, 1) +
320 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
321 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
107} 322}
108 323
324/*
325 * When only changing the inode we log the inode and possibly the superblock
326 * We also add a bit of slop for the transaction stuff.
327 */
109STATIC uint 328STATIC uint
110xfs_calc_ichange_reservation(xfs_mount_t *mp) 329xfs_calc_ichange_reservation(
330 struct xfs_mount *mp)
111{ 331{
112 return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 332 return XFS_DQUOT_LOGRES(mp) +
333 mp->m_sb.sb_inodesize +
334 mp->m_sb.sb_sectsize +
335 512;
336
113} 337}
114 338
339/*
340 * Growing the data section of the filesystem.
341 * superblock
342 * agi and agf
343 * allocation btrees
344 */
115STATIC uint 345STATIC uint
116xfs_calc_growdata_reservation(xfs_mount_t *mp) 346xfs_calc_growdata_reservation(
347 struct xfs_mount *mp)
117{ 348{
118 return XFS_CALC_GROWDATA_LOG_RES(mp); 349 return mp->m_sb.sb_sectsize * 3 +
350 XFS_ALLOCFREE_LOG_RES(mp, 1) +
351 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
119} 352}
120 353
354/*
355 * Growing the rt section of the filesystem.
356 * In the first set of transactions (ALLOC) we allocate space to the
357 * bitmap or summary files.
358 * superblock: sector size
359 * agf of the ag from which the extent is allocated: sector size
360 * bmap btree for bitmap/summary inode: max depth * blocksize
361 * bitmap/summary inode: inode size
362 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
363 */
121STATIC uint 364STATIC uint
122xfs_calc_growrtalloc_reservation(xfs_mount_t *mp) 365xfs_calc_growrtalloc_reservation(
366 struct xfs_mount *mp)
123{ 367{
124 return XFS_CALC_GROWRTALLOC_LOG_RES(mp); 368 return 2 * mp->m_sb.sb_sectsize +
369 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
370 mp->m_sb.sb_inodesize +
371 XFS_ALLOCFREE_LOG_RES(mp, 1) +
372 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
373 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
125} 374}
126 375
376/*
377 * Growing the rt section of the filesystem.
378 * In the second set of transactions (ZERO) we zero the new metadata blocks.
379 * one bitmap/summary block: blocksize
380 */
127STATIC uint 381STATIC uint
128xfs_calc_growrtzero_reservation(xfs_mount_t *mp) 382xfs_calc_growrtzero_reservation(
383 struct xfs_mount *mp)
129{ 384{
130 return XFS_CALC_GROWRTZERO_LOG_RES(mp); 385 return mp->m_sb.sb_blocksize + 128;
131} 386}
132 387
388/*
389 * Growing the rt section of the filesystem.
390 * In the third set of transactions (FREE) we update metadata without
391 * allocating any new blocks.
392 * superblock: sector size
393 * bitmap inode: inode size
394 * summary inode: inode size
395 * one bitmap block: blocksize
396 * summary blocks: new summary size
397 */
133STATIC uint 398STATIC uint
134xfs_calc_growrtfree_reservation(xfs_mount_t *mp) 399xfs_calc_growrtfree_reservation(
400 struct xfs_mount *mp)
135{ 401{
136 return XFS_CALC_GROWRTFREE_LOG_RES(mp); 402 return mp->m_sb.sb_sectsize +
403 2 * mp->m_sb.sb_inodesize +
404 mp->m_sb.sb_blocksize +
405 mp->m_rsumsize +
406 128 * 5;
137} 407}
138 408
409/*
410 * Logging the inode modification timestamp on a synchronous write.
411 * inode
412 */
139STATIC uint 413STATIC uint
140xfs_calc_swrite_reservation(xfs_mount_t *mp) 414xfs_calc_swrite_reservation(
415 struct xfs_mount *mp)
141{ 416{
142 return XFS_CALC_SWRITE_LOG_RES(mp); 417 return mp->m_sb.sb_inodesize + 128;
143} 418}
144 419
420/*
421 * Logging the inode mode bits when writing a setuid/setgid file
422 * inode
423 */
145STATIC uint 424STATIC uint
146xfs_calc_writeid_reservation(xfs_mount_t *mp) 425xfs_calc_writeid_reservation(xfs_mount_t *mp)
147{ 426{
148 return XFS_CALC_WRITEID_LOG_RES(mp); 427 return mp->m_sb.sb_inodesize + 128;
149} 428}
150 429
430/*
431 * Converting the inode from non-attributed to attributed.
432 * the inode being converted: inode size
433 * agf block and superblock (for block allocation)
434 * the new block (directory sized)
435 * bmap blocks for the new directory block
436 * allocation btrees
437 */
151STATIC uint 438STATIC uint
152xfs_calc_addafork_reservation(xfs_mount_t *mp) 439xfs_calc_addafork_reservation(
440 struct xfs_mount *mp)
153{ 441{
154 return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 442 return XFS_DQUOT_LOGRES(mp) +
443 mp->m_sb.sb_inodesize +
444 mp->m_sb.sb_sectsize * 2 +
445 mp->m_dirblksize +
446 XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
447 XFS_ALLOCFREE_LOG_RES(mp, 1) +
448 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
449 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
155} 450}
156 451
452/*
453 * Removing the attribute fork of a file
454 * the inode being truncated: inode size
455 * the inode's bmap btree: max depth * block size
456 * And the bmap_finish transaction can free the blocks and bmap blocks:
457 * the agf for each of the ags: 4 * sector size
458 * the agfl for each of the ags: 4 * sector size
459 * the super block to reflect the freed blocks: sector size
460 * worst case split in allocation btrees per extent assuming 4 extents:
461 * 4 exts * 2 trees * (2 * max depth - 1) * block size
462 */
157STATIC uint 463STATIC uint
158xfs_calc_attrinval_reservation(xfs_mount_t *mp) 464xfs_calc_attrinval_reservation(
465 struct xfs_mount *mp)
159{ 466{
160 return XFS_CALC_ATTRINVAL_LOG_RES(mp); 467 return MAX((mp->m_sb.sb_inodesize +
468 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
469 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
470 (4 * mp->m_sb.sb_sectsize +
471 4 * mp->m_sb.sb_sectsize +
472 mp->m_sb.sb_sectsize +
473 XFS_ALLOCFREE_LOG_RES(mp, 4) +
474 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
161} 475}
162 476
477/*
478 * Setting an attribute.
479 * the inode getting the attribute
480 * the superblock for allocations
481 * the agfs extents are allocated from
482 * the attribute btree * max depth
483 * the inode allocation btree
484 * Since attribute transaction space is dependent on the size of the attribute,
485 * the calculation is done partially at mount time and partially at runtime.
486 */
163STATIC uint 487STATIC uint
164xfs_calc_attrset_reservation(xfs_mount_t *mp) 488xfs_calc_attrset_reservation(
489 struct xfs_mount *mp)
165{ 490{
166 return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 491 return XFS_DQUOT_LOGRES(mp) +
492 mp->m_sb.sb_inodesize +
493 mp->m_sb.sb_sectsize +
494 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
495 128 * (2 + XFS_DA_NODE_MAXDEPTH);
167} 496}
168 497
498/*
499 * Removing an attribute.
500 * the inode: inode size
501 * the attribute btree could join: max depth * block size
502 * the inode bmap btree could join or split: max depth * block size
503 * And the bmap_finish transaction can free the attr blocks freed giving:
504 * the agf for the ag in which the blocks live: 2 * sector size
505 * the agfl for the ag in which the blocks live: 2 * sector size
506 * the superblock for the free block count: sector size
507 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
508 */
169STATIC uint 509STATIC uint
170xfs_calc_attrrm_reservation(xfs_mount_t *mp) 510xfs_calc_attrrm_reservation(
511 struct xfs_mount *mp)
171{ 512{
172 return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 513 return XFS_DQUOT_LOGRES(mp) +
514 MAX((mp->m_sb.sb_inodesize +
515 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
516 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
517 128 * (1 + XFS_DA_NODE_MAXDEPTH +
518 XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
519 (2 * mp->m_sb.sb_sectsize +
520 2 * mp->m_sb.sb_sectsize +
521 mp->m_sb.sb_sectsize +
522 XFS_ALLOCFREE_LOG_RES(mp, 2) +
523 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
173} 524}
174 525
526/*
527 * Clearing a bad agino number in an agi hash bucket.
528 */
175STATIC uint 529STATIC uint
176xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp) 530xfs_calc_clear_agi_bucket_reservation(
531 struct xfs_mount *mp)
177{ 532{
178 return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp); 533 return mp->m_sb.sb_sectsize + 128;
179} 534}
180 535
181/* 536/*
@@ -184,11 +539,10 @@ xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
184 */ 539 */
185void 540void
186xfs_trans_init( 541xfs_trans_init(
187 xfs_mount_t *mp) 542 struct xfs_mount *mp)
188{ 543{
189 xfs_trans_reservations_t *resp; 544 struct xfs_trans_reservations *resp = &mp->m_reservations;
190 545
191 resp = &(mp->m_reservations);
192 resp->tr_write = xfs_calc_write_reservation(mp); 546 resp->tr_write = xfs_calc_write_reservation(mp);
193 resp->tr_itruncate = xfs_calc_itruncate_reservation(mp); 547 resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
194 resp->tr_rename = xfs_calc_rename_reservation(mp); 548 resp->tr_rename = xfs_calc_rename_reservation(mp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 8c69e7824f68..e639e8e9a2a9 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -300,24 +300,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
300 300
301 301
302/* 302/*
303 * Various log reservation values.
304 * These are based on the size of the file system block
305 * because that is what most transactions manipulate.
306 * Each adds in an additional 128 bytes per item logged to
307 * try to account for the overhead of the transaction mechanism.
308 *
309 * Note:
310 * Most of the reservations underestimate the number of allocation
311 * groups into which they could free extents in the xfs_bmap_finish()
312 * call. This is because the number in the worst case is quite high
313 * and quite unusual. In order to fix this we need to change
314 * xfs_bmap_finish() to free extents in only a single AG at a time.
315 * This will require changes to the EFI code as well, however, so that
316 * the EFI for the extents not freed is logged again in each transaction.
317 * See bug 261917.
318 */
319
320/*
321 * Per-extent log reservation for the allocation btree changes 303 * Per-extent log reservation for the allocation btree changes
322 * involved in freeing or allocating an extent. 304 * involved in freeing or allocating an extent.
323 * 2 trees * (2 blocks/level * max depth - 1) * block size 305 * 2 trees * (2 blocks/level * max depth - 1) * block size
@@ -341,429 +323,36 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
341 (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ 323 (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
342 XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) 324 XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
343 325
344/*
345 * In a write transaction we can allocate a maximum of 2
346 * extents. This gives:
347 * the inode getting the new extents: inode size
348 * the inode's bmap btree: max depth * block size
349 * the agfs of the ags from which the extents are allocated: 2 * sector
350 * the superblock free block counter: sector size
351 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
352 * And the bmap_finish transaction can free bmap blocks in a join:
353 * the agfs of the ags containing the blocks: 2 * sector size
354 * the agfls of the ags containing the blocks: 2 * sector size
355 * the super block free block counter: sector size
356 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
357 */
358#define XFS_CALC_WRITE_LOG_RES(mp) \
359 (MAX( \
360 ((mp)->m_sb.sb_inodesize + \
361 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
362 (2 * (mp)->m_sb.sb_sectsize) + \
363 (mp)->m_sb.sb_sectsize + \
364 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
365 (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\
366 ((2 * (mp)->m_sb.sb_sectsize) + \
367 (2 * (mp)->m_sb.sb_sectsize) + \
368 (mp)->m_sb.sb_sectsize + \
369 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
370 (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
371 326
372#define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write) 327#define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write)
373
374/*
375 * In truncating a file we free up to two extents at once. We can modify:
376 * the inode being truncated: inode size
377 * the inode's bmap btree: (max depth + 1) * block size
378 * And the bmap_finish transaction can free the blocks and bmap blocks:
379 * the agf for each of the ags: 4 * sector size
380 * the agfl for each of the ags: 4 * sector size
381 * the super block to reflect the freed blocks: sector size
382 * worst case split in allocation btrees per extent assuming 4 extents:
383 * 4 exts * 2 trees * (2 * max depth - 1) * block size
384 * the inode btree: max depth * blocksize
385 * the allocation btrees: 2 trees * (max depth - 1) * block size
386 */
387#define XFS_CALC_ITRUNCATE_LOG_RES(mp) \
388 (MAX( \
389 ((mp)->m_sb.sb_inodesize + \
390 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \
391 (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
392 ((4 * (mp)->m_sb.sb_sectsize) + \
393 (4 * (mp)->m_sb.sb_sectsize) + \
394 (mp)->m_sb.sb_sectsize + \
395 XFS_ALLOCFREE_LOG_RES(mp, 4) + \
396 (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \
397 (128 * 5) + \
398 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
399 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
400 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
401
402#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate) 328#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate)
403
404/*
405 * In renaming a files we can modify:
406 * the four inodes involved: 4 * inode size
407 * the two directory btrees: 2 * (max depth + v2) * dir block size
408 * the two directory bmap btrees: 2 * max depth * block size
409 * And the bmap_finish transaction can free dir and bmap blocks (two sets
410 * of bmap blocks) giving:
411 * the agf for the ags in which the blocks live: 3 * sector size
412 * the agfl for the ags in which the blocks live: 3 * sector size
413 * the superblock for the free block count: sector size
414 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
415 */
416#define XFS_CALC_RENAME_LOG_RES(mp) \
417 (MAX( \
418 ((4 * (mp)->m_sb.sb_inodesize) + \
419 (2 * XFS_DIROP_LOG_RES(mp)) + \
420 (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \
421 ((3 * (mp)->m_sb.sb_sectsize) + \
422 (3 * (mp)->m_sb.sb_sectsize) + \
423 (mp)->m_sb.sb_sectsize + \
424 XFS_ALLOCFREE_LOG_RES(mp, 3) + \
425 (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))))))
426
427#define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename) 329#define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename)
428
429/*
430 * For creating a link to an inode:
431 * the parent directory inode: inode size
432 * the linked inode: inode size
433 * the directory btree could split: (max depth + v2) * dir block size
434 * the directory bmap btree could join or split: (max depth + v2) * blocksize
435 * And the bmap_finish transaction can free some bmap blocks giving:
436 * the agf for the ag in which the blocks live: sector size
437 * the agfl for the ag in which the blocks live: sector size
438 * the superblock for the free block count: sector size
439 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
440 */
441#define XFS_CALC_LINK_LOG_RES(mp) \
442 (MAX( \
443 ((mp)->m_sb.sb_inodesize + \
444 (mp)->m_sb.sb_inodesize + \
445 XFS_DIROP_LOG_RES(mp) + \
446 (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
447 ((mp)->m_sb.sb_sectsize + \
448 (mp)->m_sb.sb_sectsize + \
449 (mp)->m_sb.sb_sectsize + \
450 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
451 (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
452
453#define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link) 330#define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link)
454
455/*
456 * For removing a directory entry we can modify:
457 * the parent directory inode: inode size
458 * the removed inode: inode size
459 * the directory btree could join: (max depth + v2) * dir block size
460 * the directory bmap btree could join or split: (max depth + v2) * blocksize
461 * And the bmap_finish transaction can free the dir and bmap blocks giving:
462 * the agf for the ag in which the blocks live: 2 * sector size
463 * the agfl for the ag in which the blocks live: 2 * sector size
464 * the superblock for the free block count: sector size
465 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
466 */
467#define XFS_CALC_REMOVE_LOG_RES(mp) \
468 (MAX( \
469 ((mp)->m_sb.sb_inodesize + \
470 (mp)->m_sb.sb_inodesize + \
471 XFS_DIROP_LOG_RES(mp) + \
472 (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
473 ((2 * (mp)->m_sb.sb_sectsize) + \
474 (2 * (mp)->m_sb.sb_sectsize) + \
475 (mp)->m_sb.sb_sectsize + \
476 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
477 (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
478
479#define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove) 331#define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove)
480
481/*
482 * For symlink we can modify:
483 * the parent directory inode: inode size
484 * the new inode: inode size
485 * the inode btree entry: 1 block
486 * the directory btree: (max depth + v2) * dir block size
487 * the directory inode's bmap btree: (max depth + v2) * block size
488 * the blocks for the symlink: 1 kB
489 * Or in the first xact we allocate some inodes giving:
490 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
491 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
492 * the inode btree: max depth * blocksize
493 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
494 */
495#define XFS_CALC_SYMLINK_LOG_RES(mp) \
496 (MAX( \
497 ((mp)->m_sb.sb_inodesize + \
498 (mp)->m_sb.sb_inodesize + \
499 XFS_FSB_TO_B(mp, 1) + \
500 XFS_DIROP_LOG_RES(mp) + \
501 1024 + \
502 (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
503 (2 * (mp)->m_sb.sb_sectsize + \
504 XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
505 XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
506 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
507 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
508 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
509
510#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink) 332#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink)
511
512/*
513 * For create we can modify:
514 * the parent directory inode: inode size
515 * the new inode: inode size
516 * the inode btree entry: block size
517 * the superblock for the nlink flag: sector size
518 * the directory btree: (max depth + v2) * dir block size
519 * the directory inode's bmap btree: (max depth + v2) * block size
520 * Or in the first xact we allocate some inodes giving:
521 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
522 * the superblock for the nlink flag: sector size
523 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
524 * the inode btree: max depth * blocksize
525 * the allocation btrees: 2 trees * (max depth - 1) * block size
526 */
527#define XFS_CALC_CREATE_LOG_RES(mp) \
528 (MAX( \
529 ((mp)->m_sb.sb_inodesize + \
530 (mp)->m_sb.sb_inodesize + \
531 (mp)->m_sb.sb_sectsize + \
532 XFS_FSB_TO_B(mp, 1) + \
533 XFS_DIROP_LOG_RES(mp) + \
534 (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
535 (3 * (mp)->m_sb.sb_sectsize + \
536 XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
537 XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
538 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
539 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
540 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
541
542#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create) 333#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create)
543
544/*
545 * Making a new directory is the same as creating a new file.
546 */
547#define XFS_CALC_MKDIR_LOG_RES(mp) XFS_CALC_CREATE_LOG_RES(mp)
548
549#define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir) 334#define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir)
550
551/*
552 * In freeing an inode we can modify:
553 * the inode being freed: inode size
554 * the super block free inode counter: sector size
555 * the agi hash list and counters: sector size
556 * the inode btree entry: block size
557 * the on disk inode before ours in the agi hash list: inode cluster size
558 * the inode btree: max depth * blocksize
559 * the allocation btrees: 2 trees * (max depth - 1) * block size
560 */
561#define XFS_CALC_IFREE_LOG_RES(mp) \
562 ((mp)->m_sb.sb_inodesize + \
563 (mp)->m_sb.sb_sectsize + \
564 (mp)->m_sb.sb_sectsize + \
565 XFS_FSB_TO_B((mp), 1) + \
566 MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
567 (128 * 5) + \
568 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
569 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
570 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
571
572
573#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree) 335#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree)
574
575/*
576 * When only changing the inode we log the inode and possibly the superblock
577 * We also add a bit of slop for the transaction stuff.
578 */
579#define XFS_CALC_ICHANGE_LOG_RES(mp) ((mp)->m_sb.sb_inodesize + \
580 (mp)->m_sb.sb_sectsize + 512)
581
582#define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange) 336#define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange)
583
584/*
585 * Growing the data section of the filesystem.
586 * superblock
587 * agi and agf
588 * allocation btrees
589 */
590#define XFS_CALC_GROWDATA_LOG_RES(mp) \
591 ((mp)->m_sb.sb_sectsize * 3 + \
592 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
593 (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
594
595#define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata) 337#define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata)
596
597/*
598 * Growing the rt section of the filesystem.
599 * In the first set of transactions (ALLOC) we allocate space to the
600 * bitmap or summary files.
601 * superblock: sector size
602 * agf of the ag from which the extent is allocated: sector size
603 * bmap btree for bitmap/summary inode: max depth * blocksize
604 * bitmap/summary inode: inode size
605 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
606 */
607#define XFS_CALC_GROWRTALLOC_LOG_RES(mp) \
608 (2 * (mp)->m_sb.sb_sectsize + \
609 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
610 (mp)->m_sb.sb_inodesize + \
611 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
612 (128 * \
613 (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \
614 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
615
616#define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc) 338#define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc)
617
618/*
619 * Growing the rt section of the filesystem.
620 * In the second set of transactions (ZERO) we zero the new metadata blocks.
621 * one bitmap/summary block: blocksize
622 */
623#define XFS_CALC_GROWRTZERO_LOG_RES(mp) \
624 ((mp)->m_sb.sb_blocksize + 128)
625
626#define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero) 339#define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero)
627
628/*
629 * Growing the rt section of the filesystem.
630 * In the third set of transactions (FREE) we update metadata without
631 * allocating any new blocks.
632 * superblock: sector size
633 * bitmap inode: inode size
634 * summary inode: inode size
635 * one bitmap block: blocksize
636 * summary blocks: new summary size
637 */
638#define XFS_CALC_GROWRTFREE_LOG_RES(mp) \
639 ((mp)->m_sb.sb_sectsize + \
640 2 * (mp)->m_sb.sb_inodesize + \
641 (mp)->m_sb.sb_blocksize + \
642 (mp)->m_rsumsize + \
643 (128 * 5))
644
645#define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree) 340#define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree)
646
647/*
648 * Logging the inode modification timestamp on a synchronous write.
649 * inode
650 */
651#define XFS_CALC_SWRITE_LOG_RES(mp) \
652 ((mp)->m_sb.sb_inodesize + 128)
653
654#define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 341#define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
655
656/* 342/*
657 * Logging the inode timestamps on an fsync -- same as SWRITE 343 * Logging the inode timestamps on an fsync -- same as SWRITE
658 * as long as SWRITE logs the entire inode core 344 * as long as SWRITE logs the entire inode core
659 */ 345 */
660#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 346#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
661
662/*
663 * Logging the inode mode bits when writing a setuid/setgid file
664 * inode
665 */
666#define XFS_CALC_WRITEID_LOG_RES(mp) \
667 ((mp)->m_sb.sb_inodesize + 128)
668
669#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 347#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
670
671/*
672 * Converting the inode from non-attributed to attributed.
673 * the inode being converted: inode size
674 * agf block and superblock (for block allocation)
675 * the new block (directory sized)
676 * bmap blocks for the new directory block
677 * allocation btrees
678 */
679#define XFS_CALC_ADDAFORK_LOG_RES(mp) \
680 ((mp)->m_sb.sb_inodesize + \
681 (mp)->m_sb.sb_sectsize * 2 + \
682 (mp)->m_dirblksize + \
683 XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \
684 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
685 (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
686 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
687
688#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) 348#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
689
690/*
691 * Removing the attribute fork of a file
692 * the inode being truncated: inode size
693 * the inode's bmap btree: max depth * block size
694 * And the bmap_finish transaction can free the blocks and bmap blocks:
695 * the agf for each of the ags: 4 * sector size
696 * the agfl for each of the ags: 4 * sector size
697 * the super block to reflect the freed blocks: sector size
698 * worst case split in allocation btrees per extent assuming 4 extents:
699 * 4 exts * 2 trees * (2 * max depth - 1) * block size
700 */
701#define XFS_CALC_ATTRINVAL_LOG_RES(mp) \
702 (MAX( \
703 ((mp)->m_sb.sb_inodesize + \
704 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
705 (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \
706 ((4 * (mp)->m_sb.sb_sectsize) + \
707 (4 * (mp)->m_sb.sb_sectsize) + \
708 (mp)->m_sb.sb_sectsize + \
709 XFS_ALLOCFREE_LOG_RES(mp, 4) + \
710 (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
711
712#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval) 349#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
713
714/*
715 * Setting an attribute.
716 * the inode getting the attribute
717 * the superblock for allocations
718 * the agfs extents are allocated from
719 * the attribute btree * max depth
720 * the inode allocation btree
721 * Since attribute transaction space is dependent on the size of the attribute,
722 * the calculation is done partially at mount time and partially at runtime.
723 */
724#define XFS_CALC_ATTRSET_LOG_RES(mp) \
725 ((mp)->m_sb.sb_inodesize + \
726 (mp)->m_sb.sb_sectsize + \
727 XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
728 (128 * (2 + XFS_DA_NODE_MAXDEPTH)))
729
730#define XFS_ATTRSET_LOG_RES(mp, ext) \ 350#define XFS_ATTRSET_LOG_RES(mp, ext) \
731 ((mp)->m_reservations.tr_attrset + \ 351 ((mp)->m_reservations.tr_attrset + \
732 (ext * (mp)->m_sb.sb_sectsize) + \ 352 (ext * (mp)->m_sb.sb_sectsize) + \
733 (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \ 353 (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
734 (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))))) 354 (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
735
736/*
737 * Removing an attribute.
738 * the inode: inode size
739 * the attribute btree could join: max depth * block size
740 * the inode bmap btree could join or split: max depth * block size
741 * And the bmap_finish transaction can free the attr blocks freed giving:
742 * the agf for the ag in which the blocks live: 2 * sector size
743 * the agfl for the ag in which the blocks live: 2 * sector size
744 * the superblock for the free block count: sector size
745 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
746 */
747#define XFS_CALC_ATTRRM_LOG_RES(mp) \
748 (MAX( \
749 ((mp)->m_sb.sb_inodesize + \
750 XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
751 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
752 (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
753 ((2 * (mp)->m_sb.sb_sectsize) + \
754 (2 * (mp)->m_sb.sb_sectsize) + \
755 (mp)->m_sb.sb_sectsize + \
756 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
757 (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
758
759#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm) 355#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
760
761/*
762 * Clearing a bad agino number in an agi hash bucket.
763 */
764#define XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \
765 ((mp)->m_sb.sb_sectsize + 128)
766
767#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) 356#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
768 357
769 358
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 9d376be0ea38..a06bd62504fc 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -267,7 +267,7 @@ xfs_setattr(
267 if (code) { 267 if (code) {
268 ASSERT(tp == NULL); 268 ASSERT(tp == NULL);
269 lock_flags &= ~XFS_ILOCK_EXCL; 269 lock_flags &= ~XFS_ILOCK_EXCL;
270 ASSERT(lock_flags == XFS_IOLOCK_EXCL); 270 ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
271 goto error_return; 271 goto error_return;
272 } 272 }
273 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 273 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);