aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2007-05-24 01:26:31 -0400
committerTim Shimmin <tes@chook.melbourne.sgi.com>2007-07-14 01:28:50 -0400
commit92821e2ba4ae26887223326fb0b95cdab963b768 (patch)
treea40a2ef10e5b0791df3e522f3139193d39bf2454
parent3260f78ad6d5b788e78ea709d377f58e569bee41 (diff)
[XFS] Lazy Superblock Counters
When we have a couple of hundred transactions on the fly at once, they all typically modify the on disk superblock in some way. create/unclink/mkdir/rmdir modify inode counts, allocation/freeing modify free block counts. When these counts are modified in a transaction, they must eventually lock the superblock buffer and apply the mods. The buffer then remains locked until the transaction is committed into the incore log buffer. The result of this is that with enough transactions on the fly the incore superblock buffer becomes a bottleneck. The result of contention on the incore superblock buffer is that transaction rates fall - the more pressure that is put on the superblock buffer, the slower things go. The key to removing the contention is to not require the superblock fields in question to be locked. We do that by not marking the superblock dirty in the transaction. IOWs, we modify the incore superblock but do not modify the cached superblock buffer. In short, we do not log superblock modifications to critical fields in the superblock on every transaction. In fact we only do it just before we write the superblock to disk every sync period or just before unmount. This creates an interesting problem - if we don't log or write out the fields in every transaction, then how do the values get recovered after a crash? the answer is simple - we keep enough duplicate, logged information in other structures that we can reconstruct the correct count after log recovery has been performed. It is the AGF and AGI structures that contain the duplicate information; after recovery, we walk every AGI and AGF and sum their individual counters to get the correct value, and we do a transaction into the log to correct them. An optimisation of this is that if we have a clean unmount record, we know the value in the superblock is correct, so we can avoid the summation walk under normal conditions and so mount/recovery times do not change under normal operation. One wrinkle that was discovered during development was that the blocks used in the freespace btrees are never accounted for in the AGF counters. This was once a valid optimisation to make; when the filesystem is full, the free space btrees are empty and consume no space. Hence when it matters, the "accounting" is correct. But that means the when we do the AGF summations, we would not have a correct count and xfs_check would complain. Hence a new counter was added to track the number of blocks used by the free space btrees. This is an *on-disk format change*. As a result of this, lazy superblock counters are a mkfs option and at the moment on linux there is no way to convert an old filesystem. This is possible - xfs_db can be used to twiddle the right bits and then xfs_repair will do the format conversion for you. Similarly, you can convert backwards as well. At some point we'll add functionality to xfs_admin to do the bit twiddling easily.... SGI-PV: 964999 SGI-Modid: xfs-linux-melb:xfs-kern:28652a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Tim Shimmin <tes@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h1
-rw-r--r--fs/xfs/xfs_ag.h8
-rw-r--r--fs/xfs/xfs_alloc.c48
-rw-r--r--fs/xfs/xfs_alloc.h6
-rw-r--r--fs/xfs/xfs_alloc_btree.c20
-rw-r--r--fs/xfs/xfs_fs.h1
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--fs/xfs/xfs_ialloc.c28
-rw-r--r--fs/xfs/xfs_ialloc.h10
-rw-r--r--fs/xfs/xfs_log.c4
-rw-r--r--fs/xfs/xfs_log_recover.c8
-rw-r--r--fs/xfs/xfs_mount.c154
-rw-r--r--fs/xfs/xfs_mount.h10
-rw-r--r--fs/xfs/xfs_sb.h16
-rw-r--r--fs/xfs/xfs_trans.c58
-rw-r--r--fs/xfs/xfs_trans.h3
-rw-r--r--fs/xfs/xfs_vfsops.c11
18 files changed, 337 insertions, 54 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bf9a9d5909be..05f188ed1206 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -547,7 +547,8 @@ vfs_sync_worker(
547 547
548 if (!(vfsp->vfs_flag & VFS_RDONLY)) 548 if (!(vfsp->vfs_flag & VFS_RDONLY))
549 error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \ 549 error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \
550 SYNC_ATTR | SYNC_REFCACHE, NULL); 550 SYNC_ATTR | SYNC_REFCACHE | SYNC_SUPER,
551 NULL);
551 vfsp->vfs_sync_seq++; 552 vfsp->vfs_sync_seq++;
552 wake_up(&vfsp->vfs_wait_single_sync_task); 553 wake_up(&vfsp->vfs_wait_single_sync_task);
553} 554}
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index e2c2ce98ab5b..cb7b0d62fb96 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -92,6 +92,7 @@ typedef enum {
92#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ 92#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
93#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ 93#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
94#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ 94#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */
95#define SYNC_SUPER 0x0200 /* flush superblock to disk */
95 96
96#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ 97#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
97#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ 98#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 9ece7f87ec5b..b1dd0029c60e 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -68,6 +68,7 @@ typedef struct xfs_agf {
68 __be32 agf_flcount; /* count of blocks in freelist */ 68 __be32 agf_flcount; /* count of blocks in freelist */
69 __be32 agf_freeblks; /* total free blocks */ 69 __be32 agf_freeblks; /* total free blocks */
70 __be32 agf_longest; /* longest free space */ 70 __be32 agf_longest; /* longest free space */
71 __be32 agf_btreeblks; /* # of blocks held in AGF btrees */
71} xfs_agf_t; 72} xfs_agf_t;
72 73
73#define XFS_AGF_MAGICNUM 0x00000001 74#define XFS_AGF_MAGICNUM 0x00000001
@@ -81,7 +82,8 @@ typedef struct xfs_agf {
81#define XFS_AGF_FLCOUNT 0x00000100 82#define XFS_AGF_FLCOUNT 0x00000100
82#define XFS_AGF_FREEBLKS 0x00000200 83#define XFS_AGF_FREEBLKS 0x00000200
83#define XFS_AGF_LONGEST 0x00000400 84#define XFS_AGF_LONGEST 0x00000400
84#define XFS_AGF_NUM_BITS 11 85#define XFS_AGF_BTREEBLKS 0x00000800
86#define XFS_AGF_NUM_BITS 12
85#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) 87#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
86 88
87/* disk block (xfs_daddr_t) in the AG */ 89/* disk block (xfs_daddr_t) in the AG */
@@ -186,11 +188,13 @@ typedef struct xfs_perag
186 __uint32_t pagf_flcount; /* count of blocks in freelist */ 188 __uint32_t pagf_flcount; /* count of blocks in freelist */
187 xfs_extlen_t pagf_freeblks; /* total free blocks */ 189 xfs_extlen_t pagf_freeblks; /* total free blocks */
188 xfs_extlen_t pagf_longest; /* longest free space */ 190 xfs_extlen_t pagf_longest; /* longest free space */
191 __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */
189 xfs_agino_t pagi_freecount; /* number of free inodes */ 192 xfs_agino_t pagi_freecount; /* number of free inodes */
193 xfs_agino_t pagi_count; /* number of allocated inodes */
194 int pagb_count; /* pagb slots in use */
190#ifdef __KERNEL__ 195#ifdef __KERNEL__
191 lock_t pagb_lock; /* lock for pagb_list */ 196 lock_t pagb_lock; /* lock for pagb_list */
192#endif 197#endif
193 int pagb_count; /* pagb slots in use */
194 xfs_perag_busy_t *pagb_list; /* unstable blocks */ 198 xfs_perag_busy_t *pagb_list; /* unstable blocks */
195} xfs_perag_t; 199} xfs_perag_t;
196 200
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 8e9a40aa0cd3..98f95d4c4bcc 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1447,7 +1447,8 @@ xfs_alloc_ag_vextent_small(
1447 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && 1447 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1448 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1448 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1449 > args->minleft)) { 1449 > args->minleft)) {
1450 if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno))) 1450 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1451 if (error)
1451 goto error0; 1452 goto error0;
1452 if (fbno != NULLAGBLOCK) { 1453 if (fbno != NULLAGBLOCK) {
1453 if (args->userdata) { 1454 if (args->userdata) {
@@ -1923,7 +1924,8 @@ xfs_alloc_fix_freelist(
1923 while (be32_to_cpu(agf->agf_flcount) > need) { 1924 while (be32_to_cpu(agf->agf_flcount) > need) {
1924 xfs_buf_t *bp; 1925 xfs_buf_t *bp;
1925 1926
1926 if ((error = xfs_alloc_get_freelist(tp, agbp, &bno))) 1927 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1928 if (error)
1927 return error; 1929 return error;
1928 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) 1930 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1929 return error; 1931 return error;
@@ -1973,8 +1975,9 @@ xfs_alloc_fix_freelist(
1973 * Put each allocated block on the list. 1975 * Put each allocated block on the list.
1974 */ 1976 */
1975 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { 1977 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
1976 if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp, 1978 error = xfs_alloc_put_freelist(tp, agbp,
1977 bno))) 1979 agflbp, bno, 0);
1980 if (error)
1978 return error; 1981 return error;
1979 } 1982 }
1980 } 1983 }
@@ -1991,13 +1994,15 @@ int /* error */
1991xfs_alloc_get_freelist( 1994xfs_alloc_get_freelist(
1992 xfs_trans_t *tp, /* transaction pointer */ 1995 xfs_trans_t *tp, /* transaction pointer */
1993 xfs_buf_t *agbp, /* buffer containing the agf structure */ 1996 xfs_buf_t *agbp, /* buffer containing the agf structure */
1994 xfs_agblock_t *bnop) /* block address retrieved from freelist */ 1997 xfs_agblock_t *bnop, /* block address retrieved from freelist */
1998 int btreeblk) /* destination is a AGF btree */
1995{ 1999{
1996 xfs_agf_t *agf; /* a.g. freespace structure */ 2000 xfs_agf_t *agf; /* a.g. freespace structure */
1997 xfs_agfl_t *agfl; /* a.g. freelist structure */ 2001 xfs_agfl_t *agfl; /* a.g. freelist structure */
1998 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 2002 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
1999 xfs_agblock_t bno; /* block number returned */ 2003 xfs_agblock_t bno; /* block number returned */
2000 int error; 2004 int error;
2005 int logflags;
2001#ifdef XFS_ALLOC_TRACE 2006#ifdef XFS_ALLOC_TRACE
2002 static char fname[] = "xfs_alloc_get_freelist"; 2007 static char fname[] = "xfs_alloc_get_freelist";
2003#endif 2008#endif
@@ -2032,8 +2037,16 @@ xfs_alloc_get_freelist(
2032 be32_add(&agf->agf_flcount, -1); 2037 be32_add(&agf->agf_flcount, -1);
2033 xfs_trans_agflist_delta(tp, -1); 2038 xfs_trans_agflist_delta(tp, -1);
2034 pag->pagf_flcount--; 2039 pag->pagf_flcount--;
2035 TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); 2040
2036 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); 2041 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2042 if (btreeblk) {
2043 be32_add(&agf->agf_btreeblks, 1);
2044 pag->pagf_btreeblks++;
2045 logflags |= XFS_AGF_BTREEBLKS;
2046 }
2047
2048 TRACE_MODAGF(NULL, agf, logflags);
2049 xfs_alloc_log_agf(tp, agbp, logflags);
2037 *bnop = bno; 2050 *bnop = bno;
2038 2051
2039 /* 2052 /*
@@ -2071,6 +2084,7 @@ xfs_alloc_log_agf(
2071 offsetof(xfs_agf_t, agf_flcount), 2084 offsetof(xfs_agf_t, agf_flcount),
2072 offsetof(xfs_agf_t, agf_freeblks), 2085 offsetof(xfs_agf_t, agf_freeblks),
2073 offsetof(xfs_agf_t, agf_longest), 2086 offsetof(xfs_agf_t, agf_longest),
2087 offsetof(xfs_agf_t, agf_btreeblks),
2074 sizeof(xfs_agf_t) 2088 sizeof(xfs_agf_t)
2075 }; 2089 };
2076 2090
@@ -2106,12 +2120,14 @@ xfs_alloc_put_freelist(
2106 xfs_trans_t *tp, /* transaction pointer */ 2120 xfs_trans_t *tp, /* transaction pointer */
2107 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 2121 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2108 xfs_buf_t *agflbp,/* buffer for a.g. free block array */ 2122 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
2109 xfs_agblock_t bno) /* block being freed */ 2123 xfs_agblock_t bno, /* block being freed */
2124 int btreeblk) /* block came from a AGF btree */
2110{ 2125{
2111 xfs_agf_t *agf; /* a.g. freespace structure */ 2126 xfs_agf_t *agf; /* a.g. freespace structure */
2112 xfs_agfl_t *agfl; /* a.g. free block array */ 2127 xfs_agfl_t *agfl; /* a.g. free block array */
2113 __be32 *blockp;/* pointer to array entry */ 2128 __be32 *blockp;/* pointer to array entry */
2114 int error; 2129 int error;
2130 int logflags;
2115#ifdef XFS_ALLOC_TRACE 2131#ifdef XFS_ALLOC_TRACE
2116 static char fname[] = "xfs_alloc_put_freelist"; 2132 static char fname[] = "xfs_alloc_put_freelist";
2117#endif 2133#endif
@@ -2132,11 +2148,22 @@ xfs_alloc_put_freelist(
2132 be32_add(&agf->agf_flcount, 1); 2148 be32_add(&agf->agf_flcount, 1);
2133 xfs_trans_agflist_delta(tp, 1); 2149 xfs_trans_agflist_delta(tp, 1);
2134 pag->pagf_flcount++; 2150 pag->pagf_flcount++;
2151
2152 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2153 if (btreeblk) {
2154 be32_add(&agf->agf_btreeblks, -1);
2155 pag->pagf_btreeblks--;
2156 logflags |= XFS_AGF_BTREEBLKS;
2157 }
2158
2159 TRACE_MODAGF(NULL, agf, logflags);
2160 xfs_alloc_log_agf(tp, agbp, logflags);
2161
2135 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2162 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2136 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2163 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
2137 *blockp = cpu_to_be32(bno); 2164 *blockp = cpu_to_be32(bno);
2138 TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); 2165 TRACE_MODAGF(NULL, agf, logflags);
2139 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); 2166 xfs_alloc_log_agf(tp, agbp, logflags);
2140 xfs_trans_log_buf(tp, agflbp, 2167 xfs_trans_log_buf(tp, agflbp,
2141 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), 2168 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
2142 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + 2169 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
@@ -2196,6 +2223,7 @@ xfs_alloc_read_agf(
2196 pag = &mp->m_perag[agno]; 2223 pag = &mp->m_perag[agno];
2197 if (!pag->pagf_init) { 2224 if (!pag->pagf_init) {
2198 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2225 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2226 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2199 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); 2227 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2200 pag->pagf_longest = be32_to_cpu(agf->agf_longest); 2228 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2201 pag->pagf_levels[XFS_BTNUM_BNOi] = 2229 pag->pagf_levels[XFS_BTNUM_BNOi] =
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5a4256120ccc..5aec15d0651e 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -136,7 +136,8 @@ int /* error */
136xfs_alloc_get_freelist( 136xfs_alloc_get_freelist(
137 struct xfs_trans *tp, /* transaction pointer */ 137 struct xfs_trans *tp, /* transaction pointer */
138 struct xfs_buf *agbp, /* buffer containing the agf structure */ 138 struct xfs_buf *agbp, /* buffer containing the agf structure */
139 xfs_agblock_t *bnop); /* block address retrieved from freelist */ 139 xfs_agblock_t *bnop, /* block address retrieved from freelist */
140 int btreeblk); /* destination is a AGF btree */
140 141
141/* 142/*
142 * Log the given fields from the agf structure. 143 * Log the given fields from the agf structure.
@@ -165,7 +166,8 @@ xfs_alloc_put_freelist(
165 struct xfs_trans *tp, /* transaction pointer */ 166 struct xfs_trans *tp, /* transaction pointer */
166 struct xfs_buf *agbp, /* buffer for a.g. freelist header */ 167 struct xfs_buf *agbp, /* buffer for a.g. freelist header */
167 struct xfs_buf *agflbp,/* buffer for a.g. free block array */ 168 struct xfs_buf *agflbp,/* buffer for a.g. free block array */
168 xfs_agblock_t bno); /* block being freed */ 169 xfs_agblock_t bno, /* block being freed */
170 int btreeblk); /* owner was a AGF btree */
169 171
170/* 172/*
171 * Read in the allocation group header (free/alloc section). 173 * Read in the allocation group header (free/alloc section).
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 74cadf95d4e8..1603ce595853 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -226,8 +226,9 @@ xfs_alloc_delrec(
226 /* 226 /*
227 * Put this buffer/block on the ag's freelist. 227 * Put this buffer/block on the ag's freelist.
228 */ 228 */
229 if ((error = xfs_alloc_put_freelist(cur->bc_tp, 229 error = xfs_alloc_put_freelist(cur->bc_tp,
230 cur->bc_private.a.agbp, NULL, bno))) 230 cur->bc_private.a.agbp, NULL, bno, 1);
231 if (error)
231 return error; 232 return error;
232 /* 233 /*
233 * Since blocks move to the free list without the 234 * Since blocks move to the free list without the
@@ -549,8 +550,9 @@ xfs_alloc_delrec(
549 /* 550 /*
550 * Free the deleting block by putting it on the freelist. 551 * Free the deleting block by putting it on the freelist.
551 */ 552 */
552 if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp, 553 error = xfs_alloc_put_freelist(cur->bc_tp,
553 NULL, rbno))) 554 cur->bc_private.a.agbp, NULL, rbno, 1);
555 if (error)
554 return error; 556 return error;
555 /* 557 /*
556 * Since blocks move to the free list without the coordination 558 * Since blocks move to the free list without the coordination
@@ -1320,8 +1322,9 @@ xfs_alloc_newroot(
1320 /* 1322 /*
1321 * Get a buffer from the freelist blocks, for the new root. 1323 * Get a buffer from the freelist blocks, for the new root.
1322 */ 1324 */
1323 if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, 1325 error = xfs_alloc_get_freelist(cur->bc_tp,
1324 &nbno))) 1326 cur->bc_private.a.agbp, &nbno, 1);
1327 if (error)
1325 return error; 1328 return error;
1326 /* 1329 /*
1327 * None available, we fail. 1330 * None available, we fail.
@@ -1604,8 +1607,9 @@ xfs_alloc_split(
1604 * Allocate the new block from the freelist. 1607 * Allocate the new block from the freelist.
1605 * If we can't do it, we're toast. Give up. 1608 * If we can't do it, we're toast. Give up.
1606 */ 1609 */
1607 if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, 1610 error = xfs_alloc_get_freelist(cur->bc_tp,
1608 &rbno))) 1611 cur->bc_private.a.agbp, &rbno, 1);
1612 if (error)
1609 return error; 1613 return error;
1610 if (rbno == NULLAGBLOCK) { 1614 if (rbno == NULLAGBLOCK) {
1611 *stat = 0; 1615 *stat = 0;
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 1335449841cd..1b60cfc28be5 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -238,6 +238,7 @@ typedef struct xfs_fsop_resblks {
238#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ 238#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */
239#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ 239#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */
240#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ 240#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */
241#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
241 242
242 243
243/* 244/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 25e5eae8a976..27d01afe8465 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -94,6 +94,8 @@ xfs_fs_geometry(
94 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) | 94 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
95 (XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ? 95 (XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
96 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | 96 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
97 (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
98 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
97 (XFS_SB_VERSION_HASATTR2(&mp->m_sb) ? 99 (XFS_SB_VERSION_HASATTR2(&mp->m_sb) ?
98 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0); 100 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0);
99 geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ? 101 geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index b5feb3e77116..f943368c9b93 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -123,6 +123,7 @@ xfs_ialloc_ag_alloc(
123 int blks_per_cluster; /* fs blocks per inode cluster */ 123 int blks_per_cluster; /* fs blocks per inode cluster */
124 xfs_btree_cur_t *cur; /* inode btree cursor */ 124 xfs_btree_cur_t *cur; /* inode btree cursor */
125 xfs_daddr_t d; /* disk addr of buffer */ 125 xfs_daddr_t d; /* disk addr of buffer */
126 xfs_agnumber_t agno;
126 int error; 127 int error;
127 xfs_buf_t *fbuf; /* new free inodes' buffer */ 128 xfs_buf_t *fbuf; /* new free inodes' buffer */
128 xfs_dinode_t *free; /* new free inode structure */ 129 xfs_dinode_t *free; /* new free inode structure */
@@ -302,15 +303,15 @@ xfs_ialloc_ag_alloc(
302 } 303 }
303 be32_add(&agi->agi_count, newlen); 304 be32_add(&agi->agi_count, newlen);
304 be32_add(&agi->agi_freecount, newlen); 305 be32_add(&agi->agi_freecount, newlen);
306 agno = be32_to_cpu(agi->agi_seqno);
305 down_read(&args.mp->m_peraglock); 307 down_read(&args.mp->m_peraglock);
306 args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen; 308 args.mp->m_perag[agno].pagi_freecount += newlen;
307 up_read(&args.mp->m_peraglock); 309 up_read(&args.mp->m_peraglock);
308 agi->agi_newino = cpu_to_be32(newino); 310 agi->agi_newino = cpu_to_be32(newino);
309 /* 311 /*
310 * Insert records describing the new inode chunk into the btree. 312 * Insert records describing the new inode chunk into the btree.
311 */ 313 */
312 cur = xfs_btree_init_cursor(args.mp, tp, agbp, 314 cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno,
313 be32_to_cpu(agi->agi_seqno),
314 XFS_BTNUM_INO, (xfs_inode_t *)0, 0); 315 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
315 for (thisino = newino; 316 for (thisino = newino;
316 thisino < newino + newlen; 317 thisino < newino + newlen;
@@ -1387,6 +1388,7 @@ xfs_ialloc_read_agi(
1387 pag = &mp->m_perag[agno]; 1388 pag = &mp->m_perag[agno];
1388 if (!pag->pagi_init) { 1389 if (!pag->pagi_init) {
1389 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1390 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1391 pag->pagi_count = be32_to_cpu(agi->agi_count);
1390 pag->pagi_init = 1; 1392 pag->pagi_init = 1;
1391 } else { 1393 } else {
1392 /* 1394 /*
@@ -1410,3 +1412,23 @@ xfs_ialloc_read_agi(
1410 *bpp = bp; 1412 *bpp = bp;
1411 return 0; 1413 return 0;
1412} 1414}
1415
1416/*
1417 * Read in the agi to initialise the per-ag data in the mount structure
1418 */
1419int
1420xfs_ialloc_pagi_init(
1421 xfs_mount_t *mp, /* file system mount structure */
1422 xfs_trans_t *tp, /* transaction pointer */
1423 xfs_agnumber_t agno) /* allocation group number */
1424{
1425 xfs_buf_t *bp = NULL;
1426 int error;
1427
1428 error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
1429 if (error)
1430 return error;
1431 if (bp)
1432 xfs_trans_brelse(tp, bp);
1433 return 0;
1434}
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 7f5debe1acb6..97f4040931ca 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -149,6 +149,16 @@ xfs_ialloc_read_agi(
149 xfs_agnumber_t agno, /* allocation group number */ 149 xfs_agnumber_t agno, /* allocation group number */
150 struct xfs_buf **bpp); /* allocation group hdr buf */ 150 struct xfs_buf **bpp); /* allocation group hdr buf */
151 151
152/*
153 * Read in the allocation group header to initialise the per-ag data
154 * in the mount structure
155 */
156int
157xfs_ialloc_pagi_init(
158 struct xfs_mount *mp, /* file system mount structure */
159 struct xfs_trans *tp, /* transaction pointer */
160 xfs_agnumber_t agno); /* allocation group number */
161
152#endif /* __KERNEL__ */ 162#endif /* __KERNEL__ */
153 163
154#endif /* __XFS_IALLOC_H__ */ 164#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index fb50fd400e53..9d4c4fbeb3ee 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -817,10 +817,8 @@ xfs_log_need_covered(xfs_mount_t *mp)
817 SPLDECL(s); 817 SPLDECL(s);
818 int needed = 0, gen; 818 int needed = 0, gen;
819 xlog_t *log = mp->m_log; 819 xlog_t *log = mp->m_log;
820 bhv_vfs_t *vfsp = XFS_MTOVFS(mp);
821 820
822 if (vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) || 821 if (!xfs_fs_writable(mp))
823 (vfsp->vfs_flag & VFS_RDONLY))
824 return 0; 822 return 0;
825 823
826 s = LOG_LOCK(log); 824 s = LOG_LOCK(log);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 080fabf61c92..fddbb091a86f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -927,6 +927,14 @@ xlog_find_tail(
927 ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle, 927 ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle,
928 after_umount_blk); 928 after_umount_blk);
929 *tail_blk = after_umount_blk; 929 *tail_blk = after_umount_blk;
930
931 /*
932 * Note that the unmount was clean. If the unmount
933 * was not clean, we need to know this to rebuild the
934 * superblock counters from the perag headers if we
935 * have a filesystem using non-persistent counters.
936 */
937 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
930 } 938 }
931 } 939 }
932 940
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 5de1f392e632..f6fe47d8c4dc 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -643,6 +643,64 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
643 sbp->sb_inopblock); 643 sbp->sb_inopblock);
644 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 644 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
645} 645}
646
647/*
648 * xfs_initialize_perag_data
649 *
650 * Read in each per-ag structure so we can count up the number of
651 * allocated inodes, free inodes and used filesystem blocks as this
652 * information is no longer persistent in the superblock. Once we have
653 * this information, write it into the in-core superblock structure.
654 */
655STATIC int
656xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
657{
658 xfs_agnumber_t index;
659 xfs_perag_t *pag;
660 xfs_sb_t *sbp = &mp->m_sb;
661 uint64_t ifree = 0;
662 uint64_t ialloc = 0;
663 uint64_t bfree = 0;
664 uint64_t bfreelst = 0;
665 uint64_t btree = 0;
666 int error;
667 int s;
668
669 for (index = 0; index < agcount; index++) {
670 /*
671 * read the agf, then the agi. This gets us
672 * all the inforamtion we need and populates the
673 * per-ag structures for us.
674 */
675 error = xfs_alloc_pagf_init(mp, NULL, index, 0);
676 if (error)
677 return error;
678
679 error = xfs_ialloc_pagi_init(mp, NULL, index);
680 if (error)
681 return error;
682 pag = &mp->m_perag[index];
683 ifree += pag->pagi_freecount;
684 ialloc += pag->pagi_count;
685 bfree += pag->pagf_freeblks;
686 bfreelst += pag->pagf_flcount;
687 btree += pag->pagf_btreeblks;
688 }
689 /*
690 * Overwrite incore superblock counters with just-read data
691 */
692 s = XFS_SB_LOCK(mp);
693 sbp->sb_ifree = ifree;
694 sbp->sb_icount = ialloc;
695 sbp->sb_fdblocks = bfree + bfreelst + btree;
696 XFS_SB_UNLOCK(mp, s);
697
698 /* Fixup the per-cpu counters as well. */
699 xfs_icsb_reinit_counters(mp);
700
701 return 0;
702}
703
646/* 704/*
647 * xfs_mountfs 705 * xfs_mountfs
648 * 706 *
@@ -987,6 +1045,34 @@ xfs_mountfs(
987 } 1045 }
988 1046
989 /* 1047 /*
1048 * Now the log is mounted, we know if it was an unclean shutdown or
1049 * not. If it was, with the first phase of recovery has completed, we
1050 * have consistent AG blocks on disk. We have not recovered EFIs yet,
1051 * but they are recovered transactionally in the second recovery phase
1052 * later.
1053 *
1054 * Hence we can safely re-initialise incore superblock counters from
1055 * the per-ag data. These may not be correct if the filesystem was not
1056 * cleanly unmounted, so we need to wait for recovery to finish before
1057 * doing this.
1058 *
1059 * If the filesystem was cleanly unmounted, then we can trust the
1060 * values in the superblock to be correct and we don't need to do
1061 * anything here.
1062 *
1063 * If we are currently making the filesystem, the initialisation will
1064 * fail as the perag data is in an undefined state.
1065 */
1066
1067 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
1068 !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
1069 !mp->m_sb.sb_inprogress) {
1070 error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
1071 if (error) {
1072 goto error2;
1073 }
1074 }
1075 /*
990 * Get and sanity-check the root inode. 1076 * Get and sanity-check the root inode.
991 * Save the pointer to it in the mount structure. 1077 * Save the pointer to it in the mount structure.
992 */ 1078 */
@@ -1049,6 +1135,7 @@ xfs_mountfs(
1049 goto error4; 1135 goto error4;
1050 } 1136 }
1051 1137
1138
1052 /* 1139 /*
1053 * Complete the quota initialisation, post-log-replay component. 1140 * Complete the quota initialisation, post-log-replay component.
1054 */ 1141 */
@@ -1111,10 +1198,9 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1111 xfs_binval(mp->m_rtdev_targp); 1198 xfs_binval(mp->m_rtdev_targp);
1112 } 1199 }
1113 1200
1201 xfs_log_sbcount(mp, 1);
1114 xfs_unmountfs_writesb(mp); 1202 xfs_unmountfs_writesb(mp);
1115
1116 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1203 xfs_unmountfs_wait(mp); /* wait for async bufs */
1117
1118 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1204 xfs_log_unmount(mp); /* Done! No more fs ops. */
1119 1205
1120 xfs_freesb(mp); 1206 xfs_freesb(mp);
@@ -1161,6 +1247,62 @@ xfs_unmountfs_wait(xfs_mount_t *mp)
1161} 1247}
1162 1248
1163int 1249int
1250xfs_fs_writable(xfs_mount_t *mp)
1251{
1252 bhv_vfs_t *vfsp = XFS_MTOVFS(mp);
1253
1254 return !(vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
1255 (vfsp->vfs_flag & VFS_RDONLY));
1256}
1257
1258/*
1259 * xfs_log_sbcount
1260 *
1261 * Called either periodically to keep the on disk superblock values
1262 * roughly up to date or from unmount to make sure the values are
1263 * correct on a clean unmount.
1264 *
1265 * Note this code can be called during the process of freezing, so
1266 * we may need to use the transaction allocator which does not not
1267 * block when the transaction subsystem is in its frozen state.
1268 */
1269int
1270xfs_log_sbcount(
1271 xfs_mount_t *mp,
1272 uint sync)
1273{
1274 xfs_trans_t *tp;
1275 int error;
1276
1277 if (!xfs_fs_writable(mp))
1278 return 0;
1279
1280 xfs_icsb_sync_counters(mp);
1281
1282 /*
1283 * we don't need to do this if we are updating the superblock
1284 * counters on every modification.
1285 */
1286 if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1287 return 0;
1288
1289 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
1290 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
1291 XFS_DEFAULT_LOG_COUNT);
1292 if (error) {
1293 xfs_trans_cancel(tp, 0);
1294 return error;
1295 }
1296
1297 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
1298 if (sync)
1299 xfs_trans_set_sync(tp);
1300 xfs_trans_commit(tp, 0);
1301
1302 return 0;
1303}
1304
1305int
1164xfs_unmountfs_writesb(xfs_mount_t *mp) 1306xfs_unmountfs_writesb(xfs_mount_t *mp)
1165{ 1307{
1166 xfs_buf_t *sbp; 1308 xfs_buf_t *sbp;
@@ -1171,16 +1313,15 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1171 * skip superblock write if fs is read-only, or 1313 * skip superblock write if fs is read-only, or
1172 * if we are doing a forced umount. 1314 * if we are doing a forced umount.
1173 */ 1315 */
1174 sbp = xfs_getsb(mp, 0);
1175 if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY || 1316 if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
1176 XFS_FORCED_SHUTDOWN(mp))) { 1317 XFS_FORCED_SHUTDOWN(mp))) {
1177 1318
1178 xfs_icsb_sync_counters(mp); 1319 sbp = xfs_getsb(mp, 0);
1320 sb = XFS_BUF_TO_SBP(sbp);
1179 1321
1180 /* 1322 /*
1181 * mark shared-readonly if desired 1323 * mark shared-readonly if desired
1182 */ 1324 */
1183 sb = XFS_BUF_TO_SBP(sbp);
1184 if (mp->m_mk_sharedro) { 1325 if (mp->m_mk_sharedro) {
1185 if (!(sb->sb_flags & XFS_SBF_READONLY)) 1326 if (!(sb->sb_flags & XFS_SBF_READONLY))
1186 sb->sb_flags |= XFS_SBF_READONLY; 1327 sb->sb_flags |= XFS_SBF_READONLY;
@@ -1189,6 +1330,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1189 xfs_fs_cmn_err(CE_NOTE, mp, 1330 xfs_fs_cmn_err(CE_NOTE, mp,
1190 "Unmounting, marking shared read-only"); 1331 "Unmounting, marking shared read-only");
1191 } 1332 }
1333
1192 XFS_BUF_UNDONE(sbp); 1334 XFS_BUF_UNDONE(sbp);
1193 XFS_BUF_UNREAD(sbp); 1335 XFS_BUF_UNREAD(sbp);
1194 XFS_BUF_UNDELAYWRITE(sbp); 1336 XFS_BUF_UNDELAYWRITE(sbp);
@@ -1203,8 +1345,8 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1203 mp, sbp, XFS_BUF_ADDR(sbp)); 1345 mp, sbp, XFS_BUF_ADDR(sbp));
1204 if (error && mp->m_mk_sharedro) 1346 if (error && mp->m_mk_sharedro)
1205 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); 1347 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly");
1348 xfs_buf_relse(sbp);
1206 } 1349 }
1207 xfs_buf_relse(sbp);
1208 return error; 1350 return error;
1209} 1351}
1210 1352
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 871a5bfd8617..0bca2d422719 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -429,12 +429,12 @@ typedef struct xfs_mount {
429/* 429/*
430 * Flags for m_flags. 430 * Flags for m_flags.
431 */ 431 */
432#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops 432#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
433 must be synchronous except 433 must be synchronous except
434 for space allocations */ 434 for space allocations */
435#define XFS_MOUNT_INO64 (1ULL << 1) 435#define XFS_MOUNT_INO64 (1ULL << 1)
436 /* (1ULL << 2) -- currently unused */ 436 /* (1ULL << 2) -- currently unused */
437 /* (1ULL << 3) -- currently unused */ 437#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
438#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 438#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
439 operations, typically for 439 operations, typically for
440 disk errors in metadata */ 440 disk errors in metadata */
@@ -511,6 +511,8 @@ xfs_preferred_iosize(xfs_mount_t *mp)
511 511
512#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset) 512#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset)
513 513
514#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \
515 ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
514#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) 516#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
515#define xfs_force_shutdown(m,f) \ 517#define xfs_force_shutdown(m,f) \
516 bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__) 518 bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__)
@@ -602,6 +604,7 @@ typedef struct xfs_mod_sb {
602 604
603extern xfs_mount_t *xfs_mount_init(void); 605extern xfs_mount_t *xfs_mount_init(void);
604extern void xfs_mod_sb(xfs_trans_t *, __int64_t); 606extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
607extern int xfs_log_sbcount(xfs_mount_t *, uint);
605extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv); 608extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
606extern int xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int); 609extern int xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int);
607extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 610extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
@@ -618,6 +621,7 @@ extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
618extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 621extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
619extern int xfs_readsb(xfs_mount_t *, int); 622extern int xfs_readsb(xfs_mount_t *, int);
620extern void xfs_freesb(xfs_mount_t *); 623extern void xfs_freesb(xfs_mount_t *);
624extern int xfs_fs_writable(xfs_mount_t *);
621extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int); 625extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
622extern int xfs_syncsub(xfs_mount_t *, int, int *); 626extern int xfs_syncsub(xfs_mount_t *, int, int *);
623extern int xfs_sync_inodes(xfs_mount_t *, int, int *); 627extern int xfs_sync_inodes(xfs_mount_t *, int, int *);
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 467854b45c8f..ef42537a607a 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -74,12 +74,13 @@ struct xfs_mount;
74 */ 74 */
75#define XFS_SB_VERSION2_REALFBITS 0x00ffffff /* Mask: features */ 75#define XFS_SB_VERSION2_REALFBITS 0x00ffffff /* Mask: features */
76#define XFS_SB_VERSION2_RESERVED1BIT 0x00000001 76#define XFS_SB_VERSION2_RESERVED1BIT 0x00000001
77#define XFS_SB_VERSION2_RESERVED2BIT 0x00000002 77#define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */
78#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 78#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004
79#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ 79#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */
80 80
81#define XFS_SB_VERSION2_OKREALFBITS \ 81#define XFS_SB_VERSION2_OKREALFBITS \
82 (XFS_SB_VERSION2_ATTR2BIT) 82 (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
83 XFS_SB_VERSION2_ATTR2BIT)
83#define XFS_SB_VERSION2_OKSASHFBITS \ 84#define XFS_SB_VERSION2_OKSASHFBITS \
84 (0) 85 (0)
85#define XFS_SB_VERSION2_OKREALBITS \ 86#define XFS_SB_VERSION2_OKREALBITS \
@@ -181,6 +182,9 @@ typedef enum {
181#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN) 182#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN)
182#define XFS_SB_UNIT XFS_SB_MVAL(UNIT) 183#define XFS_SB_UNIT XFS_SB_MVAL(UNIT)
183#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH) 184#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH)
185#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT)
186#define XFS_SB_IFREE XFS_SB_MVAL(IFREE)
187#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
184#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) 188#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2)
185#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) 189#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
186#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) 190#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
@@ -188,7 +192,7 @@ typedef enum {
188 (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \ 192 (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
189 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ 193 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
190 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ 194 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
191 XFS_SB_FEATURES2) 195 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2)
192 196
193 197
194/* 198/*
@@ -414,6 +418,12 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
414 * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT) 418 * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
415 */ 419 */
416 420
421static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
422{
423 return (XFS_SB_VERSION_HASMOREBITS(sbp) && \
424 ((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
425}
426
417#define XFS_SB_VERSION_HASATTR2(sbp) xfs_sb_version_hasattr2(sbp) 427#define XFS_SB_VERSION_HASATTR2(sbp) xfs_sb_version_hasattr2(sbp)
418static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) 428static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
419{ 429{
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index cc2d60951e21..7133fd9ab868 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -427,6 +427,14 @@ undo_blocks:
427 * 427 *
428 * Mark the transaction structure to indicate that the superblock 428 * Mark the transaction structure to indicate that the superblock
429 * needs to be updated before committing. 429 * needs to be updated before committing.
430 *
431 * Because we may not be keeping track of allocated/free inodes and
432 * used filesystem blocks in the superblock, we do not mark the
433 * superblock dirty in this transaction if we modify these fields.
434 * We still need to update the transaction deltas so that they get
435 * applied to the incore superblock, but we don't want them to
436 * cause the superblock to get locked and logged if these are the
437 * only fields in the superblock that the transaction modifies.
430 */ 438 */
431void 439void
432xfs_trans_mod_sb( 440xfs_trans_mod_sb(
@@ -434,13 +442,19 @@ xfs_trans_mod_sb(
434 uint field, 442 uint field,
435 int64_t delta) 443 int64_t delta)
436{ 444{
445 uint32_t flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY);
446 xfs_mount_t *mp = tp->t_mountp;
437 447
438 switch (field) { 448 switch (field) {
439 case XFS_TRANS_SB_ICOUNT: 449 case XFS_TRANS_SB_ICOUNT:
440 tp->t_icount_delta += delta; 450 tp->t_icount_delta += delta;
451 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
452 flags &= ~XFS_TRANS_SB_DIRTY;
441 break; 453 break;
442 case XFS_TRANS_SB_IFREE: 454 case XFS_TRANS_SB_IFREE:
443 tp->t_ifree_delta += delta; 455 tp->t_ifree_delta += delta;
456 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
457 flags &= ~XFS_TRANS_SB_DIRTY;
444 break; 458 break;
445 case XFS_TRANS_SB_FDBLOCKS: 459 case XFS_TRANS_SB_FDBLOCKS:
446 /* 460 /*
@@ -453,6 +467,8 @@ xfs_trans_mod_sb(
453 ASSERT(tp->t_blk_res_used <= tp->t_blk_res); 467 ASSERT(tp->t_blk_res_used <= tp->t_blk_res);
454 } 468 }
455 tp->t_fdblocks_delta += delta; 469 tp->t_fdblocks_delta += delta;
470 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
471 flags &= ~XFS_TRANS_SB_DIRTY;
456 break; 472 break;
457 case XFS_TRANS_SB_RES_FDBLOCKS: 473 case XFS_TRANS_SB_RES_FDBLOCKS:
458 /* 474 /*
@@ -462,6 +478,8 @@ xfs_trans_mod_sb(
462 */ 478 */
463 ASSERT(delta < 0); 479 ASSERT(delta < 0);
464 tp->t_res_fdblocks_delta += delta; 480 tp->t_res_fdblocks_delta += delta;
481 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
482 flags &= ~XFS_TRANS_SB_DIRTY;
465 break; 483 break;
466 case XFS_TRANS_SB_FREXTENTS: 484 case XFS_TRANS_SB_FREXTENTS:
467 /* 485 /*
@@ -544,18 +562,23 @@ xfs_trans_apply_sb_deltas(
544 (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta + 562 (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta +
545 tp->t_ag_btree_delta)); 563 tp->t_ag_btree_delta));
546 564
547 if (tp->t_icount_delta != 0) { 565 /*
548 INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta); 566 * Only update the superblock counters if we are logging them
549 } 567 */
550 if (tp->t_ifree_delta != 0) { 568 if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) {
551 INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta); 569 if (tp->t_icount_delta != 0) {
552 } 570 INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta);
571 }
572 if (tp->t_ifree_delta != 0) {
573 INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta);
574 }
553 575
554 if (tp->t_fdblocks_delta != 0) { 576 if (tp->t_fdblocks_delta != 0) {
555 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta); 577 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta);
556 } 578 }
557 if (tp->t_res_fdblocks_delta != 0) { 579 if (tp->t_res_fdblocks_delta != 0) {
558 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta); 580 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta);
581 }
559 } 582 }
560 583
561 if (tp->t_frextents_delta != 0) { 584 if (tp->t_frextents_delta != 0) {
@@ -627,6 +650,7 @@ xfs_trans_unreserve_and_mod_sb(
627{ 650{
628 xfs_mod_sb_t msb[14]; /* If you add cases, add entries */ 651 xfs_mod_sb_t msb[14]; /* If you add cases, add entries */
629 xfs_mod_sb_t *msbp; 652 xfs_mod_sb_t *msbp;
653 xfs_mount_t *mp = tp->t_mountp;
630 /* REFERENCED */ 654 /* REFERENCED */
631 int error; 655 int error;
632 int rsvd; 656 int rsvd;
@@ -659,8 +683,15 @@ xfs_trans_unreserve_and_mod_sb(
659 * The t_res_fdblocks_delta and t_res_frextents_delta fields are 683 * The t_res_fdblocks_delta and t_res_frextents_delta fields are
660 * explicitly NOT applied to the in-core superblock. 684 * explicitly NOT applied to the in-core superblock.
661 * The idea is that that has already been done. 685 * The idea is that that has already been done.
686 *
687 * If we are not logging superblock counters, then the inode
688 * allocated/free and used block counts are not updated in the
689 * on disk superblock. In this case, XFS_TRANS_SB_DIRTY will
690 * not be set when the transaction is updated but we still need
691 * to update the incore superblock with the changes.
662 */ 692 */
663 if (tp->t_flags & XFS_TRANS_SB_DIRTY) { 693 if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
694 (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
664 if (tp->t_icount_delta != 0) { 695 if (tp->t_icount_delta != 0) {
665 msbp->msb_field = XFS_SBS_ICOUNT; 696 msbp->msb_field = XFS_SBS_ICOUNT;
666 msbp->msb_delta = tp->t_icount_delta; 697 msbp->msb_delta = tp->t_icount_delta;
@@ -676,6 +707,9 @@ xfs_trans_unreserve_and_mod_sb(
676 msbp->msb_delta = tp->t_fdblocks_delta; 707 msbp->msb_delta = tp->t_fdblocks_delta;
677 msbp++; 708 msbp++;
678 } 709 }
710 }
711
712 if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
679 if (tp->t_frextents_delta != 0) { 713 if (tp->t_frextents_delta != 0) {
680 msbp->msb_field = XFS_SBS_FREXTENTS; 714 msbp->msb_field = XFS_SBS_FREXTENTS;
681 msbp->msb_delta = tp->t_frextents_delta; 715 msbp->msb_delta = tp->t_frextents_delta;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7dfcc450366f..0e26e729023e 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -94,7 +94,8 @@ typedef struct xfs_trans_header {
94#define XFS_TRANS_GROWFSRT_ZERO 38 94#define XFS_TRANS_GROWFSRT_ZERO 38
95#define XFS_TRANS_GROWFSRT_FREE 39 95#define XFS_TRANS_GROWFSRT_FREE 39
96#define XFS_TRANS_SWAPEXT 40 96#define XFS_TRANS_SWAPEXT 40
97#define XFS_TRANS_TYPE_MAX 40 97#define XFS_TRANS_SB_COUNT 41
98#define XFS_TRANS_TYPE_MAX 41
98/* new transaction types need to be reflected in xfs_logprint(8) */ 99/* new transaction types need to be reflected in xfs_logprint(8) */
99 100
100 101
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 92c1425d06ce..3a647339f40e 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -672,6 +672,7 @@ xfs_mntupdate(
672 } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */ 672 } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */
673 bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL); 673 bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL);
674 xfs_quiesce_fs(mp); 674 xfs_quiesce_fs(mp);
675 xfs_log_sbcount(mp, 1);
675 xfs_log_unmount_write(mp); 676 xfs_log_unmount_write(mp);
676 xfs_unmountfs_writesb(mp); 677 xfs_unmountfs_writesb(mp);
677 vfsp->vfs_flag |= VFS_RDONLY; 678 vfsp->vfs_flag |= VFS_RDONLY;
@@ -1497,6 +1498,15 @@ xfs_syncsub(
1497 } 1498 }
1498 1499
1499 /* 1500 /*
1501 * If asked, update the disk superblock with incore counter values if we
1502 * are using non-persistent counters so that they don't get too far out
1503 * of sync if we crash or get a forced shutdown. We don't want to force
1504 * this to disk, just get a transaction into the iclogs....
1505 */
1506 if (flags & SYNC_SUPER)
1507 xfs_log_sbcount(mp, 0);
1508
1509 /*
1500 * Now check to see if the log needs a "dummy" transaction. 1510 * Now check to see if the log needs a "dummy" transaction.
1501 */ 1511 */
1502 1512
@@ -1962,6 +1972,7 @@ xfs_freeze(
1962 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); 1972 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
1963 1973
1964 /* Push the superblock and write an unmount record */ 1974 /* Push the superblock and write an unmount record */
1975 xfs_log_sbcount(mp, 1);
1965 xfs_log_unmount_write(mp); 1976 xfs_log_unmount_write(mp);
1966 xfs_unmountfs_writesb(mp); 1977 xfs_unmountfs_writesb(mp);
1967 xfs_fs_log_dummy(mp); 1978 xfs_fs_log_dummy(mp);