aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c72
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c8
-rw-r--r--fs/xfs/libxfs/xfs_format.h62
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c6
-rw-r--r--fs/xfs/libxfs/xfs_sb.c12
-rw-r--r--fs/xfs/xfs_bmap_util.c31
-rw-r--r--fs/xfs/xfs_error.c2
-rw-r--r--fs/xfs/xfs_file.c84
-rw-r--r--fs/xfs/xfs_filestream.c2
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_inode.c532
-rw-r--r--fs/xfs/xfs_inode.h36
-rw-r--r--fs/xfs/xfs_ioctl.c5
-rw-r--r--fs/xfs/xfs_iomap.c3
-rw-r--r--fs/xfs/xfs_iops.c101
-rw-r--r--fs/xfs/xfs_linux.h9
-rw-r--r--fs/xfs/xfs_log_recover.c4
-rw-r--r--fs/xfs/xfs_mount.c918
-rw-r--r--fs/xfs/xfs_mount.h95
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_pnfs.c4
-rw-r--r--fs/xfs/xfs_qm.c5
-rw-r--r--fs/xfs/xfs_super.c107
-rw-r--r--fs/xfs/xfs_super.h2
-rw-r--r--fs/xfs/xfs_trace.h23
-rw-r--r--fs/xfs/xfs_trans.c234
26 files changed, 890 insertions, 1489 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index e8696f5a8041..aeffeaaac0ec 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -244,30 +244,6 @@ xfs_bmap_forkoff_reset(
244 } 244 }
245} 245}
246 246
247/*
248 * Debug/sanity checking code
249 */
250
251STATIC int
252xfs_bmap_sanity_check(
253 struct xfs_mount *mp,
254 struct xfs_buf *bp,
255 int level)
256{
257 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
258
259 if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
260 block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
261 return 0;
262
263 if (be16_to_cpu(block->bb_level) != level ||
264 be16_to_cpu(block->bb_numrecs) == 0 ||
265 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
266 return 0;
267
268 return 1;
269}
270
271#ifdef DEBUG 247#ifdef DEBUG
272STATIC struct xfs_buf * 248STATIC struct xfs_buf *
273xfs_bmap_get_bp( 249xfs_bmap_get_bp(
@@ -410,9 +386,6 @@ xfs_bmap_check_leaf_extents(
410 goto error_norelse; 386 goto error_norelse;
411 } 387 }
412 block = XFS_BUF_TO_BLOCK(bp); 388 block = XFS_BUF_TO_BLOCK(bp);
413 XFS_WANT_CORRUPTED_GOTO(mp,
414 xfs_bmap_sanity_check(mp, bp, level),
415 error0);
416 if (level == 0) 389 if (level == 0)
417 break; 390 break;
418 391
@@ -1312,8 +1285,6 @@ xfs_bmap_read_extents(
1312 if (error) 1285 if (error)
1313 return error; 1286 return error;
1314 block = XFS_BUF_TO_BLOCK(bp); 1287 block = XFS_BUF_TO_BLOCK(bp);
1315 XFS_WANT_CORRUPTED_GOTO(mp,
1316 xfs_bmap_sanity_check(mp, bp, level), error0);
1317 if (level == 0) 1288 if (level == 0)
1318 break; 1289 break;
1319 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 1290 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
@@ -1346,9 +1317,6 @@ xfs_bmap_read_extents(
1346 XFS_ERRLEVEL_LOW, ip->i_mount, block); 1317 XFS_ERRLEVEL_LOW, ip->i_mount, block);
1347 goto error0; 1318 goto error0;
1348 } 1319 }
1349 XFS_WANT_CORRUPTED_GOTO(mp,
1350 xfs_bmap_sanity_check(mp, bp, 0),
1351 error0);
1352 /* 1320 /*
1353 * Read-ahead the next leaf block, if any. 1321 * Read-ahead the next leaf block, if any.
1354 */ 1322 */
@@ -2215,9 +2183,8 @@ xfs_bmap_add_extent_delay_real(
2215 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - 2183 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
2216 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 2184 (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2217 if (diff > 0) { 2185 if (diff > 0) {
2218 error = xfs_icsb_modify_counters(bma->ip->i_mount, 2186 error = xfs_mod_fdblocks(bma->ip->i_mount,
2219 XFS_SBS_FDBLOCKS, 2187 -((int64_t)diff), false);
2220 -((int64_t)diff), 0);
2221 ASSERT(!error); 2188 ASSERT(!error);
2222 if (error) 2189 if (error)
2223 goto done; 2190 goto done;
@@ -2268,9 +2235,8 @@ xfs_bmap_add_extent_delay_real(
2268 temp += bma->cur->bc_private.b.allocated; 2235 temp += bma->cur->bc_private.b.allocated;
2269 ASSERT(temp <= da_old); 2236 ASSERT(temp <= da_old);
2270 if (temp < da_old) 2237 if (temp < da_old)
2271 xfs_icsb_modify_counters(bma->ip->i_mount, 2238 xfs_mod_fdblocks(bma->ip->i_mount,
2272 XFS_SBS_FDBLOCKS, 2239 (int64_t)(da_old - temp), false);
2273 (int64_t)(da_old - temp), 0);
2274 } 2240 }
2275 2241
2276 /* clear out the allocated field, done with it now in any case. */ 2242 /* clear out the allocated field, done with it now in any case. */
@@ -2948,8 +2914,8 @@ xfs_bmap_add_extent_hole_delay(
2948 } 2914 }
2949 if (oldlen != newlen) { 2915 if (oldlen != newlen) {
2950 ASSERT(oldlen > newlen); 2916 ASSERT(oldlen > newlen);
2951 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, 2917 xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2952 (int64_t)(oldlen - newlen), 0); 2918 false);
2953 /* 2919 /*
2954 * Nothing to do for disk quota accounting here. 2920 * Nothing to do for disk quota accounting here.
2955 */ 2921 */
@@ -4166,18 +4132,15 @@ xfs_bmapi_reserve_delalloc(
4166 ASSERT(indlen > 0); 4132 ASSERT(indlen > 0);
4167 4133
4168 if (rt) { 4134 if (rt) {
4169 error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 4135 error = xfs_mod_frextents(mp, -((int64_t)extsz));
4170 -((int64_t)extsz), 0);
4171 } else { 4136 } else {
4172 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 4137 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4173 -((int64_t)alen), 0);
4174 } 4138 }
4175 4139
4176 if (error) 4140 if (error)
4177 goto out_unreserve_quota; 4141 goto out_unreserve_quota;
4178 4142
4179 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 4143 error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4180 -((int64_t)indlen), 0);
4181 if (error) 4144 if (error)
4182 goto out_unreserve_blocks; 4145 goto out_unreserve_blocks;
4183 4146
@@ -4204,9 +4167,9 @@ xfs_bmapi_reserve_delalloc(
4204 4167
4205out_unreserve_blocks: 4168out_unreserve_blocks:
4206 if (rt) 4169 if (rt)
4207 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0); 4170 xfs_mod_frextents(mp, extsz);
4208 else 4171 else
4209 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); 4172 xfs_mod_fdblocks(mp, alen, false);
4210out_unreserve_quota: 4173out_unreserve_quota:
4211 if (XFS_IS_QUOTA_ON(mp)) 4174 if (XFS_IS_QUOTA_ON(mp))
4212 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? 4175 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
@@ -5019,10 +4982,8 @@ xfs_bmap_del_extent(
5019 * Nothing to do for disk quota accounting here. 4982 * Nothing to do for disk quota accounting here.
5020 */ 4983 */
5021 ASSERT(da_old >= da_new); 4984 ASSERT(da_old >= da_new);
5022 if (da_old > da_new) { 4985 if (da_old > da_new)
5023 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 4986 xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
5024 (int64_t)(da_old - da_new), 0);
5025 }
5026done: 4987done:
5027 *logflagsp = flags; 4988 *logflagsp = flags;
5028 return error; 4989 return error;
@@ -5291,14 +5252,13 @@ xfs_bunmapi(
5291 5252
5292 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); 5253 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5293 do_div(rtexts, mp->m_sb.sb_rextsize); 5254 do_div(rtexts, mp->m_sb.sb_rextsize);
5294 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 5255 xfs_mod_frextents(mp, (int64_t)rtexts);
5295 (int64_t)rtexts, 0);
5296 (void)xfs_trans_reserve_quota_nblks(NULL, 5256 (void)xfs_trans_reserve_quota_nblks(NULL,
5297 ip, -((long)del.br_blockcount), 0, 5257 ip, -((long)del.br_blockcount), 0,
5298 XFS_QMOPT_RES_RTBLKS); 5258 XFS_QMOPT_RES_RTBLKS);
5299 } else { 5259 } else {
5300 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 5260 xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
5301 (int64_t)del.br_blockcount, 0); 5261 false);
5302 (void)xfs_trans_reserve_quota_nblks(NULL, 5262 (void)xfs_trans_reserve_quota_nblks(NULL,
5303 ip, -((long)del.br_blockcount), 0, 5263 ip, -((long)del.br_blockcount), 0,
5304 XFS_QMOPT_RES_REGBLKS); 5264 XFS_QMOPT_RES_REGBLKS);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 9cb0115c6bd1..2385f8cd08ab 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -538,12 +538,12 @@ xfs_da3_root_split(
538 oldroot = blk1->bp->b_addr; 538 oldroot = blk1->bp->b_addr;
539 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 539 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
540 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { 540 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
541 struct xfs_da3_icnode_hdr nodehdr; 541 struct xfs_da3_icnode_hdr icnodehdr;
542 542
543 dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); 543 dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot);
544 btree = dp->d_ops->node_tree_p(oldroot); 544 btree = dp->d_ops->node_tree_p(oldroot);
545 size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot); 545 size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
546 level = nodehdr.level; 546 level = icnodehdr.level;
547 547
548 /* 548 /*
549 * we are about to copy oldroot to bp, so set up the type 549 * we are about to copy oldroot to bp, so set up the type
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 8eb718979383..4daaa662337b 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -264,68 +264,6 @@ typedef struct xfs_dsb {
264 /* must be padded to 64 bit alignment */ 264 /* must be padded to 64 bit alignment */
265} xfs_dsb_t; 265} xfs_dsb_t;
266 266
267/*
268 * Sequence number values for the fields.
269 */
270typedef enum {
271 XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
272 XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
273 XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
274 XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
275 XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
276 XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
277 XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
278 XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
279 XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
280 XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
281 XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
282 XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
283 XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
284 XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
285 XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
286 XFS_SBS_PQUOTINO, XFS_SBS_LSN,
287 XFS_SBS_FIELDCOUNT
288} xfs_sb_field_t;
289
290/*
291 * Mask values, defined based on the xfs_sb_field_t values.
292 * Only define the ones we're using.
293 */
294#define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x)
295#define XFS_SB_UUID XFS_SB_MVAL(UUID)
296#define XFS_SB_FNAME XFS_SB_MVAL(FNAME)
297#define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO)
298#define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO)
299#define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO)
300#define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM)
301#define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO)
302#define XFS_SB_GQUOTINO XFS_SB_MVAL(GQUOTINO)
303#define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS)
304#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN)
305#define XFS_SB_UNIT XFS_SB_MVAL(UNIT)
306#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH)
307#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT)
308#define XFS_SB_IFREE XFS_SB_MVAL(IFREE)
309#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
310#define XFS_SB_FEATURES2 (XFS_SB_MVAL(FEATURES2) | \
311 XFS_SB_MVAL(BAD_FEATURES2))
312#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
313#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
314#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
315#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
316#define XFS_SB_CRC XFS_SB_MVAL(CRC)
317#define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO)
318#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
319#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
320#define XFS_SB_MOD_BITS \
321 (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
322 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
323 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
324 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
325 XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \
326 XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \
327 XFS_SB_PQUOTINO)
328
329 267
330/* 268/*
331 * Misc. Flags - warning - these will be cleared by xfs_repair unless 269 * Misc. Flags - warning - these will be cleared by xfs_repair unless
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index db0444893e96..07349a183a11 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc(
376 */ 376 */
377 newlen = args.mp->m_ialloc_inos; 377 newlen = args.mp->m_ialloc_inos;
378 if (args.mp->m_maxicount && 378 if (args.mp->m_maxicount &&
379 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) 379 percpu_counter_read(&args.mp->m_icount) + newlen >
380 args.mp->m_maxicount)
380 return -ENOSPC; 381 return -ENOSPC;
381 args.minlen = args.maxlen = args.mp->m_ialloc_blks; 382 args.minlen = args.maxlen = args.mp->m_ialloc_blks;
382 /* 383 /*
@@ -1340,7 +1341,8 @@ xfs_dialloc(
1340 * inode. 1341 * inode.
1341 */ 1342 */
1342 if (mp->m_maxicount && 1343 if (mp->m_maxicount &&
1343 mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { 1344 percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
1345 mp->m_maxicount) {
1344 noroom = 1; 1346 noroom = 1;
1345 okalloc = 0; 1347 okalloc = 0;
1346 } 1348 }
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index f3ea02bf893e..dc4bfc5d88fc 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -735,17 +735,15 @@ xfs_initialize_perag_data(
735 btree += pag->pagf_btreeblks; 735 btree += pag->pagf_btreeblks;
736 xfs_perag_put(pag); 736 xfs_perag_put(pag);
737 } 737 }
738 /* 738
739 * Overwrite incore superblock counters with just-read data 739 /* Overwrite incore superblock counters with just-read data */
740 */
741 spin_lock(&mp->m_sb_lock); 740 spin_lock(&mp->m_sb_lock);
742 sbp->sb_ifree = ifree; 741 sbp->sb_ifree = ifree;
743 sbp->sb_icount = ialloc; 742 sbp->sb_icount = ialloc;
744 sbp->sb_fdblocks = bfree + bfreelst + btree; 743 sbp->sb_fdblocks = bfree + bfreelst + btree;
745 spin_unlock(&mp->m_sb_lock); 744 spin_unlock(&mp->m_sb_lock);
746 745
747 /* Fixup the per-cpu counters as well. */ 746 xfs_reinit_percpu_counters(mp);
748 xfs_icsb_reinit_counters(mp);
749 747
750 return 0; 748 return 0;
751} 749}
@@ -763,6 +761,10 @@ xfs_log_sb(
763 struct xfs_mount *mp = tp->t_mountp; 761 struct xfs_mount *mp = tp->t_mountp;
764 struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); 762 struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0);
765 763
764 mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
765 mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
766 mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
767
766 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); 768 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
767 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); 769 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
768 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); 770 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index e86757358d5b..a52bbd3abc7d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1660,13 +1660,6 @@ xfs_swap_extent_flush(
1660 /* Verify O_DIRECT for ftmp */ 1660 /* Verify O_DIRECT for ftmp */
1661 if (VFS_I(ip)->i_mapping->nrpages) 1661 if (VFS_I(ip)->i_mapping->nrpages)
1662 return -EINVAL; 1662 return -EINVAL;
1663
1664 /*
1665 * Don't try to swap extents on mmap()d files because we can't lock
1666 * out races against page faults safely.
1667 */
1668 if (mapping_mapped(VFS_I(ip)->i_mapping))
1669 return -EBUSY;
1670 return 0; 1663 return 0;
1671} 1664}
1672 1665
@@ -1694,13 +1687,14 @@ xfs_swap_extents(
1694 } 1687 }
1695 1688
1696 /* 1689 /*
1697 * Lock up the inodes against other IO and truncate to begin with. 1690 * Lock the inodes against other IO, page faults and truncate to
1698 * Then we can ensure the inodes are flushed and have no page cache 1691 * begin with. Then we can ensure the inodes are flushed and have no
1699 * safely. Once we have done this we can take the ilocks and do the rest 1692 * page cache safely. Once we have done this we can take the ilocks and
1700 * of the checks. 1693 * do the rest of the checks.
1701 */ 1694 */
1702 lock_flags = XFS_IOLOCK_EXCL; 1695 lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
1703 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); 1696 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
1697 xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
1704 1698
1705 /* Verify that both files have the same format */ 1699 /* Verify that both files have the same format */
1706 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { 1700 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
@@ -1727,8 +1721,16 @@ xfs_swap_extents(
1727 xfs_trans_cancel(tp, 0); 1721 xfs_trans_cancel(tp, 0);
1728 goto out_unlock; 1722 goto out_unlock;
1729 } 1723 }
1724
1725 /*
1726 * Lock and join the inodes to the tansaction so that transaction commit
1727 * or cancel will unlock the inodes from this point onwards.
1728 */
1730 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 1729 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
1731 lock_flags |= XFS_ILOCK_EXCL; 1730 lock_flags |= XFS_ILOCK_EXCL;
1731 xfs_trans_ijoin(tp, ip, lock_flags);
1732 xfs_trans_ijoin(tp, tip, lock_flags);
1733
1732 1734
1733 /* Verify all data are being swapped */ 1735 /* Verify all data are being swapped */
1734 if (sxp->sx_offset != 0 || 1736 if (sxp->sx_offset != 0 ||
@@ -1781,9 +1783,6 @@ xfs_swap_extents(
1781 goto out_trans_cancel; 1783 goto out_trans_cancel;
1782 } 1784 }
1783 1785
1784 xfs_trans_ijoin(tp, ip, lock_flags);
1785 xfs_trans_ijoin(tp, tip, lock_flags);
1786
1787 /* 1786 /*
1788 * Before we've swapped the forks, lets set the owners of the forks 1787 * Before we've swapped the forks, lets set the owners of the forks
1789 * appropriately. We have to do this as we are demand paging the btree 1788 * appropriately. We have to do this as we are demand paging the btree
@@ -1917,5 +1916,5 @@ out_unlock:
1917 1916
1918out_trans_cancel: 1917out_trans_cancel:
1919 xfs_trans_cancel(tp, 0); 1918 xfs_trans_cancel(tp, 0);
1920 goto out_unlock; 1919 goto out;
1921} 1920}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 3ee186ac1093..338e50bbfd1e 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -131,7 +131,7 @@ xfs_error_report(
131{ 131{
132 if (level <= xfs_error_level) { 132 if (level <= xfs_error_level) {
133 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, 133 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
134 "Internal error %s at line %d of file %s. Caller %pF", 134 "Internal error %s at line %d of file %s. Caller %pS",
135 tag, linenum, filename, ra); 135 tag, linenum, filename, ra);
136 136
137 xfs_stack_trace(); 137 xfs_stack_trace();
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f63aeddd31d5..c203839cd5be 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -397,7 +397,8 @@ STATIC int /* error (positive) */
397xfs_zero_last_block( 397xfs_zero_last_block(
398 struct xfs_inode *ip, 398 struct xfs_inode *ip,
399 xfs_fsize_t offset, 399 xfs_fsize_t offset,
400 xfs_fsize_t isize) 400 xfs_fsize_t isize,
401 bool *did_zeroing)
401{ 402{
402 struct xfs_mount *mp = ip->i_mount; 403 struct xfs_mount *mp = ip->i_mount;
403 xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize); 404 xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
@@ -425,6 +426,7 @@ xfs_zero_last_block(
425 zero_len = mp->m_sb.sb_blocksize - zero_offset; 426 zero_len = mp->m_sb.sb_blocksize - zero_offset;
426 if (isize + zero_len > offset) 427 if (isize + zero_len > offset)
427 zero_len = offset - isize; 428 zero_len = offset - isize;
429 *did_zeroing = true;
428 return xfs_iozero(ip, isize, zero_len); 430 return xfs_iozero(ip, isize, zero_len);
429} 431}
430 432
@@ -443,7 +445,8 @@ int /* error (positive) */
443xfs_zero_eof( 445xfs_zero_eof(
444 struct xfs_inode *ip, 446 struct xfs_inode *ip,
445 xfs_off_t offset, /* starting I/O offset */ 447 xfs_off_t offset, /* starting I/O offset */
446 xfs_fsize_t isize) /* current inode size */ 448 xfs_fsize_t isize, /* current inode size */
449 bool *did_zeroing)
447{ 450{
448 struct xfs_mount *mp = ip->i_mount; 451 struct xfs_mount *mp = ip->i_mount;
449 xfs_fileoff_t start_zero_fsb; 452 xfs_fileoff_t start_zero_fsb;
@@ -465,7 +468,7 @@ xfs_zero_eof(
465 * We only zero a part of that block so it is handled specially. 468 * We only zero a part of that block so it is handled specially.
466 */ 469 */
467 if (XFS_B_FSB_OFFSET(mp, isize) != 0) { 470 if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
468 error = xfs_zero_last_block(ip, offset, isize); 471 error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
469 if (error) 472 if (error)
470 return error; 473 return error;
471 } 474 }
@@ -525,6 +528,7 @@ xfs_zero_eof(
525 if (error) 528 if (error)
526 return error; 529 return error;
527 530
531 *did_zeroing = true;
528 start_zero_fsb = imap.br_startoff + imap.br_blockcount; 532 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
529 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 533 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
530 } 534 }
@@ -567,13 +571,15 @@ restart:
567 * having to redo all checks before. 571 * having to redo all checks before.
568 */ 572 */
569 if (*pos > i_size_read(inode)) { 573 if (*pos > i_size_read(inode)) {
574 bool zero = false;
575
570 if (*iolock == XFS_IOLOCK_SHARED) { 576 if (*iolock == XFS_IOLOCK_SHARED) {
571 xfs_rw_iunlock(ip, *iolock); 577 xfs_rw_iunlock(ip, *iolock);
572 *iolock = XFS_IOLOCK_EXCL; 578 *iolock = XFS_IOLOCK_EXCL;
573 xfs_rw_ilock(ip, *iolock); 579 xfs_rw_ilock(ip, *iolock);
574 goto restart; 580 goto restart;
575 } 581 }
576 error = xfs_zero_eof(ip, *pos, i_size_read(inode)); 582 error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
577 if (error) 583 if (error)
578 return error; 584 return error;
579 } 585 }
@@ -846,6 +852,9 @@ xfs_file_fallocate(
846 if (error) 852 if (error)
847 goto out_unlock; 853 goto out_unlock;
848 854
855 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
856 iolock |= XFS_MMAPLOCK_EXCL;
857
849 if (mode & FALLOC_FL_PUNCH_HOLE) { 858 if (mode & FALLOC_FL_PUNCH_HOLE) {
850 error = xfs_free_file_space(ip, offset, len); 859 error = xfs_free_file_space(ip, offset, len);
851 if (error) 860 if (error)
@@ -1028,20 +1037,6 @@ xfs_file_mmap(
1028} 1037}
1029 1038
1030/* 1039/*
1031 * mmap()d file has taken write protection fault and is being made
1032 * writable. We can set the page state up correctly for a writable
1033 * page, which means we can do correct delalloc accounting (ENOSPC
1034 * checking!) and unwritten extent mapping.
1035 */
1036STATIC int
1037xfs_vm_page_mkwrite(
1038 struct vm_area_struct *vma,
1039 struct vm_fault *vmf)
1040{
1041 return block_page_mkwrite(vma, vmf, xfs_get_blocks);
1042}
1043
1044/*
1045 * This type is designed to indicate the type of offset we would like 1040 * This type is designed to indicate the type of offset we would like
1046 * to search from page cache for xfs_seek_hole_data(). 1041 * to search from page cache for xfs_seek_hole_data().
1047 */ 1042 */
@@ -1416,6 +1411,55 @@ xfs_file_llseek(
1416 } 1411 }
1417} 1412}
1418 1413
1414/*
1415 * Locking for serialisation of IO during page faults. This results in a lock
1416 * ordering of:
1417 *
1418 * mmap_sem (MM)
1419 * i_mmap_lock (XFS - truncate serialisation)
1420 * page_lock (MM)
1421 * i_lock (XFS - extent map serialisation)
1422 */
1423STATIC int
1424xfs_filemap_fault(
1425 struct vm_area_struct *vma,
1426 struct vm_fault *vmf)
1427{
1428 struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
1429 int error;
1430
1431 trace_xfs_filemap_fault(ip);
1432
1433 xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
1434 error = filemap_fault(vma, vmf);
1435 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1436
1437 return error;
1438}
1439
1440/*
1441 * mmap()d file has taken write protection fault and is being made writable. We
1442 * can set the page state up correctly for a writable page, which means we can
1443 * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
1444 * mapping.
1445 */
1446STATIC int
1447xfs_filemap_page_mkwrite(
1448 struct vm_area_struct *vma,
1449 struct vm_fault *vmf)
1450{
1451 struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
1452 int error;
1453
1454 trace_xfs_filemap_page_mkwrite(ip);
1455
1456 xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
1457 error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
1458 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1459
1460 return error;
1461}
1462
1419const struct file_operations xfs_file_operations = { 1463const struct file_operations xfs_file_operations = {
1420 .llseek = xfs_file_llseek, 1464 .llseek = xfs_file_llseek,
1421 .read = new_sync_read, 1465 .read = new_sync_read,
@@ -1448,7 +1492,7 @@ const struct file_operations xfs_dir_file_operations = {
1448}; 1492};
1449 1493
1450static const struct vm_operations_struct xfs_file_vm_ops = { 1494static const struct vm_operations_struct xfs_file_vm_ops = {
1451 .fault = filemap_fault, 1495 .fault = xfs_filemap_fault,
1452 .map_pages = filemap_map_pages, 1496 .map_pages = filemap_map_pages,
1453 .page_mkwrite = xfs_vm_page_mkwrite, 1497 .page_mkwrite = xfs_filemap_page_mkwrite,
1454}; 1498};
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index a2e86e8a0fea..8f9f854376c6 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -322,7 +322,7 @@ xfs_filestream_lookup_ag(
322 322
323 pip = xfs_filestream_get_parent(ip); 323 pip = xfs_filestream_get_parent(ip);
324 if (!pip) 324 if (!pip)
325 goto out; 325 return NULLAGNUMBER;
326 326
327 mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); 327 mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
328 if (mru) { 328 if (mru) {
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 74efe5b760dc..cb7e8a29dfb6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -637,12 +637,13 @@ xfs_fs_counts(
637 xfs_mount_t *mp, 637 xfs_mount_t *mp,
638 xfs_fsop_counts_t *cnt) 638 xfs_fsop_counts_t *cnt)
639{ 639{
640 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); 640 cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
641 cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
642 cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
643 XFS_ALLOC_SET_ASIDE(mp);
644
641 spin_lock(&mp->m_sb_lock); 645 spin_lock(&mp->m_sb_lock);
642 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
643 cnt->freertx = mp->m_sb.sb_frextents; 646 cnt->freertx = mp->m_sb.sb_frextents;
644 cnt->freeino = mp->m_sb.sb_ifree;
645 cnt->allocino = mp->m_sb.sb_icount;
646 spin_unlock(&mp->m_sb_lock); 647 spin_unlock(&mp->m_sb_lock);
647 return 0; 648 return 0;
648} 649}
@@ -692,14 +693,9 @@ xfs_reserve_blocks(
692 * what to do. This means that the amount of free space can 693 * what to do. This means that the amount of free space can
693 * change while we do this, so we need to retry if we end up 694 * change while we do this, so we need to retry if we end up
694 * trying to reserve more space than is available. 695 * trying to reserve more space than is available.
695 *
696 * We also use the xfs_mod_incore_sb() interface so that we
697 * don't have to care about whether per cpu counter are
698 * enabled, disabled or even compiled in....
699 */ 696 */
700retry: 697retry:
701 spin_lock(&mp->m_sb_lock); 698 spin_lock(&mp->m_sb_lock);
702 xfs_icsb_sync_counters_locked(mp, 0);
703 699
704 /* 700 /*
705 * If our previous reservation was larger than the current value, 701 * If our previous reservation was larger than the current value,
@@ -716,7 +712,8 @@ retry:
716 } else { 712 } else {
717 __int64_t free; 713 __int64_t free;
718 714
719 free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 715 free = percpu_counter_sum(&mp->m_fdblocks) -
716 XFS_ALLOC_SET_ASIDE(mp);
720 if (!free) 717 if (!free)
721 goto out; /* ENOSPC and fdblks_delta = 0 */ 718 goto out; /* ENOSPC and fdblks_delta = 0 */
722 719
@@ -755,8 +752,7 @@ out:
755 * the extra reserve blocks from the reserve..... 752 * the extra reserve blocks from the reserve.....
756 */ 753 */
757 int error; 754 int error;
758 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 755 error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
759 fdblks_delta, 0);
760 if (error == -ENOSPC) 756 if (error == -ENOSPC)
761 goto retry; 757 goto retry;
762 } 758 }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d0414f305967..d6ebc85192b7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
117} 117}
118 118
119/* 119/*
120 * The xfs inode contains 2 locks: a multi-reader lock called the 120 * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
121 * i_iolock and a multi-reader lock called the i_lock. This routine 121 * the i_lock. This routine allows various combinations of the locks to be
122 * allows either or both of the locks to be obtained. 122 * obtained.
123 * 123 *
124 * The 2 locks should always be ordered so that the IO lock is 124 * The 3 locks should always be ordered so that the IO lock is obtained first,
125 * obtained first in order to prevent deadlock. 125 * the mmap lock second and the ilock last in order to prevent deadlock.
126 * 126 *
127 * ip -- the inode being locked 127 * Basic locking order:
128 * lock_flags -- this parameter indicates the inode's locks 128 *
129 * to be locked. It can be: 129 * i_iolock -> i_mmap_lock -> page_lock -> i_ilock
130 * XFS_IOLOCK_SHARED, 130 *
131 * XFS_IOLOCK_EXCL, 131 * mmap_sem locking order:
132 * XFS_ILOCK_SHARED, 132 *
133 * XFS_ILOCK_EXCL, 133 * i_iolock -> page lock -> mmap_sem
134 * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, 134 * mmap_sem -> i_mmap_lock -> page_lock
135 * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, 135 *
136 * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, 136 * The difference in mmap_sem locking order mean that we cannot hold the
137 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 137 * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
138 * fault in pages during copy in/out (for buffered IO) or require the mmap_sem
139 * in get_user_pages() to map the user pages into the kernel address space for
140 * direct IO. Similarly the i_iolock cannot be taken inside a page fault because
141 * page faults already hold the mmap_sem.
142 *
143 * Hence to serialise fully against both syscall and mmap based IO, we need to
144 * take both the i_iolock and the i_mmap_lock. These locks should *only* be both
145 * taken in places where we need to invalidate the page cache in a race
146 * free manner (e.g. truncate, hole punch and other extent manipulation
147 * functions).
138 */ 148 */
139void 149void
140xfs_ilock( 150xfs_ilock(
@@ -150,6 +160,8 @@ xfs_ilock(
150 */ 160 */
151 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 161 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
152 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 162 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
163 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
164 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
153 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 165 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
154 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 166 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
155 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 167 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -159,6 +171,11 @@ xfs_ilock(
159 else if (lock_flags & XFS_IOLOCK_SHARED) 171 else if (lock_flags & XFS_IOLOCK_SHARED)
160 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 172 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
161 173
174 if (lock_flags & XFS_MMAPLOCK_EXCL)
175 mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
176 else if (lock_flags & XFS_MMAPLOCK_SHARED)
177 mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
178
162 if (lock_flags & XFS_ILOCK_EXCL) 179 if (lock_flags & XFS_ILOCK_EXCL)
163 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 180 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
164 else if (lock_flags & XFS_ILOCK_SHARED) 181 else if (lock_flags & XFS_ILOCK_SHARED)
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
191 */ 208 */
192 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 209 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
193 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 210 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
211 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
212 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
194 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 213 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
195 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 214 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
196 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 215 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
202 if (!mrtryaccess(&ip->i_iolock)) 221 if (!mrtryaccess(&ip->i_iolock))
203 goto out; 222 goto out;
204 } 223 }
224
225 if (lock_flags & XFS_MMAPLOCK_EXCL) {
226 if (!mrtryupdate(&ip->i_mmaplock))
227 goto out_undo_iolock;
228 } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
229 if (!mrtryaccess(&ip->i_mmaplock))
230 goto out_undo_iolock;
231 }
232
205 if (lock_flags & XFS_ILOCK_EXCL) { 233 if (lock_flags & XFS_ILOCK_EXCL) {
206 if (!mrtryupdate(&ip->i_lock)) 234 if (!mrtryupdate(&ip->i_lock))
207 goto out_undo_iolock; 235 goto out_undo_mmaplock;
208 } else if (lock_flags & XFS_ILOCK_SHARED) { 236 } else if (lock_flags & XFS_ILOCK_SHARED) {
209 if (!mrtryaccess(&ip->i_lock)) 237 if (!mrtryaccess(&ip->i_lock))
210 goto out_undo_iolock; 238 goto out_undo_mmaplock;
211 } 239 }
212 return 1; 240 return 1;
213 241
214 out_undo_iolock: 242out_undo_mmaplock:
243 if (lock_flags & XFS_MMAPLOCK_EXCL)
244 mrunlock_excl(&ip->i_mmaplock);
245 else if (lock_flags & XFS_MMAPLOCK_SHARED)
246 mrunlock_shared(&ip->i_mmaplock);
247out_undo_iolock:
215 if (lock_flags & XFS_IOLOCK_EXCL) 248 if (lock_flags & XFS_IOLOCK_EXCL)
216 mrunlock_excl(&ip->i_iolock); 249 mrunlock_excl(&ip->i_iolock);
217 else if (lock_flags & XFS_IOLOCK_SHARED) 250 else if (lock_flags & XFS_IOLOCK_SHARED)
218 mrunlock_shared(&ip->i_iolock); 251 mrunlock_shared(&ip->i_iolock);
219 out: 252out:
220 return 0; 253 return 0;
221} 254}
222 255
@@ -244,6 +277,8 @@ xfs_iunlock(
244 */ 277 */
245 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 278 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
246 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 279 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
280 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
281 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
247 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 282 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
248 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 283 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
249 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 284 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -254,6 +289,11 @@ xfs_iunlock(
254 else if (lock_flags & XFS_IOLOCK_SHARED) 289 else if (lock_flags & XFS_IOLOCK_SHARED)
255 mrunlock_shared(&ip->i_iolock); 290 mrunlock_shared(&ip->i_iolock);
256 291
292 if (lock_flags & XFS_MMAPLOCK_EXCL)
293 mrunlock_excl(&ip->i_mmaplock);
294 else if (lock_flags & XFS_MMAPLOCK_SHARED)
295 mrunlock_shared(&ip->i_mmaplock);
296
257 if (lock_flags & XFS_ILOCK_EXCL) 297 if (lock_flags & XFS_ILOCK_EXCL)
258 mrunlock_excl(&ip->i_lock); 298 mrunlock_excl(&ip->i_lock);
259 else if (lock_flags & XFS_ILOCK_SHARED) 299 else if (lock_flags & XFS_ILOCK_SHARED)
@@ -271,11 +311,14 @@ xfs_ilock_demote(
271 xfs_inode_t *ip, 311 xfs_inode_t *ip,
272 uint lock_flags) 312 uint lock_flags)
273{ 313{
274 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 314 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
275 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 315 ASSERT((lock_flags &
316 ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
276 317
277 if (lock_flags & XFS_ILOCK_EXCL) 318 if (lock_flags & XFS_ILOCK_EXCL)
278 mrdemote(&ip->i_lock); 319 mrdemote(&ip->i_lock);
320 if (lock_flags & XFS_MMAPLOCK_EXCL)
321 mrdemote(&ip->i_mmaplock);
279 if (lock_flags & XFS_IOLOCK_EXCL) 322 if (lock_flags & XFS_IOLOCK_EXCL)
280 mrdemote(&ip->i_iolock); 323 mrdemote(&ip->i_iolock);
281 324
@@ -294,6 +337,12 @@ xfs_isilocked(
294 return rwsem_is_locked(&ip->i_lock.mr_lock); 337 return rwsem_is_locked(&ip->i_lock.mr_lock);
295 } 338 }
296 339
340 if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
341 if (!(lock_flags & XFS_MMAPLOCK_SHARED))
342 return !!ip->i_mmaplock.mr_writer;
343 return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
344 }
345
297 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { 346 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
298 if (!(lock_flags & XFS_IOLOCK_SHARED)) 347 if (!(lock_flags & XFS_IOLOCK_SHARED))
299 return !!ip->i_iolock.mr_writer; 348 return !!ip->i_iolock.mr_writer;
@@ -314,14 +363,27 @@ int xfs_lock_delays;
314#endif 363#endif
315 364
316/* 365/*
317 * Bump the subclass so xfs_lock_inodes() acquires each lock with 366 * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
318 * a different value 367 * value. This shouldn't be called for page fault locking, but we also need to
368 * ensure we don't overrun the number of lockdep subclasses for the iolock or
369 * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
319 */ 370 */
320static inline int 371static inline int
321xfs_lock_inumorder(int lock_mode, int subclass) 372xfs_lock_inumorder(int lock_mode, int subclass)
322{ 373{
323 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 374 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
375 ASSERT(subclass + XFS_LOCK_INUMORDER <
376 (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
324 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 377 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
378 }
379
380 if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
381 ASSERT(subclass + XFS_LOCK_INUMORDER <
382 (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
383 lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
384 XFS_MMAPLOCK_SHIFT;
385 }
386
325 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 387 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
326 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 388 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
327 389
@@ -329,15 +391,14 @@ xfs_lock_inumorder(int lock_mode, int subclass)
329} 391}
330 392
331/* 393/*
332 * The following routine will lock n inodes in exclusive mode. 394 * The following routine will lock n inodes in exclusive mode. We assume the
333 * We assume the caller calls us with the inodes in i_ino order. 395 * caller calls us with the inodes in i_ino order.
334 * 396 *
335 * We need to detect deadlock where an inode that we lock 397 * We need to detect deadlock where an inode that we lock is in the AIL and we
336 * is in the AIL and we start waiting for another inode that is locked 398 * start waiting for another inode that is locked by a thread in a long running
337 * by a thread in a long running transaction (such as truncate). This can 399 * transaction (such as truncate). This can result in deadlock since the long
338 * result in deadlock since the long running trans might need to wait 400 * running trans might need to wait for the inode we just locked in order to
339 * for the inode we just locked in order to push the tail and free space 401 * push the tail and free space in the log.
340 * in the log.
341 */ 402 */
342void 403void
343xfs_lock_inodes( 404xfs_lock_inodes(
@@ -348,30 +409,27 @@ xfs_lock_inodes(
348 int attempts = 0, i, j, try_lock; 409 int attempts = 0, i, j, try_lock;
349 xfs_log_item_t *lp; 410 xfs_log_item_t *lp;
350 411
351 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 412 /* currently supports between 2 and 5 inodes */
413 ASSERT(ips && inodes >= 2 && inodes <= 5);
352 414
353 try_lock = 0; 415 try_lock = 0;
354 i = 0; 416 i = 0;
355
356again: 417again:
357 for (; i < inodes; i++) { 418 for (; i < inodes; i++) {
358 ASSERT(ips[i]); 419 ASSERT(ips[i]);
359 420
360 if (i && (ips[i] == ips[i-1])) /* Already locked */ 421 if (i && (ips[i] == ips[i - 1])) /* Already locked */
361 continue; 422 continue;
362 423
363 /* 424 /*
364 * If try_lock is not set yet, make sure all locked inodes 425 * If try_lock is not set yet, make sure all locked inodes are
365 * are not in the AIL. 426 * not in the AIL. If any are, set try_lock to be used later.
366 * If any are, set try_lock to be used later.
367 */ 427 */
368
369 if (!try_lock) { 428 if (!try_lock) {
370 for (j = (i - 1); j >= 0 && !try_lock; j--) { 429 for (j = (i - 1); j >= 0 && !try_lock; j--) {
371 lp = (xfs_log_item_t *)ips[j]->i_itemp; 430 lp = (xfs_log_item_t *)ips[j]->i_itemp;
372 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 431 if (lp && (lp->li_flags & XFS_LI_IN_AIL))
373 try_lock++; 432 try_lock++;
374 }
375 } 433 }
376 } 434 }
377 435
@@ -381,51 +439,42 @@ again:
381 * we can't get any, we must release all we have 439 * we can't get any, we must release all we have
382 * and try again. 440 * and try again.
383 */ 441 */
442 if (!try_lock) {
443 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
444 continue;
445 }
446
447 /* try_lock means we have an inode locked that is in the AIL. */
448 ASSERT(i != 0);
449 if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
450 continue;
384 451
385 if (try_lock) { 452 /*
386 /* try_lock must be 0 if i is 0. */ 453 * Unlock all previous guys and try again. xfs_iunlock will try
454 * to push the tail if the inode is in the AIL.
455 */
456 attempts++;
457 for (j = i - 1; j >= 0; j--) {
387 /* 458 /*
388 * try_lock means we have an inode locked 459 * Check to see if we've already unlocked this one. Not
389 * that is in the AIL. 460 * the first one going back, and the inode ptr is the
461 * same.
390 */ 462 */
391 ASSERT(i != 0); 463 if (j != (i - 1) && ips[j] == ips[j + 1])
392 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 464 continue;
393 attempts++;
394
395 /*
396 * Unlock all previous guys and try again.
397 * xfs_iunlock will try to push the tail
398 * if the inode is in the AIL.
399 */
400
401 for(j = i - 1; j >= 0; j--) {
402
403 /*
404 * Check to see if we've already
405 * unlocked this one.
406 * Not the first one going back,
407 * and the inode ptr is the same.
408 */
409 if ((j != (i - 1)) && ips[j] ==
410 ips[j+1])
411 continue;
412
413 xfs_iunlock(ips[j], lock_mode);
414 }
415 465
416 if ((attempts % 5) == 0) { 466 xfs_iunlock(ips[j], lock_mode);
417 delay(1); /* Don't just spin the CPU */ 467 }
468
469 if ((attempts % 5) == 0) {
470 delay(1); /* Don't just spin the CPU */
418#ifdef DEBUG 471#ifdef DEBUG
419 xfs_lock_delays++; 472 xfs_lock_delays++;
420#endif 473#endif
421 }
422 i = 0;
423 try_lock = 0;
424 goto again;
425 }
426 } else {
427 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
428 } 474 }
475 i = 0;
476 try_lock = 0;
477 goto again;
429 } 478 }
430 479
431#ifdef DEBUG 480#ifdef DEBUG
@@ -440,10 +489,10 @@ again:
440} 489}
441 490
442/* 491/*
443 * xfs_lock_two_inodes() can only be used to lock one type of lock 492 * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
444 * at a time - the iolock or the ilock, but not both at once. If 493 * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
445 * we lock both at once, lockdep will report false positives saying 494 * lock more than one at a time, lockdep will report false positives saying we
446 * we have violated locking orders. 495 * have violated locking orders.
447 */ 496 */
448void 497void
449xfs_lock_two_inodes( 498xfs_lock_two_inodes(
@@ -455,8 +504,12 @@ xfs_lock_two_inodes(
455 int attempts = 0; 504 int attempts = 0;
456 xfs_log_item_t *lp; 505 xfs_log_item_t *lp;
457 506
458 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 507 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
459 ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); 508 ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
509 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
510 } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
511 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
512
460 ASSERT(ip0->i_ino != ip1->i_ino); 513 ASSERT(ip0->i_ino != ip1->i_ino);
461 514
462 if (ip0->i_ino > ip1->i_ino) { 515 if (ip0->i_ino > ip1->i_ino) {
@@ -2615,19 +2668,22 @@ xfs_remove(
2615/* 2668/*
2616 * Enter all inodes for a rename transaction into a sorted array. 2669 * Enter all inodes for a rename transaction into a sorted array.
2617 */ 2670 */
2671#define __XFS_SORT_INODES 5
2618STATIC void 2672STATIC void
2619xfs_sort_for_rename( 2673xfs_sort_for_rename(
2620 xfs_inode_t *dp1, /* in: old (source) directory inode */ 2674 struct xfs_inode *dp1, /* in: old (source) directory inode */
2621 xfs_inode_t *dp2, /* in: new (target) directory inode */ 2675 struct xfs_inode *dp2, /* in: new (target) directory inode */
2622 xfs_inode_t *ip1, /* in: inode of old entry */ 2676 struct xfs_inode *ip1, /* in: inode of old entry */
2623 xfs_inode_t *ip2, /* in: inode of new entry, if it 2677 struct xfs_inode *ip2, /* in: inode of new entry */
2624 already exists, NULL otherwise. */ 2678 struct xfs_inode *wip, /* in: whiteout inode */
2625 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ 2679 struct xfs_inode **i_tab,/* out: sorted array of inodes */
2626 int *num_inodes) /* out: number of inodes in array */ 2680 int *num_inodes) /* in/out: inodes in array */
2627{ 2681{
2628 xfs_inode_t *temp;
2629 int i, j; 2682 int i, j;
2630 2683
2684 ASSERT(*num_inodes == __XFS_SORT_INODES);
2685 memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *));
2686
2631 /* 2687 /*
2632 * i_tab contains a list of pointers to inodes. We initialize 2688 * i_tab contains a list of pointers to inodes. We initialize
2633 * the table here & we'll sort it. We will then use it to 2689 * the table here & we'll sort it. We will then use it to
@@ -2635,25 +2691,24 @@ xfs_sort_for_rename(
2635 * 2691 *
2636 * Note that the table may contain duplicates. e.g., dp1 == dp2. 2692 * Note that the table may contain duplicates. e.g., dp1 == dp2.
2637 */ 2693 */
2638 i_tab[0] = dp1; 2694 i = 0;
2639 i_tab[1] = dp2; 2695 i_tab[i++] = dp1;
2640 i_tab[2] = ip1; 2696 i_tab[i++] = dp2;
2641 if (ip2) { 2697 i_tab[i++] = ip1;
2642 *num_inodes = 4; 2698 if (ip2)
2643 i_tab[3] = ip2; 2699 i_tab[i++] = ip2;
2644 } else { 2700 if (wip)
2645 *num_inodes = 3; 2701 i_tab[i++] = wip;
2646 i_tab[3] = NULL; 2702 *num_inodes = i;
2647 }
2648 2703
2649 /* 2704 /*
2650 * Sort the elements via bubble sort. (Remember, there are at 2705 * Sort the elements via bubble sort. (Remember, there are at
2651 * most 4 elements to sort, so this is adequate.) 2706 * most 5 elements to sort, so this is adequate.)
2652 */ 2707 */
2653 for (i = 0; i < *num_inodes; i++) { 2708 for (i = 0; i < *num_inodes; i++) {
2654 for (j = 1; j < *num_inodes; j++) { 2709 for (j = 1; j < *num_inodes; j++) {
2655 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { 2710 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
2656 temp = i_tab[j]; 2711 struct xfs_inode *temp = i_tab[j];
2657 i_tab[j] = i_tab[j-1]; 2712 i_tab[j] = i_tab[j-1];
2658 i_tab[j-1] = temp; 2713 i_tab[j-1] = temp;
2659 } 2714 }
@@ -2661,6 +2716,31 @@ xfs_sort_for_rename(
2661 } 2716 }
2662} 2717}
2663 2718
2719static int
2720xfs_finish_rename(
2721 struct xfs_trans *tp,
2722 struct xfs_bmap_free *free_list)
2723{
2724 int committed = 0;
2725 int error;
2726
2727 /*
2728 * If this is a synchronous mount, make sure that the rename transaction
2729 * goes to disk before returning to the user.
2730 */
2731 if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2732 xfs_trans_set_sync(tp);
2733
2734 error = xfs_bmap_finish(&tp, free_list, &committed);
2735 if (error) {
2736 xfs_bmap_cancel(free_list);
2737 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
2738 return error;
2739 }
2740
2741 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2742}
2743
2664/* 2744/*
2665 * xfs_cross_rename() 2745 * xfs_cross_rename()
2666 * 2746 *
@@ -2689,14 +2769,14 @@ xfs_cross_rename(
2689 ip2->i_ino, 2769 ip2->i_ino,
2690 first_block, free_list, spaceres); 2770 first_block, free_list, spaceres);
2691 if (error) 2771 if (error)
2692 goto out; 2772 goto out_trans_abort;
2693 2773
2694 /* Swap inode number for dirent in second parent */ 2774 /* Swap inode number for dirent in second parent */
2695 error = xfs_dir_replace(tp, dp2, name2, 2775 error = xfs_dir_replace(tp, dp2, name2,
2696 ip1->i_ino, 2776 ip1->i_ino,
2697 first_block, free_list, spaceres); 2777 first_block, free_list, spaceres);
2698 if (error) 2778 if (error)
2699 goto out; 2779 goto out_trans_abort;
2700 2780
2701 /* 2781 /*
2702 * If we're renaming one or more directories across different parents, 2782 * If we're renaming one or more directories across different parents,
@@ -2711,16 +2791,16 @@ xfs_cross_rename(
2711 dp1->i_ino, first_block, 2791 dp1->i_ino, first_block,
2712 free_list, spaceres); 2792 free_list, spaceres);
2713 if (error) 2793 if (error)
2714 goto out; 2794 goto out_trans_abort;
2715 2795
2716 /* transfer ip2 ".." reference to dp1 */ 2796 /* transfer ip2 ".." reference to dp1 */
2717 if (!S_ISDIR(ip1->i_d.di_mode)) { 2797 if (!S_ISDIR(ip1->i_d.di_mode)) {
2718 error = xfs_droplink(tp, dp2); 2798 error = xfs_droplink(tp, dp2);
2719 if (error) 2799 if (error)
2720 goto out; 2800 goto out_trans_abort;
2721 error = xfs_bumplink(tp, dp1); 2801 error = xfs_bumplink(tp, dp1);
2722 if (error) 2802 if (error)
2723 goto out; 2803 goto out_trans_abort;
2724 } 2804 }
2725 2805
2726 /* 2806 /*
@@ -2738,16 +2818,16 @@ xfs_cross_rename(
2738 dp2->i_ino, first_block, 2818 dp2->i_ino, first_block,
2739 free_list, spaceres); 2819 free_list, spaceres);
2740 if (error) 2820 if (error)
2741 goto out; 2821 goto out_trans_abort;
2742 2822
2743 /* transfer ip1 ".." reference to dp2 */ 2823 /* transfer ip1 ".." reference to dp2 */
2744 if (!S_ISDIR(ip2->i_d.di_mode)) { 2824 if (!S_ISDIR(ip2->i_d.di_mode)) {
2745 error = xfs_droplink(tp, dp1); 2825 error = xfs_droplink(tp, dp1);
2746 if (error) 2826 if (error)
2747 goto out; 2827 goto out_trans_abort;
2748 error = xfs_bumplink(tp, dp2); 2828 error = xfs_bumplink(tp, dp2);
2749 if (error) 2829 if (error)
2750 goto out; 2830 goto out_trans_abort;
2751 } 2831 }
2752 2832
2753 /* 2833 /*
@@ -2775,66 +2855,108 @@ xfs_cross_rename(
2775 } 2855 }
2776 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2856 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2777 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); 2857 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
2778out: 2858 return xfs_finish_rename(tp, free_list);
2859
2860out_trans_abort:
2861 xfs_bmap_cancel(free_list);
2862 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
2779 return error; 2863 return error;
2780} 2864}
2781 2865
2782/* 2866/*
2867 * xfs_rename_alloc_whiteout()
2868 *
2869 * Return a referenced, unlinked, unlocked inode that that can be used as a
2870 * whiteout in a rename transaction. We use a tmpfile inode here so that if we
2871 * crash between allocating the inode and linking it into the rename transaction
2872 * recovery will free the inode and we won't leak it.
2873 */
2874static int
2875xfs_rename_alloc_whiteout(
2876 struct xfs_inode *dp,
2877 struct xfs_inode **wip)
2878{
2879 struct xfs_inode *tmpfile;
2880 int error;
2881
2882 error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile);
2883 if (error)
2884 return error;
2885
2886 /* Satisfy xfs_bumplink that this is a real tmpfile */
2887 xfs_finish_inode_setup(tmpfile);
2888 VFS_I(tmpfile)->i_state |= I_LINKABLE;
2889
2890 *wip = tmpfile;
2891 return 0;
2892}
2893
2894/*
2783 * xfs_rename 2895 * xfs_rename
2784 */ 2896 */
2785int 2897int
2786xfs_rename( 2898xfs_rename(
2787 xfs_inode_t *src_dp, 2899 struct xfs_inode *src_dp,
2788 struct xfs_name *src_name, 2900 struct xfs_name *src_name,
2789 xfs_inode_t *src_ip, 2901 struct xfs_inode *src_ip,
2790 xfs_inode_t *target_dp, 2902 struct xfs_inode *target_dp,
2791 struct xfs_name *target_name, 2903 struct xfs_name *target_name,
2792 xfs_inode_t *target_ip, 2904 struct xfs_inode *target_ip,
2793 unsigned int flags) 2905 unsigned int flags)
2794{ 2906{
2795 xfs_trans_t *tp = NULL; 2907 struct xfs_mount *mp = src_dp->i_mount;
2796 xfs_mount_t *mp = src_dp->i_mount; 2908 struct xfs_trans *tp;
2797 int new_parent; /* moving to a new dir */ 2909 struct xfs_bmap_free free_list;
2798 int src_is_directory; /* src_name is a directory */ 2910 xfs_fsblock_t first_block;
2799 int error; 2911 struct xfs_inode *wip = NULL; /* whiteout inode */
2800 xfs_bmap_free_t free_list; 2912 struct xfs_inode *inodes[__XFS_SORT_INODES];
2801 xfs_fsblock_t first_block; 2913 int num_inodes = __XFS_SORT_INODES;
2802 int cancel_flags; 2914 bool new_parent = (src_dp != target_dp);
2803 int committed; 2915 bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
2804 xfs_inode_t *inodes[4]; 2916 int cancel_flags = 0;
2805 int spaceres; 2917 int spaceres;
2806 int num_inodes; 2918 int error;
2807 2919
2808 trace_xfs_rename(src_dp, target_dp, src_name, target_name); 2920 trace_xfs_rename(src_dp, target_dp, src_name, target_name);
2809 2921
2810 new_parent = (src_dp != target_dp); 2922 if ((flags & RENAME_EXCHANGE) && !target_ip)
2811 src_is_directory = S_ISDIR(src_ip->i_d.di_mode); 2923 return -EINVAL;
2924
2925 /*
2926 * If we are doing a whiteout operation, allocate the whiteout inode
2927 * we will be placing at the target and ensure the type is set
2928 * appropriately.
2929 */
2930 if (flags & RENAME_WHITEOUT) {
2931 ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
2932 error = xfs_rename_alloc_whiteout(target_dp, &wip);
2933 if (error)
2934 return error;
2935
2936 /* setup target dirent info as whiteout */
2937 src_name->type = XFS_DIR3_FT_CHRDEV;
2938 }
2812 2939
2813 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, 2940 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
2814 inodes, &num_inodes); 2941 inodes, &num_inodes);
2815 2942
2816 xfs_bmap_init(&free_list, &first_block);
2817 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); 2943 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
2818 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2819 spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); 2944 spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
2820 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); 2945 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
2821 if (error == -ENOSPC) { 2946 if (error == -ENOSPC) {
2822 spaceres = 0; 2947 spaceres = 0;
2823 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); 2948 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
2824 } 2949 }
2825 if (error) { 2950 if (error)
2826 xfs_trans_cancel(tp, 0); 2951 goto out_trans_cancel;
2827 goto std_return; 2952 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2828 }
2829 2953
2830 /* 2954 /*
2831 * Attach the dquots to the inodes 2955 * Attach the dquots to the inodes
2832 */ 2956 */
2833 error = xfs_qm_vop_rename_dqattach(inodes); 2957 error = xfs_qm_vop_rename_dqattach(inodes);
2834 if (error) { 2958 if (error)
2835 xfs_trans_cancel(tp, cancel_flags); 2959 goto out_trans_cancel;
2836 goto std_return;
2837 }
2838 2960
2839 /* 2961 /*
2840 * Lock all the participating inodes. Depending upon whether 2962 * Lock all the participating inodes. Depending upon whether
@@ -2855,6 +2977,8 @@ xfs_rename(
2855 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 2977 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
2856 if (target_ip) 2978 if (target_ip)
2857 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 2979 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
2980 if (wip)
2981 xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL);
2858 2982
2859 /* 2983 /*
2860 * If we are using project inheritance, we only allow renames 2984 * If we are using project inheritance, we only allow renames
@@ -2864,20 +2988,16 @@ xfs_rename(
2864 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2988 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
2865 (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { 2989 (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
2866 error = -EXDEV; 2990 error = -EXDEV;
2867 goto error_return; 2991 goto out_trans_cancel;
2868 } 2992 }
2869 2993
2870 /* 2994 xfs_bmap_init(&free_list, &first_block);
2871 * Handle RENAME_EXCHANGE flags 2995
2872 */ 2996 /* RENAME_EXCHANGE is unique from here on. */
2873 if (flags & RENAME_EXCHANGE) { 2997 if (flags & RENAME_EXCHANGE)
2874 error = xfs_cross_rename(tp, src_dp, src_name, src_ip, 2998 return xfs_cross_rename(tp, src_dp, src_name, src_ip,
2875 target_dp, target_name, target_ip, 2999 target_dp, target_name, target_ip,
2876 &free_list, &first_block, spaceres); 3000 &free_list, &first_block, spaceres);
2877 if (error)
2878 goto abort_return;
2879 goto finish_rename;
2880 }
2881 3001
2882 /* 3002 /*
2883 * Set up the target. 3003 * Set up the target.
@@ -2890,7 +3010,7 @@ xfs_rename(
2890 if (!spaceres) { 3010 if (!spaceres) {
2891 error = xfs_dir_canenter(tp, target_dp, target_name); 3011 error = xfs_dir_canenter(tp, target_dp, target_name);
2892 if (error) 3012 if (error)
2893 goto error_return; 3013 goto out_trans_cancel;
2894 } 3014 }
2895 /* 3015 /*
2896 * If target does not exist and the rename crosses 3016 * If target does not exist and the rename crosses
@@ -2901,9 +3021,9 @@ xfs_rename(
2901 src_ip->i_ino, &first_block, 3021 src_ip->i_ino, &first_block,
2902 &free_list, spaceres); 3022 &free_list, spaceres);
2903 if (error == -ENOSPC) 3023 if (error == -ENOSPC)
2904 goto error_return; 3024 goto out_bmap_cancel;
2905 if (error) 3025 if (error)
2906 goto abort_return; 3026 goto out_trans_abort;
2907 3027
2908 xfs_trans_ichgtime(tp, target_dp, 3028 xfs_trans_ichgtime(tp, target_dp,
2909 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3029 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2911,7 +3031,7 @@ xfs_rename(
2911 if (new_parent && src_is_directory) { 3031 if (new_parent && src_is_directory) {
2912 error = xfs_bumplink(tp, target_dp); 3032 error = xfs_bumplink(tp, target_dp);
2913 if (error) 3033 if (error)
2914 goto abort_return; 3034 goto out_trans_abort;
2915 } 3035 }
2916 } else { /* target_ip != NULL */ 3036 } else { /* target_ip != NULL */
2917 /* 3037 /*
@@ -2926,7 +3046,7 @@ xfs_rename(
2926 if (!(xfs_dir_isempty(target_ip)) || 3046 if (!(xfs_dir_isempty(target_ip)) ||
2927 (target_ip->i_d.di_nlink > 2)) { 3047 (target_ip->i_d.di_nlink > 2)) {
2928 error = -EEXIST; 3048 error = -EEXIST;
2929 goto error_return; 3049 goto out_trans_cancel;
2930 } 3050 }
2931 } 3051 }
2932 3052
@@ -2943,7 +3063,7 @@ xfs_rename(
2943 src_ip->i_ino, 3063 src_ip->i_ino,
2944 &first_block, &free_list, spaceres); 3064 &first_block, &free_list, spaceres);
2945 if (error) 3065 if (error)
2946 goto abort_return; 3066 goto out_trans_abort;
2947 3067
2948 xfs_trans_ichgtime(tp, target_dp, 3068 xfs_trans_ichgtime(tp, target_dp,
2949 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3069 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2954,7 +3074,7 @@ xfs_rename(
2954 */ 3074 */
2955 error = xfs_droplink(tp, target_ip); 3075 error = xfs_droplink(tp, target_ip);
2956 if (error) 3076 if (error)
2957 goto abort_return; 3077 goto out_trans_abort;
2958 3078
2959 if (src_is_directory) { 3079 if (src_is_directory) {
2960 /* 3080 /*
@@ -2962,7 +3082,7 @@ xfs_rename(
2962 */ 3082 */
2963 error = xfs_droplink(tp, target_ip); 3083 error = xfs_droplink(tp, target_ip);
2964 if (error) 3084 if (error)
2965 goto abort_return; 3085 goto out_trans_abort;
2966 } 3086 }
2967 } /* target_ip != NULL */ 3087 } /* target_ip != NULL */
2968 3088
@@ -2979,7 +3099,7 @@ xfs_rename(
2979 &first_block, &free_list, spaceres); 3099 &first_block, &free_list, spaceres);
2980 ASSERT(error != -EEXIST); 3100 ASSERT(error != -EEXIST);
2981 if (error) 3101 if (error)
2982 goto abort_return; 3102 goto out_trans_abort;
2983 } 3103 }
2984 3104
2985 /* 3105 /*
@@ -3005,49 +3125,67 @@ xfs_rename(
3005 */ 3125 */
3006 error = xfs_droplink(tp, src_dp); 3126 error = xfs_droplink(tp, src_dp);
3007 if (error) 3127 if (error)
3008 goto abort_return; 3128 goto out_trans_abort;
3009 } 3129 }
3010 3130
3011 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 3131 /*
3132 * For whiteouts, we only need to update the source dirent with the
3133 * inode number of the whiteout inode rather than removing it
3134 * altogether.
3135 */
3136 if (wip) {
3137 error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
3012 &first_block, &free_list, spaceres); 3138 &first_block, &free_list, spaceres);
3139 } else
3140 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
3141 &first_block, &free_list, spaceres);
3013 if (error) 3142 if (error)
3014 goto abort_return; 3143 goto out_trans_abort;
3015
3016 xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3017 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
3018 if (new_parent)
3019 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
3020 3144
3021finish_rename:
3022 /* 3145 /*
3023 * If this is a synchronous mount, make sure that the 3146 * For whiteouts, we need to bump the link count on the whiteout inode.
3024 * rename transaction goes to disk before returning to 3147 * This means that failures all the way up to this point leave the inode
3025 * the user. 3148 * on the unlinked list and so cleanup is a simple matter of dropping
3149 * the remaining reference to it. If we fail here after bumping the link
3150 * count, we're shutting down the filesystem so we'll never see the
3151 * intermediate state on disk.
3026 */ 3152 */
3027 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3153 if (wip) {
3028 xfs_trans_set_sync(tp); 3154 ASSERT(wip->i_d.di_nlink == 0);
3029 } 3155 error = xfs_bumplink(tp, wip);
3156 if (error)
3157 goto out_trans_abort;
3158 error = xfs_iunlink_remove(tp, wip);
3159 if (error)
3160 goto out_trans_abort;
3161 xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
3030 3162
3031 error = xfs_bmap_finish(&tp, &free_list, &committed); 3163 /*
3032 if (error) { 3164 * Now we have a real link, clear the "I'm a tmpfile" state
3033 xfs_bmap_cancel(&free_list); 3165 * flag from the inode so it doesn't accidentally get misused in
3034 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 3166 * future.
3035 XFS_TRANS_ABORT)); 3167 */
3036 goto std_return; 3168 VFS_I(wip)->i_state &= ~I_LINKABLE;
3037 } 3169 }
3038 3170
3039 /* 3171 xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3040 * trans_commit will unlock src_ip, target_ip & decrement 3172 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
3041 * the vnode references. 3173 if (new_parent)
3042 */ 3174 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
3043 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3044 3175
3045 abort_return: 3176 error = xfs_finish_rename(tp, &free_list);
3177 if (wip)
3178 IRELE(wip);
3179 return error;
3180
3181out_trans_abort:
3046 cancel_flags |= XFS_TRANS_ABORT; 3182 cancel_flags |= XFS_TRANS_ABORT;
3047 error_return: 3183out_bmap_cancel:
3048 xfs_bmap_cancel(&free_list); 3184 xfs_bmap_cancel(&free_list);
3185out_trans_cancel:
3049 xfs_trans_cancel(tp, cancel_flags); 3186 xfs_trans_cancel(tp, cancel_flags);
3050 std_return: 3187 if (wip)
3188 IRELE(wip);
3051 return error; 3189 return error;
3052} 3190}
3053 3191
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 8e82b41d2050..8f22d20368d8 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
56 struct xfs_inode_log_item *i_itemp; /* logging information */ 56 struct xfs_inode_log_item *i_itemp; /* logging information */
57 mrlock_t i_lock; /* inode lock */ 57 mrlock_t i_lock; /* inode lock */
58 mrlock_t i_iolock; /* inode IO lock */ 58 mrlock_t i_iolock; /* inode IO lock */
59 mrlock_t i_mmaplock; /* inode mmap IO lock */
59 atomic_t i_pincount; /* inode pin count */ 60 atomic_t i_pincount; /* inode pin count */
60 spinlock_t i_flags_lock; /* inode i_flags lock */ 61 spinlock_t i_flags_lock; /* inode i_flags lock */
61 /* Miscellaneous state. */ 62 /* Miscellaneous state. */
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
263#define XFS_IOLOCK_SHARED (1<<1) 264#define XFS_IOLOCK_SHARED (1<<1)
264#define XFS_ILOCK_EXCL (1<<2) 265#define XFS_ILOCK_EXCL (1<<2)
265#define XFS_ILOCK_SHARED (1<<3) 266#define XFS_ILOCK_SHARED (1<<3)
267#define XFS_MMAPLOCK_EXCL (1<<4)
268#define XFS_MMAPLOCK_SHARED (1<<5)
266 269
267#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 270#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
268 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) 271 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
272 | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
269 273
270#define XFS_LOCK_FLAGS \ 274#define XFS_LOCK_FLAGS \
271 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ 275 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
272 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ 276 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
273 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ 277 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
274 { XFS_ILOCK_SHARED, "ILOCK_SHARED" } 278 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
279 { XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \
280 { XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" }
275 281
276 282
277/* 283/*
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
302#define XFS_IOLOCK_SHIFT 16 308#define XFS_IOLOCK_SHIFT 16
303#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) 309#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
304 310
311#define XFS_MMAPLOCK_SHIFT 20
312
305#define XFS_ILOCK_SHIFT 24 313#define XFS_ILOCK_SHIFT 24
306#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) 314#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
307#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) 315#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
308#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) 316#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
309 317
310#define XFS_IOLOCK_DEP_MASK 0x00ff0000 318#define XFS_IOLOCK_DEP_MASK 0x000f0000
319#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
311#define XFS_ILOCK_DEP_MASK 0xff000000 320#define XFS_ILOCK_DEP_MASK 0xff000000
312#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) 321#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
322 XFS_MMAPLOCK_DEP_MASK | \
323 XFS_ILOCK_DEP_MASK)
313 324
314#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) 325#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \
315#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) 326 >> XFS_IOLOCK_SHIFT)
327#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \
328 >> XFS_MMAPLOCK_SHIFT)
329#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \
330 >> XFS_ILOCK_SHIFT)
316 331
317/* 332/*
318 * For multiple groups support: if S_ISGID bit is set in the parent 333 * For multiple groups support: if S_ISGID bit is set in the parent
@@ -384,10 +399,11 @@ enum xfs_prealloc_flags {
384 XFS_PREALLOC_INVISIBLE = (1 << 4), 399 XFS_PREALLOC_INVISIBLE = (1 << 4),
385}; 400};
386 401
387int xfs_update_prealloc_flags(struct xfs_inode *, 402int xfs_update_prealloc_flags(struct xfs_inode *ip,
388 enum xfs_prealloc_flags); 403 enum xfs_prealloc_flags flags);
389int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); 404int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
390int xfs_iozero(struct xfs_inode *, loff_t, size_t); 405 xfs_fsize_t isize, bool *did_zeroing);
406int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
391 407
392 408
393/* from xfs_iops.c */ 409/* from xfs_iops.c */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 3a21cc71fda0..5f4a396f5186 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -631,7 +631,7 @@ xfs_ioc_space(
631 631
632 if (filp->f_flags & O_DSYNC) 632 if (filp->f_flags & O_DSYNC)
633 flags |= XFS_PREALLOC_SYNC; 633 flags |= XFS_PREALLOC_SYNC;
634 if (ioflags & XFS_IO_INVIS) 634 if (ioflags & XFS_IO_INVIS)
635 flags |= XFS_PREALLOC_INVISIBLE; 635 flags |= XFS_PREALLOC_INVISIBLE;
636 636
637 error = mnt_want_write_file(filp); 637 error = mnt_want_write_file(filp);
@@ -643,6 +643,9 @@ xfs_ioc_space(
643 if (error) 643 if (error)
644 goto out_unlock; 644 goto out_unlock;
645 645
646 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
647 iolock |= XFS_MMAPLOCK_EXCL;
648
646 switch (bf->l_whence) { 649 switch (bf->l_whence) {
647 case 0: /*SEEK_SET*/ 650 case 0: /*SEEK_SET*/
648 break; 651 break;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ccb1dd0d509e..38e633bad8c2 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -460,8 +460,7 @@ xfs_iomap_prealloc_size(
460 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), 460 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
461 alloc_blocks); 461 alloc_blocks);
462 462
463 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); 463 freesp = percpu_counter_read_positive(&mp->m_fdblocks);
464 freesp = mp->m_sb.sb_fdblocks;
465 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { 464 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
466 shift = 2; 465 shift = 2;
467 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) 466 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1d4efee4be17..2f1839e4dd1b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -394,7 +394,7 @@ xfs_vn_rename(
394 struct xfs_name oname; 394 struct xfs_name oname;
395 struct xfs_name nname; 395 struct xfs_name nname;
396 396
397 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 397 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
398 return -EINVAL; 398 return -EINVAL;
399 399
400 /* if we are exchanging files, we need to set i_mode of both files */ 400 /* if we are exchanging files, we need to set i_mode of both files */
@@ -756,6 +756,7 @@ xfs_setattr_size(
756 int error; 756 int error;
757 uint lock_flags = 0; 757 uint lock_flags = 0;
758 uint commit_flags = 0; 758 uint commit_flags = 0;
759 bool did_zeroing = false;
759 760
760 trace_xfs_setattr(ip); 761 trace_xfs_setattr(ip);
761 762
@@ -770,6 +771,7 @@ xfs_setattr_size(
770 return error; 771 return error;
771 772
772 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 773 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
774 ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
773 ASSERT(S_ISREG(ip->i_d.di_mode)); 775 ASSERT(S_ISREG(ip->i_d.di_mode));
774 ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| 776 ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
775 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); 777 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
@@ -799,20 +801,16 @@ xfs_setattr_size(
799 return error; 801 return error;
800 802
801 /* 803 /*
802 * Now we can make the changes. Before we join the inode to the 804 * File data changes must be complete before we start the transaction to
803 * transaction, take care of the part of the truncation that must be 805 * modify the inode. This needs to be done before joining the inode to
804 * done without the inode lock. This needs to be done before joining 806 * the transaction because the inode cannot be unlocked once it is a
805 * the inode to the transaction, because the inode cannot be unlocked 807 * part of the transaction.
806 * once it is a part of the transaction. 808 *
809 * Start with zeroing any data block beyond EOF that we may expose on
810 * file extension.
807 */ 811 */
808 if (newsize > oldsize) { 812 if (newsize > oldsize) {
809 /* 813 error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
810 * Do the first part of growing a file: zero any data in the
811 * last block that is beyond the old EOF. We need to do this
812 * before the inode is joined to the transaction to modify
813 * i_size.
814 */
815 error = xfs_zero_eof(ip, newsize, oldsize);
816 if (error) 814 if (error)
817 return error; 815 return error;
818 } 816 }
@@ -822,75 +820,42 @@ xfs_setattr_size(
822 * any previous writes that are beyond the on disk EOF and the new 820 * any previous writes that are beyond the on disk EOF and the new
823 * EOF that have not been written out need to be written here. If we 821 * EOF that have not been written out need to be written here. If we
824 * do not write the data out, we expose ourselves to the null files 822 * do not write the data out, we expose ourselves to the null files
825 * problem. 823 * problem. Note that this includes any block zeroing we did above;
826 * 824 * otherwise those blocks may not be zeroed after a crash.
827 * Only flush from the on disk size to the smaller of the in memory
828 * file size or the new size as that's the range we really care about
829 * here and prevents waiting for other data not within the range we
830 * care about here.
831 */ 825 */
832 if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { 826 if (newsize > ip->i_d.di_size &&
827 (oldsize != ip->i_d.di_size || did_zeroing)) {
833 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 828 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
834 ip->i_d.di_size, newsize); 829 ip->i_d.di_size, newsize);
835 if (error) 830 if (error)
836 return error; 831 return error;
837 } 832 }
838 833
839 /* 834 /* Now wait for all direct I/O to complete. */
840 * Wait for all direct I/O to complete.
841 */
842 inode_dio_wait(inode); 835 inode_dio_wait(inode);
843 836
844 /* 837 /*
845 * Do all the page cache truncate work outside the transaction context 838 * We've already locked out new page faults, so now we can safely remove
846 * as the "lock" order is page lock->log space reservation. i.e. 839 * pages from the page cache knowing they won't get refaulted until we
847 * locking pages inside the transaction can ABBA deadlock with 840 * drop the XFS_MMAP_EXCL lock after the extent manipulations are
848 * writeback. We have to do the VFS inode size update before we truncate 841 * complete. The truncate_setsize() call also cleans partial EOF page
849 * the pagecache, however, to avoid racing with page faults beyond the 842 * PTEs on extending truncates and hence ensures sub-page block size
850 * new EOF they are not serialised against truncate operations except by 843 * filesystems are correctly handled, too.
851 * page locks and size updates.
852 * 844 *
853 * Hence we are in a situation where a truncate can fail with ENOMEM 845 * We have to do all the page cache truncate work outside the
854 * from xfs_trans_reserve(), but having already truncated the in-memory 846 * transaction context as the "lock" order is page lock->log space
855 * version of the file (i.e. made user visible changes). There's not 847 * reservation as defined by extent allocation in the writeback path.
856 * much we can do about this, except to hope that the caller sees ENOMEM 848 * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
857 * and retries the truncate operation. 849 * having already truncated the in-memory version of the file (i.e. made
850 * user visible changes). There's not much we can do about this, except
851 * to hope that the caller sees ENOMEM and retries the truncate
852 * operation.
858 */ 853 */
859 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); 854 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
860 if (error) 855 if (error)
861 return error; 856 return error;
862 truncate_setsize(inode, newsize); 857 truncate_setsize(inode, newsize);
863 858
864 /*
865 * The "we can't serialise against page faults" pain gets worse.
866 *
867 * If the file is mapped then we have to clean the page at the old EOF
868 * when extending the file. Extending the file can expose changes the
869 * underlying page mapping (e.g. from beyond EOF to a hole or
870 * unwritten), and so on the next attempt to write to that page we need
871 * to remap it for write. i.e. we need .page_mkwrite() to be called.
872 * Hence we need to clean the page to clean the pte and so a new write
873 * fault will be triggered appropriately.
874 *
875 * If we do it before we change the inode size, then we can race with a
876 * page fault that maps the page with exactly the same problem. If we do
877 * it after we change the file size, then a new page fault can come in
878 * and allocate space before we've run the rest of the truncate
879 * transaction. That's kinda grotesque, but it's better than have data
880 * over a hole, and so that's the lesser evil that has been chosen here.
881 *
882 * The real solution, however, is to have some mechanism for locking out
883 * page faults while a truncate is in progress.
884 */
885 if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
886 error = filemap_write_and_wait_range(
887 VFS_I(ip)->i_mapping,
888 round_down(oldsize, PAGE_CACHE_SIZE),
889 round_up(oldsize, PAGE_CACHE_SIZE) - 1);
890 if (error)
891 return error;
892 }
893
894 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 859 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
895 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 860 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
896 if (error) 861 if (error)
@@ -989,8 +954,12 @@ xfs_vn_setattr(
989 954
990 xfs_ilock(ip, iolock); 955 xfs_ilock(ip, iolock);
991 error = xfs_break_layouts(dentry->d_inode, &iolock, true); 956 error = xfs_break_layouts(dentry->d_inode, &iolock, true);
992 if (!error) 957 if (!error) {
958 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
959 iolock |= XFS_MMAPLOCK_EXCL;
960
993 error = xfs_setattr_size(ip, iattr); 961 error = xfs_setattr_size(ip, iattr);
962 }
994 xfs_iunlock(ip, iolock); 963 xfs_iunlock(ip, iolock);
995 } else { 964 } else {
996 error = xfs_setattr_nonsize(ip, iattr, 0); 965 error = xfs_setattr_nonsize(ip, iattr, 0);
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index c31d2c2eadc4..7c7842c85a08 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -116,15 +116,6 @@ typedef __uint64_t __psunsigned_t;
116#undef XFS_NATIVE_HOST 116#undef XFS_NATIVE_HOST
117#endif 117#endif
118 118
119/*
120 * Feature macros (disable/enable)
121 */
122#ifdef CONFIG_SMP
123#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
124#else
125#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
126#endif
127
128#define irix_sgid_inherit xfs_params.sgid_inherit.val 119#define irix_sgid_inherit xfs_params.sgid_inherit.val
129#define irix_symlink_mode xfs_params.symlink_mode.val 120#define irix_symlink_mode xfs_params.symlink_mode.val
130#define xfs_panic_mask xfs_params.panic_mask.val 121#define xfs_panic_mask xfs_params.panic_mask.val
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a5a945fc3bdc..4f5784f85a5b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4463,10 +4463,10 @@ xlog_do_recover(
4463 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 4463 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
4464 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); 4464 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
4465 ASSERT(xfs_sb_good_version(sbp)); 4465 ASSERT(xfs_sb_good_version(sbp));
4466 xfs_reinit_percpu_counters(log->l_mp);
4467
4466 xfs_buf_relse(bp); 4468 xfs_buf_relse(bp);
4467 4469
4468 /* We've re-read the superblock so re-initialize per-cpu counters */
4469 xfs_icsb_reinit_counters(log->l_mp);
4470 4470
4471 xlog_recover_check_summary(log); 4471 xlog_recover_check_summary(log);
4472 4472
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4fa80e63eea2..2ce7ee3b4ec1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,18 +43,6 @@
43#include "xfs_sysfs.h" 43#include "xfs_sysfs.h"
44 44
45 45
46#ifdef HAVE_PERCPU_SB
47STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
48 int);
49STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
50 int);
51STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
52#else
53
54#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
55#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
56#endif
57
58static DEFINE_MUTEX(xfs_uuid_table_mutex); 46static DEFINE_MUTEX(xfs_uuid_table_mutex);
59static int xfs_uuid_table_size; 47static int xfs_uuid_table_size;
60static uuid_t *xfs_uuid_table; 48static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
347 goto reread; 335 goto reread;
348 } 336 }
349 337
350 /* Initialize per-cpu counters */ 338 xfs_reinit_percpu_counters(mp);
351 xfs_icsb_reinit_counters(mp);
352 339
353 /* no need to be quiet anymore, so reset the buf ops */ 340 /* no need to be quiet anymore, so reset the buf ops */
354 bp->b_ops = &xfs_sb_buf_ops; 341 bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
1087 if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) 1074 if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
1088 return 0; 1075 return 0;
1089 1076
1090 xfs_icsb_sync_counters(mp, 0);
1091
1092 /* 1077 /*
1093 * we don't need to do this if we are updating the superblock 1078 * we don't need to do this if we are updating the superblock
1094 * counters on every modification. 1079 * counters on every modification.
@@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp)
1099 return xfs_sync_sb(mp, true); 1084 return xfs_sync_sb(mp, true);
1100} 1085}
1101 1086
1102/* 1087int
1103 * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply 1088xfs_mod_icount(
1104 * a delta to a specified field in the in-core superblock. Simply 1089 struct xfs_mount *mp,
1105 * switch on the field indicated and apply the delta to that field. 1090 int64_t delta)
1106 * Fields are not allowed to dip below zero, so if the delta would
1107 * do this do not apply it and return EINVAL.
1108 *
1109 * The m_sb_lock must be held when this routine is called.
1110 */
1111STATIC int
1112xfs_mod_incore_sb_unlocked(
1113 xfs_mount_t *mp,
1114 xfs_sb_field_t field,
1115 int64_t delta,
1116 int rsvd)
1117{ 1091{
1118 int scounter; /* short counter for 32 bit fields */ 1092 /* deltas are +/-64, hence the large batch size of 128. */
1119 long long lcounter; /* long counter for 64 bit fields */ 1093 __percpu_counter_add(&mp->m_icount, delta, 128);
1120 long long res_used, rem; 1094 if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
1121
1122 /*
1123 * With the in-core superblock spin lock held, switch
1124 * on the indicated field. Apply the delta to the
1125 * proper field. If the fields value would dip below
1126 * 0, then do not apply the delta and return EINVAL.
1127 */
1128 switch (field) {
1129 case XFS_SBS_ICOUNT:
1130 lcounter = (long long)mp->m_sb.sb_icount;
1131 lcounter += delta;
1132 if (lcounter < 0) {
1133 ASSERT(0);
1134 return -EINVAL;
1135 }
1136 mp->m_sb.sb_icount = lcounter;
1137 return 0;
1138 case XFS_SBS_IFREE:
1139 lcounter = (long long)mp->m_sb.sb_ifree;
1140 lcounter += delta;
1141 if (lcounter < 0) {
1142 ASSERT(0);
1143 return -EINVAL;
1144 }
1145 mp->m_sb.sb_ifree = lcounter;
1146 return 0;
1147 case XFS_SBS_FDBLOCKS:
1148 lcounter = (long long)
1149 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1150 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1151
1152 if (delta > 0) { /* Putting blocks back */
1153 if (res_used > delta) {
1154 mp->m_resblks_avail += delta;
1155 } else {
1156 rem = delta - res_used;
1157 mp->m_resblks_avail = mp->m_resblks;
1158 lcounter += rem;
1159 }
1160 } else { /* Taking blocks away */
1161 lcounter += delta;
1162 if (lcounter >= 0) {
1163 mp->m_sb.sb_fdblocks = lcounter +
1164 XFS_ALLOC_SET_ASIDE(mp);
1165 return 0;
1166 }
1167
1168 /*
1169 * We are out of blocks, use any available reserved
1170 * blocks if were allowed to.
1171 */
1172 if (!rsvd)
1173 return -ENOSPC;
1174
1175 lcounter = (long long)mp->m_resblks_avail + delta;
1176 if (lcounter >= 0) {
1177 mp->m_resblks_avail = lcounter;
1178 return 0;
1179 }
1180 printk_once(KERN_WARNING
1181 "Filesystem \"%s\": reserve blocks depleted! "
1182 "Consider increasing reserve pool size.",
1183 mp->m_fsname);
1184 return -ENOSPC;
1185 }
1186
1187 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
1188 return 0;
1189 case XFS_SBS_FREXTENTS:
1190 lcounter = (long long)mp->m_sb.sb_frextents;
1191 lcounter += delta;
1192 if (lcounter < 0) {
1193 return -ENOSPC;
1194 }
1195 mp->m_sb.sb_frextents = lcounter;
1196 return 0;
1197 case XFS_SBS_DBLOCKS:
1198 lcounter = (long long)mp->m_sb.sb_dblocks;
1199 lcounter += delta;
1200 if (lcounter < 0) {
1201 ASSERT(0);
1202 return -EINVAL;
1203 }
1204 mp->m_sb.sb_dblocks = lcounter;
1205 return 0;
1206 case XFS_SBS_AGCOUNT:
1207 scounter = mp->m_sb.sb_agcount;
1208 scounter += delta;
1209 if (scounter < 0) {
1210 ASSERT(0);
1211 return -EINVAL;
1212 }
1213 mp->m_sb.sb_agcount = scounter;
1214 return 0;
1215 case XFS_SBS_IMAX_PCT:
1216 scounter = mp->m_sb.sb_imax_pct;
1217 scounter += delta;
1218 if (scounter < 0) {
1219 ASSERT(0);
1220 return -EINVAL;
1221 }
1222 mp->m_sb.sb_imax_pct = scounter;
1223 return 0;
1224 case XFS_SBS_REXTSIZE:
1225 scounter = mp->m_sb.sb_rextsize;
1226 scounter += delta;
1227 if (scounter < 0) {
1228 ASSERT(0);
1229 return -EINVAL;
1230 }
1231 mp->m_sb.sb_rextsize = scounter;
1232 return 0;
1233 case XFS_SBS_RBMBLOCKS:
1234 scounter = mp->m_sb.sb_rbmblocks;
1235 scounter += delta;
1236 if (scounter < 0) {
1237 ASSERT(0);
1238 return -EINVAL;
1239 }
1240 mp->m_sb.sb_rbmblocks = scounter;
1241 return 0;
1242 case XFS_SBS_RBLOCKS:
1243 lcounter = (long long)mp->m_sb.sb_rblocks;
1244 lcounter += delta;
1245 if (lcounter < 0) {
1246 ASSERT(0);
1247 return -EINVAL;
1248 }
1249 mp->m_sb.sb_rblocks = lcounter;
1250 return 0;
1251 case XFS_SBS_REXTENTS:
1252 lcounter = (long long)mp->m_sb.sb_rextents;
1253 lcounter += delta;
1254 if (lcounter < 0) {
1255 ASSERT(0);
1256 return -EINVAL;
1257 }
1258 mp->m_sb.sb_rextents = lcounter;
1259 return 0;
1260 case XFS_SBS_REXTSLOG:
1261 scounter = mp->m_sb.sb_rextslog;
1262 scounter += delta;
1263 if (scounter < 0) {
1264 ASSERT(0);
1265 return -EINVAL;
1266 }
1267 mp->m_sb.sb_rextslog = scounter;
1268 return 0;
1269 default:
1270 ASSERT(0); 1095 ASSERT(0);
1096 percpu_counter_add(&mp->m_icount, -delta);
1271 return -EINVAL; 1097 return -EINVAL;
1272 } 1098 }
1099 return 0;
1273} 1100}
1274 1101
1275/*
1276 * xfs_mod_incore_sb() is used to change a field in the in-core
1277 * superblock structure by the specified delta. This modification
1278 * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked()
1279 * routine to do the work.
1280 */
1281int 1102int
1282xfs_mod_incore_sb( 1103xfs_mod_ifree(
1283 struct xfs_mount *mp, 1104 struct xfs_mount *mp,
1284 xfs_sb_field_t field, 1105 int64_t delta)
1285 int64_t delta,
1286 int rsvd)
1287{ 1106{
1288 int status; 1107 percpu_counter_add(&mp->m_ifree, delta);
1289 1108 if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
1290#ifdef HAVE_PERCPU_SB 1109 ASSERT(0);
1291 ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); 1110 percpu_counter_add(&mp->m_ifree, -delta);
1292#endif 1111 return -EINVAL;
1293 spin_lock(&mp->m_sb_lock); 1112 }
1294 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1113 return 0;
1295 spin_unlock(&mp->m_sb_lock);
1296
1297 return status;
1298} 1114}
1299 1115
1300/*
1301 * Change more than one field in the in-core superblock structure at a time.
1302 *
1303 * The fields and changes to those fields are specified in the array of
1304 * xfs_mod_sb structures passed in. Either all of the specified deltas
1305 * will be applied or none of them will. If any modified field dips below 0,
1306 * then all modifications will be backed out and EINVAL will be returned.
1307 *
1308 * Note that this function may not be used for the superblock values that
1309 * are tracked with the in-memory per-cpu counters - a direct call to
1310 * xfs_icsb_modify_counters is required for these.
1311 */
1312int 1116int
1313xfs_mod_incore_sb_batch( 1117xfs_mod_fdblocks(
1314 struct xfs_mount *mp, 1118 struct xfs_mount *mp,
1315 xfs_mod_sb_t *msb, 1119 int64_t delta,
1316 uint nmsb, 1120 bool rsvd)
1317 int rsvd)
1318{ 1121{
1319 xfs_mod_sb_t *msbp; 1122 int64_t lcounter;
1320 int error = 0; 1123 long long res_used;
1124 s32 batch;
1125
1126 if (delta > 0) {
1127 /*
1128 * If the reserve pool is depleted, put blocks back into it
1129 * first. Most of the time the pool is full.
1130 */
1131 if (likely(mp->m_resblks == mp->m_resblks_avail)) {
1132 percpu_counter_add(&mp->m_fdblocks, delta);
1133 return 0;
1134 }
1135
1136 spin_lock(&mp->m_sb_lock);
1137 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1138
1139 if (res_used > delta) {
1140 mp->m_resblks_avail += delta;
1141 } else {
1142 delta -= res_used;
1143 mp->m_resblks_avail = mp->m_resblks;
1144 percpu_counter_add(&mp->m_fdblocks, delta);
1145 }
1146 spin_unlock(&mp->m_sb_lock);
1147 return 0;
1148 }
1321 1149
1322 /* 1150 /*
1323 * Loop through the array of mod structures and apply each individually. 1151 * Taking blocks away, need to be more accurate the closer we
1324 * If any fail, then back out all those which have already been applied. 1152 * are to zero.
1325 * Do all of this within the scope of the m_sb_lock so that all of the 1153 *
1326 * changes will be atomic. 1154 * batch size is set to a maximum of 1024 blocks - if we are
1155 * allocating of freeing extents larger than this then we aren't
1156 * going to be hammering the counter lock so a lock per update
1157 * is not a problem.
1158 *
1159 * If the counter has a value of less than 2 * max batch size,
1160 * then make everything serialise as we are real close to
1161 * ENOSPC.
1162 */
1163#define __BATCH 1024
1164 if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
1165 batch = 1;
1166 else
1167 batch = __BATCH;
1168#undef __BATCH
1169
1170 __percpu_counter_add(&mp->m_fdblocks, delta, batch);
1171 if (percpu_counter_compare(&mp->m_fdblocks,
1172 XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
1173 /* we had space! */
1174 return 0;
1175 }
1176
1177 /*
1178 * lock up the sb for dipping into reserves before releasing the space
1179 * that took us to ENOSPC.
1327 */ 1180 */
1328 spin_lock(&mp->m_sb_lock); 1181 spin_lock(&mp->m_sb_lock);
1329 for (msbp = msb; msbp < (msb + nmsb); msbp++) { 1182 percpu_counter_add(&mp->m_fdblocks, -delta);
1330 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || 1183 if (!rsvd)
1331 msbp->msb_field > XFS_SBS_FDBLOCKS); 1184 goto fdblocks_enospc;
1332 1185
1333 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, 1186 lcounter = (long long)mp->m_resblks_avail + delta;
1334 msbp->msb_delta, rsvd); 1187 if (lcounter >= 0) {
1335 if (error) 1188 mp->m_resblks_avail = lcounter;
1336 goto unwind; 1189 spin_unlock(&mp->m_sb_lock);
1190 return 0;
1337 } 1191 }
1192 printk_once(KERN_WARNING
1193 "Filesystem \"%s\": reserve blocks depleted! "
1194 "Consider increasing reserve pool size.",
1195 mp->m_fsname);
1196fdblocks_enospc:
1338 spin_unlock(&mp->m_sb_lock); 1197 spin_unlock(&mp->m_sb_lock);
1339 return 0; 1198 return -ENOSPC;
1199}
1340 1200
1341unwind: 1201int
1342 while (--msbp >= msb) { 1202xfs_mod_frextents(
1343 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, 1203 struct xfs_mount *mp,
1344 -msbp->msb_delta, rsvd); 1204 int64_t delta)
1345 ASSERT(error == 0); 1205{
1346 } 1206 int64_t lcounter;
1207 int ret = 0;
1208
1209 spin_lock(&mp->m_sb_lock);
1210 lcounter = mp->m_sb.sb_frextents + delta;
1211 if (lcounter < 0)
1212 ret = -ENOSPC;
1213 else
1214 mp->m_sb.sb_frextents = lcounter;
1347 spin_unlock(&mp->m_sb_lock); 1215 spin_unlock(&mp->m_sb_lock);
1348 return error; 1216 return ret;
1349} 1217}
1350 1218
1351/* 1219/*
@@ -1407,573 +1275,3 @@ xfs_dev_is_read_only(
1407 } 1275 }
1408 return 0; 1276 return 0;
1409} 1277}
1410
1411#ifdef HAVE_PERCPU_SB
1412/*
1413 * Per-cpu incore superblock counters
1414 *
1415 * Simple concept, difficult implementation
1416 *
1417 * Basically, replace the incore superblock counters with a distributed per cpu
1418 * counter for contended fields (e.g. free block count).
1419 *
1420 * Difficulties arise in that the incore sb is used for ENOSPC checking, and
1421 * hence needs to be accurately read when we are running low on space. Hence
1422 * there is a method to enable and disable the per-cpu counters based on how
1423 * much "stuff" is available in them.
1424 *
1425 * Basically, a counter is enabled if there is enough free resource to justify
1426 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
1427 * ENOSPC), then we disable the counters to synchronise all callers and
1428 * re-distribute the available resources.
1429 *
1430 * If, once we redistributed the available resources, we still get a failure,
1431 * we disable the per-cpu counter and go through the slow path.
1432 *
1433 * The slow path is the current xfs_mod_incore_sb() function. This means that
1434 * when we disable a per-cpu counter, we need to drain its resources back to
1435 * the global superblock. We do this after disabling the counter to prevent
1436 * more threads from queueing up on the counter.
1437 *
1438 * Essentially, this means that we still need a lock in the fast path to enable
1439 * synchronisation between the global counters and the per-cpu counters. This
1440 * is not a problem because the lock will be local to a CPU almost all the time
1441 * and have little contention except when we get to ENOSPC conditions.
1442 *
1443 * Basically, this lock becomes a barrier that enables us to lock out the fast
1444 * path while we do things like enabling and disabling counters and
1445 * synchronising the counters.
1446 *
1447 * Locking rules:
1448 *
1449 * 1. m_sb_lock before picking up per-cpu locks
1450 * 2. per-cpu locks always picked up via for_each_online_cpu() order
1451 * 3. accurate counter sync requires m_sb_lock + per cpu locks
1452 * 4. modifying per-cpu counters requires holding per-cpu lock
1453 * 5. modifying global counters requires holding m_sb_lock
1454 * 6. enabling or disabling a counter requires holding the m_sb_lock
1455 * and _none_ of the per-cpu locks.
1456 *
1457 * Disabled counters are only ever re-enabled by a balance operation
1458 * that results in more free resources per CPU than a given threshold.
1459 * To ensure counters don't remain disabled, they are rebalanced when
1460 * the global resource goes above a higher threshold (i.e. some hysteresis
1461 * is present to prevent thrashing).
1462 */
1463
1464#ifdef CONFIG_HOTPLUG_CPU
1465/*
1466 * hot-plug CPU notifier support.
1467 *
1468 * We need a notifier per filesystem as we need to be able to identify
1469 * the filesystem to balance the counters out. This is achieved by
1470 * having a notifier block embedded in the xfs_mount_t and doing pointer
1471 * magic to get the mount pointer from the notifier block address.
1472 */
1473STATIC int
1474xfs_icsb_cpu_notify(
1475 struct notifier_block *nfb,
1476 unsigned long action,
1477 void *hcpu)
1478{
1479 xfs_icsb_cnts_t *cntp;
1480 xfs_mount_t *mp;
1481
1482 mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
1483 cntp = (xfs_icsb_cnts_t *)
1484 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
1485 switch (action) {
1486 case CPU_UP_PREPARE:
1487 case CPU_UP_PREPARE_FROZEN:
1488 /* Easy Case - initialize the area and locks, and
1489 * then rebalance when online does everything else for us. */
1490 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1491 break;
1492 case CPU_ONLINE:
1493 case CPU_ONLINE_FROZEN:
1494 xfs_icsb_lock(mp);
1495 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
1496 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
1497 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
1498 xfs_icsb_unlock(mp);
1499 break;
1500 case CPU_DEAD:
1501 case CPU_DEAD_FROZEN:
1502 /* Disable all the counters, then fold the dead cpu's
1503 * count into the total on the global superblock and
1504 * re-enable the counters. */
1505 xfs_icsb_lock(mp);
1506 spin_lock(&mp->m_sb_lock);
1507 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
1508 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
1509 xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
1510
1511 mp->m_sb.sb_icount += cntp->icsb_icount;
1512 mp->m_sb.sb_ifree += cntp->icsb_ifree;
1513 mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
1514
1515 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1516
1517 xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
1518 xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
1519 xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
1520 spin_unlock(&mp->m_sb_lock);
1521 xfs_icsb_unlock(mp);
1522 break;
1523 }
1524
1525 return NOTIFY_OK;
1526}
1527#endif /* CONFIG_HOTPLUG_CPU */
1528
1529int
1530xfs_icsb_init_counters(
1531 xfs_mount_t *mp)
1532{
1533 xfs_icsb_cnts_t *cntp;
1534 int i;
1535
1536 mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
1537 if (mp->m_sb_cnts == NULL)
1538 return -ENOMEM;
1539
1540 for_each_online_cpu(i) {
1541 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1542 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1543 }
1544
1545 mutex_init(&mp->m_icsb_mutex);
1546
1547 /*
1548 * start with all counters disabled so that the
1549 * initial balance kicks us off correctly
1550 */
1551 mp->m_icsb_counters = -1;
1552
1553#ifdef CONFIG_HOTPLUG_CPU
1554 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
1555 mp->m_icsb_notifier.priority = 0;
1556 register_hotcpu_notifier(&mp->m_icsb_notifier);
1557#endif /* CONFIG_HOTPLUG_CPU */
1558
1559 return 0;
1560}
1561
1562void
1563xfs_icsb_reinit_counters(
1564 xfs_mount_t *mp)
1565{
1566 xfs_icsb_lock(mp);
1567 /*
1568 * start with all counters disabled so that the
1569 * initial balance kicks us off correctly
1570 */
1571 mp->m_icsb_counters = -1;
1572 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
1573 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
1574 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
1575 xfs_icsb_unlock(mp);
1576}
1577
1578void
1579xfs_icsb_destroy_counters(
1580 xfs_mount_t *mp)
1581{
1582 if (mp->m_sb_cnts) {
1583 unregister_hotcpu_notifier(&mp->m_icsb_notifier);
1584 free_percpu(mp->m_sb_cnts);
1585 }
1586 mutex_destroy(&mp->m_icsb_mutex);
1587}
1588
1589STATIC void
1590xfs_icsb_lock_cntr(
1591 xfs_icsb_cnts_t *icsbp)
1592{
1593 while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
1594 ndelay(1000);
1595 }
1596}
1597
1598STATIC void
1599xfs_icsb_unlock_cntr(
1600 xfs_icsb_cnts_t *icsbp)
1601{
1602 clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
1603}
1604
1605
1606STATIC void
1607xfs_icsb_lock_all_counters(
1608 xfs_mount_t *mp)
1609{
1610 xfs_icsb_cnts_t *cntp;
1611 int i;
1612
1613 for_each_online_cpu(i) {
1614 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1615 xfs_icsb_lock_cntr(cntp);
1616 }
1617}
1618
1619STATIC void
1620xfs_icsb_unlock_all_counters(
1621 xfs_mount_t *mp)
1622{
1623 xfs_icsb_cnts_t *cntp;
1624 int i;
1625
1626 for_each_online_cpu(i) {
1627 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1628 xfs_icsb_unlock_cntr(cntp);
1629 }
1630}
1631
1632STATIC void
1633xfs_icsb_count(
1634 xfs_mount_t *mp,
1635 xfs_icsb_cnts_t *cnt,
1636 int flags)
1637{
1638 xfs_icsb_cnts_t *cntp;
1639 int i;
1640
1641 memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
1642
1643 if (!(flags & XFS_ICSB_LAZY_COUNT))
1644 xfs_icsb_lock_all_counters(mp);
1645
1646 for_each_online_cpu(i) {
1647 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1648 cnt->icsb_icount += cntp->icsb_icount;
1649 cnt->icsb_ifree += cntp->icsb_ifree;
1650 cnt->icsb_fdblocks += cntp->icsb_fdblocks;
1651 }
1652
1653 if (!(flags & XFS_ICSB_LAZY_COUNT))
1654 xfs_icsb_unlock_all_counters(mp);
1655}
1656
1657STATIC int
1658xfs_icsb_counter_disabled(
1659 xfs_mount_t *mp,
1660 xfs_sb_field_t field)
1661{
1662 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1663 return test_bit(field, &mp->m_icsb_counters);
1664}
1665
1666STATIC void
1667xfs_icsb_disable_counter(
1668 xfs_mount_t *mp,
1669 xfs_sb_field_t field)
1670{
1671 xfs_icsb_cnts_t cnt;
1672
1673 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1674
1675 /*
1676 * If we are already disabled, then there is nothing to do
1677 * here. We check before locking all the counters to avoid
1678 * the expensive lock operation when being called in the
1679 * slow path and the counter is already disabled. This is
1680 * safe because the only time we set or clear this state is under
1681 * the m_icsb_mutex.
1682 */
1683 if (xfs_icsb_counter_disabled(mp, field))
1684 return;
1685
1686 xfs_icsb_lock_all_counters(mp);
1687 if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
1688 /* drain back to superblock */
1689
1690 xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
1691 switch(field) {
1692 case XFS_SBS_ICOUNT:
1693 mp->m_sb.sb_icount = cnt.icsb_icount;
1694 break;
1695 case XFS_SBS_IFREE:
1696 mp->m_sb.sb_ifree = cnt.icsb_ifree;
1697 break;
1698 case XFS_SBS_FDBLOCKS:
1699 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
1700 break;
1701 default:
1702 BUG();
1703 }
1704 }
1705
1706 xfs_icsb_unlock_all_counters(mp);
1707}
1708
1709STATIC void
1710xfs_icsb_enable_counter(
1711 xfs_mount_t *mp,
1712 xfs_sb_field_t field,
1713 uint64_t count,
1714 uint64_t resid)
1715{
1716 xfs_icsb_cnts_t *cntp;
1717 int i;
1718
1719 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1720
1721 xfs_icsb_lock_all_counters(mp);
1722 for_each_online_cpu(i) {
1723 cntp = per_cpu_ptr(mp->m_sb_cnts, i);
1724 switch (field) {
1725 case XFS_SBS_ICOUNT:
1726 cntp->icsb_icount = count + resid;
1727 break;
1728 case XFS_SBS_IFREE:
1729 cntp->icsb_ifree = count + resid;
1730 break;
1731 case XFS_SBS_FDBLOCKS:
1732 cntp->icsb_fdblocks = count + resid;
1733 break;
1734 default:
1735 BUG();
1736 break;
1737 }
1738 resid = 0;
1739 }
1740 clear_bit(field, &mp->m_icsb_counters);
1741 xfs_icsb_unlock_all_counters(mp);
1742}
1743
1744void
1745xfs_icsb_sync_counters_locked(
1746 xfs_mount_t *mp,
1747 int flags)
1748{
1749 xfs_icsb_cnts_t cnt;
1750
1751 xfs_icsb_count(mp, &cnt, flags);
1752
1753 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
1754 mp->m_sb.sb_icount = cnt.icsb_icount;
1755 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
1756 mp->m_sb.sb_ifree = cnt.icsb_ifree;
1757 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
1758 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
1759}
1760
1761/*
1762 * Accurate update of per-cpu counters to incore superblock
1763 */
1764void
1765xfs_icsb_sync_counters(
1766 xfs_mount_t *mp,
1767 int flags)
1768{
1769 spin_lock(&mp->m_sb_lock);
1770 xfs_icsb_sync_counters_locked(mp, flags);
1771 spin_unlock(&mp->m_sb_lock);
1772}
1773
1774/*
1775 * Balance and enable/disable counters as necessary.
1776 *
1777 * Thresholds for re-enabling counters are somewhat magic. inode counts are
1778 * chosen to be the same number as single on disk allocation chunk per CPU, and
1779 * free blocks is something far enough zero that we aren't going thrash when we
1780 * get near ENOSPC. We also need to supply a minimum we require per cpu to
1781 * prevent looping endlessly when xfs_alloc_space asks for more than will
1782 * be distributed to a single CPU but each CPU has enough blocks to be
1783 * reenabled.
1784 *
1785 * Note that we can be called when counters are already disabled.
1786 * xfs_icsb_disable_counter() optimises the counter locking in this case to
1787 * prevent locking every per-cpu counter needlessly.
1788 */
1789
1790#define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64
1791#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
1792 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
1793STATIC void
1794xfs_icsb_balance_counter_locked(
1795 xfs_mount_t *mp,
1796 xfs_sb_field_t field,
1797 int min_per_cpu)
1798{
1799 uint64_t count, resid;
1800 int weight = num_online_cpus();
1801 uint64_t min = (uint64_t)min_per_cpu;
1802
1803 /* disable counter and sync counter */
1804 xfs_icsb_disable_counter(mp, field);
1805
1806 /* update counters - first CPU gets residual*/
1807 switch (field) {
1808 case XFS_SBS_ICOUNT:
1809 count = mp->m_sb.sb_icount;
1810 resid = do_div(count, weight);
1811 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
1812 return;
1813 break;
1814 case XFS_SBS_IFREE:
1815 count = mp->m_sb.sb_ifree;
1816 resid = do_div(count, weight);
1817 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
1818 return;
1819 break;
1820 case XFS_SBS_FDBLOCKS:
1821 count = mp->m_sb.sb_fdblocks;
1822 resid = do_div(count, weight);
1823 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
1824 return;
1825 break;
1826 default:
1827 BUG();
1828 count = resid = 0; /* quiet, gcc */
1829 break;
1830 }
1831
1832 xfs_icsb_enable_counter(mp, field, count, resid);
1833}
1834
1835STATIC void
1836xfs_icsb_balance_counter(
1837 xfs_mount_t *mp,
1838 xfs_sb_field_t fields,
1839 int min_per_cpu)
1840{
1841 spin_lock(&mp->m_sb_lock);
1842 xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
1843 spin_unlock(&mp->m_sb_lock);
1844}
1845
1846int
1847xfs_icsb_modify_counters(
1848 xfs_mount_t *mp,
1849 xfs_sb_field_t field,
1850 int64_t delta,
1851 int rsvd)
1852{
1853 xfs_icsb_cnts_t *icsbp;
1854 long long lcounter; /* long counter for 64 bit fields */
1855 int ret = 0;
1856
1857 might_sleep();
1858again:
1859 preempt_disable();
1860 icsbp = this_cpu_ptr(mp->m_sb_cnts);
1861
1862 /*
1863 * if the counter is disabled, go to slow path
1864 */
1865 if (unlikely(xfs_icsb_counter_disabled(mp, field)))
1866 goto slow_path;
1867 xfs_icsb_lock_cntr(icsbp);
1868 if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
1869 xfs_icsb_unlock_cntr(icsbp);
1870 goto slow_path;
1871 }
1872
1873 switch (field) {
1874 case XFS_SBS_ICOUNT:
1875 lcounter = icsbp->icsb_icount;
1876 lcounter += delta;
1877 if (unlikely(lcounter < 0))
1878 goto balance_counter;
1879 icsbp->icsb_icount = lcounter;
1880 break;
1881
1882 case XFS_SBS_IFREE:
1883 lcounter = icsbp->icsb_ifree;
1884 lcounter += delta;
1885 if (unlikely(lcounter < 0))
1886 goto balance_counter;
1887 icsbp->icsb_ifree = lcounter;
1888 break;
1889
1890 case XFS_SBS_FDBLOCKS:
1891 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
1892
1893 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1894 lcounter += delta;
1895 if (unlikely(lcounter < 0))
1896 goto balance_counter;
1897 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
1898 break;
1899 default:
1900 BUG();
1901 break;
1902 }
1903 xfs_icsb_unlock_cntr(icsbp);
1904 preempt_enable();
1905 return 0;
1906
1907slow_path:
1908 preempt_enable();
1909
1910 /*
1911 * serialise with a mutex so we don't burn lots of cpu on
1912 * the superblock lock. We still need to hold the superblock
1913 * lock, however, when we modify the global structures.
1914 */
1915 xfs_icsb_lock(mp);
1916
1917 /*
1918 * Now running atomically.
1919 *
1920 * If the counter is enabled, someone has beaten us to rebalancing.
1921 * Drop the lock and try again in the fast path....
1922 */
1923 if (!(xfs_icsb_counter_disabled(mp, field))) {
1924 xfs_icsb_unlock(mp);
1925 goto again;
1926 }
1927
1928 /*
1929 * The counter is currently disabled. Because we are
1930 * running atomically here, we know a rebalance cannot
1931 * be in progress. Hence we can go straight to operating
1932 * on the global superblock. We do not call xfs_mod_incore_sb()
1933 * here even though we need to get the m_sb_lock. Doing so
1934 * will cause us to re-enter this function and deadlock.
1935 * Hence we get the m_sb_lock ourselves and then call
1936 * xfs_mod_incore_sb_unlocked() as the unlocked path operates
1937 * directly on the global counters.
1938 */
1939 spin_lock(&mp->m_sb_lock);
1940 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
1941 spin_unlock(&mp->m_sb_lock);
1942
1943 /*
1944 * Now that we've modified the global superblock, we
1945 * may be able to re-enable the distributed counters
1946 * (e.g. lots of space just got freed). After that
1947 * we are done.
1948 */
1949 if (ret != -ENOSPC)
1950 xfs_icsb_balance_counter(mp, field, 0);
1951 xfs_icsb_unlock(mp);
1952 return ret;
1953
1954balance_counter:
1955 xfs_icsb_unlock_cntr(icsbp);
1956 preempt_enable();
1957
1958 /*
1959 * We may have multiple threads here if multiple per-cpu
1960 * counters run dry at the same time. This will mean we can
1961 * do more balances than strictly necessary but it is not
1962 * the common slowpath case.
1963 */
1964 xfs_icsb_lock(mp);
1965
1966 /*
1967 * running atomically.
1968 *
1969 * This will leave the counter in the correct state for future
1970 * accesses. After the rebalance, we simply try again and our retry
1971 * will either succeed through the fast path or slow path without
1972 * another balance operation being required.
1973 */
1974 xfs_icsb_balance_counter(mp, field, delta);
1975 xfs_icsb_unlock(mp);
1976 goto again;
1977}
1978
1979#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 0d8abd6364d9..8c995a2ccb6f 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,8 +18,6 @@
18#ifndef __XFS_MOUNT_H__ 18#ifndef __XFS_MOUNT_H__
19#define __XFS_MOUNT_H__ 19#define __XFS_MOUNT_H__
20 20
21#ifdef __KERNEL__
22
23struct xlog; 21struct xlog;
24struct xfs_inode; 22struct xfs_inode;
25struct xfs_mru_cache; 23struct xfs_mru_cache;
@@ -29,44 +27,6 @@ struct xfs_quotainfo;
29struct xfs_dir_ops; 27struct xfs_dir_ops;
30struct xfs_da_geometry; 28struct xfs_da_geometry;
31 29
32#ifdef HAVE_PERCPU_SB
33
34/*
35 * Valid per-cpu incore superblock counters. Note that if you add new counters,
36 * you may need to define new counter disabled bit field descriptors as there
37 * are more possible fields in the superblock that can fit in a bitfield on a
38 * 32 bit platform. The XFS_SBS_* values for the current current counters just
39 * fit.
40 */
41typedef struct xfs_icsb_cnts {
42 uint64_t icsb_fdblocks;
43 uint64_t icsb_ifree;
44 uint64_t icsb_icount;
45 unsigned long icsb_flags;
46} xfs_icsb_cnts_t;
47
48#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */
49
50#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
51
52extern int xfs_icsb_init_counters(struct xfs_mount *);
53extern void xfs_icsb_reinit_counters(struct xfs_mount *);
54extern void xfs_icsb_destroy_counters(struct xfs_mount *);
55extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
56extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
57extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
58 int64_t, int);
59
60#else
61#define xfs_icsb_init_counters(mp) (0)
62#define xfs_icsb_destroy_counters(mp) do { } while (0)
63#define xfs_icsb_reinit_counters(mp) do { } while (0)
64#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
65#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
66#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
67 xfs_mod_incore_sb(mp, field, delta, rsvd)
68#endif
69
70/* dynamic preallocation free space thresholds, 5% down to 1% */ 30/* dynamic preallocation free space thresholds, 5% down to 1% */
71enum { 31enum {
72 XFS_LOWSP_1_PCNT = 0, 32 XFS_LOWSP_1_PCNT = 0,
@@ -81,8 +41,13 @@ typedef struct xfs_mount {
81 struct super_block *m_super; 41 struct super_block *m_super;
82 xfs_tid_t m_tid; /* next unused tid for fs */ 42 xfs_tid_t m_tid; /* next unused tid for fs */
83 struct xfs_ail *m_ail; /* fs active log item list */ 43 struct xfs_ail *m_ail; /* fs active log item list */
84 xfs_sb_t m_sb; /* copy of fs superblock */ 44
45 struct xfs_sb m_sb; /* copy of fs superblock */
85 spinlock_t m_sb_lock; /* sb counter lock */ 46 spinlock_t m_sb_lock; /* sb counter lock */
47 struct percpu_counter m_icount; /* allocated inodes counter */
48 struct percpu_counter m_ifree; /* free inodes counter */
49 struct percpu_counter m_fdblocks; /* free block counter */
50
86 struct xfs_buf *m_sb_bp; /* buffer for superblock */ 51 struct xfs_buf *m_sb_bp; /* buffer for superblock */
87 char *m_fsname; /* filesystem name */ 52 char *m_fsname; /* filesystem name */
88 int m_fsname_len; /* strlen of fs name */ 53 int m_fsname_len; /* strlen of fs name */
@@ -152,12 +117,6 @@ typedef struct xfs_mount {
152 const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ 117 const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
153 uint m_chsize; /* size of next field */ 118 uint m_chsize; /* size of next field */
154 atomic_t m_active_trans; /* number trans frozen */ 119 atomic_t m_active_trans; /* number trans frozen */
155#ifdef HAVE_PERCPU_SB
156 xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */
157 unsigned long m_icsb_counters; /* disabled per-cpu counters */
158 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
159 struct mutex m_icsb_mutex; /* balancer sync lock */
160#endif
161 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 120 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
162 struct delayed_work m_reclaim_work; /* background inode reclaim */ 121 struct delayed_work m_reclaim_work; /* background inode reclaim */
163 struct delayed_work m_eofblocks_work; /* background eof blocks 122 struct delayed_work m_eofblocks_work; /* background eof blocks
@@ -301,35 +260,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
301} 260}
302 261
303/* 262/*
304 * Per-cpu superblock locking functions
305 */
306#ifdef HAVE_PERCPU_SB
307static inline void
308xfs_icsb_lock(xfs_mount_t *mp)
309{
310 mutex_lock(&mp->m_icsb_mutex);
311}
312
313static inline void
314xfs_icsb_unlock(xfs_mount_t *mp)
315{
316 mutex_unlock(&mp->m_icsb_mutex);
317}
318#else
319#define xfs_icsb_lock(mp)
320#define xfs_icsb_unlock(mp)
321#endif
322
323/*
324 * This structure is for use by the xfs_mod_incore_sb_batch() routine.
325 * xfs_growfs can specify a few fields which are more than int limit
326 */
327typedef struct xfs_mod_sb {
328 xfs_sb_field_t msb_field; /* Field to modify, see below */
329 int64_t msb_delta; /* Change to make to specified field */
330} xfs_mod_sb_t;
331
332/*
333 * Per-ag incore structure, copies of information in agf and agi, to improve the 263 * Per-ag incore structure, copies of information in agf and agi, to improve the
334 * performance of allocation group selection. 264 * performance of allocation group selection.
335 */ 265 */
@@ -383,11 +313,14 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
383extern int xfs_mountfs(xfs_mount_t *mp); 313extern int xfs_mountfs(xfs_mount_t *mp);
384extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount, 314extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
385 xfs_agnumber_t *maxagi); 315 xfs_agnumber_t *maxagi);
386
387extern void xfs_unmountfs(xfs_mount_t *); 316extern void xfs_unmountfs(xfs_mount_t *);
388extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 317
389extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, 318extern int xfs_mod_icount(struct xfs_mount *mp, int64_t delta);
390 uint, int); 319extern int xfs_mod_ifree(struct xfs_mount *mp, int64_t delta);
320extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
321 bool reserved);
322extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
323
391extern int xfs_mount_log_sb(xfs_mount_t *); 324extern int xfs_mount_log_sb(xfs_mount_t *);
392extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 325extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
393extern int xfs_readsb(xfs_mount_t *, int); 326extern int xfs_readsb(xfs_mount_t *, int);
@@ -399,6 +332,4 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
399 332
400extern void xfs_set_low_space_thresholds(struct xfs_mount *); 333extern void xfs_set_low_space_thresholds(struct xfs_mount *);
401 334
402#endif /* __KERNEL__ */
403
404#endif /* __XFS_MOUNT_H__ */ 335#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 30ecca3037e3..f8a674d7f092 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -437,7 +437,7 @@ xfs_mru_cache_insert(
437 if (!mru || !mru->lists) 437 if (!mru || !mru->lists)
438 return -EINVAL; 438 return -EINVAL;
439 439
440 if (radix_tree_preload(GFP_KERNEL)) 440 if (radix_tree_preload(GFP_NOFS))
441 return -ENOMEM; 441 return -ENOMEM;
442 442
443 INIT_LIST_HEAD(&elem->list_node); 443 INIT_LIST_HEAD(&elem->list_node);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index cbb424f4d93a..981a657eca39 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -305,8 +305,10 @@ xfs_fs_commit_blocks(
305 305
306 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 306 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
307 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 307 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
308 if (error) 308 if (error) {
309 xfs_trans_cancel(tp, 0);
309 goto out_drop_iolock; 310 goto out_drop_iolock;
311 }
310 312
311 xfs_ilock(ip, XFS_ILOCK_EXCL); 313 xfs_ilock(ip, XFS_ILOCK_EXCL);
312 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 314 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index c6b22e1e77ed..5538468c7f63 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -841,6 +841,11 @@ xfs_qm_reset_dqcounts(
841 */ 841 */
842 xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, 842 xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
843 "xfs_quotacheck"); 843 "xfs_quotacheck");
844 /*
845 * Reset type in case we are reusing group quota file for
846 * project quotas or vice versa
847 */
848 ddq->d_flags = type;
844 ddq->d_bcount = 0; 849 ddq->d_bcount = 0;
845 ddq->d_icount = 0; 850 ddq->d_icount = 0;
846 ddq->d_rtbcount = 0; 851 ddq->d_rtbcount = 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 02718638dc12..5f357ca97e76 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -966,6 +966,8 @@ xfs_fs_inode_init_once(
966 atomic_set(&ip->i_pincount, 0); 966 atomic_set(&ip->i_pincount, 0);
967 spin_lock_init(&ip->i_flags_lock); 967 spin_lock_init(&ip->i_flags_lock);
968 968
969 mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
970 "xfsino", ip->i_ino);
969 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 971 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
970 "xfsino", ip->i_ino); 972 "xfsino", ip->i_ino);
971} 973}
@@ -1013,24 +1015,6 @@ xfs_free_fsname(
1013 kfree(mp->m_logname); 1015 kfree(mp->m_logname);
1014} 1016}
1015 1017
1016STATIC void
1017xfs_fs_put_super(
1018 struct super_block *sb)
1019{
1020 struct xfs_mount *mp = XFS_M(sb);
1021
1022 xfs_notice(mp, "Unmounting Filesystem");
1023 xfs_filestream_unmount(mp);
1024 xfs_unmountfs(mp);
1025
1026 xfs_freesb(mp);
1027 xfs_icsb_destroy_counters(mp);
1028 xfs_destroy_mount_workqueues(mp);
1029 xfs_close_devices(mp);
1030 xfs_free_fsname(mp);
1031 kfree(mp);
1032}
1033
1034STATIC int 1018STATIC int
1035xfs_fs_sync_fs( 1019xfs_fs_sync_fs(
1036 struct super_block *sb, 1020 struct super_block *sb,
@@ -1066,6 +1050,9 @@ xfs_fs_statfs(
1066 xfs_sb_t *sbp = &mp->m_sb; 1050 xfs_sb_t *sbp = &mp->m_sb;
1067 struct xfs_inode *ip = XFS_I(dentry->d_inode); 1051 struct xfs_inode *ip = XFS_I(dentry->d_inode);
1068 __uint64_t fakeinos, id; 1052 __uint64_t fakeinos, id;
1053 __uint64_t icount;
1054 __uint64_t ifree;
1055 __uint64_t fdblocks;
1069 xfs_extlen_t lsize; 1056 xfs_extlen_t lsize;
1070 __int64_t ffree; 1057 __int64_t ffree;
1071 1058
@@ -1076,17 +1063,21 @@ xfs_fs_statfs(
1076 statp->f_fsid.val[0] = (u32)id; 1063 statp->f_fsid.val[0] = (u32)id;
1077 statp->f_fsid.val[1] = (u32)(id >> 32); 1064 statp->f_fsid.val[1] = (u32)(id >> 32);
1078 1065
1079 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); 1066 icount = percpu_counter_sum(&mp->m_icount);
1067 ifree = percpu_counter_sum(&mp->m_ifree);
1068 fdblocks = percpu_counter_sum(&mp->m_fdblocks);
1080 1069
1081 spin_lock(&mp->m_sb_lock); 1070 spin_lock(&mp->m_sb_lock);
1082 statp->f_bsize = sbp->sb_blocksize; 1071 statp->f_bsize = sbp->sb_blocksize;
1083 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 1072 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
1084 statp->f_blocks = sbp->sb_dblocks - lsize; 1073 statp->f_blocks = sbp->sb_dblocks - lsize;
1085 statp->f_bfree = statp->f_bavail = 1074 spin_unlock(&mp->m_sb_lock);
1086 sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1075
1076 statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1077 statp->f_bavail = statp->f_bfree;
1078
1087 fakeinos = statp->f_bfree << sbp->sb_inopblog; 1079 fakeinos = statp->f_bfree << sbp->sb_inopblog;
1088 statp->f_files = 1080 statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
1089 MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
1090 if (mp->m_maxicount) 1081 if (mp->m_maxicount)
1091 statp->f_files = min_t(typeof(statp->f_files), 1082 statp->f_files = min_t(typeof(statp->f_files),
1092 statp->f_files, 1083 statp->f_files,
@@ -1098,10 +1089,9 @@ xfs_fs_statfs(
1098 sbp->sb_icount); 1089 sbp->sb_icount);
1099 1090
1100 /* make sure statp->f_ffree does not underflow */ 1091 /* make sure statp->f_ffree does not underflow */
1101 ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); 1092 ffree = statp->f_files - (icount - ifree);
1102 statp->f_ffree = max_t(__int64_t, ffree, 0); 1093 statp->f_ffree = max_t(__int64_t, ffree, 0);
1103 1094
1104 spin_unlock(&mp->m_sb_lock);
1105 1095
1106 if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1096 if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1107 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 1097 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
@@ -1388,6 +1378,51 @@ xfs_finish_flags(
1388 return 0; 1378 return 0;
1389} 1379}
1390 1380
1381static int
1382xfs_init_percpu_counters(
1383 struct xfs_mount *mp)
1384{
1385 int error;
1386
1387 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1388 if (error)
1389 return -ENOMEM;
1390
1391 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1392 if (error)
1393 goto free_icount;
1394
1395 error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1396 if (error)
1397 goto free_ifree;
1398
1399 return 0;
1400
1401free_ifree:
1402 percpu_counter_destroy(&mp->m_ifree);
1403free_icount:
1404 percpu_counter_destroy(&mp->m_icount);
1405 return -ENOMEM;
1406}
1407
1408void
1409xfs_reinit_percpu_counters(
1410 struct xfs_mount *mp)
1411{
1412 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1413 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1414 percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1415}
1416
1417static void
1418xfs_destroy_percpu_counters(
1419 struct xfs_mount *mp)
1420{
1421 percpu_counter_destroy(&mp->m_icount);
1422 percpu_counter_destroy(&mp->m_ifree);
1423 percpu_counter_destroy(&mp->m_fdblocks);
1424}
1425
1391STATIC int 1426STATIC int
1392xfs_fs_fill_super( 1427xfs_fs_fill_super(
1393 struct super_block *sb, 1428 struct super_block *sb,
@@ -1436,7 +1471,7 @@ xfs_fs_fill_super(
1436 if (error) 1471 if (error)
1437 goto out_close_devices; 1472 goto out_close_devices;
1438 1473
1439 error = xfs_icsb_init_counters(mp); 1474 error = xfs_init_percpu_counters(mp);
1440 if (error) 1475 if (error)
1441 goto out_destroy_workqueues; 1476 goto out_destroy_workqueues;
1442 1477
@@ -1494,7 +1529,7 @@ xfs_fs_fill_super(
1494 out_free_sb: 1529 out_free_sb:
1495 xfs_freesb(mp); 1530 xfs_freesb(mp);
1496 out_destroy_counters: 1531 out_destroy_counters:
1497 xfs_icsb_destroy_counters(mp); 1532 xfs_destroy_percpu_counters(mp);
1498out_destroy_workqueues: 1533out_destroy_workqueues:
1499 xfs_destroy_mount_workqueues(mp); 1534 xfs_destroy_mount_workqueues(mp);
1500 out_close_devices: 1535 out_close_devices:
@@ -1511,6 +1546,24 @@ out_destroy_workqueues:
1511 goto out_free_sb; 1546 goto out_free_sb;
1512} 1547}
1513 1548
1549STATIC void
1550xfs_fs_put_super(
1551 struct super_block *sb)
1552{
1553 struct xfs_mount *mp = XFS_M(sb);
1554
1555 xfs_notice(mp, "Unmounting Filesystem");
1556 xfs_filestream_unmount(mp);
1557 xfs_unmountfs(mp);
1558
1559 xfs_freesb(mp);
1560 xfs_destroy_percpu_counters(mp);
1561 xfs_destroy_mount_workqueues(mp);
1562 xfs_close_devices(mp);
1563 xfs_free_fsname(mp);
1564 kfree(mp);
1565}
1566
1514STATIC struct dentry * 1567STATIC struct dentry *
1515xfs_fs_mount( 1568xfs_fs_mount(
1516 struct file_system_type *fs_type, 1569 struct file_system_type *fs_type,
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 2b830c2f322e..499058fea303 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -72,6 +72,8 @@ extern const struct export_operations xfs_export_operations;
72extern const struct xattr_handler *xfs_xattr_handlers[]; 72extern const struct xattr_handler *xfs_xattr_handlers[];
73extern const struct quotactl_ops xfs_quotactl_operations; 73extern const struct quotactl_ops xfs_quotactl_operations;
74 74
75extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
76
75#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) 77#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
76 78
77#endif /* __XFS_SUPER_H__ */ 79#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7e45fa155ea8..b2a45cc9eceb 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -115,7 +115,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
115 __entry->refcount = refcount; 115 __entry->refcount = refcount;
116 __entry->caller_ip = caller_ip; 116 __entry->caller_ip = caller_ip;
117 ), 117 ),
118 TP_printk("dev %d:%d agno %u refcount %d caller %pf", 118 TP_printk("dev %d:%d agno %u refcount %d caller %ps",
119 MAJOR(__entry->dev), MINOR(__entry->dev), 119 MAJOR(__entry->dev), MINOR(__entry->dev),
120 __entry->agno, 120 __entry->agno,
121 __entry->refcount, 121 __entry->refcount,
@@ -239,7 +239,7 @@ TRACE_EVENT(xfs_iext_insert,
239 __entry->caller_ip = caller_ip; 239 __entry->caller_ip = caller_ip;
240 ), 240 ),
241 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 241 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
242 "offset %lld block %lld count %lld flag %d caller %pf", 242 "offset %lld block %lld count %lld flag %d caller %ps",
243 MAJOR(__entry->dev), MINOR(__entry->dev), 243 MAJOR(__entry->dev), MINOR(__entry->dev),
244 __entry->ino, 244 __entry->ino,
245 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 245 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -283,7 +283,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
283 __entry->caller_ip = caller_ip; 283 __entry->caller_ip = caller_ip;
284 ), 284 ),
285 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 285 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
286 "offset %lld block %lld count %lld flag %d caller %pf", 286 "offset %lld block %lld count %lld flag %d caller %ps",
287 MAJOR(__entry->dev), MINOR(__entry->dev), 287 MAJOR(__entry->dev), MINOR(__entry->dev),
288 __entry->ino, 288 __entry->ino,
289 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 289 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -329,7 +329,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
329 __entry->caller_ip = caller_ip; 329 __entry->caller_ip = caller_ip;
330 ), 330 ),
331 TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " 331 TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
332 "lock %d flags %s caller %pf", 332 "lock %d flags %s caller %ps",
333 MAJOR(__entry->dev), MINOR(__entry->dev), 333 MAJOR(__entry->dev), MINOR(__entry->dev),
334 (unsigned long long)__entry->bno, 334 (unsigned long long)__entry->bno,
335 __entry->nblks, 335 __entry->nblks,
@@ -402,7 +402,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
402 __entry->caller_ip = caller_ip; 402 __entry->caller_ip = caller_ip;
403 ), 403 ),
404 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 404 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
405 "lock %d flags %s caller %pf", 405 "lock %d flags %s caller %ps",
406 MAJOR(__entry->dev), MINOR(__entry->dev), 406 MAJOR(__entry->dev), MINOR(__entry->dev),
407 (unsigned long long)__entry->bno, 407 (unsigned long long)__entry->bno,
408 __entry->buffer_length, 408 __entry->buffer_length,
@@ -447,7 +447,7 @@ TRACE_EVENT(xfs_buf_ioerror,
447 __entry->caller_ip = caller_ip; 447 __entry->caller_ip = caller_ip;
448 ), 448 ),
449 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 449 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
450 "lock %d error %d flags %s caller %pf", 450 "lock %d error %d flags %s caller %ps",
451 MAJOR(__entry->dev), MINOR(__entry->dev), 451 MAJOR(__entry->dev), MINOR(__entry->dev),
452 (unsigned long long)__entry->bno, 452 (unsigned long long)__entry->bno,
453 __entry->buffer_length, 453 __entry->buffer_length,
@@ -613,7 +613,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class,
613 __entry->lock_flags = lock_flags; 613 __entry->lock_flags = lock_flags;
614 __entry->caller_ip = caller_ip; 614 __entry->caller_ip = caller_ip;
615 ), 615 ),
616 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", 616 TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps",
617 MAJOR(__entry->dev), MINOR(__entry->dev), 617 MAJOR(__entry->dev), MINOR(__entry->dev),
618 __entry->ino, 618 __entry->ino,
619 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), 619 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
@@ -686,6 +686,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
686DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); 686DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
687DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); 687DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
688 688
689DEFINE_INODE_EVENT(xfs_filemap_fault);
690DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
691
689DECLARE_EVENT_CLASS(xfs_iref_class, 692DECLARE_EVENT_CLASS(xfs_iref_class,
690 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), 693 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
691 TP_ARGS(ip, caller_ip), 694 TP_ARGS(ip, caller_ip),
@@ -703,7 +706,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
703 __entry->pincount = atomic_read(&ip->i_pincount); 706 __entry->pincount = atomic_read(&ip->i_pincount);
704 __entry->caller_ip = caller_ip; 707 __entry->caller_ip = caller_ip;
705 ), 708 ),
706 TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", 709 TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps",
707 MAJOR(__entry->dev), MINOR(__entry->dev), 710 MAJOR(__entry->dev), MINOR(__entry->dev),
708 __entry->ino, 711 __entry->ino,
709 __entry->count, 712 __entry->count,
@@ -1334,7 +1337,7 @@ TRACE_EVENT(xfs_bunmap,
1334 __entry->flags = flags; 1337 __entry->flags = flags;
1335 ), 1338 ),
1336 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" 1339 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
1337 "flags %s caller %pf", 1340 "flags %s caller %ps",
1338 MAJOR(__entry->dev), MINOR(__entry->dev), 1341 MAJOR(__entry->dev), MINOR(__entry->dev),
1339 __entry->ino, 1342 __entry->ino,
1340 __entry->size, 1343 __entry->size,
@@ -1467,7 +1470,7 @@ TRACE_EVENT(xfs_agf,
1467 ), 1470 ),
1468 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " 1471 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
1469 "levels b %u c %u flfirst %u fllast %u flcount %u " 1472 "levels b %u c %u flfirst %u fllast %u flcount %u "
1470 "freeblks %u longest %u caller %pf", 1473 "freeblks %u longest %u caller %ps",
1471 MAJOR(__entry->dev), MINOR(__entry->dev), 1474 MAJOR(__entry->dev), MINOR(__entry->dev),
1472 __entry->agno, 1475 __entry->agno,
1473 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), 1476 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index eb90cd59a0ec..220ef2c906b2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -173,7 +173,7 @@ xfs_trans_reserve(
173 uint rtextents) 173 uint rtextents)
174{ 174{
175 int error = 0; 175 int error = 0;
176 int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 176 bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
177 177
178 /* Mark this thread as being in a transaction */ 178 /* Mark this thread as being in a transaction */
179 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); 179 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -184,8 +184,7 @@ xfs_trans_reserve(
184 * fail if the count would go below zero. 184 * fail if the count would go below zero.
185 */ 185 */
186 if (blocks > 0) { 186 if (blocks > 0) {
187 error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, 187 error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
188 -((int64_t)blocks), rsvd);
189 if (error != 0) { 188 if (error != 0) {
190 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 189 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
191 return -ENOSPC; 190 return -ENOSPC;
@@ -236,8 +235,7 @@ xfs_trans_reserve(
236 * fail if the count would go below zero. 235 * fail if the count would go below zero.
237 */ 236 */
238 if (rtextents > 0) { 237 if (rtextents > 0) {
239 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS, 238 error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents));
240 -((int64_t)rtextents), rsvd);
241 if (error) { 239 if (error) {
242 error = -ENOSPC; 240 error = -ENOSPC;
243 goto undo_log; 241 goto undo_log;
@@ -268,8 +266,7 @@ undo_log:
268 266
269undo_blocks: 267undo_blocks:
270 if (blocks > 0) { 268 if (blocks > 0) {
271 xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, 269 xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
272 (int64_t)blocks, rsvd);
273 tp->t_blk_res = 0; 270 tp->t_blk_res = 0;
274 } 271 }
275 272
@@ -488,6 +485,54 @@ xfs_trans_apply_sb_deltas(
488 sizeof(sbp->sb_frextents) - 1); 485 sizeof(sbp->sb_frextents) - 1);
489} 486}
490 487
488STATIC int
489xfs_sb_mod8(
490 uint8_t *field,
491 int8_t delta)
492{
493 int8_t counter = *field;
494
495 counter += delta;
496 if (counter < 0) {
497 ASSERT(0);
498 return -EINVAL;
499 }
500 *field = counter;
501 return 0;
502}
503
504STATIC int
505xfs_sb_mod32(
506 uint32_t *field,
507 int32_t delta)
508{
509 int32_t counter = *field;
510
511 counter += delta;
512 if (counter < 0) {
513 ASSERT(0);
514 return -EINVAL;
515 }
516 *field = counter;
517 return 0;
518}
519
520STATIC int
521xfs_sb_mod64(
522 uint64_t *field,
523 int64_t delta)
524{
525 int64_t counter = *field;
526
527 counter += delta;
528 if (counter < 0) {
529 ASSERT(0);
530 return -EINVAL;
531 }
532 *field = counter;
533 return 0;
534}
535
491/* 536/*
492 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations 537 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
493 * and apply superblock counter changes to the in-core superblock. The 538 * and apply superblock counter changes to the in-core superblock. The
@@ -495,13 +540,6 @@ xfs_trans_apply_sb_deltas(
495 * applied to the in-core superblock. The idea is that that has already been 540 * applied to the in-core superblock. The idea is that that has already been
496 * done. 541 * done.
497 * 542 *
498 * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
499 * However, we have to ensure that we only modify each superblock field only
500 * once because the application of the delta values may not be atomic. That can
501 * lead to ENOSPC races occurring if we have two separate modifcations of the
502 * free space counter to put back the entire reservation and then take away
503 * what we used.
504 *
505 * If we are not logging superblock counters, then the inode allocated/free and 543 * If we are not logging superblock counters, then the inode allocated/free and
506 * used block counts are not updated in the on disk superblock. In this case, 544 * used block counts are not updated in the on disk superblock. In this case,
507 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we 545 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
@@ -509,21 +547,15 @@ xfs_trans_apply_sb_deltas(
509 */ 547 */
510void 548void
511xfs_trans_unreserve_and_mod_sb( 549xfs_trans_unreserve_and_mod_sb(
512 xfs_trans_t *tp) 550 struct xfs_trans *tp)
513{ 551{
514 xfs_mod_sb_t msb[9]; /* If you add cases, add entries */ 552 struct xfs_mount *mp = tp->t_mountp;
515 xfs_mod_sb_t *msbp; 553 bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
516 xfs_mount_t *mp = tp->t_mountp; 554 int64_t blkdelta = 0;
517 /* REFERENCED */ 555 int64_t rtxdelta = 0;
518 int error; 556 int64_t idelta = 0;
519 int rsvd; 557 int64_t ifreedelta = 0;
520 int64_t blkdelta = 0; 558 int error;
521 int64_t rtxdelta = 0;
522 int64_t idelta = 0;
523 int64_t ifreedelta = 0;
524
525 msbp = msb;
526 rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
527 559
528 /* calculate deltas */ 560 /* calculate deltas */
529 if (tp->t_blk_res > 0) 561 if (tp->t_blk_res > 0)
@@ -547,97 +579,115 @@ xfs_trans_unreserve_and_mod_sb(
547 579
548 /* apply the per-cpu counters */ 580 /* apply the per-cpu counters */
549 if (blkdelta) { 581 if (blkdelta) {
550 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 582 error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
551 blkdelta, rsvd);
552 if (error) 583 if (error)
553 goto out; 584 goto out;
554 } 585 }
555 586
556 if (idelta) { 587 if (idelta) {
557 error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, 588 error = xfs_mod_icount(mp, idelta);
558 idelta, rsvd);
559 if (error) 589 if (error)
560 goto out_undo_fdblocks; 590 goto out_undo_fdblocks;
561 } 591 }
562 592
563 if (ifreedelta) { 593 if (ifreedelta) {
564 error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, 594 error = xfs_mod_ifree(mp, ifreedelta);
565 ifreedelta, rsvd);
566 if (error) 595 if (error)
567 goto out_undo_icount; 596 goto out_undo_icount;
568 } 597 }
569 598
599 if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
600 return;
601
570 /* apply remaining deltas */ 602 /* apply remaining deltas */
571 if (rtxdelta != 0) { 603 spin_lock(&mp->m_sb_lock);
572 msbp->msb_field = XFS_SBS_FREXTENTS; 604 if (rtxdelta) {
573 msbp->msb_delta = rtxdelta; 605 error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta);
574 msbp++; 606 if (error)
607 goto out_undo_ifree;
575 } 608 }
576 609
577 if (tp->t_flags & XFS_TRANS_SB_DIRTY) { 610 if (tp->t_dblocks_delta != 0) {
578 if (tp->t_dblocks_delta != 0) { 611 error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta);
579 msbp->msb_field = XFS_SBS_DBLOCKS; 612 if (error)
580 msbp->msb_delta = tp->t_dblocks_delta; 613 goto out_undo_frextents;
581 msbp++;
582 }
583 if (tp->t_agcount_delta != 0) {
584 msbp->msb_field = XFS_SBS_AGCOUNT;
585 msbp->msb_delta = tp->t_agcount_delta;
586 msbp++;
587 }
588 if (tp->t_imaxpct_delta != 0) {
589 msbp->msb_field = XFS_SBS_IMAX_PCT;
590 msbp->msb_delta = tp->t_imaxpct_delta;
591 msbp++;
592 }
593 if (tp->t_rextsize_delta != 0) {
594 msbp->msb_field = XFS_SBS_REXTSIZE;
595 msbp->msb_delta = tp->t_rextsize_delta;
596 msbp++;
597 }
598 if (tp->t_rbmblocks_delta != 0) {
599 msbp->msb_field = XFS_SBS_RBMBLOCKS;
600 msbp->msb_delta = tp->t_rbmblocks_delta;
601 msbp++;
602 }
603 if (tp->t_rblocks_delta != 0) {
604 msbp->msb_field = XFS_SBS_RBLOCKS;
605 msbp->msb_delta = tp->t_rblocks_delta;
606 msbp++;
607 }
608 if (tp->t_rextents_delta != 0) {
609 msbp->msb_field = XFS_SBS_REXTENTS;
610 msbp->msb_delta = tp->t_rextents_delta;
611 msbp++;
612 }
613 if (tp->t_rextslog_delta != 0) {
614 msbp->msb_field = XFS_SBS_REXTSLOG;
615 msbp->msb_delta = tp->t_rextslog_delta;
616 msbp++;
617 }
618 } 614 }
619 615 if (tp->t_agcount_delta != 0) {
620 /* 616 error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta);
621 * If we need to change anything, do it.
622 */
623 if (msbp > msb) {
624 error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
625 (uint)(msbp - msb), rsvd);
626 if (error) 617 if (error)
627 goto out_undo_ifreecount; 618 goto out_undo_dblocks;
628 } 619 }
629 620 if (tp->t_imaxpct_delta != 0) {
621 error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta);
622 if (error)
623 goto out_undo_agcount;
624 }
625 if (tp->t_rextsize_delta != 0) {
626 error = xfs_sb_mod32(&mp->m_sb.sb_rextsize,
627 tp->t_rextsize_delta);
628 if (error)
629 goto out_undo_imaxpct;
630 }
631 if (tp->t_rbmblocks_delta != 0) {
632 error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks,
633 tp->t_rbmblocks_delta);
634 if (error)
635 goto out_undo_rextsize;
636 }
637 if (tp->t_rblocks_delta != 0) {
638 error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta);
639 if (error)
640 goto out_undo_rbmblocks;
641 }
642 if (tp->t_rextents_delta != 0) {
643 error = xfs_sb_mod64(&mp->m_sb.sb_rextents,
644 tp->t_rextents_delta);
645 if (error)
646 goto out_undo_rblocks;
647 }
648 if (tp->t_rextslog_delta != 0) {
649 error = xfs_sb_mod8(&mp->m_sb.sb_rextslog,
650 tp->t_rextslog_delta);
651 if (error)
652 goto out_undo_rextents;
653 }
654 spin_unlock(&mp->m_sb_lock);
630 return; 655 return;
631 656
632out_undo_ifreecount: 657out_undo_rextents:
658 if (tp->t_rextents_delta)
659 xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta);
660out_undo_rblocks:
661 if (tp->t_rblocks_delta)
662 xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta);
663out_undo_rbmblocks:
664 if (tp->t_rbmblocks_delta)
665 xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta);
666out_undo_rextsize:
667 if (tp->t_rextsize_delta)
668 xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta);
669out_undo_imaxpct:
670 if (tp->t_rextsize_delta)
671 xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta);
672out_undo_agcount:
673 if (tp->t_agcount_delta)
674 xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta);
675out_undo_dblocks:
676 if (tp->t_dblocks_delta)
677 xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta);
678out_undo_frextents:
679 if (rtxdelta)
680 xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta);
681out_undo_ifree:
682 spin_unlock(&mp->m_sb_lock);
633 if (ifreedelta) 683 if (ifreedelta)
634 xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd); 684 xfs_mod_ifree(mp, -ifreedelta);
635out_undo_icount: 685out_undo_icount:
636 if (idelta) 686 if (idelta)
637 xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd); 687 xfs_mod_icount(mp, -idelta);
638out_undo_fdblocks: 688out_undo_fdblocks:
639 if (blkdelta) 689 if (blkdelta)
640 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); 690 xfs_mod_fdblocks(mp, -blkdelta, rsvd);
641out: 691out:
642 ASSERT(error == 0); 692 ASSERT(error == 0);
643 return; 693 return;