diff options
author | Jens Axboe <jaxboe@fusionio.com> | 2010-10-19 03:13:04 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-10-19 03:13:04 -0400 |
commit | fa251f89903d73989e2f63e13d0eaed1e07ce0da (patch) | |
tree | 3f7fe779941e3b6d67754dd7c44a32f48ea47c74 /fs/xfs | |
parent | dd3932eddf428571762596e17b65f5dc92ca361b (diff) | |
parent | cd07202cc8262e1669edff0d97715f3dd9260917 (diff) |
Merge branch 'v2.6.36-rc8' into for-2.6.37/barrier
Conflicts:
block/blk-core.c
drivers/block/loop.c
mm/swapfile.c
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 13 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 11 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 1 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl.c | 9 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_iops.c | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 9 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 61 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_fs.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.c | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_ialloc.c | 16 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 49 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 271 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 50 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_priv.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 13 |
19 files changed, 332 insertions, 239 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 15412fe15c3a..b552f816de15 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -852,8 +852,8 @@ xfs_convert_page( | |||
852 | SetPageUptodate(page); | 852 | SetPageUptodate(page); |
853 | 853 | ||
854 | if (count) { | 854 | if (count) { |
855 | wbc->nr_to_write--; | 855 | if (--wbc->nr_to_write <= 0 && |
856 | if (wbc->nr_to_write <= 0) | 856 | wbc->sync_mode == WB_SYNC_NONE) |
857 | done = 1; | 857 | done = 1; |
858 | } | 858 | } |
859 | xfs_start_page_writeback(page, !page_dirty, count); | 859 | xfs_start_page_writeback(page, !page_dirty, count); |
@@ -1068,7 +1068,7 @@ xfs_vm_writepage( | |||
1068 | * by themselves. | 1068 | * by themselves. |
1069 | */ | 1069 | */ |
1070 | if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) | 1070 | if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) |
1071 | goto out_fail; | 1071 | goto redirty; |
1072 | 1072 | ||
1073 | /* | 1073 | /* |
1074 | * We need a transaction if there are delalloc or unwritten buffers | 1074 | * We need a transaction if there are delalloc or unwritten buffers |
@@ -1080,7 +1080,7 @@ xfs_vm_writepage( | |||
1080 | */ | 1080 | */ |
1081 | xfs_count_page_state(page, &delalloc, &unwritten); | 1081 | xfs_count_page_state(page, &delalloc, &unwritten); |
1082 | if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) | 1082 | if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) |
1083 | goto out_fail; | 1083 | goto redirty; |
1084 | 1084 | ||
1085 | /* Is this page beyond the end of the file? */ | 1085 | /* Is this page beyond the end of the file? */ |
1086 | offset = i_size_read(inode); | 1086 | offset = i_size_read(inode); |
@@ -1245,12 +1245,15 @@ error: | |||
1245 | if (iohead) | 1245 | if (iohead) |
1246 | xfs_cancel_ioend(iohead); | 1246 | xfs_cancel_ioend(iohead); |
1247 | 1247 | ||
1248 | if (err == -EAGAIN) | ||
1249 | goto redirty; | ||
1250 | |||
1248 | xfs_aops_discard_page(page); | 1251 | xfs_aops_discard_page(page); |
1249 | ClearPageUptodate(page); | 1252 | ClearPageUptodate(page); |
1250 | unlock_page(page); | 1253 | unlock_page(page); |
1251 | return err; | 1254 | return err; |
1252 | 1255 | ||
1253 | out_fail: | 1256 | redirty: |
1254 | redirty_page_for_writepage(wbc, page); | 1257 | redirty_page_for_writepage(wbc, page); |
1255 | unlock_page(page); | 1258 | unlock_page(page); |
1256 | return 0; | 1259 | return 0; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index b93ea3342281..1846a0dd7035 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -440,12 +440,7 @@ _xfs_buf_find( | |||
440 | ASSERT(btp == bp->b_target); | 440 | ASSERT(btp == bp->b_target); |
441 | if (bp->b_file_offset == range_base && | 441 | if (bp->b_file_offset == range_base && |
442 | bp->b_buffer_length == range_length) { | 442 | bp->b_buffer_length == range_length) { |
443 | /* | ||
444 | * If we look at something, bring it to the | ||
445 | * front of the list for next time. | ||
446 | */ | ||
447 | atomic_inc(&bp->b_hold); | 443 | atomic_inc(&bp->b_hold); |
448 | list_move(&bp->b_hash_list, &hash->bh_list); | ||
449 | goto found; | 444 | goto found; |
450 | } | 445 | } |
451 | } | 446 | } |
@@ -1431,8 +1426,7 @@ xfs_alloc_bufhash( | |||
1431 | { | 1426 | { |
1432 | unsigned int i; | 1427 | unsigned int i; |
1433 | 1428 | ||
1434 | btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ | 1429 | btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */ |
1435 | btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; | ||
1436 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * | 1430 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * |
1437 | sizeof(xfs_bufhash_t)); | 1431 | sizeof(xfs_bufhash_t)); |
1438 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1432 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { |
@@ -1926,7 +1920,8 @@ xfs_buf_init(void) | |||
1926 | if (!xfs_buf_zone) | 1920 | if (!xfs_buf_zone) |
1927 | goto out; | 1921 | goto out; |
1928 | 1922 | ||
1929 | xfslogd_workqueue = create_workqueue("xfslogd"); | 1923 | xfslogd_workqueue = alloc_workqueue("xfslogd", |
1924 | WQ_RESCUER | WQ_HIGHPRI, 1); | ||
1930 | if (!xfslogd_workqueue) | 1925 | if (!xfslogd_workqueue) |
1931 | goto out_free_buf_zone; | 1926 | goto out_free_buf_zone; |
1932 | 1927 | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index d533d64e2c3e..9d021c73ea52 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -128,7 +128,6 @@ typedef struct xfs_buftarg { | |||
128 | size_t bt_smask; | 128 | size_t bt_smask; |
129 | 129 | ||
130 | /* per device buffer hash table */ | 130 | /* per device buffer hash table */ |
131 | uint bt_hashmask; | ||
132 | uint bt_hashshift; | 131 | uint bt_hashshift; |
133 | xfs_bufhash_t *bt_hash; | 132 | xfs_bufhash_t *bt_hash; |
134 | 133 | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 237f5ffb2ee8..3b9e626f7cd1 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -785,6 +785,8 @@ xfs_ioc_fsgetxattr( | |||
785 | { | 785 | { |
786 | struct fsxattr fa; | 786 | struct fsxattr fa; |
787 | 787 | ||
788 | memset(&fa, 0, sizeof(struct fsxattr)); | ||
789 | |||
788 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 790 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
789 | fa.fsx_xflags = xfs_ip2xflags(ip); | 791 | fa.fsx_xflags = xfs_ip2xflags(ip); |
790 | fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; | 792 | fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; |
@@ -907,6 +909,13 @@ xfs_ioctl_setattr( | |||
907 | return XFS_ERROR(EIO); | 909 | return XFS_ERROR(EIO); |
908 | 910 | ||
909 | /* | 911 | /* |
912 | * Disallow 32bit project ids because on-disk structure | ||
913 | * is 16bit only. | ||
914 | */ | ||
915 | if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1)) | ||
916 | return XFS_ERROR(EINVAL); | ||
917 | |||
918 | /* | ||
910 | * If disk quotas is on, we make sure that the dquots do exist on disk, | 919 | * If disk quotas is on, we make sure that the dquots do exist on disk, |
911 | * before we start any other transactions. Trying to do this later | 920 | * before we start any other transactions. Trying to do this later |
912 | * is messy. We don't care to take a readlock to look at the ids | 921 | * is messy. We don't care to take a readlock to look at the ids |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 68be25dcd301..b1fc2a6bfe83 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -664,7 +664,7 @@ xfs_vn_fiemap( | |||
664 | fieinfo->fi_extents_max + 1; | 664 | fieinfo->fi_extents_max + 1; |
665 | bm.bmv_count = min_t(__s32, bm.bmv_count, | 665 | bm.bmv_count = min_t(__s32, bm.bmv_count, |
666 | (PAGE_SIZE * 16 / sizeof(struct getbmapx))); | 666 | (PAGE_SIZE * 16 / sizeof(struct getbmapx))); |
667 | bm.bmv_iflags = BMV_IF_PREALLOC; | 667 | bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; |
668 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) | 668 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) |
669 | bm.bmv_iflags |= BMV_IF_ATTRFORK; | 669 | bm.bmv_iflags |= BMV_IF_ATTRFORK; |
670 | if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) | 670 | if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 5fa7a30cc3f0..08fd3102128c 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1225,6 +1225,7 @@ xfs_fs_statfs( | |||
1225 | struct xfs_inode *ip = XFS_I(dentry->d_inode); | 1225 | struct xfs_inode *ip = XFS_I(dentry->d_inode); |
1226 | __uint64_t fakeinos, id; | 1226 | __uint64_t fakeinos, id; |
1227 | xfs_extlen_t lsize; | 1227 | xfs_extlen_t lsize; |
1228 | __int64_t ffree; | ||
1228 | 1229 | ||
1229 | statp->f_type = XFS_SB_MAGIC; | 1230 | statp->f_type = XFS_SB_MAGIC; |
1230 | statp->f_namelen = MAXNAMELEN - 1; | 1231 | statp->f_namelen = MAXNAMELEN - 1; |
@@ -1248,7 +1249,11 @@ xfs_fs_statfs( | |||
1248 | statp->f_files = min_t(typeof(statp->f_files), | 1249 | statp->f_files = min_t(typeof(statp->f_files), |
1249 | statp->f_files, | 1250 | statp->f_files, |
1250 | mp->m_maxicount); | 1251 | mp->m_maxicount); |
1251 | statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | 1252 | |
1253 | /* make sure statp->f_ffree does not underflow */ | ||
1254 | ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | ||
1255 | statp->f_ffree = max_t(__int64_t, ffree, 0); | ||
1256 | |||
1252 | spin_unlock(&mp->m_sb_lock); | 1257 | spin_unlock(&mp->m_sb_lock); |
1253 | 1258 | ||
1254 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || | 1259 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || |
@@ -1401,7 +1406,7 @@ xfs_fs_freeze( | |||
1401 | 1406 | ||
1402 | xfs_save_resvblks(mp); | 1407 | xfs_save_resvblks(mp); |
1403 | xfs_quiesce_attr(mp); | 1408 | xfs_quiesce_attr(mp); |
1404 | return -xfs_fs_log_dummy(mp); | 1409 | return -xfs_fs_log_dummy(mp, SYNC_WAIT); |
1405 | } | 1410 | } |
1406 | 1411 | ||
1407 | STATIC int | 1412 | STATIC int |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index dfcbd98d1599..81976ffed7d6 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "xfs_inode_item.h" | 34 | #include "xfs_inode_item.h" |
35 | #include "xfs_quota.h" | 35 | #include "xfs_quota.h" |
36 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
37 | #include "xfs_fsops.h" | ||
37 | 38 | ||
38 | #include <linux/kthread.h> | 39 | #include <linux/kthread.h> |
39 | #include <linux/freezer.h> | 40 | #include <linux/freezer.h> |
@@ -341,38 +342,6 @@ xfs_sync_attr( | |||
341 | } | 342 | } |
342 | 343 | ||
343 | STATIC int | 344 | STATIC int |
344 | xfs_commit_dummy_trans( | ||
345 | struct xfs_mount *mp, | ||
346 | uint flags) | ||
347 | { | ||
348 | struct xfs_inode *ip = mp->m_rootip; | ||
349 | struct xfs_trans *tp; | ||
350 | int error; | ||
351 | |||
352 | /* | ||
353 | * Put a dummy transaction in the log to tell recovery | ||
354 | * that all others are OK. | ||
355 | */ | ||
356 | tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); | ||
357 | error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); | ||
358 | if (error) { | ||
359 | xfs_trans_cancel(tp, 0); | ||
360 | return error; | ||
361 | } | ||
362 | |||
363 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
364 | |||
365 | xfs_trans_ijoin(tp, ip); | ||
366 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
367 | error = xfs_trans_commit(tp, 0); | ||
368 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
369 | |||
370 | /* the log force ensures this transaction is pushed to disk */ | ||
371 | xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); | ||
372 | return error; | ||
373 | } | ||
374 | |||
375 | STATIC int | ||
376 | xfs_sync_fsdata( | 345 | xfs_sync_fsdata( |
377 | struct xfs_mount *mp) | 346 | struct xfs_mount *mp) |
378 | { | 347 | { |
@@ -432,7 +401,7 @@ xfs_quiesce_data( | |||
432 | 401 | ||
433 | /* mark the log as covered if needed */ | 402 | /* mark the log as covered if needed */ |
434 | if (xfs_log_need_covered(mp)) | 403 | if (xfs_log_need_covered(mp)) |
435 | error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); | 404 | error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); |
436 | 405 | ||
437 | /* flush data-only devices */ | 406 | /* flush data-only devices */ |
438 | if (mp->m_rtdev_targp) | 407 | if (mp->m_rtdev_targp) |
@@ -563,7 +532,7 @@ xfs_flush_inodes( | |||
563 | /* | 532 | /* |
564 | * Every sync period we need to unpin all items, reclaim inodes and sync | 533 | * Every sync period we need to unpin all items, reclaim inodes and sync |
565 | * disk quotas. We might need to cover the log to indicate that the | 534 | * disk quotas. We might need to cover the log to indicate that the |
566 | * filesystem is idle. | 535 | * filesystem is idle and not frozen. |
567 | */ | 536 | */ |
568 | STATIC void | 537 | STATIC void |
569 | xfs_sync_worker( | 538 | xfs_sync_worker( |
@@ -577,8 +546,9 @@ xfs_sync_worker( | |||
577 | xfs_reclaim_inodes(mp, 0); | 546 | xfs_reclaim_inodes(mp, 0); |
578 | /* dgc: errors ignored here */ | 547 | /* dgc: errors ignored here */ |
579 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | 548 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); |
580 | if (xfs_log_need_covered(mp)) | 549 | if (mp->m_super->s_frozen == SB_UNFROZEN && |
581 | error = xfs_commit_dummy_trans(mp, 0); | 550 | xfs_log_need_covered(mp)) |
551 | error = xfs_fs_log_dummy(mp, 0); | ||
582 | } | 552 | } |
583 | mp->m_sync_seq++; | 553 | mp->m_sync_seq++; |
584 | wake_up(&mp->m_wait_single_sync_task); | 554 | wake_up(&mp->m_wait_single_sync_task); |
@@ -698,14 +668,11 @@ xfs_inode_set_reclaim_tag( | |||
698 | xfs_perag_put(pag); | 668 | xfs_perag_put(pag); |
699 | } | 669 | } |
700 | 670 | ||
701 | void | 671 | STATIC void |
702 | __xfs_inode_clear_reclaim_tag( | 672 | __xfs_inode_clear_reclaim( |
703 | xfs_mount_t *mp, | ||
704 | xfs_perag_t *pag, | 673 | xfs_perag_t *pag, |
705 | xfs_inode_t *ip) | 674 | xfs_inode_t *ip) |
706 | { | 675 | { |
707 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
708 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
709 | pag->pag_ici_reclaimable--; | 676 | pag->pag_ici_reclaimable--; |
710 | if (!pag->pag_ici_reclaimable) { | 677 | if (!pag->pag_ici_reclaimable) { |
711 | /* clear the reclaim tag from the perag radix tree */ | 678 | /* clear the reclaim tag from the perag radix tree */ |
@@ -719,6 +686,17 @@ __xfs_inode_clear_reclaim_tag( | |||
719 | } | 686 | } |
720 | } | 687 | } |
721 | 688 | ||
689 | void | ||
690 | __xfs_inode_clear_reclaim_tag( | ||
691 | xfs_mount_t *mp, | ||
692 | xfs_perag_t *pag, | ||
693 | xfs_inode_t *ip) | ||
694 | { | ||
695 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
696 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
697 | __xfs_inode_clear_reclaim(pag, ip); | ||
698 | } | ||
699 | |||
722 | /* | 700 | /* |
723 | * Inodes in different states need to be treated differently, and the return | 701 | * Inodes in different states need to be treated differently, and the return |
724 | * value of xfs_iflush is not sufficient to get this right. The following table | 702 | * value of xfs_iflush is not sufficient to get this right. The following table |
@@ -868,6 +846,7 @@ reclaim: | |||
868 | if (!radix_tree_delete(&pag->pag_ici_root, | 846 | if (!radix_tree_delete(&pag->pag_ici_root, |
869 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 847 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) |
870 | ASSERT(0); | 848 | ASSERT(0); |
849 | __xfs_inode_clear_reclaim(pag, ip); | ||
871 | write_unlock(&pag->pag_ici_lock); | 850 | write_unlock(&pag->pag_ici_lock); |
872 | 851 | ||
873 | /* | 852 | /* |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 23f14e595c18..f90dadd5a968 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -5533,12 +5533,24 @@ xfs_getbmap( | |||
5533 | map[i].br_startblock)) | 5533 | map[i].br_startblock)) |
5534 | goto out_free_map; | 5534 | goto out_free_map; |
5535 | 5535 | ||
5536 | nexleft--; | ||
5537 | bmv->bmv_offset = | 5536 | bmv->bmv_offset = |
5538 | out[cur_ext].bmv_offset + | 5537 | out[cur_ext].bmv_offset + |
5539 | out[cur_ext].bmv_length; | 5538 | out[cur_ext].bmv_length; |
5540 | bmv->bmv_length = | 5539 | bmv->bmv_length = |
5541 | max_t(__int64_t, 0, bmvend - bmv->bmv_offset); | 5540 | max_t(__int64_t, 0, bmvend - bmv->bmv_offset); |
5541 | |||
5542 | /* | ||
5543 | * In case we don't want to return the hole, | ||
5544 | * don't increase cur_ext so that we can reuse | ||
5545 | * it in the next loop. | ||
5546 | */ | ||
5547 | if ((iflags & BMV_IF_NO_HOLES) && | ||
5548 | map[i].br_startblock == HOLESTARTBLOCK) { | ||
5549 | memset(&out[cur_ext], 0, sizeof(out[cur_ext])); | ||
5550 | continue; | ||
5551 | } | ||
5552 | |||
5553 | nexleft--; | ||
5542 | bmv->bmv_entries++; | 5554 | bmv->bmv_entries++; |
5543 | cur_ext++; | 5555 | cur_ext++; |
5544 | } | 5556 | } |
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 7cf7220e7d5f..87c2e9d02288 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h | |||
@@ -114,8 +114,10 @@ struct getbmapx { | |||
114 | #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ | 114 | #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ |
115 | #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ | 115 | #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ |
116 | #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ | 116 | #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ |
117 | #define BMV_IF_NO_HOLES 0x10 /* Do not return holes */ | ||
117 | #define BMV_IF_VALID \ | 118 | #define BMV_IF_VALID \ |
118 | (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) | 119 | (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \ |
120 | BMV_IF_DELALLOC|BMV_IF_NO_HOLES) | ||
119 | 121 | ||
120 | /* bmv_oflags values - returned for each non-header segment */ | 122 | /* bmv_oflags values - returned for each non-header segment */ |
121 | #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ | 123 | #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index dbca5f5c37ba..43b1d5699335 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -604,31 +604,36 @@ out: | |||
604 | return 0; | 604 | return 0; |
605 | } | 605 | } |
606 | 606 | ||
607 | /* | ||
608 | * Dump a transaction into the log that contains no real change. This is needed | ||
609 | * to be able to make the log dirty or stamp the current tail LSN into the log | ||
610 | * during the covering operation. | ||
611 | * | ||
612 | * We cannot use an inode here for this - that will push dirty state back up | ||
613 | * into the VFS and then periodic inode flushing will prevent log covering from | ||
614 | * making progress. Hence we log a field in the superblock instead. | ||
615 | */ | ||
607 | int | 616 | int |
608 | xfs_fs_log_dummy( | 617 | xfs_fs_log_dummy( |
609 | xfs_mount_t *mp) | 618 | xfs_mount_t *mp, |
619 | int flags) | ||
610 | { | 620 | { |
611 | xfs_trans_t *tp; | 621 | xfs_trans_t *tp; |
612 | xfs_inode_t *ip; | ||
613 | int error; | 622 | int error; |
614 | 623 | ||
615 | tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); | 624 | tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); |
616 | error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); | 625 | error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, |
626 | XFS_DEFAULT_LOG_COUNT); | ||
617 | if (error) { | 627 | if (error) { |
618 | xfs_trans_cancel(tp, 0); | 628 | xfs_trans_cancel(tp, 0); |
619 | return error; | 629 | return error; |
620 | } | 630 | } |
621 | 631 | ||
622 | ip = mp->m_rootip; | 632 | /* log the UUID because it is an unchanging field */ |
623 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 633 | xfs_mod_sb(tp, XFS_SB_UUID); |
624 | 634 | if (flags & SYNC_WAIT) | |
625 | xfs_trans_ijoin(tp, ip); | 635 | xfs_trans_set_sync(tp); |
626 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 636 | return xfs_trans_commit(tp, 0); |
627 | xfs_trans_set_sync(tp); | ||
628 | error = xfs_trans_commit(tp, 0); | ||
629 | |||
630 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
631 | return error; | ||
632 | } | 637 | } |
633 | 638 | ||
634 | int | 639 | int |
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index 88435e0a77c9..a786c5212c1e 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h | |||
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); | |||
25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, | 25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, |
26 | xfs_fsop_resblks_t *outval); | 26 | xfs_fsop_resblks_t *outval); |
27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); | 27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); |
28 | extern int xfs_fs_log_dummy(xfs_mount_t *mp); | 28 | extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags); |
29 | 29 | ||
30 | #endif /* __XFS_FSOPS_H__ */ | 30 | #endif /* __XFS_FSOPS_H__ */ |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index abf80ae1e95b..5371d2dc360e 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -1213,7 +1213,6 @@ xfs_imap_lookup( | |||
1213 | struct xfs_inobt_rec_incore rec; | 1213 | struct xfs_inobt_rec_incore rec; |
1214 | struct xfs_btree_cur *cur; | 1214 | struct xfs_btree_cur *cur; |
1215 | struct xfs_buf *agbp; | 1215 | struct xfs_buf *agbp; |
1216 | xfs_agino_t startino; | ||
1217 | int error; | 1216 | int error; |
1218 | int i; | 1217 | int i; |
1219 | 1218 | ||
@@ -1227,13 +1226,13 @@ xfs_imap_lookup( | |||
1227 | } | 1226 | } |
1228 | 1227 | ||
1229 | /* | 1228 | /* |
1230 | * derive and lookup the exact inode record for the given agino. If the | 1229 | * Lookup the inode record for the given agino. If the record cannot be |
1231 | * record cannot be found, then it's an invalid inode number and we | 1230 | * found, then it's an invalid inode number and we should abort. Once |
1232 | * should abort. | 1231 | * we have a record, we need to ensure it contains the inode number |
1232 | * we are looking up. | ||
1233 | */ | 1233 | */ |
1234 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1234 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); |
1235 | startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); | 1235 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); |
1236 | error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i); | ||
1237 | if (!error) { | 1236 | if (!error) { |
1238 | if (i) | 1237 | if (i) |
1239 | error = xfs_inobt_get_rec(cur, &rec, &i); | 1238 | error = xfs_inobt_get_rec(cur, &rec, &i); |
@@ -1246,6 +1245,11 @@ xfs_imap_lookup( | |||
1246 | if (error) | 1245 | if (error) |
1247 | return error; | 1246 | return error; |
1248 | 1247 | ||
1248 | /* check that the returned record contains the required inode */ | ||
1249 | if (rec.ir_startino > agino || | ||
1250 | rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) | ||
1251 | return EINVAL; | ||
1252 | |||
1249 | /* for untrusted inodes check it is allocated first */ | 1253 | /* for untrusted inodes check it is allocated first */ |
1250 | if ((flags & XFS_IGET_UNTRUSTED) && | 1254 | if ((flags & XFS_IGET_UNTRUSTED) && |
1251 | (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) | 1255 | (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 68415cb4f23c..34798f391c49 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1914,6 +1914,11 @@ xfs_iunlink_remove( | |||
1914 | return 0; | 1914 | return 0; |
1915 | } | 1915 | } |
1916 | 1916 | ||
1917 | /* | ||
1918 | * A big issue when freeing the inode cluster is is that we _cannot_ skip any | ||
1919 | * inodes that are in memory - they all must be marked stale and attached to | ||
1920 | * the cluster buffer. | ||
1921 | */ | ||
1917 | STATIC void | 1922 | STATIC void |
1918 | xfs_ifree_cluster( | 1923 | xfs_ifree_cluster( |
1919 | xfs_inode_t *free_ip, | 1924 | xfs_inode_t *free_ip, |
@@ -1945,8 +1950,6 @@ xfs_ifree_cluster( | |||
1945 | } | 1950 | } |
1946 | 1951 | ||
1947 | for (j = 0; j < nbufs; j++, inum += ninodes) { | 1952 | for (j = 0; j < nbufs; j++, inum += ninodes) { |
1948 | int found = 0; | ||
1949 | |||
1950 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), | 1953 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), |
1951 | XFS_INO_TO_AGBNO(mp, inum)); | 1954 | XFS_INO_TO_AGBNO(mp, inum)); |
1952 | 1955 | ||
@@ -1965,7 +1968,9 @@ xfs_ifree_cluster( | |||
1965 | /* | 1968 | /* |
1966 | * Walk the inodes already attached to the buffer and mark them | 1969 | * Walk the inodes already attached to the buffer and mark them |
1967 | * stale. These will all have the flush locks held, so an | 1970 | * stale. These will all have the flush locks held, so an |
1968 | * in-memory inode walk can't lock them. | 1971 | * in-memory inode walk can't lock them. By marking them all |
1972 | * stale first, we will not attempt to lock them in the loop | ||
1973 | * below as the XFS_ISTALE flag will be set. | ||
1969 | */ | 1974 | */ |
1970 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 1975 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
1971 | while (lip) { | 1976 | while (lip) { |
@@ -1977,11 +1982,11 @@ xfs_ifree_cluster( | |||
1977 | &iip->ili_flush_lsn, | 1982 | &iip->ili_flush_lsn, |
1978 | &iip->ili_item.li_lsn); | 1983 | &iip->ili_item.li_lsn); |
1979 | xfs_iflags_set(iip->ili_inode, XFS_ISTALE); | 1984 | xfs_iflags_set(iip->ili_inode, XFS_ISTALE); |
1980 | found++; | ||
1981 | } | 1985 | } |
1982 | lip = lip->li_bio_list; | 1986 | lip = lip->li_bio_list; |
1983 | } | 1987 | } |
1984 | 1988 | ||
1989 | |||
1985 | /* | 1990 | /* |
1986 | * For each inode in memory attempt to add it to the inode | 1991 | * For each inode in memory attempt to add it to the inode |
1987 | * buffer and set it up for being staled on buffer IO | 1992 | * buffer and set it up for being staled on buffer IO |
@@ -1993,6 +1998,7 @@ xfs_ifree_cluster( | |||
1993 | * even trying to lock them. | 1998 | * even trying to lock them. |
1994 | */ | 1999 | */ |
1995 | for (i = 0; i < ninodes; i++) { | 2000 | for (i = 0; i < ninodes; i++) { |
2001 | retry: | ||
1996 | read_lock(&pag->pag_ici_lock); | 2002 | read_lock(&pag->pag_ici_lock); |
1997 | ip = radix_tree_lookup(&pag->pag_ici_root, | 2003 | ip = radix_tree_lookup(&pag->pag_ici_root, |
1998 | XFS_INO_TO_AGINO(mp, (inum + i))); | 2004 | XFS_INO_TO_AGINO(mp, (inum + i))); |
@@ -2003,38 +2009,36 @@ xfs_ifree_cluster( | |||
2003 | continue; | 2009 | continue; |
2004 | } | 2010 | } |
2005 | 2011 | ||
2006 | /* don't try to lock/unlock the current inode */ | 2012 | /* |
2013 | * Don't try to lock/unlock the current inode, but we | ||
2014 | * _cannot_ skip the other inodes that we did not find | ||
2015 | * in the list attached to the buffer and are not | ||
2016 | * already marked stale. If we can't lock it, back off | ||
2017 | * and retry. | ||
2018 | */ | ||
2007 | if (ip != free_ip && | 2019 | if (ip != free_ip && |
2008 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | 2020 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
2009 | read_unlock(&pag->pag_ici_lock); | 2021 | read_unlock(&pag->pag_ici_lock); |
2010 | continue; | 2022 | delay(1); |
2023 | goto retry; | ||
2011 | } | 2024 | } |
2012 | read_unlock(&pag->pag_ici_lock); | 2025 | read_unlock(&pag->pag_ici_lock); |
2013 | 2026 | ||
2014 | if (!xfs_iflock_nowait(ip)) { | 2027 | xfs_iflock(ip); |
2015 | if (ip != free_ip) | ||
2016 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
2017 | continue; | ||
2018 | } | ||
2019 | |||
2020 | xfs_iflags_set(ip, XFS_ISTALE); | 2028 | xfs_iflags_set(ip, XFS_ISTALE); |
2021 | if (xfs_inode_clean(ip)) { | ||
2022 | ASSERT(ip != free_ip); | ||
2023 | xfs_ifunlock(ip); | ||
2024 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
2025 | continue; | ||
2026 | } | ||
2027 | 2029 | ||
2030 | /* | ||
2031 | * we don't need to attach clean inodes or those only | ||
2032 | * with unlogged changes (which we throw away, anyway). | ||
2033 | */ | ||
2028 | iip = ip->i_itemp; | 2034 | iip = ip->i_itemp; |
2029 | if (!iip) { | 2035 | if (!iip || xfs_inode_clean(ip)) { |
2030 | /* inode with unlogged changes only */ | ||
2031 | ASSERT(ip != free_ip); | 2036 | ASSERT(ip != free_ip); |
2032 | ip->i_update_core = 0; | 2037 | ip->i_update_core = 0; |
2033 | xfs_ifunlock(ip); | 2038 | xfs_ifunlock(ip); |
2034 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2039 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2035 | continue; | 2040 | continue; |
2036 | } | 2041 | } |
2037 | found++; | ||
2038 | 2042 | ||
2039 | iip->ili_last_fields = iip->ili_format.ilf_fields; | 2043 | iip->ili_last_fields = iip->ili_format.ilf_fields; |
2040 | iip->ili_format.ilf_fields = 0; | 2044 | iip->ili_format.ilf_fields = 0; |
@@ -2049,8 +2053,7 @@ xfs_ifree_cluster( | |||
2049 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2053 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2050 | } | 2054 | } |
2051 | 2055 | ||
2052 | if (found) | 2056 | xfs_trans_stale_inode_buf(tp, bp); |
2053 | xfs_trans_stale_inode_buf(tp, bp); | ||
2054 | xfs_trans_binval(tp, bp); | 2057 | xfs_trans_binval(tp, bp); |
2055 | } | 2058 | } |
2056 | 2059 | ||
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 430a8fc02c1f..ba8e36e0b4e7 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -3002,7 +3002,8 @@ _xfs_log_force( | |||
3002 | 3002 | ||
3003 | XFS_STATS_INC(xs_log_force); | 3003 | XFS_STATS_INC(xs_log_force); |
3004 | 3004 | ||
3005 | xlog_cil_push(log, 1); | 3005 | if (log->l_cilp) |
3006 | xlog_cil_force(log); | ||
3006 | 3007 | ||
3007 | spin_lock(&log->l_icloglock); | 3008 | spin_lock(&log->l_icloglock); |
3008 | 3009 | ||
@@ -3154,7 +3155,7 @@ _xfs_log_force_lsn( | |||
3154 | XFS_STATS_INC(xs_log_force); | 3155 | XFS_STATS_INC(xs_log_force); |
3155 | 3156 | ||
3156 | if (log->l_cilp) { | 3157 | if (log->l_cilp) { |
3157 | lsn = xlog_cil_push_lsn(log, lsn); | 3158 | lsn = xlog_cil_force_lsn(log, lsn); |
3158 | if (lsn == NULLCOMMITLSN) | 3159 | if (lsn == NULLCOMMITLSN) |
3159 | return 0; | 3160 | return 0; |
3160 | } | 3161 | } |
@@ -3711,7 +3712,7 @@ xfs_log_force_umount( | |||
3711 | * call below. | 3712 | * call below. |
3712 | */ | 3713 | */ |
3713 | if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) | 3714 | if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) |
3714 | xlog_cil_push(log, 1); | 3715 | xlog_cil_force(log); |
3715 | 3716 | ||
3716 | /* | 3717 | /* |
3717 | * We must hold both the GRANT lock and the LOG lock, | 3718 | * We must hold both the GRANT lock and the LOG lock, |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 31e4ea2d19ac..7e206fc1fa36 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -68,6 +68,7 @@ xlog_cil_init( | |||
68 | ctx->sequence = 1; | 68 | ctx->sequence = 1; |
69 | ctx->cil = cil; | 69 | ctx->cil = cil; |
70 | cil->xc_ctx = ctx; | 70 | cil->xc_ctx = ctx; |
71 | cil->xc_current_sequence = ctx->sequence; | ||
71 | 72 | ||
72 | cil->xc_log = log; | 73 | cil->xc_log = log; |
73 | log->l_cilp = cil; | 74 | log->l_cilp = cil; |
@@ -269,15 +270,10 @@ xlog_cil_insert( | |||
269 | static void | 270 | static void |
270 | xlog_cil_format_items( | 271 | xlog_cil_format_items( |
271 | struct log *log, | 272 | struct log *log, |
272 | struct xfs_log_vec *log_vector, | 273 | struct xfs_log_vec *log_vector) |
273 | struct xlog_ticket *ticket, | ||
274 | xfs_lsn_t *start_lsn) | ||
275 | { | 274 | { |
276 | struct xfs_log_vec *lv; | 275 | struct xfs_log_vec *lv; |
277 | 276 | ||
278 | if (start_lsn) | ||
279 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
280 | |||
281 | ASSERT(log_vector); | 277 | ASSERT(log_vector); |
282 | for (lv = log_vector; lv; lv = lv->lv_next) { | 278 | for (lv = log_vector; lv; lv = lv->lv_next) { |
283 | void *ptr; | 279 | void *ptr; |
@@ -301,9 +297,24 @@ xlog_cil_format_items( | |||
301 | ptr += vec->i_len; | 297 | ptr += vec->i_len; |
302 | } | 298 | } |
303 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); | 299 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); |
300 | } | ||
301 | } | ||
304 | 302 | ||
303 | static void | ||
304 | xlog_cil_insert_items( | ||
305 | struct log *log, | ||
306 | struct xfs_log_vec *log_vector, | ||
307 | struct xlog_ticket *ticket, | ||
308 | xfs_lsn_t *start_lsn) | ||
309 | { | ||
310 | struct xfs_log_vec *lv; | ||
311 | |||
312 | if (start_lsn) | ||
313 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
314 | |||
315 | ASSERT(log_vector); | ||
316 | for (lv = log_vector; lv; lv = lv->lv_next) | ||
305 | xlog_cil_insert(log, ticket, lv->lv_item, lv); | 317 | xlog_cil_insert(log, ticket, lv->lv_item, lv); |
306 | } | ||
307 | } | 318 | } |
308 | 319 | ||
309 | static void | 320 | static void |
@@ -321,80 +332,6 @@ xlog_cil_free_logvec( | |||
321 | } | 332 | } |
322 | 333 | ||
323 | /* | 334 | /* |
324 | * Commit a transaction with the given vector to the Committed Item List. | ||
325 | * | ||
326 | * To do this, we need to format the item, pin it in memory if required and | ||
327 | * account for the space used by the transaction. Once we have done that we | ||
328 | * need to release the unused reservation for the transaction, attach the | ||
329 | * transaction to the checkpoint context so we carry the busy extents through | ||
330 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
331 | * | ||
332 | * For more specific information about the order of operations in | ||
333 | * xfs_log_commit_cil() please refer to the comments in | ||
334 | * xfs_trans_commit_iclog(). | ||
335 | * | ||
336 | * Called with the context lock already held in read mode to lock out | ||
337 | * background commit, returns without it held once background commits are | ||
338 | * allowed again. | ||
339 | */ | ||
340 | int | ||
341 | xfs_log_commit_cil( | ||
342 | struct xfs_mount *mp, | ||
343 | struct xfs_trans *tp, | ||
344 | struct xfs_log_vec *log_vector, | ||
345 | xfs_lsn_t *commit_lsn, | ||
346 | int flags) | ||
347 | { | ||
348 | struct log *log = mp->m_log; | ||
349 | int log_flags = 0; | ||
350 | int push = 0; | ||
351 | |||
352 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
353 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
354 | |||
355 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
356 | xlog_cil_free_logvec(log_vector); | ||
357 | return XFS_ERROR(EIO); | ||
358 | } | ||
359 | |||
360 | /* lock out background commit */ | ||
361 | down_read(&log->l_cilp->xc_ctx_lock); | ||
362 | xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
363 | |||
364 | /* check we didn't blow the reservation */ | ||
365 | if (tp->t_ticket->t_curr_res < 0) | ||
366 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
367 | |||
368 | /* attach the transaction to the CIL if it has any busy extents */ | ||
369 | if (!list_empty(&tp->t_busy)) { | ||
370 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
371 | list_splice_init(&tp->t_busy, | ||
372 | &log->l_cilp->xc_ctx->busy_extents); | ||
373 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
374 | } | ||
375 | |||
376 | tp->t_commit_lsn = *commit_lsn; | ||
377 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
378 | xfs_trans_unreserve_and_mod_sb(tp); | ||
379 | |||
380 | /* check for background commit before unlock */ | ||
381 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
382 | push = 1; | ||
383 | up_read(&log->l_cilp->xc_ctx_lock); | ||
384 | |||
385 | /* | ||
386 | * We need to push CIL every so often so we don't cache more than we | ||
387 | * can fit in the log. The limit really is that a checkpoint can't be | ||
388 | * more than half the log (the current checkpoint is not allowed to | ||
389 | * overwrite the previous checkpoint), but commit latency and memory | ||
390 | * usage limit this to a smaller size in most cases. | ||
391 | */ | ||
392 | if (push) | ||
393 | xlog_cil_push(log, 0); | ||
394 | return 0; | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * Mark all items committed and clear busy extents. We free the log vector | 335 | * Mark all items committed and clear busy extents. We free the log vector |
399 | * chains in a separate pass so that we unpin the log items as quickly as | 336 | * chains in a separate pass so that we unpin the log items as quickly as |
400 | * possible. | 337 | * possible. |
@@ -427,13 +364,23 @@ xlog_cil_committed( | |||
427 | } | 364 | } |
428 | 365 | ||
429 | /* | 366 | /* |
430 | * Push the Committed Item List to the log. If the push_now flag is not set, | 367 | * Push the Committed Item List to the log. If @push_seq flag is zero, then it |
431 | * then it is a background flush and so we can chose to ignore it. | 368 | * is a background flush and so we can chose to ignore it. Otherwise, if the |
369 | * current sequence is the same as @push_seq we need to do a flush. If | ||
370 | * @push_seq is less than the current sequence, then it has already been | ||
371 | * flushed and we don't need to do anything - the caller will wait for it to | ||
372 | * complete if necessary. | ||
373 | * | ||
374 | * @push_seq is a value rather than a flag because that allows us to do an | ||
375 | * unlocked check of the sequence number for a match. Hence we can allows log | ||
376 | * forces to run racily and not issue pushes for the same sequence twice. If we | ||
377 | * get a race between multiple pushes for the same sequence they will block on | ||
378 | * the first one and then abort, hence avoiding needless pushes. | ||
432 | */ | 379 | */ |
433 | int | 380 | STATIC int |
434 | xlog_cil_push( | 381 | xlog_cil_push( |
435 | struct log *log, | 382 | struct log *log, |
436 | int push_now) | 383 | xfs_lsn_t push_seq) |
437 | { | 384 | { |
438 | struct xfs_cil *cil = log->l_cilp; | 385 | struct xfs_cil *cil = log->l_cilp; |
439 | struct xfs_log_vec *lv; | 386 | struct xfs_log_vec *lv; |
@@ -453,12 +400,20 @@ xlog_cil_push( | |||
453 | if (!cil) | 400 | if (!cil) |
454 | return 0; | 401 | return 0; |
455 | 402 | ||
403 | ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence); | ||
404 | |||
456 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 405 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
457 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 406 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
458 | 407 | ||
459 | /* lock out transaction commit, but don't block on background push */ | 408 | /* |
409 | * Lock out transaction commit, but don't block for background pushes | ||
410 | * unless we are well over the CIL space limit. See the definition of | ||
411 | * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic | ||
412 | * used here. | ||
413 | */ | ||
460 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | 414 | if (!down_write_trylock(&cil->xc_ctx_lock)) { |
461 | if (!push_now) | 415 | if (!push_seq && |
416 | cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log)) | ||
462 | goto out_free_ticket; | 417 | goto out_free_ticket; |
463 | down_write(&cil->xc_ctx_lock); | 418 | down_write(&cil->xc_ctx_lock); |
464 | } | 419 | } |
@@ -469,7 +424,11 @@ xlog_cil_push( | |||
469 | goto out_skip; | 424 | goto out_skip; |
470 | 425 | ||
471 | /* check for spurious background flush */ | 426 | /* check for spurious background flush */ |
472 | if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | 427 | if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) |
428 | goto out_skip; | ||
429 | |||
430 | /* check for a previously pushed seqeunce */ | ||
431 | if (push_seq && push_seq < cil->xc_ctx->sequence) | ||
473 | goto out_skip; | 432 | goto out_skip; |
474 | 433 | ||
475 | /* | 434 | /* |
@@ -515,6 +474,13 @@ xlog_cil_push( | |||
515 | cil->xc_ctx = new_ctx; | 474 | cil->xc_ctx = new_ctx; |
516 | 475 | ||
517 | /* | 476 | /* |
477 | * mirror the new sequence into the cil structure so that we can do | ||
478 | * unlocked checks against the current sequence in log forces without | ||
479 | * risking deferencing a freed context pointer. | ||
480 | */ | ||
481 | cil->xc_current_sequence = new_ctx->sequence; | ||
482 | |||
483 | /* | ||
518 | * The switch is now done, so we can drop the context lock and move out | 484 | * The switch is now done, so we can drop the context lock and move out |
519 | * of a shared context. We can't just go straight to the commit record, | 485 | * of a shared context. We can't just go straight to the commit record, |
520 | * though - we need to synchronise with previous and future commits so | 486 | * though - we need to synchronise with previous and future commits so |
@@ -626,6 +592,102 @@ out_abort: | |||
626 | } | 592 | } |
627 | 593 | ||
628 | /* | 594 | /* |
595 | * Commit a transaction with the given vector to the Committed Item List. | ||
596 | * | ||
597 | * To do this, we need to format the item, pin it in memory if required and | ||
598 | * account for the space used by the transaction. Once we have done that we | ||
599 | * need to release the unused reservation for the transaction, attach the | ||
600 | * transaction to the checkpoint context so we carry the busy extents through | ||
601 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
602 | * | ||
603 | * For more specific information about the order of operations in | ||
604 | * xfs_log_commit_cil() please refer to the comments in | ||
605 | * xfs_trans_commit_iclog(). | ||
606 | * | ||
607 | * Called with the context lock already held in read mode to lock out | ||
608 | * background commit, returns without it held once background commits are | ||
609 | * allowed again. | ||
610 | */ | ||
611 | int | ||
612 | xfs_log_commit_cil( | ||
613 | struct xfs_mount *mp, | ||
614 | struct xfs_trans *tp, | ||
615 | struct xfs_log_vec *log_vector, | ||
616 | xfs_lsn_t *commit_lsn, | ||
617 | int flags) | ||
618 | { | ||
619 | struct log *log = mp->m_log; | ||
620 | int log_flags = 0; | ||
621 | int push = 0; | ||
622 | |||
623 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
624 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
625 | |||
626 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
627 | xlog_cil_free_logvec(log_vector); | ||
628 | return XFS_ERROR(EIO); | ||
629 | } | ||
630 | |||
631 | /* | ||
632 | * do all the hard work of formatting items (including memory | ||
633 | * allocation) outside the CIL context lock. This prevents stalling CIL | ||
634 | * pushes when we are low on memory and a transaction commit spends a | ||
635 | * lot of time in memory reclaim. | ||
636 | */ | ||
637 | xlog_cil_format_items(log, log_vector); | ||
638 | |||
639 | /* lock out background commit */ | ||
640 | down_read(&log->l_cilp->xc_ctx_lock); | ||
641 | xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
642 | |||
643 | /* check we didn't blow the reservation */ | ||
644 | if (tp->t_ticket->t_curr_res < 0) | ||
645 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
646 | |||
647 | /* attach the transaction to the CIL if it has any busy extents */ | ||
648 | if (!list_empty(&tp->t_busy)) { | ||
649 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
650 | list_splice_init(&tp->t_busy, | ||
651 | &log->l_cilp->xc_ctx->busy_extents); | ||
652 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
653 | } | ||
654 | |||
655 | tp->t_commit_lsn = *commit_lsn; | ||
656 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
657 | xfs_trans_unreserve_and_mod_sb(tp); | ||
658 | |||
659 | /* | ||
660 | * Once all the items of the transaction have been copied to the CIL, | ||
661 | * the items can be unlocked and freed. | ||
662 | * | ||
663 | * This needs to be done before we drop the CIL context lock because we | ||
664 | * have to update state in the log items and unlock them before they go | ||
665 | * to disk. If we don't, then the CIL checkpoint can race with us and | ||
666 | * we can run checkpoint completion before we've updated and unlocked | ||
667 | * the log items. This affects (at least) processing of stale buffers, | ||
668 | * inodes and EFIs. | ||
669 | */ | ||
670 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
671 | |||
672 | /* check for background commit before unlock */ | ||
673 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
674 | push = 1; | ||
675 | |||
676 | up_read(&log->l_cilp->xc_ctx_lock); | ||
677 | |||
678 | /* | ||
679 | * We need to push CIL every so often so we don't cache more than we | ||
680 | * can fit in the log. The limit really is that a checkpoint can't be | ||
681 | * more than half the log (the current checkpoint is not allowed to | ||
682 | * overwrite the previous checkpoint), but commit latency and memory | ||
683 | * usage limit this to a smaller size in most cases. | ||
684 | */ | ||
685 | if (push) | ||
686 | xlog_cil_push(log, 0); | ||
687 | return 0; | ||
688 | } | ||
689 | |||
690 | /* | ||
629 | * Conditionally push the CIL based on the sequence passed in. | 691 | * Conditionally push the CIL based on the sequence passed in. |
630 | * | 692 | * |
631 | * We only need to push if we haven't already pushed the sequence | 693 | * We only need to push if we haven't already pushed the sequence |
@@ -639,39 +701,34 @@ out_abort: | |||
639 | * commit lsn is there. It'll be empty, so this is broken for now. | 701 | * commit lsn is there. It'll be empty, so this is broken for now. |
640 | */ | 702 | */ |
641 | xfs_lsn_t | 703 | xfs_lsn_t |
642 | xlog_cil_push_lsn( | 704 | xlog_cil_force_lsn( |
643 | struct log *log, | 705 | struct log *log, |
644 | xfs_lsn_t push_seq) | 706 | xfs_lsn_t sequence) |
645 | { | 707 | { |
646 | struct xfs_cil *cil = log->l_cilp; | 708 | struct xfs_cil *cil = log->l_cilp; |
647 | struct xfs_cil_ctx *ctx; | 709 | struct xfs_cil_ctx *ctx; |
648 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; | 710 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; |
649 | 711 | ||
650 | restart: | 712 | ASSERT(sequence <= cil->xc_current_sequence); |
651 | down_write(&cil->xc_ctx_lock); | 713 | |
652 | ASSERT(push_seq <= cil->xc_ctx->sequence); | 714 | /* |
653 | 715 | * check to see if we need to force out the current context. | |
654 | /* check to see if we need to force out the current context */ | 716 | * xlog_cil_push() handles racing pushes for the same sequence, |
655 | if (push_seq == cil->xc_ctx->sequence) { | 717 | * so no need to deal with it here. |
656 | up_write(&cil->xc_ctx_lock); | 718 | */ |
657 | xlog_cil_push(log, 1); | 719 | if (sequence == cil->xc_current_sequence) |
658 | goto restart; | 720 | xlog_cil_push(log, sequence); |
659 | } | ||
660 | 721 | ||
661 | /* | 722 | /* |
662 | * See if we can find a previous sequence still committing. | 723 | * See if we can find a previous sequence still committing. |
663 | * We can drop the flush lock as soon as we have the cil lock | ||
664 | * because we are now only comparing contexts protected by | ||
665 | * the cil lock. | ||
666 | * | ||
667 | * We need to wait for all previous sequence commits to complete | 724 | * We need to wait for all previous sequence commits to complete |
668 | * before allowing the force of push_seq to go ahead. Hence block | 725 | * before allowing the force of push_seq to go ahead. Hence block |
669 | * on commits for those as well. | 726 | * on commits for those as well. |
670 | */ | 727 | */ |
728 | restart: | ||
671 | spin_lock(&cil->xc_cil_lock); | 729 | spin_lock(&cil->xc_cil_lock); |
672 | up_write(&cil->xc_ctx_lock); | ||
673 | list_for_each_entry(ctx, &cil->xc_committing, committing) { | 730 | list_for_each_entry(ctx, &cil->xc_committing, committing) { |
674 | if (ctx->sequence > push_seq) | 731 | if (ctx->sequence > sequence) |
675 | continue; | 732 | continue; |
676 | if (!ctx->commit_lsn) { | 733 | if (!ctx->commit_lsn) { |
677 | /* | 734 | /* |
@@ -681,7 +738,7 @@ restart: | |||
681 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 738 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); |
682 | goto restart; | 739 | goto restart; |
683 | } | 740 | } |
684 | if (ctx->sequence != push_seq) | 741 | if (ctx->sequence != sequence) |
685 | continue; | 742 | continue; |
686 | /* found it! */ | 743 | /* found it! */ |
687 | commit_lsn = ctx->commit_lsn; | 744 | commit_lsn = ctx->commit_lsn; |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 8c072618965c..edcdfe01617f 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -422,16 +422,17 @@ struct xfs_cil { | |||
422 | struct rw_semaphore xc_ctx_lock; | 422 | struct rw_semaphore xc_ctx_lock; |
423 | struct list_head xc_committing; | 423 | struct list_head xc_committing; |
424 | sv_t xc_commit_wait; | 424 | sv_t xc_commit_wait; |
425 | xfs_lsn_t xc_current_sequence; | ||
425 | }; | 426 | }; |
426 | 427 | ||
427 | /* | 428 | /* |
428 | * The amount of log space we should the CIL to aggregate is difficult to size. | 429 | * The amount of log space we allow the CIL to aggregate is difficult to size. |
429 | * Whatever we chose we have to make we can get a reservation for the log space | 430 | * Whatever we choose, we have to make sure we can get a reservation for the |
430 | * effectively, that it is large enough to capture sufficient relogging to | 431 | * log space effectively, that it is large enough to capture sufficient |
431 | * reduce log buffer IO significantly, but it is not too large for the log or | 432 | * relogging to reduce log buffer IO significantly, but it is not too large for |
432 | * induces too much latency when writing out through the iclogs. We track both | 433 | * the log or induces too much latency when writing out through the iclogs. We |
433 | * space consumed and the number of vectors in the checkpoint context, so we | 434 | * track both space consumed and the number of vectors in the checkpoint |
434 | * need to decide which to use for limiting. | 435 | * context, so we need to decide which to use for limiting. |
435 | * | 436 | * |
436 | * Every log buffer we write out during a push needs a header reserved, which | 437 | * Every log buffer we write out during a push needs a header reserved, which |
437 | * is at least one sector and more for v2 logs. Hence we need a reservation of | 438 | * is at least one sector and more for v2 logs. Hence we need a reservation of |
@@ -458,16 +459,21 @@ struct xfs_cil { | |||
458 | * checkpoint transaction ticket is specific to the checkpoint context, rather | 459 | * checkpoint transaction ticket is specific to the checkpoint context, rather |
459 | * than the CIL itself. | 460 | * than the CIL itself. |
460 | * | 461 | * |
461 | * With dynamic reservations, we can basically make up arbitrary limits for the | 462 | * With dynamic reservations, we can effectively make up arbitrary limits for |
462 | * checkpoint size so long as they don't violate any other size rules. Hence | 463 | * the checkpoint size so long as they don't violate any other size rules. |
463 | * the initial maximum size for the checkpoint transaction will be set to a | 464 | * Recovery imposes a rule that no transaction exceed half the log, so we are |
464 | * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit | 465 | * limited by that. Furthermore, the log transaction reservation subsystem |
465 | * right now based on the latency of writing out a large amount of data through | 466 | * tries to keep 25% of the log free, so we need to keep below that limit or we |
466 | * the circular iclog buffers. | 467 | * risk running out of free log space to start any new transactions. |
468 | * | ||
469 | * In order to keep background CIL push efficient, we will set a lower | ||
470 | * threshold at which background pushing is attempted without blocking current | ||
471 | * transaction commits. A separate, higher bound defines when CIL pushes are | ||
472 | * enforced to ensure we stay within our maximum checkpoint size bounds. | ||
473 | * threshold, yet give us plenty of space for aggregation on large logs. | ||
467 | */ | 474 | */ |
468 | 475 | #define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) | |
469 | #define XLOG_CIL_SPACE_LIMIT(log) \ | 476 | #define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) |
470 | (min((log->l_logsize >> 2), (8 * 1024 * 1024))) | ||
471 | 477 | ||
472 | /* | 478 | /* |
473 | * The reservation head lsn is not made up of a cycle number and block number. | 479 | * The reservation head lsn is not made up of a cycle number and block number. |
@@ -562,8 +568,16 @@ int xlog_cil_init(struct log *log); | |||
562 | void xlog_cil_init_post_recovery(struct log *log); | 568 | void xlog_cil_init_post_recovery(struct log *log); |
563 | void xlog_cil_destroy(struct log *log); | 569 | void xlog_cil_destroy(struct log *log); |
564 | 570 | ||
565 | int xlog_cil_push(struct log *log, int push_now); | 571 | /* |
566 | xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); | 572 | * CIL force routines |
573 | */ | ||
574 | xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence); | ||
575 | |||
576 | static inline void | ||
577 | xlog_cil_force(struct log *log) | ||
578 | { | ||
579 | xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); | ||
580 | } | ||
567 | 581 | ||
568 | /* | 582 | /* |
569 | * Unmount record type is used as a pseudo transaction type for the ticket. | 583 | * Unmount record type is used as a pseudo transaction type for the ticket. |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fdca7416c754..1c47edaea0d2 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -1167,7 +1167,7 @@ xfs_trans_del_item( | |||
1167 | * Unlock all of the items of a transaction and free all the descriptors | 1167 | * Unlock all of the items of a transaction and free all the descriptors |
1168 | * of that transaction. | 1168 | * of that transaction. |
1169 | */ | 1169 | */ |
1170 | STATIC void | 1170 | void |
1171 | xfs_trans_free_items( | 1171 | xfs_trans_free_items( |
1172 | struct xfs_trans *tp, | 1172 | struct xfs_trans *tp, |
1173 | xfs_lsn_t commit_lsn, | 1173 | xfs_lsn_t commit_lsn, |
@@ -1653,9 +1653,6 @@ xfs_trans_commit_cil( | |||
1653 | return error; | 1653 | return error; |
1654 | 1654 | ||
1655 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1655 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1656 | |||
1657 | /* xfs_trans_free_items() unlocks them first */ | ||
1658 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
1659 | xfs_trans_free(tp); | 1656 | xfs_trans_free(tp); |
1660 | return 0; | 1657 | return 0; |
1661 | } | 1658 | } |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index e2d93d8ead7b..62da86c90de5 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -25,7 +25,8 @@ struct xfs_trans; | |||
25 | 25 | ||
26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); | 26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); |
27 | void xfs_trans_del_item(struct xfs_log_item *); | 27 | void xfs_trans_del_item(struct xfs_log_item *); |
28 | 28 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, | |
29 | int flags); | ||
29 | void xfs_trans_item_committed(struct xfs_log_item *lip, | 30 | void xfs_trans_item_committed(struct xfs_log_item *lip, |
30 | xfs_lsn_t commit_lsn, int aborted); | 31 | xfs_lsn_t commit_lsn, int aborted); |
31 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); | 32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 66d585c6917c..4c7c7bfb2b2f 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -2299,15 +2299,22 @@ xfs_alloc_file_space( | |||
2299 | e = allocatesize_fsb; | 2299 | e = allocatesize_fsb; |
2300 | } | 2300 | } |
2301 | 2301 | ||
2302 | /* | ||
2303 | * The transaction reservation is limited to a 32-bit block | ||
2304 | * count, hence we need to limit the number of blocks we are | ||
2305 | * trying to reserve to avoid an overflow. We can't allocate | ||
2306 | * more than @nimaps extents, and an extent is limited on disk | ||
2307 | * to MAXEXTLEN (21 bits), so use that to enforce the limit. | ||
2308 | */ | ||
2309 | resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); | ||
2302 | if (unlikely(rt)) { | 2310 | if (unlikely(rt)) { |
2303 | resrtextents = qblocks = (uint)(e - s); | 2311 | resrtextents = qblocks = resblks; |
2304 | resrtextents /= mp->m_sb.sb_rextsize; | 2312 | resrtextents /= mp->m_sb.sb_rextsize; |
2305 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); | 2313 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); |
2306 | quota_flag = XFS_QMOPT_RES_RTBLKS; | 2314 | quota_flag = XFS_QMOPT_RES_RTBLKS; |
2307 | } else { | 2315 | } else { |
2308 | resrtextents = 0; | 2316 | resrtextents = 0; |
2309 | resblks = qblocks = \ | 2317 | resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); |
2310 | XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); | ||
2311 | quota_flag = XFS_QMOPT_RES_REGBLKS; | 2318 | quota_flag = XFS_QMOPT_RES_REGBLKS; |
2312 | } | 2319 | } |
2313 | 2320 | ||