diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 13 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 8 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 1 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl.c | 7 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_iops.c | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 9 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 42 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_fs.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.c | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_ialloc.c | 16 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 49 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 263 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_priv.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 13 |
19 files changed, 286 insertions, 216 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 15412fe15c3a..b552f816de15 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -852,8 +852,8 @@ xfs_convert_page( | |||
852 | SetPageUptodate(page); | 852 | SetPageUptodate(page); |
853 | 853 | ||
854 | if (count) { | 854 | if (count) { |
855 | wbc->nr_to_write--; | 855 | if (--wbc->nr_to_write <= 0 && |
856 | if (wbc->nr_to_write <= 0) | 856 | wbc->sync_mode == WB_SYNC_NONE) |
857 | done = 1; | 857 | done = 1; |
858 | } | 858 | } |
859 | xfs_start_page_writeback(page, !page_dirty, count); | 859 | xfs_start_page_writeback(page, !page_dirty, count); |
@@ -1068,7 +1068,7 @@ xfs_vm_writepage( | |||
1068 | * by themselves. | 1068 | * by themselves. |
1069 | */ | 1069 | */ |
1070 | if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) | 1070 | if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) |
1071 | goto out_fail; | 1071 | goto redirty; |
1072 | 1072 | ||
1073 | /* | 1073 | /* |
1074 | * We need a transaction if there are delalloc or unwritten buffers | 1074 | * We need a transaction if there are delalloc or unwritten buffers |
@@ -1080,7 +1080,7 @@ xfs_vm_writepage( | |||
1080 | */ | 1080 | */ |
1081 | xfs_count_page_state(page, &delalloc, &unwritten); | 1081 | xfs_count_page_state(page, &delalloc, &unwritten); |
1082 | if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) | 1082 | if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) |
1083 | goto out_fail; | 1083 | goto redirty; |
1084 | 1084 | ||
1085 | /* Is this page beyond the end of the file? */ | 1085 | /* Is this page beyond the end of the file? */ |
1086 | offset = i_size_read(inode); | 1086 | offset = i_size_read(inode); |
@@ -1245,12 +1245,15 @@ error: | |||
1245 | if (iohead) | 1245 | if (iohead) |
1246 | xfs_cancel_ioend(iohead); | 1246 | xfs_cancel_ioend(iohead); |
1247 | 1247 | ||
1248 | if (err == -EAGAIN) | ||
1249 | goto redirty; | ||
1250 | |||
1248 | xfs_aops_discard_page(page); | 1251 | xfs_aops_discard_page(page); |
1249 | ClearPageUptodate(page); | 1252 | ClearPageUptodate(page); |
1250 | unlock_page(page); | 1253 | unlock_page(page); |
1251 | return err; | 1254 | return err; |
1252 | 1255 | ||
1253 | out_fail: | 1256 | redirty: |
1254 | redirty_page_for_writepage(wbc, page); | 1257 | redirty_page_for_writepage(wbc, page); |
1255 | unlock_page(page); | 1258 | unlock_page(page); |
1256 | return 0; | 1259 | return 0; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ea79072f5210..d72cf2bb054a 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -440,12 +440,7 @@ _xfs_buf_find( | |||
440 | ASSERT(btp == bp->b_target); | 440 | ASSERT(btp == bp->b_target); |
441 | if (bp->b_file_offset == range_base && | 441 | if (bp->b_file_offset == range_base && |
442 | bp->b_buffer_length == range_length) { | 442 | bp->b_buffer_length == range_length) { |
443 | /* | ||
444 | * If we look at something, bring it to the | ||
445 | * front of the list for next time. | ||
446 | */ | ||
447 | atomic_inc(&bp->b_hold); | 443 | atomic_inc(&bp->b_hold); |
448 | list_move(&bp->b_hash_list, &hash->bh_list); | ||
449 | goto found; | 444 | goto found; |
450 | } | 445 | } |
451 | } | 446 | } |
@@ -1443,8 +1438,7 @@ xfs_alloc_bufhash( | |||
1443 | { | 1438 | { |
1444 | unsigned int i; | 1439 | unsigned int i; |
1445 | 1440 | ||
1446 | btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ | 1441 | btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */ |
1447 | btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; | ||
1448 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * | 1442 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * |
1449 | sizeof(xfs_bufhash_t)); | 1443 | sizeof(xfs_bufhash_t)); |
1450 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1444 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index d072e5ff923b..2a05614f0b92 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -137,7 +137,6 @@ typedef struct xfs_buftarg { | |||
137 | size_t bt_smask; | 137 | size_t bt_smask; |
138 | 138 | ||
139 | /* per device buffer hash table */ | 139 | /* per device buffer hash table */ |
140 | uint bt_hashmask; | ||
141 | uint bt_hashshift; | 140 | uint bt_hashshift; |
142 | xfs_bufhash_t *bt_hash; | 141 | xfs_bufhash_t *bt_hash; |
143 | 142 | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 237f5ffb2ee8..4fec427b83ef 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -907,6 +907,13 @@ xfs_ioctl_setattr( | |||
907 | return XFS_ERROR(EIO); | 907 | return XFS_ERROR(EIO); |
908 | 908 | ||
909 | /* | 909 | /* |
910 | * Disallow 32bit project ids because on-disk structure | ||
911 | * is 16bit only. | ||
912 | */ | ||
913 | if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1)) | ||
914 | return XFS_ERROR(EINVAL); | ||
915 | |||
916 | /* | ||
910 | * If disk quotas is on, we make sure that the dquots do exist on disk, | 917 | * If disk quotas is on, we make sure that the dquots do exist on disk, |
911 | * before we start any other transactions. Trying to do this later | 918 | * before we start any other transactions. Trying to do this later |
912 | * is messy. We don't care to take a readlock to look at the ids | 919 | * is messy. We don't care to take a readlock to look at the ids |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 68be25dcd301..b1fc2a6bfe83 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -664,7 +664,7 @@ xfs_vn_fiemap( | |||
664 | fieinfo->fi_extents_max + 1; | 664 | fieinfo->fi_extents_max + 1; |
665 | bm.bmv_count = min_t(__s32, bm.bmv_count, | 665 | bm.bmv_count = min_t(__s32, bm.bmv_count, |
666 | (PAGE_SIZE * 16 / sizeof(struct getbmapx))); | 666 | (PAGE_SIZE * 16 / sizeof(struct getbmapx))); |
667 | bm.bmv_iflags = BMV_IF_PREALLOC; | 667 | bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; |
668 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) | 668 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) |
669 | bm.bmv_iflags |= BMV_IF_ATTRFORK; | 669 | bm.bmv_iflags |= BMV_IF_ATTRFORK; |
670 | if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) | 670 | if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 15c35b62ff14..a4e07974955b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1226,6 +1226,7 @@ xfs_fs_statfs( | |||
1226 | struct xfs_inode *ip = XFS_I(dentry->d_inode); | 1226 | struct xfs_inode *ip = XFS_I(dentry->d_inode); |
1227 | __uint64_t fakeinos, id; | 1227 | __uint64_t fakeinos, id; |
1228 | xfs_extlen_t lsize; | 1228 | xfs_extlen_t lsize; |
1229 | __int64_t ffree; | ||
1229 | 1230 | ||
1230 | statp->f_type = XFS_SB_MAGIC; | 1231 | statp->f_type = XFS_SB_MAGIC; |
1231 | statp->f_namelen = MAXNAMELEN - 1; | 1232 | statp->f_namelen = MAXNAMELEN - 1; |
@@ -1249,7 +1250,11 @@ xfs_fs_statfs( | |||
1249 | statp->f_files = min_t(typeof(statp->f_files), | 1250 | statp->f_files = min_t(typeof(statp->f_files), |
1250 | statp->f_files, | 1251 | statp->f_files, |
1251 | mp->m_maxicount); | 1252 | mp->m_maxicount); |
1252 | statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | 1253 | |
1254 | /* make sure statp->f_ffree does not underflow */ | ||
1255 | ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | ||
1256 | statp->f_ffree = max_t(__int64_t, ffree, 0); | ||
1257 | |||
1253 | spin_unlock(&mp->m_sb_lock); | 1258 | spin_unlock(&mp->m_sb_lock); |
1254 | 1259 | ||
1255 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || | 1260 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || |
@@ -1402,7 +1407,7 @@ xfs_fs_freeze( | |||
1402 | 1407 | ||
1403 | xfs_save_resvblks(mp); | 1408 | xfs_save_resvblks(mp); |
1404 | xfs_quiesce_attr(mp); | 1409 | xfs_quiesce_attr(mp); |
1405 | return -xfs_fs_log_dummy(mp); | 1410 | return -xfs_fs_log_dummy(mp, SYNC_WAIT); |
1406 | } | 1411 | } |
1407 | 1412 | ||
1408 | STATIC int | 1413 | STATIC int |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index dfcbd98d1599..d59c4a65d492 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "xfs_inode_item.h" | 34 | #include "xfs_inode_item.h" |
35 | #include "xfs_quota.h" | 35 | #include "xfs_quota.h" |
36 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
37 | #include "xfs_fsops.h" | ||
37 | 38 | ||
38 | #include <linux/kthread.h> | 39 | #include <linux/kthread.h> |
39 | #include <linux/freezer.h> | 40 | #include <linux/freezer.h> |
@@ -341,38 +342,6 @@ xfs_sync_attr( | |||
341 | } | 342 | } |
342 | 343 | ||
343 | STATIC int | 344 | STATIC int |
344 | xfs_commit_dummy_trans( | ||
345 | struct xfs_mount *mp, | ||
346 | uint flags) | ||
347 | { | ||
348 | struct xfs_inode *ip = mp->m_rootip; | ||
349 | struct xfs_trans *tp; | ||
350 | int error; | ||
351 | |||
352 | /* | ||
353 | * Put a dummy transaction in the log to tell recovery | ||
354 | * that all others are OK. | ||
355 | */ | ||
356 | tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); | ||
357 | error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); | ||
358 | if (error) { | ||
359 | xfs_trans_cancel(tp, 0); | ||
360 | return error; | ||
361 | } | ||
362 | |||
363 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
364 | |||
365 | xfs_trans_ijoin(tp, ip); | ||
366 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
367 | error = xfs_trans_commit(tp, 0); | ||
368 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
369 | |||
370 | /* the log force ensures this transaction is pushed to disk */ | ||
371 | xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); | ||
372 | return error; | ||
373 | } | ||
374 | |||
375 | STATIC int | ||
376 | xfs_sync_fsdata( | 345 | xfs_sync_fsdata( |
377 | struct xfs_mount *mp) | 346 | struct xfs_mount *mp) |
378 | { | 347 | { |
@@ -432,7 +401,7 @@ xfs_quiesce_data( | |||
432 | 401 | ||
433 | /* mark the log as covered if needed */ | 402 | /* mark the log as covered if needed */ |
434 | if (xfs_log_need_covered(mp)) | 403 | if (xfs_log_need_covered(mp)) |
435 | error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); | 404 | error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); |
436 | 405 | ||
437 | /* flush data-only devices */ | 406 | /* flush data-only devices */ |
438 | if (mp->m_rtdev_targp) | 407 | if (mp->m_rtdev_targp) |
@@ -563,7 +532,7 @@ xfs_flush_inodes( | |||
563 | /* | 532 | /* |
564 | * Every sync period we need to unpin all items, reclaim inodes and sync | 533 | * Every sync period we need to unpin all items, reclaim inodes and sync |
565 | * disk quotas. We might need to cover the log to indicate that the | 534 | * disk quotas. We might need to cover the log to indicate that the |
566 | * filesystem is idle. | 535 | * filesystem is idle and not frozen. |
567 | */ | 536 | */ |
568 | STATIC void | 537 | STATIC void |
569 | xfs_sync_worker( | 538 | xfs_sync_worker( |
@@ -577,8 +546,9 @@ xfs_sync_worker( | |||
577 | xfs_reclaim_inodes(mp, 0); | 546 | xfs_reclaim_inodes(mp, 0); |
578 | /* dgc: errors ignored here */ | 547 | /* dgc: errors ignored here */ |
579 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | 548 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); |
580 | if (xfs_log_need_covered(mp)) | 549 | if (mp->m_super->s_frozen == SB_UNFROZEN && |
581 | error = xfs_commit_dummy_trans(mp, 0); | 550 | xfs_log_need_covered(mp)) |
551 | error = xfs_fs_log_dummy(mp, 0); | ||
582 | } | 552 | } |
583 | mp->m_sync_seq++; | 553 | mp->m_sync_seq++; |
584 | wake_up(&mp->m_wait_single_sync_task); | 554 | wake_up(&mp->m_wait_single_sync_task); |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 23f14e595c18..f90dadd5a968 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -5533,12 +5533,24 @@ xfs_getbmap( | |||
5533 | map[i].br_startblock)) | 5533 | map[i].br_startblock)) |
5534 | goto out_free_map; | 5534 | goto out_free_map; |
5535 | 5535 | ||
5536 | nexleft--; | ||
5537 | bmv->bmv_offset = | 5536 | bmv->bmv_offset = |
5538 | out[cur_ext].bmv_offset + | 5537 | out[cur_ext].bmv_offset + |
5539 | out[cur_ext].bmv_length; | 5538 | out[cur_ext].bmv_length; |
5540 | bmv->bmv_length = | 5539 | bmv->bmv_length = |
5541 | max_t(__int64_t, 0, bmvend - bmv->bmv_offset); | 5540 | max_t(__int64_t, 0, bmvend - bmv->bmv_offset); |
5541 | |||
5542 | /* | ||
5543 | * In case we don't want to return the hole, | ||
5544 | * don't increase cur_ext so that we can reuse | ||
5545 | * it in the next loop. | ||
5546 | */ | ||
5547 | if ((iflags & BMV_IF_NO_HOLES) && | ||
5548 | map[i].br_startblock == HOLESTARTBLOCK) { | ||
5549 | memset(&out[cur_ext], 0, sizeof(out[cur_ext])); | ||
5550 | continue; | ||
5551 | } | ||
5552 | |||
5553 | nexleft--; | ||
5542 | bmv->bmv_entries++; | 5554 | bmv->bmv_entries++; |
5543 | cur_ext++; | 5555 | cur_ext++; |
5544 | } | 5556 | } |
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 7cf7220e7d5f..87c2e9d02288 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h | |||
@@ -114,8 +114,10 @@ struct getbmapx { | |||
114 | #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ | 114 | #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ |
115 | #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ | 115 | #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ |
116 | #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ | 116 | #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ |
117 | #define BMV_IF_NO_HOLES 0x10 /* Do not return holes */ | ||
117 | #define BMV_IF_VALID \ | 118 | #define BMV_IF_VALID \ |
118 | (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) | 119 | (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \ |
120 | BMV_IF_DELALLOC|BMV_IF_NO_HOLES) | ||
119 | 121 | ||
120 | /* bmv_oflags values - returned for each non-header segment */ | 122 | /* bmv_oflags values - returned for each non-header segment */ |
121 | #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ | 123 | #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index dbca5f5c37ba..43b1d5699335 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -604,31 +604,36 @@ out: | |||
604 | return 0; | 604 | return 0; |
605 | } | 605 | } |
606 | 606 | ||
607 | /* | ||
608 | * Dump a transaction into the log that contains no real change. This is needed | ||
609 | * to be able to make the log dirty or stamp the current tail LSN into the log | ||
610 | * during the covering operation. | ||
611 | * | ||
612 | * We cannot use an inode here for this - that will push dirty state back up | ||
613 | * into the VFS and then periodic inode flushing will prevent log covering from | ||
614 | * making progress. Hence we log a field in the superblock instead. | ||
615 | */ | ||
607 | int | 616 | int |
608 | xfs_fs_log_dummy( | 617 | xfs_fs_log_dummy( |
609 | xfs_mount_t *mp) | 618 | xfs_mount_t *mp, |
619 | int flags) | ||
610 | { | 620 | { |
611 | xfs_trans_t *tp; | 621 | xfs_trans_t *tp; |
612 | xfs_inode_t *ip; | ||
613 | int error; | 622 | int error; |
614 | 623 | ||
615 | tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); | 624 | tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); |
616 | error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); | 625 | error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, |
626 | XFS_DEFAULT_LOG_COUNT); | ||
617 | if (error) { | 627 | if (error) { |
618 | xfs_trans_cancel(tp, 0); | 628 | xfs_trans_cancel(tp, 0); |
619 | return error; | 629 | return error; |
620 | } | 630 | } |
621 | 631 | ||
622 | ip = mp->m_rootip; | 632 | /* log the UUID because it is an unchanging field */ |
623 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 633 | xfs_mod_sb(tp, XFS_SB_UUID); |
624 | 634 | if (flags & SYNC_WAIT) | |
625 | xfs_trans_ijoin(tp, ip); | 635 | xfs_trans_set_sync(tp); |
626 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 636 | return xfs_trans_commit(tp, 0); |
627 | xfs_trans_set_sync(tp); | ||
628 | error = xfs_trans_commit(tp, 0); | ||
629 | |||
630 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
631 | return error; | ||
632 | } | 637 | } |
633 | 638 | ||
634 | int | 639 | int |
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index 88435e0a77c9..a786c5212c1e 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h | |||
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); | |||
25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, | 25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, |
26 | xfs_fsop_resblks_t *outval); | 26 | xfs_fsop_resblks_t *outval); |
27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); | 27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); |
28 | extern int xfs_fs_log_dummy(xfs_mount_t *mp); | 28 | extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags); |
29 | 29 | ||
30 | #endif /* __XFS_FSOPS_H__ */ | 30 | #endif /* __XFS_FSOPS_H__ */ |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index abf80ae1e95b..5371d2dc360e 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -1213,7 +1213,6 @@ xfs_imap_lookup( | |||
1213 | struct xfs_inobt_rec_incore rec; | 1213 | struct xfs_inobt_rec_incore rec; |
1214 | struct xfs_btree_cur *cur; | 1214 | struct xfs_btree_cur *cur; |
1215 | struct xfs_buf *agbp; | 1215 | struct xfs_buf *agbp; |
1216 | xfs_agino_t startino; | ||
1217 | int error; | 1216 | int error; |
1218 | int i; | 1217 | int i; |
1219 | 1218 | ||
@@ -1227,13 +1226,13 @@ xfs_imap_lookup( | |||
1227 | } | 1226 | } |
1228 | 1227 | ||
1229 | /* | 1228 | /* |
1230 | * derive and lookup the exact inode record for the given agino. If the | 1229 | * Lookup the inode record for the given agino. If the record cannot be |
1231 | * record cannot be found, then it's an invalid inode number and we | 1230 | * found, then it's an invalid inode number and we should abort. Once |
1232 | * should abort. | 1231 | * we have a record, we need to ensure it contains the inode number |
1232 | * we are looking up. | ||
1233 | */ | 1233 | */ |
1234 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1234 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); |
1235 | startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); | 1235 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); |
1236 | error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i); | ||
1237 | if (!error) { | 1236 | if (!error) { |
1238 | if (i) | 1237 | if (i) |
1239 | error = xfs_inobt_get_rec(cur, &rec, &i); | 1238 | error = xfs_inobt_get_rec(cur, &rec, &i); |
@@ -1246,6 +1245,11 @@ xfs_imap_lookup( | |||
1246 | if (error) | 1245 | if (error) |
1247 | return error; | 1246 | return error; |
1248 | 1247 | ||
1248 | /* check that the returned record contains the required inode */ | ||
1249 | if (rec.ir_startino > agino || | ||
1250 | rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) | ||
1251 | return EINVAL; | ||
1252 | |||
1249 | /* for untrusted inodes check it is allocated first */ | 1253 | /* for untrusted inodes check it is allocated first */ |
1250 | if ((flags & XFS_IGET_UNTRUSTED) && | 1254 | if ((flags & XFS_IGET_UNTRUSTED) && |
1251 | (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) | 1255 | (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 68415cb4f23c..34798f391c49 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1914,6 +1914,11 @@ xfs_iunlink_remove( | |||
1914 | return 0; | 1914 | return 0; |
1915 | } | 1915 | } |
1916 | 1916 | ||
1917 | /* | ||
1918 | * A big issue when freeing the inode cluster is is that we _cannot_ skip any | ||
1919 | * inodes that are in memory - they all must be marked stale and attached to | ||
1920 | * the cluster buffer. | ||
1921 | */ | ||
1917 | STATIC void | 1922 | STATIC void |
1918 | xfs_ifree_cluster( | 1923 | xfs_ifree_cluster( |
1919 | xfs_inode_t *free_ip, | 1924 | xfs_inode_t *free_ip, |
@@ -1945,8 +1950,6 @@ xfs_ifree_cluster( | |||
1945 | } | 1950 | } |
1946 | 1951 | ||
1947 | for (j = 0; j < nbufs; j++, inum += ninodes) { | 1952 | for (j = 0; j < nbufs; j++, inum += ninodes) { |
1948 | int found = 0; | ||
1949 | |||
1950 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), | 1953 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), |
1951 | XFS_INO_TO_AGBNO(mp, inum)); | 1954 | XFS_INO_TO_AGBNO(mp, inum)); |
1952 | 1955 | ||
@@ -1965,7 +1968,9 @@ xfs_ifree_cluster( | |||
1965 | /* | 1968 | /* |
1966 | * Walk the inodes already attached to the buffer and mark them | 1969 | * Walk the inodes already attached to the buffer and mark them |
1967 | * stale. These will all have the flush locks held, so an | 1970 | * stale. These will all have the flush locks held, so an |
1968 | * in-memory inode walk can't lock them. | 1971 | * in-memory inode walk can't lock them. By marking them all |
1972 | * stale first, we will not attempt to lock them in the loop | ||
1973 | * below as the XFS_ISTALE flag will be set. | ||
1969 | */ | 1974 | */ |
1970 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 1975 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
1971 | while (lip) { | 1976 | while (lip) { |
@@ -1977,11 +1982,11 @@ xfs_ifree_cluster( | |||
1977 | &iip->ili_flush_lsn, | 1982 | &iip->ili_flush_lsn, |
1978 | &iip->ili_item.li_lsn); | 1983 | &iip->ili_item.li_lsn); |
1979 | xfs_iflags_set(iip->ili_inode, XFS_ISTALE); | 1984 | xfs_iflags_set(iip->ili_inode, XFS_ISTALE); |
1980 | found++; | ||
1981 | } | 1985 | } |
1982 | lip = lip->li_bio_list; | 1986 | lip = lip->li_bio_list; |
1983 | } | 1987 | } |
1984 | 1988 | ||
1989 | |||
1985 | /* | 1990 | /* |
1986 | * For each inode in memory attempt to add it to the inode | 1991 | * For each inode in memory attempt to add it to the inode |
1987 | * buffer and set it up for being staled on buffer IO | 1992 | * buffer and set it up for being staled on buffer IO |
@@ -1993,6 +1998,7 @@ xfs_ifree_cluster( | |||
1993 | * even trying to lock them. | 1998 | * even trying to lock them. |
1994 | */ | 1999 | */ |
1995 | for (i = 0; i < ninodes; i++) { | 2000 | for (i = 0; i < ninodes; i++) { |
2001 | retry: | ||
1996 | read_lock(&pag->pag_ici_lock); | 2002 | read_lock(&pag->pag_ici_lock); |
1997 | ip = radix_tree_lookup(&pag->pag_ici_root, | 2003 | ip = radix_tree_lookup(&pag->pag_ici_root, |
1998 | XFS_INO_TO_AGINO(mp, (inum + i))); | 2004 | XFS_INO_TO_AGINO(mp, (inum + i))); |
@@ -2003,38 +2009,36 @@ xfs_ifree_cluster( | |||
2003 | continue; | 2009 | continue; |
2004 | } | 2010 | } |
2005 | 2011 | ||
2006 | /* don't try to lock/unlock the current inode */ | 2012 | /* |
2013 | * Don't try to lock/unlock the current inode, but we | ||
2014 | * _cannot_ skip the other inodes that we did not find | ||
2015 | * in the list attached to the buffer and are not | ||
2016 | * already marked stale. If we can't lock it, back off | ||
2017 | * and retry. | ||
2018 | */ | ||
2007 | if (ip != free_ip && | 2019 | if (ip != free_ip && |
2008 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | 2020 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
2009 | read_unlock(&pag->pag_ici_lock); | 2021 | read_unlock(&pag->pag_ici_lock); |
2010 | continue; | 2022 | delay(1); |
2023 | goto retry; | ||
2011 | } | 2024 | } |
2012 | read_unlock(&pag->pag_ici_lock); | 2025 | read_unlock(&pag->pag_ici_lock); |
2013 | 2026 | ||
2014 | if (!xfs_iflock_nowait(ip)) { | 2027 | xfs_iflock(ip); |
2015 | if (ip != free_ip) | ||
2016 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
2017 | continue; | ||
2018 | } | ||
2019 | |||
2020 | xfs_iflags_set(ip, XFS_ISTALE); | 2028 | xfs_iflags_set(ip, XFS_ISTALE); |
2021 | if (xfs_inode_clean(ip)) { | ||
2022 | ASSERT(ip != free_ip); | ||
2023 | xfs_ifunlock(ip); | ||
2024 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
2025 | continue; | ||
2026 | } | ||
2027 | 2029 | ||
2030 | /* | ||
2031 | * we don't need to attach clean inodes or those only | ||
2032 | * with unlogged changes (which we throw away, anyway). | ||
2033 | */ | ||
2028 | iip = ip->i_itemp; | 2034 | iip = ip->i_itemp; |
2029 | if (!iip) { | 2035 | if (!iip || xfs_inode_clean(ip)) { |
2030 | /* inode with unlogged changes only */ | ||
2031 | ASSERT(ip != free_ip); | 2036 | ASSERT(ip != free_ip); |
2032 | ip->i_update_core = 0; | 2037 | ip->i_update_core = 0; |
2033 | xfs_ifunlock(ip); | 2038 | xfs_ifunlock(ip); |
2034 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2039 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2035 | continue; | 2040 | continue; |
2036 | } | 2041 | } |
2037 | found++; | ||
2038 | 2042 | ||
2039 | iip->ili_last_fields = iip->ili_format.ilf_fields; | 2043 | iip->ili_last_fields = iip->ili_format.ilf_fields; |
2040 | iip->ili_format.ilf_fields = 0; | 2044 | iip->ili_format.ilf_fields = 0; |
@@ -2049,8 +2053,7 @@ xfs_ifree_cluster( | |||
2049 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2053 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2050 | } | 2054 | } |
2051 | 2055 | ||
2052 | if (found) | 2056 | xfs_trans_stale_inode_buf(tp, bp); |
2053 | xfs_trans_stale_inode_buf(tp, bp); | ||
2054 | xfs_trans_binval(tp, bp); | 2057 | xfs_trans_binval(tp, bp); |
2055 | } | 2058 | } |
2056 | 2059 | ||
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 925d572bf0f4..33f718f92a48 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -3015,7 +3015,8 @@ _xfs_log_force( | |||
3015 | 3015 | ||
3016 | XFS_STATS_INC(xs_log_force); | 3016 | XFS_STATS_INC(xs_log_force); |
3017 | 3017 | ||
3018 | xlog_cil_push(log, 1); | 3018 | if (log->l_cilp) |
3019 | xlog_cil_force(log); | ||
3019 | 3020 | ||
3020 | spin_lock(&log->l_icloglock); | 3021 | spin_lock(&log->l_icloglock); |
3021 | 3022 | ||
@@ -3167,7 +3168,7 @@ _xfs_log_force_lsn( | |||
3167 | XFS_STATS_INC(xs_log_force); | 3168 | XFS_STATS_INC(xs_log_force); |
3168 | 3169 | ||
3169 | if (log->l_cilp) { | 3170 | if (log->l_cilp) { |
3170 | lsn = xlog_cil_push_lsn(log, lsn); | 3171 | lsn = xlog_cil_force_lsn(log, lsn); |
3171 | if (lsn == NULLCOMMITLSN) | 3172 | if (lsn == NULLCOMMITLSN) |
3172 | return 0; | 3173 | return 0; |
3173 | } | 3174 | } |
@@ -3724,7 +3725,7 @@ xfs_log_force_umount( | |||
3724 | * call below. | 3725 | * call below. |
3725 | */ | 3726 | */ |
3726 | if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) | 3727 | if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) |
3727 | xlog_cil_push(log, 1); | 3728 | xlog_cil_force(log); |
3728 | 3729 | ||
3729 | /* | 3730 | /* |
3730 | * We must hold both the GRANT lock and the LOG lock, | 3731 | * We must hold both the GRANT lock and the LOG lock, |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 31e4ea2d19ac..ed575fb4b495 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -68,6 +68,7 @@ xlog_cil_init( | |||
68 | ctx->sequence = 1; | 68 | ctx->sequence = 1; |
69 | ctx->cil = cil; | 69 | ctx->cil = cil; |
70 | cil->xc_ctx = ctx; | 70 | cil->xc_ctx = ctx; |
71 | cil->xc_current_sequence = ctx->sequence; | ||
71 | 72 | ||
72 | cil->xc_log = log; | 73 | cil->xc_log = log; |
73 | log->l_cilp = cil; | 74 | log->l_cilp = cil; |
@@ -269,15 +270,10 @@ xlog_cil_insert( | |||
269 | static void | 270 | static void |
270 | xlog_cil_format_items( | 271 | xlog_cil_format_items( |
271 | struct log *log, | 272 | struct log *log, |
272 | struct xfs_log_vec *log_vector, | 273 | struct xfs_log_vec *log_vector) |
273 | struct xlog_ticket *ticket, | ||
274 | xfs_lsn_t *start_lsn) | ||
275 | { | 274 | { |
276 | struct xfs_log_vec *lv; | 275 | struct xfs_log_vec *lv; |
277 | 276 | ||
278 | if (start_lsn) | ||
279 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
280 | |||
281 | ASSERT(log_vector); | 277 | ASSERT(log_vector); |
282 | for (lv = log_vector; lv; lv = lv->lv_next) { | 278 | for (lv = log_vector; lv; lv = lv->lv_next) { |
283 | void *ptr; | 279 | void *ptr; |
@@ -301,9 +297,24 @@ xlog_cil_format_items( | |||
301 | ptr += vec->i_len; | 297 | ptr += vec->i_len; |
302 | } | 298 | } |
303 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); | 299 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); |
300 | } | ||
301 | } | ||
302 | |||
303 | static void | ||
304 | xlog_cil_insert_items( | ||
305 | struct log *log, | ||
306 | struct xfs_log_vec *log_vector, | ||
307 | struct xlog_ticket *ticket, | ||
308 | xfs_lsn_t *start_lsn) | ||
309 | { | ||
310 | struct xfs_log_vec *lv; | ||
311 | |||
312 | if (start_lsn) | ||
313 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
304 | 314 | ||
315 | ASSERT(log_vector); | ||
316 | for (lv = log_vector; lv; lv = lv->lv_next) | ||
305 | xlog_cil_insert(log, ticket, lv->lv_item, lv); | 317 | xlog_cil_insert(log, ticket, lv->lv_item, lv); |
306 | } | ||
307 | } | 318 | } |
308 | 319 | ||
309 | static void | 320 | static void |
@@ -321,80 +332,6 @@ xlog_cil_free_logvec( | |||
321 | } | 332 | } |
322 | 333 | ||
323 | /* | 334 | /* |
324 | * Commit a transaction with the given vector to the Committed Item List. | ||
325 | * | ||
326 | * To do this, we need to format the item, pin it in memory if required and | ||
327 | * account for the space used by the transaction. Once we have done that we | ||
328 | * need to release the unused reservation for the transaction, attach the | ||
329 | * transaction to the checkpoint context so we carry the busy extents through | ||
330 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
331 | * | ||
332 | * For more specific information about the order of operations in | ||
333 | * xfs_log_commit_cil() please refer to the comments in | ||
334 | * xfs_trans_commit_iclog(). | ||
335 | * | ||
336 | * Called with the context lock already held in read mode to lock out | ||
337 | * background commit, returns without it held once background commits are | ||
338 | * allowed again. | ||
339 | */ | ||
340 | int | ||
341 | xfs_log_commit_cil( | ||
342 | struct xfs_mount *mp, | ||
343 | struct xfs_trans *tp, | ||
344 | struct xfs_log_vec *log_vector, | ||
345 | xfs_lsn_t *commit_lsn, | ||
346 | int flags) | ||
347 | { | ||
348 | struct log *log = mp->m_log; | ||
349 | int log_flags = 0; | ||
350 | int push = 0; | ||
351 | |||
352 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
353 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
354 | |||
355 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
356 | xlog_cil_free_logvec(log_vector); | ||
357 | return XFS_ERROR(EIO); | ||
358 | } | ||
359 | |||
360 | /* lock out background commit */ | ||
361 | down_read(&log->l_cilp->xc_ctx_lock); | ||
362 | xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
363 | |||
364 | /* check we didn't blow the reservation */ | ||
365 | if (tp->t_ticket->t_curr_res < 0) | ||
366 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
367 | |||
368 | /* attach the transaction to the CIL if it has any busy extents */ | ||
369 | if (!list_empty(&tp->t_busy)) { | ||
370 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
371 | list_splice_init(&tp->t_busy, | ||
372 | &log->l_cilp->xc_ctx->busy_extents); | ||
373 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
374 | } | ||
375 | |||
376 | tp->t_commit_lsn = *commit_lsn; | ||
377 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
378 | xfs_trans_unreserve_and_mod_sb(tp); | ||
379 | |||
380 | /* check for background commit before unlock */ | ||
381 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
382 | push = 1; | ||
383 | up_read(&log->l_cilp->xc_ctx_lock); | ||
384 | |||
385 | /* | ||
386 | * We need to push CIL every so often so we don't cache more than we | ||
387 | * can fit in the log. The limit really is that a checkpoint can't be | ||
388 | * more than half the log (the current checkpoint is not allowed to | ||
389 | * overwrite the previous checkpoint), but commit latency and memory | ||
390 | * usage limit this to a smaller size in most cases. | ||
391 | */ | ||
392 | if (push) | ||
393 | xlog_cil_push(log, 0); | ||
394 | return 0; | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * Mark all items committed and clear busy extents. We free the log vector | 335 | * Mark all items committed and clear busy extents. We free the log vector |
399 | * chains in a separate pass so that we unpin the log items as quickly as | 336 | * chains in a separate pass so that we unpin the log items as quickly as |
400 | * possible. | 337 | * possible. |
@@ -427,13 +364,23 @@ xlog_cil_committed( | |||
427 | } | 364 | } |
428 | 365 | ||
429 | /* | 366 | /* |
430 | * Push the Committed Item List to the log. If the push_now flag is not set, | 367 | * Push the Committed Item List to the log. If @push_seq flag is zero, then it |
431 | * then it is a background flush and so we can chose to ignore it. | 368 | * is a background flush and so we can chose to ignore it. Otherwise, if the |
369 | * current sequence is the same as @push_seq we need to do a flush. If | ||
370 | * @push_seq is less than the current sequence, then it has already been | ||
371 | * flushed and we don't need to do anything - the caller will wait for it to | ||
372 | * complete if necessary. | ||
373 | * | ||
374 | * @push_seq is a value rather than a flag because that allows us to do an | ||
375 | * unlocked check of the sequence number for a match. Hence we can allows log | ||
376 | * forces to run racily and not issue pushes for the same sequence twice. If we | ||
377 | * get a race between multiple pushes for the same sequence they will block on | ||
378 | * the first one and then abort, hence avoiding needless pushes. | ||
432 | */ | 379 | */ |
433 | int | 380 | STATIC int |
434 | xlog_cil_push( | 381 | xlog_cil_push( |
435 | struct log *log, | 382 | struct log *log, |
436 | int push_now) | 383 | xfs_lsn_t push_seq) |
437 | { | 384 | { |
438 | struct xfs_cil *cil = log->l_cilp; | 385 | struct xfs_cil *cil = log->l_cilp; |
439 | struct xfs_log_vec *lv; | 386 | struct xfs_log_vec *lv; |
@@ -453,12 +400,14 @@ xlog_cil_push( | |||
453 | if (!cil) | 400 | if (!cil) |
454 | return 0; | 401 | return 0; |
455 | 402 | ||
403 | ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence); | ||
404 | |||
456 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 405 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
457 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 406 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
458 | 407 | ||
459 | /* lock out transaction commit, but don't block on background push */ | 408 | /* lock out transaction commit, but don't block on background push */ |
460 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | 409 | if (!down_write_trylock(&cil->xc_ctx_lock)) { |
461 | if (!push_now) | 410 | if (!push_seq) |
462 | goto out_free_ticket; | 411 | goto out_free_ticket; |
463 | down_write(&cil->xc_ctx_lock); | 412 | down_write(&cil->xc_ctx_lock); |
464 | } | 413 | } |
@@ -469,7 +418,11 @@ xlog_cil_push( | |||
469 | goto out_skip; | 418 | goto out_skip; |
470 | 419 | ||
471 | /* check for spurious background flush */ | 420 | /* check for spurious background flush */ |
472 | if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | 421 | if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) |
422 | goto out_skip; | ||
423 | |||
424 | /* check for a previously pushed seqeunce */ | ||
425 | if (push_seq < cil->xc_ctx->sequence) | ||
473 | goto out_skip; | 426 | goto out_skip; |
474 | 427 | ||
475 | /* | 428 | /* |
@@ -515,6 +468,13 @@ xlog_cil_push( | |||
515 | cil->xc_ctx = new_ctx; | 468 | cil->xc_ctx = new_ctx; |
516 | 469 | ||
517 | /* | 470 | /* |
471 | * mirror the new sequence into the cil structure so that we can do | ||
472 | * unlocked checks against the current sequence in log forces without | ||
473 | * risking deferencing a freed context pointer. | ||
474 | */ | ||
475 | cil->xc_current_sequence = new_ctx->sequence; | ||
476 | |||
477 | /* | ||
518 | * The switch is now done, so we can drop the context lock and move out | 478 | * The switch is now done, so we can drop the context lock and move out |
519 | * of a shared context. We can't just go straight to the commit record, | 479 | * of a shared context. We can't just go straight to the commit record, |
520 | * though - we need to synchronise with previous and future commits so | 480 | * though - we need to synchronise with previous and future commits so |
@@ -626,6 +586,102 @@ out_abort: | |||
626 | } | 586 | } |
627 | 587 | ||
628 | /* | 588 | /* |
589 | * Commit a transaction with the given vector to the Committed Item List. | ||
590 | * | ||
591 | * To do this, we need to format the item, pin it in memory if required and | ||
592 | * account for the space used by the transaction. Once we have done that we | ||
593 | * need to release the unused reservation for the transaction, attach the | ||
594 | * transaction to the checkpoint context so we carry the busy extents through | ||
595 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
596 | * | ||
597 | * For more specific information about the order of operations in | ||
598 | * xfs_log_commit_cil() please refer to the comments in | ||
599 | * xfs_trans_commit_iclog(). | ||
600 | * | ||
601 | * Called with the context lock already held in read mode to lock out | ||
602 | * background commit, returns without it held once background commits are | ||
603 | * allowed again. | ||
604 | */ | ||
605 | int | ||
606 | xfs_log_commit_cil( | ||
607 | struct xfs_mount *mp, | ||
608 | struct xfs_trans *tp, | ||
609 | struct xfs_log_vec *log_vector, | ||
610 | xfs_lsn_t *commit_lsn, | ||
611 | int flags) | ||
612 | { | ||
613 | struct log *log = mp->m_log; | ||
614 | int log_flags = 0; | ||
615 | int push = 0; | ||
616 | |||
617 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
618 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
619 | |||
620 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
621 | xlog_cil_free_logvec(log_vector); | ||
622 | return XFS_ERROR(EIO); | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | * do all the hard work of formatting items (including memory | ||
627 | * allocation) outside the CIL context lock. This prevents stalling CIL | ||
628 | * pushes when we are low on memory and a transaction commit spends a | ||
629 | * lot of time in memory reclaim. | ||
630 | */ | ||
631 | xlog_cil_format_items(log, log_vector); | ||
632 | |||
633 | /* lock out background commit */ | ||
634 | down_read(&log->l_cilp->xc_ctx_lock); | ||
635 | xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
636 | |||
637 | /* check we didn't blow the reservation */ | ||
638 | if (tp->t_ticket->t_curr_res < 0) | ||
639 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
640 | |||
641 | /* attach the transaction to the CIL if it has any busy extents */ | ||
642 | if (!list_empty(&tp->t_busy)) { | ||
643 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
644 | list_splice_init(&tp->t_busy, | ||
645 | &log->l_cilp->xc_ctx->busy_extents); | ||
646 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
647 | } | ||
648 | |||
649 | tp->t_commit_lsn = *commit_lsn; | ||
650 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
651 | xfs_trans_unreserve_and_mod_sb(tp); | ||
652 | |||
653 | /* | ||
654 | * Once all the items of the transaction have been copied to the CIL, | ||
655 | * the items can be unlocked and freed. | ||
656 | * | ||
657 | * This needs to be done before we drop the CIL context lock because we | ||
658 | * have to update state in the log items and unlock them before they go | ||
659 | * to disk. If we don't, then the CIL checkpoint can race with us and | ||
660 | * we can run checkpoint completion before we've updated and unlocked | ||
661 | * the log items. This affects (at least) processing of stale buffers, | ||
662 | * inodes and EFIs. | ||
663 | */ | ||
664 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
665 | |||
666 | /* check for background commit before unlock */ | ||
667 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
668 | push = 1; | ||
669 | |||
670 | up_read(&log->l_cilp->xc_ctx_lock); | ||
671 | |||
672 | /* | ||
673 | * We need to push CIL every so often so we don't cache more than we | ||
674 | * can fit in the log. The limit really is that a checkpoint can't be | ||
675 | * more than half the log (the current checkpoint is not allowed to | ||
676 | * overwrite the previous checkpoint), but commit latency and memory | ||
677 | * usage limit this to a smaller size in most cases. | ||
678 | */ | ||
679 | if (push) | ||
680 | xlog_cil_push(log, 0); | ||
681 | return 0; | ||
682 | } | ||
683 | |||
684 | /* | ||
629 | * Conditionally push the CIL based on the sequence passed in. | 685 | * Conditionally push the CIL based on the sequence passed in. |
630 | * | 686 | * |
631 | * We only need to push if we haven't already pushed the sequence | 687 | * We only need to push if we haven't already pushed the sequence |
@@ -639,39 +695,34 @@ out_abort: | |||
639 | * commit lsn is there. It'll be empty, so this is broken for now. | 695 | * commit lsn is there. It'll be empty, so this is broken for now. |
640 | */ | 696 | */ |
641 | xfs_lsn_t | 697 | xfs_lsn_t |
642 | xlog_cil_push_lsn( | 698 | xlog_cil_force_lsn( |
643 | struct log *log, | 699 | struct log *log, |
644 | xfs_lsn_t push_seq) | 700 | xfs_lsn_t sequence) |
645 | { | 701 | { |
646 | struct xfs_cil *cil = log->l_cilp; | 702 | struct xfs_cil *cil = log->l_cilp; |
647 | struct xfs_cil_ctx *ctx; | 703 | struct xfs_cil_ctx *ctx; |
648 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; | 704 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; |
649 | 705 | ||
650 | restart: | 706 | ASSERT(sequence <= cil->xc_current_sequence); |
651 | down_write(&cil->xc_ctx_lock); | 707 | |
652 | ASSERT(push_seq <= cil->xc_ctx->sequence); | 708 | /* |
653 | 709 | * check to see if we need to force out the current context. | |
654 | /* check to see if we need to force out the current context */ | 710 | * xlog_cil_push() handles racing pushes for the same sequence, |
655 | if (push_seq == cil->xc_ctx->sequence) { | 711 | * so no need to deal with it here. |
656 | up_write(&cil->xc_ctx_lock); | 712 | */ |
657 | xlog_cil_push(log, 1); | 713 | if (sequence == cil->xc_current_sequence) |
658 | goto restart; | 714 | xlog_cil_push(log, sequence); |
659 | } | ||
660 | 715 | ||
661 | /* | 716 | /* |
662 | * See if we can find a previous sequence still committing. | 717 | * See if we can find a previous sequence still committing. |
663 | * We can drop the flush lock as soon as we have the cil lock | ||
664 | * because we are now only comparing contexts protected by | ||
665 | * the cil lock. | ||
666 | * | ||
667 | * We need to wait for all previous sequence commits to complete | 718 | * We need to wait for all previous sequence commits to complete |
668 | * before allowing the force of push_seq to go ahead. Hence block | 719 | * before allowing the force of push_seq to go ahead. Hence block |
669 | * on commits for those as well. | 720 | * on commits for those as well. |
670 | */ | 721 | */ |
722 | restart: | ||
671 | spin_lock(&cil->xc_cil_lock); | 723 | spin_lock(&cil->xc_cil_lock); |
672 | up_write(&cil->xc_ctx_lock); | ||
673 | list_for_each_entry(ctx, &cil->xc_committing, committing) { | 724 | list_for_each_entry(ctx, &cil->xc_committing, committing) { |
674 | if (ctx->sequence > push_seq) | 725 | if (ctx->sequence > sequence) |
675 | continue; | 726 | continue; |
676 | if (!ctx->commit_lsn) { | 727 | if (!ctx->commit_lsn) { |
677 | /* | 728 | /* |
@@ -681,7 +732,7 @@ restart: | |||
681 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 732 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); |
682 | goto restart; | 733 | goto restart; |
683 | } | 734 | } |
684 | if (ctx->sequence != push_seq) | 735 | if (ctx->sequence != sequence) |
685 | continue; | 736 | continue; |
686 | /* found it! */ | 737 | /* found it! */ |
687 | commit_lsn = ctx->commit_lsn; | 738 | commit_lsn = ctx->commit_lsn; |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 8c072618965c..ced52b98b322 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -422,6 +422,7 @@ struct xfs_cil { | |||
422 | struct rw_semaphore xc_ctx_lock; | 422 | struct rw_semaphore xc_ctx_lock; |
423 | struct list_head xc_committing; | 423 | struct list_head xc_committing; |
424 | sv_t xc_commit_wait; | 424 | sv_t xc_commit_wait; |
425 | xfs_lsn_t xc_current_sequence; | ||
425 | }; | 426 | }; |
426 | 427 | ||
427 | /* | 428 | /* |
@@ -562,8 +563,16 @@ int xlog_cil_init(struct log *log); | |||
562 | void xlog_cil_init_post_recovery(struct log *log); | 563 | void xlog_cil_init_post_recovery(struct log *log); |
563 | void xlog_cil_destroy(struct log *log); | 564 | void xlog_cil_destroy(struct log *log); |
564 | 565 | ||
565 | int xlog_cil_push(struct log *log, int push_now); | 566 | /* |
566 | xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); | 567 | * CIL force routines |
568 | */ | ||
569 | xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence); | ||
570 | |||
571 | static inline void | ||
572 | xlog_cil_force(struct log *log) | ||
573 | { | ||
574 | xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); | ||
575 | } | ||
567 | 576 | ||
568 | /* | 577 | /* |
569 | * Unmount record type is used as a pseudo transaction type for the ticket. | 578 | * Unmount record type is used as a pseudo transaction type for the ticket. |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fdca7416c754..1c47edaea0d2 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -1167,7 +1167,7 @@ xfs_trans_del_item( | |||
1167 | * Unlock all of the items of a transaction and free all the descriptors | 1167 | * Unlock all of the items of a transaction and free all the descriptors |
1168 | * of that transaction. | 1168 | * of that transaction. |
1169 | */ | 1169 | */ |
1170 | STATIC void | 1170 | void |
1171 | xfs_trans_free_items( | 1171 | xfs_trans_free_items( |
1172 | struct xfs_trans *tp, | 1172 | struct xfs_trans *tp, |
1173 | xfs_lsn_t commit_lsn, | 1173 | xfs_lsn_t commit_lsn, |
@@ -1653,9 +1653,6 @@ xfs_trans_commit_cil( | |||
1653 | return error; | 1653 | return error; |
1654 | 1654 | ||
1655 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1655 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1656 | |||
1657 | /* xfs_trans_free_items() unlocks them first */ | ||
1658 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
1659 | xfs_trans_free(tp); | 1656 | xfs_trans_free(tp); |
1660 | return 0; | 1657 | return 0; |
1661 | } | 1658 | } |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index e2d93d8ead7b..62da86c90de5 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -25,7 +25,8 @@ struct xfs_trans; | |||
25 | 25 | ||
26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); | 26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); |
27 | void xfs_trans_del_item(struct xfs_log_item *); | 27 | void xfs_trans_del_item(struct xfs_log_item *); |
28 | 28 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, | |
29 | int flags); | ||
29 | void xfs_trans_item_committed(struct xfs_log_item *lip, | 30 | void xfs_trans_item_committed(struct xfs_log_item *lip, |
30 | xfs_lsn_t commit_lsn, int aborted); | 31 | xfs_lsn_t commit_lsn, int aborted); |
31 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); | 32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 66d585c6917c..4c7c7bfb2b2f 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -2299,15 +2299,22 @@ xfs_alloc_file_space( | |||
2299 | e = allocatesize_fsb; | 2299 | e = allocatesize_fsb; |
2300 | } | 2300 | } |
2301 | 2301 | ||
2302 | /* | ||
2303 | * The transaction reservation is limited to a 32-bit block | ||
2304 | * count, hence we need to limit the number of blocks we are | ||
2305 | * trying to reserve to avoid an overflow. We can't allocate | ||
2306 | * more than @nimaps extents, and an extent is limited on disk | ||
2307 | * to MAXEXTLEN (21 bits), so use that to enforce the limit. | ||
2308 | */ | ||
2309 | resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); | ||
2302 | if (unlikely(rt)) { | 2310 | if (unlikely(rt)) { |
2303 | resrtextents = qblocks = (uint)(e - s); | 2311 | resrtextents = qblocks = resblks; |
2304 | resrtextents /= mp->m_sb.sb_rextsize; | 2312 | resrtextents /= mp->m_sb.sb_rextsize; |
2305 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); | 2313 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); |
2306 | quota_flag = XFS_QMOPT_RES_RTBLKS; | 2314 | quota_flag = XFS_QMOPT_RES_RTBLKS; |
2307 | } else { | 2315 | } else { |
2308 | resrtextents = 0; | 2316 | resrtextents = 0; |
2309 | resblks = qblocks = \ | 2317 | resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); |
2310 | XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); | ||
2311 | quota_flag = XFS_QMOPT_RES_REGBLKS; | 2318 | quota_flag = XFS_QMOPT_RES_REGBLKS; |
2312 | } | 2319 | } |
2313 | 2320 | ||