aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJie Liu <jeff.liu@oracle.com>2014-05-19 18:24:26 -0400
committerDave Chinner <david@fromorbit.com>2014-05-19 18:24:26 -0400
commit8695d27ec34b19c58a0dc25bfcce3f2c6cf0699d (patch)
tree99b82a3b5c902294b4828f77efe5ae28e7ef7dd5
parent7c166350b15cbec4ed9357563461b6e1d2a44ea9 (diff)
xfs: fix infinite loop at xfs_vm_writepage on 32bit system
Write to a file with an offset greater than 16TB on 32-bit system and then trigger page write-back via sync(1) will cause task hang. # block_size=4096 # offset=$(((2**32 - 1) * $block_size)) # xfs_io -f -c "pwrite $offset $block_size" /storage/test_file # sync INFO: task sync:2590 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. sync D c1064a28 0 2590 2097 0x00000000 ..... Call Trace: [<c1064a28>] ? ttwu_do_wakeup+0x18/0x130 [<c1066d0e>] ? try_to_wake_up+0x1ce/0x220 [<c1066dbf>] ? wake_up_process+0x1f/0x40 [<c104fc2e>] ? wake_up_worker+0x1e/0x30 [<c15b6083>] schedule+0x23/0x60 [<c15b3c2d>] schedule_timeout+0x18d/0x1f0 [<c12a143e>] ? do_raw_spin_unlock+0x4e/0x90 [<c10515f1>] ? __queue_delayed_work+0x91/0x150 [<c12a12ef>] ? do_raw_spin_lock+0x3f/0x100 [<c12a143e>] ? do_raw_spin_unlock+0x4e/0x90 [<c15b5b5d>] wait_for_completion+0x7d/0xc0 [<c1066d60>] ? try_to_wake_up+0x220/0x220 [<c116a4d2>] sync_inodes_sb+0x92/0x180 [<c116fb05>] sync_inodes_one_sb+0x15/0x20 [<c114a8f8>] iterate_supers+0xb8/0xc0 [<c116faf0>] ? fdatawrite_one_bdev+0x20/0x20 [<c116fc21>] sys_sync+0x31/0x80 [<c15be18d>] sysenter_do_call+0x12/0x28 This issue can be triggered via xfstests/generic/308. The reason is that the end_index is unsigned long with maximum value '2^32-1=4294967295' on 32-bit platform, and the given offset cause it wrapped to 0, so that the following codes will repeat again and again until the task schedule time out: end_index = offset >> PAGE_CACHE_SHIFT; last_index = (offset - 1) >> PAGE_CACHE_SHIFT; if (page->index >= end_index) { unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1); /* * Just skip the page if it is fully outside i_size, e.g. due * to a truncate operation that is in progress. */ if (page->index >= end_index + 1 || offset_into_page == 0) { ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ unlock_page(page); return 0; } In order to check if a page is fully outsids i_size or not, we can fix the code logic as below: if (page->index > end_index || (page->index == end_index && offset_into_page == 0)) Secondly, there still has another similar issue when calculating the end offset for mapping the filesystem blocks to the file blocks for delalloc. With the same tests to above, run unmount(8) will cause kernel panic if CONFIG_XFS_DEBUG is enabled: XFS: Assertion failed: XFS_FORCED_SHUTDOWN(ip->i_mount) || \ ip->i_delayed_blks == 0, file: fs/xfs/xfs_super.c, line: 964 kernel BUG at fs/xfs/xfs_message.c:108! invalid opcode: 0000 [#1] SMP task: edddc100 ti: ec6ee000 task.ti: ec6ee000 EIP: 0060:[<f83d87cb>] EFLAGS: 00010296 CPU: 1 EIP is at assfail+0x2b/0x30 [xfs] .............. Call Trace: [<f83d9cd4>] xfs_fs_destroy_inode+0x74/0x120 [xfs] [<c115ddf1>] destroy_inode+0x31/0x50 [<c115deff>] evict+0xef/0x170 [<c115dfb2>] dispose_list+0x32/0x40 [<c115ea3a>] evict_inodes+0xca/0xe0 [<c1149706>] generic_shutdown_super+0x46/0xd0 [<c11497b9>] kill_block_super+0x29/0x70 [<c1149a14>] deactivate_locked_super+0x44/0x70 [<c114a427>] deactivate_super+0x47/0x60 [<c1161c3d>] mntput_no_expire+0xcd/0x120 [<c1162ae8>] SyS_umount+0xa8/0x370 [<c1162dce>] SyS_oldumount+0x1e/0x20 [<c15be18d>] sysenter_do_call+0x12/0x28 That because the end_offset is evaluated to 0 which is the same reason to above, hence the mapping and covertion for dealloc file blocks to file system blocks did not happened. This patch just fixed both issues. Reported-by: Michael L. Semon <mlsemon35@gmail.com> Signed-off-by: Jie Liu <jeff.liu@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/xfs/xfs_aops.c49
1 files changed, 43 insertions, 6 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0479c32c5eb1..d1b99b692ccb 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -982,7 +982,32 @@ xfs_vm_writepage(
982 offset = i_size_read(inode); 982 offset = i_size_read(inode);
983 end_index = offset >> PAGE_CACHE_SHIFT; 983 end_index = offset >> PAGE_CACHE_SHIFT;
984 last_index = (offset - 1) >> PAGE_CACHE_SHIFT; 984 last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
985 if (page->index >= end_index) { 985
986 /*
987 * The page index is less than the end_index, adjust the end_offset
988 * to the highest offset that this page should represent.
989 * -----------------------------------------------------
990 * | file mapping | <EOF> |
991 * -----------------------------------------------------
992 * | Page ... | Page N-2 | Page N-1 | Page N | |
993 * ^--------------------------------^----------|--------
994 * | desired writeback range | see else |
995 * ---------------------------------^------------------|
996 */
997 if (page->index < end_index)
998 end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
999 else {
1000 /*
1001 * Check whether the page to write out is beyond or straddles
1002 * i_size or not.
1003 * -------------------------------------------------------
1004 * | file mapping | <EOF> |
1005 * -------------------------------------------------------
1006 * | Page ... | Page N-2 | Page N-1 | Page N | Beyond |
1007 * ^--------------------------------^-----------|---------
1008 * | | Straddles |
1009 * ---------------------------------^-----------|--------|
1010 */
986 unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1); 1011 unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
987 1012
988 /* 1013 /*
@@ -990,24 +1015,36 @@ xfs_vm_writepage(
990 * truncate operation that is in progress. We must redirty the 1015 * truncate operation that is in progress. We must redirty the
991 * page so that reclaim stops reclaiming it. Otherwise 1016 * page so that reclaim stops reclaiming it. Otherwise
992 * xfs_vm_releasepage() is called on it and gets confused. 1017 * xfs_vm_releasepage() is called on it and gets confused.
1018 *
1019 * Note that the end_index is unsigned long, it would overflow
1020 * if the given offset is greater than 16TB on 32-bit system
1021 * and if we do check the page is fully outside i_size or not
1022 * via "if (page->index >= end_index + 1)" as "end_index + 1"
1023 * will be evaluated to 0. Hence this page will be redirtied
1024 * and be written out repeatedly which would result in an
1025 * infinite loop, the user program that perform this operation
1026 * will hang. Instead, we can verify this situation by checking
1027 * if the page to write is totally beyond the i_size or if it's
1028 * offset is just equal to the EOF.
993 */ 1029 */
994 if (page->index >= end_index + 1 || offset_into_page == 0) 1030 if (page->index > end_index ||
1031 (page->index == end_index && offset_into_page == 0))
995 goto redirty; 1032 goto redirty;
996 1033
997 /* 1034 /*
998 * The page straddles i_size. It must be zeroed out on each 1035 * The page straddles i_size. It must be zeroed out on each
999 * and every writepage invocation because it may be mmapped. 1036 * and every writepage invocation because it may be mmapped.
1000 * "A file is mapped in multiples of the page size. For a file 1037 * "A file is mapped in multiples of the page size. For a file
1001 * that is not a multiple of the page size, the remaining 1038 * that is not a multiple of the page size, the remaining
1002 * memory is zeroed when mapped, and writes to that region are 1039 * memory is zeroed when mapped, and writes to that region are
1003 * not written out to the file." 1040 * not written out to the file."
1004 */ 1041 */
1005 zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE); 1042 zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
1043
1044 /* Adjust the end_offset to the end of file */
1045 end_offset = offset;
1006 } 1046 }
1007 1047
1008 end_offset = min_t(unsigned long long,
1009 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
1010 offset);
1011 len = 1 << inode->i_blkbits; 1048 len = 1 << inode->i_blkbits;
1012 1049
1013 bh = head = page_buffers(page); 1050 bh = head = page_buffers(page);