aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2014-11-27 21:59:58 -0500
committerDave Chinner <david@fromorbit.com>2014-11-27 21:59:58 -0500
commit78c931b8be75456562b55ed4e27878f1519e1367 (patch)
treea3a3b996307a11f0b292f734416da70f6622532b
parentcac7f2429872d3733dc3f9915857b1691da2eb2f (diff)
xfs: replace global xfslogd wq with per-mount wq
The xfslogd workqueue is a global, single-job workqueue for buffer ioend processing. This means we allow for a single work item at a time for all possible XFS mounts on a system. fsstress testing in loopback XFS over XFS configurations has reproduced xfslogd deadlocks due to the single threaded nature of the queue and dependencies introduced between the separate XFS instances by online discard (-o discard). Discard over a loopback device converts the discard request to a hole punch (fallocate) on the underlying file. Online discard requests are issued synchronously and from xfslogd context in XFS, hence the xfslogd workqueue is blocked in the upper fs waiting on a hole punch request to be servied in the lower fs. If the lower fs issues I/O that depends on xfslogd to complete, both filesystems end up hung indefinitely. This is reproduced reliabily by generic/013 on XFS->loop->XFS test devices with the '-o discard' mount option. Further, docker implementations appear to use this kind of configuration for container instance filesystems by default (container fs->dm-> loop->base fs) and therefore are subject to this deadlock when running on XFS. Replace the global xfslogd workqueue with a per-mount variant. This guarantees each mount access to a single worker and prevents deadlocks due to inter-fs dependencies introduced by discard. Since the queue is only responsible for buffer iodone processing at this point in time, rename xfslogd to xfs-buf. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/xfs/xfs_buf.c12
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_super.c11
3 files changed, 12 insertions, 12 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 24b4ebea0d4d..c06d790a3000 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -44,8 +44,6 @@
44 44
45static kmem_zone_t *xfs_buf_zone; 45static kmem_zone_t *xfs_buf_zone;
46 46
47static struct workqueue_struct *xfslogd_workqueue;
48
49#ifdef XFS_BUF_LOCK_TRACKING 47#ifdef XFS_BUF_LOCK_TRACKING
50# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) 48# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
51# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) 49# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
@@ -1053,7 +1051,7 @@ xfs_buf_ioend_async(
1053 struct xfs_buf *bp) 1051 struct xfs_buf *bp)
1054{ 1052{
1055 INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work); 1053 INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work);
1056 queue_work(xfslogd_workqueue, &bp->b_iodone_work); 1054 queue_work(bp->b_target->bt_mount->m_buf_workqueue, &bp->b_iodone_work);
1057} 1055}
1058 1056
1059void 1057void
@@ -1882,15 +1880,8 @@ xfs_buf_init(void)
1882 if (!xfs_buf_zone) 1880 if (!xfs_buf_zone)
1883 goto out; 1881 goto out;
1884 1882
1885 xfslogd_workqueue = alloc_workqueue("xfslogd",
1886 WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 1);
1887 if (!xfslogd_workqueue)
1888 goto out_free_buf_zone;
1889
1890 return 0; 1883 return 0;
1891 1884
1892 out_free_buf_zone:
1893 kmem_zone_destroy(xfs_buf_zone);
1894 out: 1885 out:
1895 return -ENOMEM; 1886 return -ENOMEM;
1896} 1887}
@@ -1898,6 +1889,5 @@ xfs_buf_init(void)
1898void 1889void
1899xfs_buf_terminate(void) 1890xfs_buf_terminate(void)
1900{ 1891{
1901 destroy_workqueue(xfslogd_workqueue);
1902 kmem_zone_destroy(xfs_buf_zone); 1892 kmem_zone_destroy(xfs_buf_zone);
1903} 1893}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b0447c86e7e2..394bc711171a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -168,6 +168,7 @@ typedef struct xfs_mount {
168 /* low free space thresholds */ 168 /* low free space thresholds */
169 struct xfs_kobj m_kobj; 169 struct xfs_kobj m_kobj;
170 170
171 struct workqueue_struct *m_buf_workqueue;
171 struct workqueue_struct *m_data_workqueue; 172 struct workqueue_struct *m_data_workqueue;
172 struct workqueue_struct *m_unwritten_workqueue; 173 struct workqueue_struct *m_unwritten_workqueue;
173 struct workqueue_struct *m_cil_workqueue; 174 struct workqueue_struct *m_cil_workqueue;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 9f622feda6a4..03e3cc242902 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -842,10 +842,16 @@ STATIC int
842xfs_init_mount_workqueues( 842xfs_init_mount_workqueues(
843 struct xfs_mount *mp) 843 struct xfs_mount *mp)
844{ 844{
845 mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
846 WQ_MEM_RECLAIM|WQ_HIGHPRI|WQ_FREEZABLE, 1,
847 mp->m_fsname);
848 if (!mp->m_buf_workqueue)
849 goto out;
850
845 mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", 851 mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
846 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 852 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
847 if (!mp->m_data_workqueue) 853 if (!mp->m_data_workqueue)
848 goto out; 854 goto out_destroy_buf;
849 855
850 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", 856 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
851 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 857 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
@@ -884,6 +890,8 @@ out_destroy_unwritten:
884 destroy_workqueue(mp->m_unwritten_workqueue); 890 destroy_workqueue(mp->m_unwritten_workqueue);
885out_destroy_data_iodone_queue: 891out_destroy_data_iodone_queue:
886 destroy_workqueue(mp->m_data_workqueue); 892 destroy_workqueue(mp->m_data_workqueue);
893out_destroy_buf:
894 destroy_workqueue(mp->m_buf_workqueue);
887out: 895out:
888 return -ENOMEM; 896 return -ENOMEM;
889} 897}
@@ -898,6 +906,7 @@ xfs_destroy_mount_workqueues(
898 destroy_workqueue(mp->m_cil_workqueue); 906 destroy_workqueue(mp->m_cil_workqueue);
899 destroy_workqueue(mp->m_data_workqueue); 907 destroy_workqueue(mp->m_data_workqueue);
900 destroy_workqueue(mp->m_unwritten_workqueue); 908 destroy_workqueue(mp->m_unwritten_workqueue);
909 destroy_workqueue(mp->m_buf_workqueue);
901} 910}
902 911
903/* 912/*