aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Snitzer <snitzer@redhat.com>2014-10-28 18:34:52 -0400
committerMike Snitzer <snitzer@redhat.com>2014-11-19 12:31:17 -0500
commitffcc39364160663cda1a3c358f4537302a92459b (patch)
treebc7defa66e5c79b036ed21092a8005b72879559b
parent80e96c5484be788f277eead9cabf88cf8e430419 (diff)
dm: enhance internal suspend and resume interface
Rename dm_internal_{suspend,resume} to dm_internal_{suspend,resume}_fast -- dm-stats will continue using these methods to avoid all the extra suspend/resume logic that is not needed in order to quickly flush IO. Introduce dm_internal_suspend_noflush() variant that actually calls the mapped_device's target callbacks -- otherwise target-specific hooks are avoided (e.g. dm-thin's thin_presuspend and thin_postsuspend). Common code between dm_internal_{suspend_noflush,resume} and dm_{suspend,resume} was factored out as __dm_{suspend,resume}. Update dm_internal_{suspend_noflush,resume} to always take and release the mapped_device's suspend_lock. Also update dm_{suspend,resume} to be aware of potential for DM_INTERNAL_SUSPEND_FLAG to be set and respond accordingly by interruptibly waiting for the DM_INTERNAL_SUSPEND_FLAG to be cleared. Add lockdep annotation to dm_suspend() and dm_resume(). The existing DM_SUSPEND_FLAG remains unchanged. DM_INTERNAL_SUSPEND_FLAG is set by dm_internal_suspend_noflush() and cleared by dm_internal_resume(). Both DM_SUSPEND_FLAG and DM_INTERNAL_SUSPEND_FLAG may be set if a device was already suspended when dm_internal_suspend_noflush() was called -- this can be thought of as a "nested suspend". A "nested suspend" can occur with legacy userspace dm-thin code that might suspend all active thin volumes before suspending the pool for resize. But otherwise, in the normal dm-thin-pool suspend case moving forward: the thin-pool will have DM_SUSPEND_FLAG set and all active thins from that thin-pool will have DM_INTERNAL_SUSPEND_FLAG set. Also add DM_INTERNAL_SUSPEND_FLAG to status report. This new DM_INTERNAL_SUSPEND_FLAG state is being reported to assist with debugging (e.g. 'dmsetup info' will report an internally suspended device accordingly). Signed-off-by: Mike Snitzer <snitzer@redhat.com> Acked-by: Joe Thornber <ejt@redhat.com>
-rw-r--r--drivers/md/dm-ioctl.c5
-rw-r--r--drivers/md/dm-stats.c2
-rw-r--r--drivers/md/dm.c229
-rw-r--r--drivers/md/dm.h9
-rw-r--r--include/uapi/linux/dm-ioctl.h5
5 files changed, 192 insertions, 58 deletions
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 0be9381365d7..73f791bb9ea4 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -684,11 +684,14 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
684 int srcu_idx; 684 int srcu_idx;
685 685
686 param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG | 686 param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
687 DM_ACTIVE_PRESENT_FLAG); 687 DM_ACTIVE_PRESENT_FLAG | DM_INTERNAL_SUSPEND_FLAG);
688 688
689 if (dm_suspended_md(md)) 689 if (dm_suspended_md(md))
690 param->flags |= DM_SUSPEND_FLAG; 690 param->flags |= DM_SUSPEND_FLAG;
691 691
692 if (dm_suspended_internally_md(md))
693 param->flags |= DM_INTERNAL_SUSPEND_FLAG;
694
692 if (dm_test_deferred_remove_flag(md)) 695 if (dm_test_deferred_remove_flag(md))
693 param->flags |= DM_DEFERRED_REMOVE; 696 param->flags |= DM_DEFERRED_REMOVE;
694 697
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 28a90122a5a8..42a057aa3811 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -824,7 +824,7 @@ static int message_stats_create(struct mapped_device *md,
824 return 1; 824 return 1;
825 825
826 id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data, 826 id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data,
827 dm_internal_suspend, dm_internal_resume, md); 827 dm_internal_suspend_fast, dm_internal_resume_fast, md);
828 if (id < 0) 828 if (id < 0)
829 return id; 829 return id;
830 830
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f84de3215982..a0ece87ad426 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -19,6 +19,7 @@
19#include <linux/idr.h> 19#include <linux/idr.h>
20#include <linux/hdreg.h> 20#include <linux/hdreg.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/wait.h>
22 23
23#include <trace/events/block.h> 24#include <trace/events/block.h>
24 25
@@ -117,6 +118,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
117#define DMF_NOFLUSH_SUSPENDING 5 118#define DMF_NOFLUSH_SUSPENDING 5
118#define DMF_MERGE_IS_OPTIONAL 6 119#define DMF_MERGE_IS_OPTIONAL 6
119#define DMF_DEFERRED_REMOVE 7 120#define DMF_DEFERRED_REMOVE 7
121#define DMF_SUSPENDED_INTERNALLY 8
120 122
121/* 123/*
122 * A dummy definition to make RCU happy. 124 * A dummy definition to make RCU happy.
@@ -2718,36 +2720,18 @@ static void unlock_fs(struct mapped_device *md)
2718} 2720}
2719 2721
2720/* 2722/*
2721 * We need to be able to change a mapping table under a mounted 2723 * If __dm_suspend returns 0, the device is completely quiescent
2722 * filesystem. For example we might want to move some data in 2724 * now. There is no request-processing activity. All new requests
2723 * the background. Before the table can be swapped with 2725 * are being added to md->deferred list.
2724 * dm_bind_table, dm_suspend must be called to flush any in
2725 * flight bios and ensure that any further io gets deferred.
2726 */
2727/*
2728 * Suspend mechanism in request-based dm.
2729 *
2730 * 1. Flush all I/Os by lock_fs() if needed.
2731 * 2. Stop dispatching any I/O by stopping the request_queue.
2732 * 3. Wait for all in-flight I/Os to be completed or requeued.
2733 * 2726 *
2734 * To abort suspend, start the request_queue. 2727 * Caller must hold md->suspend_lock
2735 */ 2728 */
2736int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 2729static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
2730 unsigned suspend_flags, int interruptible)
2737{ 2731{
2738 struct dm_table *map = NULL; 2732 bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
2739 int r = 0; 2733 bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
2740 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; 2734 int r;
2741 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
2742
2743 mutex_lock(&md->suspend_lock);
2744
2745 if (dm_suspended_md(md)) {
2746 r = -EINVAL;
2747 goto out_unlock;
2748 }
2749
2750 map = rcu_dereference(md->map);
2751 2735
2752 /* 2736 /*
2753 * DMF_NOFLUSH_SUSPENDING must be set before presuspend. 2737 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
@@ -2772,7 +2756,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2772 r = lock_fs(md); 2756 r = lock_fs(md);
2773 if (r) { 2757 if (r) {
2774 dm_table_presuspend_undo_targets(map); 2758 dm_table_presuspend_undo_targets(map);
2775 goto out_unlock; 2759 return r;
2776 } 2760 }
2777 } 2761 }
2778 2762
@@ -2806,7 +2790,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2806 * We call dm_wait_for_completion to wait for all existing requests 2790 * We call dm_wait_for_completion to wait for all existing requests
2807 * to finish. 2791 * to finish.
2808 */ 2792 */
2809 r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); 2793 r = dm_wait_for_completion(md, interruptible);
2810 2794
2811 if (noflush) 2795 if (noflush)
2812 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 2796 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
@@ -2822,14 +2806,55 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2822 2806
2823 unlock_fs(md); 2807 unlock_fs(md);
2824 dm_table_presuspend_undo_targets(map); 2808 dm_table_presuspend_undo_targets(map);
2825 goto out_unlock; /* pushback list is already flushed, so skip flush */ 2809 /* pushback list is already flushed, so skip flush */
2826 } 2810 }
2827 2811
2828 /* 2812 return r;
2829 * If dm_wait_for_completion returned 0, the device is completely 2813}
2830 * quiescent now. There is no request-processing activity. All new 2814
2831 * requests are being added to md->deferred list. 2815/*
2832 */ 2816 * We need to be able to change a mapping table under a mounted
2817 * filesystem. For example we might want to move some data in
2818 * the background. Before the table can be swapped with
2819 * dm_bind_table, dm_suspend must be called to flush any in
2820 * flight bios and ensure that any further io gets deferred.
2821 */
2822/*
2823 * Suspend mechanism in request-based dm.
2824 *
2825 * 1. Flush all I/Os by lock_fs() if needed.
2826 * 2. Stop dispatching any I/O by stopping the request_queue.
2827 * 3. Wait for all in-flight I/Os to be completed or requeued.
2828 *
2829 * To abort suspend, start the request_queue.
2830 */
2831int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2832{
2833 struct dm_table *map = NULL;
2834 int r = 0;
2835
2836retry:
2837 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
2838
2839 if (dm_suspended_md(md)) {
2840 r = -EINVAL;
2841 goto out_unlock;
2842 }
2843
2844 if (dm_suspended_internally_md(md)) {
2845 /* already internally suspended, wait for internal resume */
2846 mutex_unlock(&md->suspend_lock);
2847 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
2848 if (r)
2849 return r;
2850 goto retry;
2851 }
2852
2853 map = rcu_dereference(md->map);
2854
2855 r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE);
2856 if (r)
2857 goto out_unlock;
2833 2858
2834 set_bit(DMF_SUSPENDED, &md->flags); 2859 set_bit(DMF_SUSPENDED, &md->flags);
2835 2860
@@ -2840,35 +2865,57 @@ out_unlock:
2840 return r; 2865 return r;
2841} 2866}
2842 2867
2868static int __dm_resume(struct mapped_device *md, struct dm_table *map)
2869{
2870 if (map) {
2871 int r = dm_table_resume_targets(map);
2872 if (r)
2873 return r;
2874 }
2875
2876 dm_queue_flush(md);
2877
2878 /*
2879 * Flushing deferred I/Os must be done after targets are resumed
2880 * so that mapping of targets can work correctly.
2881 * Request-based dm is queueing the deferred I/Os in its request_queue.
2882 */
2883 if (dm_request_based(md))
2884 start_queue(md->queue);
2885
2886 unlock_fs(md);
2887
2888 return 0;
2889}
2890
2843int dm_resume(struct mapped_device *md) 2891int dm_resume(struct mapped_device *md)
2844{ 2892{
2845 int r = -EINVAL; 2893 int r = -EINVAL;
2846 struct dm_table *map = NULL; 2894 struct dm_table *map = NULL;
2847 2895
2848 mutex_lock(&md->suspend_lock); 2896retry:
2897 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
2898
2849 if (!dm_suspended_md(md)) 2899 if (!dm_suspended_md(md))
2850 goto out; 2900 goto out;
2851 2901
2902 if (dm_suspended_internally_md(md)) {
2903 /* already internally suspended, wait for internal resume */
2904 mutex_unlock(&md->suspend_lock);
2905 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
2906 if (r)
2907 return r;
2908 goto retry;
2909 }
2910
2852 map = rcu_dereference(md->map); 2911 map = rcu_dereference(md->map);
2853 if (!map || !dm_table_get_size(map)) 2912 if (!map || !dm_table_get_size(map))
2854 goto out; 2913 goto out;
2855 2914
2856 r = dm_table_resume_targets(map); 2915 r = __dm_resume(md, map);
2857 if (r) 2916 if (r)
2858 goto out; 2917 goto out;
2859 2918
2860 dm_queue_flush(md);
2861
2862 /*
2863 * Flushing deferred I/Os must be done after targets are resumed
2864 * so that mapping of targets can work correctly.
2865 * Request-based dm is queueing the deferred I/Os in its request_queue.
2866 */
2867 if (dm_request_based(md))
2868 start_queue(md->queue);
2869
2870 unlock_fs(md);
2871
2872 clear_bit(DMF_SUSPENDED, &md->flags); 2919 clear_bit(DMF_SUSPENDED, &md->flags);
2873 2920
2874 r = 0; 2921 r = 0;
@@ -2882,15 +2929,80 @@ out:
2882 * Internal suspend/resume works like userspace-driven suspend. It waits 2929 * Internal suspend/resume works like userspace-driven suspend. It waits
2883 * until all bios finish and prevents issuing new bios to the target drivers. 2930 * until all bios finish and prevents issuing new bios to the target drivers.
2884 * It may be used only from the kernel. 2931 * It may be used only from the kernel.
2885 *
2886 * Internal suspend holds md->suspend_lock, which prevents interaction with
2887 * userspace-driven suspend.
2888 */ 2932 */
2889 2933
2890void dm_internal_suspend(struct mapped_device *md) 2934static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
2891{ 2935{
2892 mutex_lock(&md->suspend_lock); 2936 struct dm_table *map = NULL;
2937
2938 if (dm_suspended_internally_md(md))
2939 return; /* nested internal suspend */
2940
2941 if (dm_suspended_md(md)) {
2942 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2943 return; /* nest suspend */
2944 }
2945
2946 map = rcu_dereference(md->map);
2947
2948 /*
2949 * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is
2950 * supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend
2951 * would require changing .presuspend to return an error -- avoid this
2952 * until there is a need for more elaborate variants of internal suspend.
2953 */
2954 (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE);
2955
2956 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2957
2958 dm_table_postsuspend_targets(map);
2959}
2960
2961static void __dm_internal_resume(struct mapped_device *md)
2962{
2963 if (!dm_suspended_internally_md(md))
2964 return; /* resume from nested internal suspend */
2965
2893 if (dm_suspended_md(md)) 2966 if (dm_suspended_md(md))
2967 goto done; /* resume from nested suspend */
2968
2969 /*
2970 * NOTE: existing callers don't need to call dm_table_resume_targets
2971 * (which may fail -- so best to avoid it for now by passing NULL map)
2972 */
2973 (void) __dm_resume(md, NULL);
2974
2975done:
2976 clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2977 smp_mb__after_atomic();
2978 wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
2979}
2980
2981void dm_internal_suspend_noflush(struct mapped_device *md)
2982{
2983 mutex_lock(&md->suspend_lock);
2984 __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
2985 mutex_unlock(&md->suspend_lock);
2986}
2987EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
2988
2989void dm_internal_resume(struct mapped_device *md)
2990{
2991 mutex_lock(&md->suspend_lock);
2992 __dm_internal_resume(md);
2993 mutex_unlock(&md->suspend_lock);
2994}
2995EXPORT_SYMBOL_GPL(dm_internal_resume);
2996
2997/*
2998 * Fast variants of internal suspend/resume hold md->suspend_lock,
2999 * which prevents interaction with userspace-driven suspend.
3000 */
3001
3002void dm_internal_suspend_fast(struct mapped_device *md)
3003{
3004 mutex_lock(&md->suspend_lock);
3005 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
2894 return; 3006 return;
2895 3007
2896 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 3008 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
@@ -2899,9 +3011,9 @@ void dm_internal_suspend(struct mapped_device *md)
2899 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 3011 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2900} 3012}
2901 3013
2902void dm_internal_resume(struct mapped_device *md) 3014void dm_internal_resume_fast(struct mapped_device *md)
2903{ 3015{
2904 if (dm_suspended_md(md)) 3016 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
2905 goto done; 3017 goto done;
2906 3018
2907 dm_queue_flush(md); 3019 dm_queue_flush(md);
@@ -2987,6 +3099,11 @@ int dm_suspended_md(struct mapped_device *md)
2987 return test_bit(DMF_SUSPENDED, &md->flags); 3099 return test_bit(DMF_SUSPENDED, &md->flags);
2988} 3100}
2989 3101
3102int dm_suspended_internally_md(struct mapped_device *md)
3103{
3104 return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3105}
3106
2990int dm_test_deferred_remove_flag(struct mapped_device *md) 3107int dm_test_deferred_remove_flag(struct mapped_device *md)
2991{ 3108{
2992 return test_bit(DMF_DEFERRED_REMOVE, &md->flags); 3109 return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 781994093bf5..84b0f9e4ba6c 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -130,6 +130,15 @@ int dm_deleting_md(struct mapped_device *md);
130int dm_suspended_md(struct mapped_device *md); 130int dm_suspended_md(struct mapped_device *md);
131 131
132/* 132/*
133 * Internal suspend and resume methods.
134 */
135int dm_suspended_internally_md(struct mapped_device *md);
136void dm_internal_suspend_fast(struct mapped_device *md);
137void dm_internal_resume_fast(struct mapped_device *md);
138void dm_internal_suspend_noflush(struct mapped_device *md);
139void dm_internal_resume(struct mapped_device *md);
140
141/*
133 * Test if the device is scheduled for deferred remove. 142 * Test if the device is scheduled for deferred remove.
134 */ 143 */
135int dm_test_deferred_remove_flag(struct mapped_device *md); 144int dm_test_deferred_remove_flag(struct mapped_device *md);
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 2be66f4be2f9..a570d7b5796c 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -352,4 +352,9 @@ enum {
352 */ 352 */
353#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */ 353#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */
354 354
355/*
356 * If set, the device is suspended internally.
357 */
358#define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */
359
355#endif /* _LINUX_DM_IOCTL_H */ 360#endif /* _LINUX_DM_IOCTL_H */