diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/dm-bio-prison.c | 186 | ||||
-rw-r--r-- | drivers/md/dm-bio-prison.h | 28 | ||||
-rw-r--r-- | drivers/md/dm-bufio.c | 226 | ||||
-rw-r--r-- | drivers/md/dm-cache-block-types.h | 11 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.c | 34 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.h | 6 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-mq.c | 82 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 378 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-stats.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 36 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.c | 35 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.h | 9 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 760 | ||||
-rw-r--r-- | drivers/md/dm.c | 273 | ||||
-rw-r--r-- | drivers/md/dm.h | 10 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-array.c | 4 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 8 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.c | 77 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.h | 7 |
21 files changed, 1610 insertions, 569 deletions
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index f752d12081ff..be065300e93c 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c | |||
@@ -14,68 +14,38 @@ | |||
14 | 14 | ||
15 | /*----------------------------------------------------------------*/ | 15 | /*----------------------------------------------------------------*/ |
16 | 16 | ||
17 | struct bucket { | 17 | #define MIN_CELLS 1024 |
18 | spinlock_t lock; | ||
19 | struct hlist_head cells; | ||
20 | }; | ||
21 | 18 | ||
22 | struct dm_bio_prison { | 19 | struct dm_bio_prison { |
20 | spinlock_t lock; | ||
23 | mempool_t *cell_pool; | 21 | mempool_t *cell_pool; |
24 | 22 | struct rb_root cells; | |
25 | unsigned nr_buckets; | ||
26 | unsigned hash_mask; | ||
27 | struct bucket *buckets; | ||
28 | }; | 23 | }; |
29 | 24 | ||
30 | /*----------------------------------------------------------------*/ | ||
31 | |||
32 | static uint32_t calc_nr_buckets(unsigned nr_cells) | ||
33 | { | ||
34 | uint32_t n = 128; | ||
35 | |||
36 | nr_cells /= 4; | ||
37 | nr_cells = min(nr_cells, 8192u); | ||
38 | |||
39 | while (n < nr_cells) | ||
40 | n <<= 1; | ||
41 | |||
42 | return n; | ||
43 | } | ||
44 | |||
45 | static struct kmem_cache *_cell_cache; | 25 | static struct kmem_cache *_cell_cache; |
46 | 26 | ||
47 | static void init_bucket(struct bucket *b) | 27 | /*----------------------------------------------------------------*/ |
48 | { | ||
49 | spin_lock_init(&b->lock); | ||
50 | INIT_HLIST_HEAD(&b->cells); | ||
51 | } | ||
52 | 28 | ||
53 | /* | 29 | /* |
54 | * @nr_cells should be the number of cells you want in use _concurrently_. | 30 | * @nr_cells should be the number of cells you want in use _concurrently_. |
55 | * Don't confuse it with the number of distinct keys. | 31 | * Don't confuse it with the number of distinct keys. |
56 | */ | 32 | */ |
57 | struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) | 33 | struct dm_bio_prison *dm_bio_prison_create(void) |
58 | { | 34 | { |
59 | unsigned i; | 35 | struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL); |
60 | uint32_t nr_buckets = calc_nr_buckets(nr_cells); | ||
61 | size_t len = sizeof(struct dm_bio_prison) + | ||
62 | (sizeof(struct bucket) * nr_buckets); | ||
63 | struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); | ||
64 | 36 | ||
65 | if (!prison) | 37 | if (!prison) |
66 | return NULL; | 38 | return NULL; |
67 | 39 | ||
68 | prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); | 40 | spin_lock_init(&prison->lock); |
41 | |||
42 | prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache); | ||
69 | if (!prison->cell_pool) { | 43 | if (!prison->cell_pool) { |
70 | kfree(prison); | 44 | kfree(prison); |
71 | return NULL; | 45 | return NULL; |
72 | } | 46 | } |
73 | 47 | ||
74 | prison->nr_buckets = nr_buckets; | 48 | prison->cells = RB_ROOT; |
75 | prison->hash_mask = nr_buckets - 1; | ||
76 | prison->buckets = (struct bucket *) (prison + 1); | ||
77 | for (i = 0; i < nr_buckets; i++) | ||
78 | init_bucket(prison->buckets + i); | ||
79 | 49 | ||
80 | return prison; | 50 | return prison; |
81 | } | 51 | } |
@@ -101,68 +71,73 @@ void dm_bio_prison_free_cell(struct dm_bio_prison *prison, | |||
101 | } | 71 | } |
102 | EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell); | 72 | EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell); |
103 | 73 | ||
104 | static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) | 74 | static void __setup_new_cell(struct dm_cell_key *key, |
75 | struct bio *holder, | ||
76 | struct dm_bio_prison_cell *cell) | ||
105 | { | 77 | { |
106 | const unsigned long BIG_PRIME = 4294967291UL; | 78 | memcpy(&cell->key, key, sizeof(cell->key)); |
107 | uint64_t hash = key->block * BIG_PRIME; | 79 | cell->holder = holder; |
108 | 80 | bio_list_init(&cell->bios); | |
109 | return (uint32_t) (hash & prison->hash_mask); | ||
110 | } | 81 | } |
111 | 82 | ||
112 | static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) | 83 | static int cmp_keys(struct dm_cell_key *lhs, |
84 | struct dm_cell_key *rhs) | ||
113 | { | 85 | { |
114 | return (lhs->virtual == rhs->virtual) && | 86 | if (lhs->virtual < rhs->virtual) |
115 | (lhs->dev == rhs->dev) && | 87 | return -1; |
116 | (lhs->block == rhs->block); | ||
117 | } | ||
118 | 88 | ||
119 | static struct bucket *get_bucket(struct dm_bio_prison *prison, | 89 | if (lhs->virtual > rhs->virtual) |
120 | struct dm_cell_key *key) | 90 | return 1; |
121 | { | ||
122 | return prison->buckets + hash_key(prison, key); | ||
123 | } | ||
124 | 91 | ||
125 | static struct dm_bio_prison_cell *__search_bucket(struct bucket *b, | 92 | if (lhs->dev < rhs->dev) |
126 | struct dm_cell_key *key) | 93 | return -1; |
127 | { | ||
128 | struct dm_bio_prison_cell *cell; | ||
129 | 94 | ||
130 | hlist_for_each_entry(cell, &b->cells, list) | 95 | if (lhs->dev > rhs->dev) |
131 | if (keys_equal(&cell->key, key)) | 96 | return 1; |
132 | return cell; | ||
133 | 97 | ||
134 | return NULL; | 98 | if (lhs->block_end <= rhs->block_begin) |
135 | } | 99 | return -1; |
136 | 100 | ||
137 | static void __setup_new_cell(struct bucket *b, | 101 | if (lhs->block_begin >= rhs->block_end) |
138 | struct dm_cell_key *key, | 102 | return 1; |
139 | struct bio *holder, | 103 | |
140 | struct dm_bio_prison_cell *cell) | 104 | return 0; |
141 | { | ||
142 | memcpy(&cell->key, key, sizeof(cell->key)); | ||
143 | cell->holder = holder; | ||
144 | bio_list_init(&cell->bios); | ||
145 | hlist_add_head(&cell->list, &b->cells); | ||
146 | } | 105 | } |
147 | 106 | ||
148 | static int __bio_detain(struct bucket *b, | 107 | static int __bio_detain(struct dm_bio_prison *prison, |
149 | struct dm_cell_key *key, | 108 | struct dm_cell_key *key, |
150 | struct bio *inmate, | 109 | struct bio *inmate, |
151 | struct dm_bio_prison_cell *cell_prealloc, | 110 | struct dm_bio_prison_cell *cell_prealloc, |
152 | struct dm_bio_prison_cell **cell_result) | 111 | struct dm_bio_prison_cell **cell_result) |
153 | { | 112 | { |
154 | struct dm_bio_prison_cell *cell; | 113 | int r; |
155 | 114 | struct rb_node **new = &prison->cells.rb_node, *parent = NULL; | |
156 | cell = __search_bucket(b, key); | 115 | |
157 | if (cell) { | 116 | while (*new) { |
158 | if (inmate) | 117 | struct dm_bio_prison_cell *cell = |
159 | bio_list_add(&cell->bios, inmate); | 118 | container_of(*new, struct dm_bio_prison_cell, node); |
160 | *cell_result = cell; | 119 | |
161 | return 1; | 120 | r = cmp_keys(key, &cell->key); |
121 | |||
122 | parent = *new; | ||
123 | if (r < 0) | ||
124 | new = &((*new)->rb_left); | ||
125 | else if (r > 0) | ||
126 | new = &((*new)->rb_right); | ||
127 | else { | ||
128 | if (inmate) | ||
129 | bio_list_add(&cell->bios, inmate); | ||
130 | *cell_result = cell; | ||
131 | return 1; | ||
132 | } | ||
162 | } | 133 | } |
163 | 134 | ||
164 | __setup_new_cell(b, key, inmate, cell_prealloc); | 135 | __setup_new_cell(key, inmate, cell_prealloc); |
165 | *cell_result = cell_prealloc; | 136 | *cell_result = cell_prealloc; |
137 | |||
138 | rb_link_node(&cell_prealloc->node, parent, new); | ||
139 | rb_insert_color(&cell_prealloc->node, &prison->cells); | ||
140 | |||
166 | return 0; | 141 | return 0; |
167 | } | 142 | } |
168 | 143 | ||
@@ -174,11 +149,10 @@ static int bio_detain(struct dm_bio_prison *prison, | |||
174 | { | 149 | { |
175 | int r; | 150 | int r; |
176 | unsigned long flags; | 151 | unsigned long flags; |
177 | struct bucket *b = get_bucket(prison, key); | ||
178 | 152 | ||
179 | spin_lock_irqsave(&b->lock, flags); | 153 | spin_lock_irqsave(&prison->lock, flags); |
180 | r = __bio_detain(b, key, inmate, cell_prealloc, cell_result); | 154 | r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result); |
181 | spin_unlock_irqrestore(&b->lock, flags); | 155 | spin_unlock_irqrestore(&prison->lock, flags); |
182 | 156 | ||
183 | return r; | 157 | return r; |
184 | } | 158 | } |
@@ -205,10 +179,11 @@ EXPORT_SYMBOL_GPL(dm_get_cell); | |||
205 | /* | 179 | /* |
206 | * @inmates must have been initialised prior to this call | 180 | * @inmates must have been initialised prior to this call |
207 | */ | 181 | */ |
208 | static void __cell_release(struct dm_bio_prison_cell *cell, | 182 | static void __cell_release(struct dm_bio_prison *prison, |
183 | struct dm_bio_prison_cell *cell, | ||
209 | struct bio_list *inmates) | 184 | struct bio_list *inmates) |
210 | { | 185 | { |
211 | hlist_del(&cell->list); | 186 | rb_erase(&cell->node, &prison->cells); |
212 | 187 | ||
213 | if (inmates) { | 188 | if (inmates) { |
214 | if (cell->holder) | 189 | if (cell->holder) |
@@ -222,21 +197,21 @@ void dm_cell_release(struct dm_bio_prison *prison, | |||
222 | struct bio_list *bios) | 197 | struct bio_list *bios) |
223 | { | 198 | { |
224 | unsigned long flags; | 199 | unsigned long flags; |
225 | struct bucket *b = get_bucket(prison, &cell->key); | ||
226 | 200 | ||
227 | spin_lock_irqsave(&b->lock, flags); | 201 | spin_lock_irqsave(&prison->lock, flags); |
228 | __cell_release(cell, bios); | 202 | __cell_release(prison, cell, bios); |
229 | spin_unlock_irqrestore(&b->lock, flags); | 203 | spin_unlock_irqrestore(&prison->lock, flags); |
230 | } | 204 | } |
231 | EXPORT_SYMBOL_GPL(dm_cell_release); | 205 | EXPORT_SYMBOL_GPL(dm_cell_release); |
232 | 206 | ||
233 | /* | 207 | /* |
234 | * Sometimes we don't want the holder, just the additional bios. | 208 | * Sometimes we don't want the holder, just the additional bios. |
235 | */ | 209 | */ |
236 | static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, | 210 | static void __cell_release_no_holder(struct dm_bio_prison *prison, |
211 | struct dm_bio_prison_cell *cell, | ||
237 | struct bio_list *inmates) | 212 | struct bio_list *inmates) |
238 | { | 213 | { |
239 | hlist_del(&cell->list); | 214 | rb_erase(&cell->node, &prison->cells); |
240 | bio_list_merge(inmates, &cell->bios); | 215 | bio_list_merge(inmates, &cell->bios); |
241 | } | 216 | } |
242 | 217 | ||
@@ -245,11 +220,10 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison, | |||
245 | struct bio_list *inmates) | 220 | struct bio_list *inmates) |
246 | { | 221 | { |
247 | unsigned long flags; | 222 | unsigned long flags; |
248 | struct bucket *b = get_bucket(prison, &cell->key); | ||
249 | 223 | ||
250 | spin_lock_irqsave(&b->lock, flags); | 224 | spin_lock_irqsave(&prison->lock, flags); |
251 | __cell_release_no_holder(cell, inmates); | 225 | __cell_release_no_holder(prison, cell, inmates); |
252 | spin_unlock_irqrestore(&b->lock, flags); | 226 | spin_unlock_irqrestore(&prison->lock, flags); |
253 | } | 227 | } |
254 | EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); | 228 | EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); |
255 | 229 | ||
@@ -267,6 +241,20 @@ void dm_cell_error(struct dm_bio_prison *prison, | |||
267 | } | 241 | } |
268 | EXPORT_SYMBOL_GPL(dm_cell_error); | 242 | EXPORT_SYMBOL_GPL(dm_cell_error); |
269 | 243 | ||
244 | void dm_cell_visit_release(struct dm_bio_prison *prison, | ||
245 | void (*visit_fn)(void *, struct dm_bio_prison_cell *), | ||
246 | void *context, | ||
247 | struct dm_bio_prison_cell *cell) | ||
248 | { | ||
249 | unsigned long flags; | ||
250 | |||
251 | spin_lock_irqsave(&prison->lock, flags); | ||
252 | visit_fn(context, cell); | ||
253 | rb_erase(&cell->node, &prison->cells); | ||
254 | spin_unlock_irqrestore(&prison->lock, flags); | ||
255 | } | ||
256 | EXPORT_SYMBOL_GPL(dm_cell_visit_release); | ||
257 | |||
270 | /*----------------------------------------------------------------*/ | 258 | /*----------------------------------------------------------------*/ |
271 | 259 | ||
272 | #define DEFERRED_SET_SIZE 64 | 260 | #define DEFERRED_SET_SIZE 64 |
diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h index 6805a142b750..74cf01144b1f 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison.h | |||
@@ -10,8 +10,8 @@ | |||
10 | #include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ | 10 | #include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ |
11 | #include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ | 11 | #include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ |
12 | 12 | ||
13 | #include <linux/list.h> | ||
14 | #include <linux/bio.h> | 13 | #include <linux/bio.h> |
14 | #include <linux/rbtree.h> | ||
15 | 15 | ||
16 | /*----------------------------------------------------------------*/ | 16 | /*----------------------------------------------------------------*/ |
17 | 17 | ||
@@ -23,11 +23,14 @@ | |||
23 | */ | 23 | */ |
24 | struct dm_bio_prison; | 24 | struct dm_bio_prison; |
25 | 25 | ||
26 | /* FIXME: this needs to be more abstract */ | 26 | /* |
27 | * Keys define a range of blocks within either a virtual or physical | ||
28 | * device. | ||
29 | */ | ||
27 | struct dm_cell_key { | 30 | struct dm_cell_key { |
28 | int virtual; | 31 | int virtual; |
29 | dm_thin_id dev; | 32 | dm_thin_id dev; |
30 | dm_block_t block; | 33 | dm_block_t block_begin, block_end; |
31 | }; | 34 | }; |
32 | 35 | ||
33 | /* | 36 | /* |
@@ -35,13 +38,15 @@ struct dm_cell_key { | |||
35 | * themselves. | 38 | * themselves. |
36 | */ | 39 | */ |
37 | struct dm_bio_prison_cell { | 40 | struct dm_bio_prison_cell { |
38 | struct hlist_node list; | 41 | struct list_head user_list; /* for client use */ |
42 | struct rb_node node; | ||
43 | |||
39 | struct dm_cell_key key; | 44 | struct dm_cell_key key; |
40 | struct bio *holder; | 45 | struct bio *holder; |
41 | struct bio_list bios; | 46 | struct bio_list bios; |
42 | }; | 47 | }; |
43 | 48 | ||
44 | struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells); | 49 | struct dm_bio_prison *dm_bio_prison_create(void); |
45 | void dm_bio_prison_destroy(struct dm_bio_prison *prison); | 50 | void dm_bio_prison_destroy(struct dm_bio_prison *prison); |
46 | 51 | ||
47 | /* | 52 | /* |
@@ -57,7 +62,7 @@ void dm_bio_prison_free_cell(struct dm_bio_prison *prison, | |||
57 | struct dm_bio_prison_cell *cell); | 62 | struct dm_bio_prison_cell *cell); |
58 | 63 | ||
59 | /* | 64 | /* |
60 | * Creates, or retrieves a cell for the given key. | 65 | * Creates, or retrieves a cell that overlaps the given key. |
61 | * | 66 | * |
62 | * Returns 1 if pre-existing cell returned, zero if new cell created using | 67 | * Returns 1 if pre-existing cell returned, zero if new cell created using |
63 | * @cell_prealloc. | 68 | * @cell_prealloc. |
@@ -68,7 +73,8 @@ int dm_get_cell(struct dm_bio_prison *prison, | |||
68 | struct dm_bio_prison_cell **cell_result); | 73 | struct dm_bio_prison_cell **cell_result); |
69 | 74 | ||
70 | /* | 75 | /* |
71 | * An atomic op that combines retrieving a cell, and adding a bio to it. | 76 | * An atomic op that combines retrieving or creating a cell, and adding a |
77 | * bio to it. | ||
72 | * | 78 | * |
73 | * Returns 1 if the cell was already held, 0 if @inmate is the new holder. | 79 | * Returns 1 if the cell was already held, 0 if @inmate is the new holder. |
74 | */ | 80 | */ |
@@ -87,6 +93,14 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison, | |||
87 | void dm_cell_error(struct dm_bio_prison *prison, | 93 | void dm_cell_error(struct dm_bio_prison *prison, |
88 | struct dm_bio_prison_cell *cell, int error); | 94 | struct dm_bio_prison_cell *cell, int error); |
89 | 95 | ||
96 | /* | ||
97 | * Visits the cell and then releases. Guarantees no new inmates are | ||
98 | * inserted between the visit and release. | ||
99 | */ | ||
100 | void dm_cell_visit_release(struct dm_bio_prison *prison, | ||
101 | void (*visit_fn)(void *, struct dm_bio_prison_cell *), | ||
102 | void *context, struct dm_bio_prison_cell *cell); | ||
103 | |||
90 | /*----------------------------------------------------------------*/ | 104 | /*----------------------------------------------------------------*/ |
91 | 105 | ||
92 | /* | 106 | /* |
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index afe79719ea32..c33b49792b87 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/vmalloc.h> | 14 | #include <linux/vmalloc.h> |
15 | #include <linux/shrinker.h> | 15 | #include <linux/shrinker.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/rbtree.h> | ||
17 | 18 | ||
18 | #define DM_MSG_PREFIX "bufio" | 19 | #define DM_MSG_PREFIX "bufio" |
19 | 20 | ||
@@ -34,26 +35,23 @@ | |||
34 | /* | 35 | /* |
35 | * Check buffer ages in this interval (seconds) | 36 | * Check buffer ages in this interval (seconds) |
36 | */ | 37 | */ |
37 | #define DM_BUFIO_WORK_TIMER_SECS 10 | 38 | #define DM_BUFIO_WORK_TIMER_SECS 30 |
38 | 39 | ||
39 | /* | 40 | /* |
40 | * Free buffers when they are older than this (seconds) | 41 | * Free buffers when they are older than this (seconds) |
41 | */ | 42 | */ |
42 | #define DM_BUFIO_DEFAULT_AGE_SECS 60 | 43 | #define DM_BUFIO_DEFAULT_AGE_SECS 300 |
43 | 44 | ||
44 | /* | 45 | /* |
45 | * The number of bvec entries that are embedded directly in the buffer. | 46 | * The nr of bytes of cached data to keep around. |
46 | * If the chunk size is larger, dm-io is used to do the io. | ||
47 | */ | 47 | */ |
48 | #define DM_BUFIO_INLINE_VECS 16 | 48 | #define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024) |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * Buffer hash | 51 | * The number of bvec entries that are embedded directly in the buffer. |
52 | * If the chunk size is larger, dm-io is used to do the io. | ||
52 | */ | 53 | */ |
53 | #define DM_BUFIO_HASH_BITS 20 | 54 | #define DM_BUFIO_INLINE_VECS 16 |
54 | #define DM_BUFIO_HASH(block) \ | ||
55 | ((((block) >> DM_BUFIO_HASH_BITS) ^ (block)) & \ | ||
56 | ((1 << DM_BUFIO_HASH_BITS) - 1)) | ||
57 | 55 | ||
58 | /* | 56 | /* |
59 | * Don't try to use kmem_cache_alloc for blocks larger than this. | 57 | * Don't try to use kmem_cache_alloc for blocks larger than this. |
@@ -106,7 +104,7 @@ struct dm_bufio_client { | |||
106 | 104 | ||
107 | unsigned minimum_buffers; | 105 | unsigned minimum_buffers; |
108 | 106 | ||
109 | struct hlist_head *cache_hash; | 107 | struct rb_root buffer_tree; |
110 | wait_queue_head_t free_buffer_wait; | 108 | wait_queue_head_t free_buffer_wait; |
111 | 109 | ||
112 | int async_write_error; | 110 | int async_write_error; |
@@ -135,7 +133,7 @@ enum data_mode { | |||
135 | }; | 133 | }; |
136 | 134 | ||
137 | struct dm_buffer { | 135 | struct dm_buffer { |
138 | struct hlist_node hash_list; | 136 | struct rb_node node; |
139 | struct list_head lru_list; | 137 | struct list_head lru_list; |
140 | sector_t block; | 138 | sector_t block; |
141 | void *data; | 139 | void *data; |
@@ -223,6 +221,7 @@ static DEFINE_SPINLOCK(param_spinlock); | |||
223 | * Buffers are freed after this timeout | 221 | * Buffers are freed after this timeout |
224 | */ | 222 | */ |
225 | static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; | 223 | static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; |
224 | static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; | ||
226 | 225 | ||
227 | static unsigned long dm_bufio_peak_allocated; | 226 | static unsigned long dm_bufio_peak_allocated; |
228 | static unsigned long dm_bufio_allocated_kmem_cache; | 227 | static unsigned long dm_bufio_allocated_kmem_cache; |
@@ -253,6 +252,53 @@ static LIST_HEAD(dm_bufio_all_clients); | |||
253 | */ | 252 | */ |
254 | static DEFINE_MUTEX(dm_bufio_clients_lock); | 253 | static DEFINE_MUTEX(dm_bufio_clients_lock); |
255 | 254 | ||
255 | /*---------------------------------------------------------------- | ||
256 | * A red/black tree acts as an index for all the buffers. | ||
257 | *--------------------------------------------------------------*/ | ||
258 | static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block) | ||
259 | { | ||
260 | struct rb_node *n = c->buffer_tree.rb_node; | ||
261 | struct dm_buffer *b; | ||
262 | |||
263 | while (n) { | ||
264 | b = container_of(n, struct dm_buffer, node); | ||
265 | |||
266 | if (b->block == block) | ||
267 | return b; | ||
268 | |||
269 | n = (b->block < block) ? n->rb_left : n->rb_right; | ||
270 | } | ||
271 | |||
272 | return NULL; | ||
273 | } | ||
274 | |||
275 | static void __insert(struct dm_bufio_client *c, struct dm_buffer *b) | ||
276 | { | ||
277 | struct rb_node **new = &c->buffer_tree.rb_node, *parent = NULL; | ||
278 | struct dm_buffer *found; | ||
279 | |||
280 | while (*new) { | ||
281 | found = container_of(*new, struct dm_buffer, node); | ||
282 | |||
283 | if (found->block == b->block) { | ||
284 | BUG_ON(found != b); | ||
285 | return; | ||
286 | } | ||
287 | |||
288 | parent = *new; | ||
289 | new = (found->block < b->block) ? | ||
290 | &((*new)->rb_left) : &((*new)->rb_right); | ||
291 | } | ||
292 | |||
293 | rb_link_node(&b->node, parent, new); | ||
294 | rb_insert_color(&b->node, &c->buffer_tree); | ||
295 | } | ||
296 | |||
297 | static void __remove(struct dm_bufio_client *c, struct dm_buffer *b) | ||
298 | { | ||
299 | rb_erase(&b->node, &c->buffer_tree); | ||
300 | } | ||
301 | |||
256 | /*----------------------------------------------------------------*/ | 302 | /*----------------------------------------------------------------*/ |
257 | 303 | ||
258 | static void adjust_total_allocated(enum data_mode data_mode, long diff) | 304 | static void adjust_total_allocated(enum data_mode data_mode, long diff) |
@@ -434,7 +480,7 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) | |||
434 | b->block = block; | 480 | b->block = block; |
435 | b->list_mode = dirty; | 481 | b->list_mode = dirty; |
436 | list_add(&b->lru_list, &c->lru[dirty]); | 482 | list_add(&b->lru_list, &c->lru[dirty]); |
437 | hlist_add_head(&b->hash_list, &c->cache_hash[DM_BUFIO_HASH(block)]); | 483 | __insert(b->c, b); |
438 | b->last_accessed = jiffies; | 484 | b->last_accessed = jiffies; |
439 | } | 485 | } |
440 | 486 | ||
@@ -448,7 +494,7 @@ static void __unlink_buffer(struct dm_buffer *b) | |||
448 | BUG_ON(!c->n_buffers[b->list_mode]); | 494 | BUG_ON(!c->n_buffers[b->list_mode]); |
449 | 495 | ||
450 | c->n_buffers[b->list_mode]--; | 496 | c->n_buffers[b->list_mode]--; |
451 | hlist_del(&b->hash_list); | 497 | __remove(b->c, b); |
452 | list_del(&b->lru_list); | 498 | list_del(&b->lru_list); |
453 | } | 499 | } |
454 | 500 | ||
@@ -532,6 +578,19 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block, | |||
532 | end_io(&b->bio, r); | 578 | end_io(&b->bio, r); |
533 | } | 579 | } |
534 | 580 | ||
581 | static void inline_endio(struct bio *bio, int error) | ||
582 | { | ||
583 | bio_end_io_t *end_fn = bio->bi_private; | ||
584 | |||
585 | /* | ||
586 | * Reset the bio to free any attached resources | ||
587 | * (e.g. bio integrity profiles). | ||
588 | */ | ||
589 | bio_reset(bio); | ||
590 | |||
591 | end_fn(bio, error); | ||
592 | } | ||
593 | |||
535 | static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, | 594 | static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, |
536 | bio_end_io_t *end_io) | 595 | bio_end_io_t *end_io) |
537 | { | 596 | { |
@@ -543,7 +602,12 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, | |||
543 | b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS; | 602 | b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS; |
544 | b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits; | 603 | b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits; |
545 | b->bio.bi_bdev = b->c->bdev; | 604 | b->bio.bi_bdev = b->c->bdev; |
546 | b->bio.bi_end_io = end_io; | 605 | b->bio.bi_end_io = inline_endio; |
606 | /* | ||
607 | * Use of .bi_private isn't a problem here because | ||
608 | * the dm_buffer's inline bio is local to bufio. | ||
609 | */ | ||
610 | b->bio.bi_private = end_io; | ||
547 | 611 | ||
548 | /* | 612 | /* |
549 | * We assume that if len >= PAGE_SIZE ptr is page-aligned. | 613 | * We assume that if len >= PAGE_SIZE ptr is page-aligned. |
@@ -887,23 +951,6 @@ static void __check_watermark(struct dm_bufio_client *c, | |||
887 | __write_dirty_buffers_async(c, 1, write_list); | 951 | __write_dirty_buffers_async(c, 1, write_list); |
888 | } | 952 | } |
889 | 953 | ||
890 | /* | ||
891 | * Find a buffer in the hash. | ||
892 | */ | ||
893 | static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block) | ||
894 | { | ||
895 | struct dm_buffer *b; | ||
896 | |||
897 | hlist_for_each_entry(b, &c->cache_hash[DM_BUFIO_HASH(block)], | ||
898 | hash_list) { | ||
899 | dm_bufio_cond_resched(); | ||
900 | if (b->block == block) | ||
901 | return b; | ||
902 | } | ||
903 | |||
904 | return NULL; | ||
905 | } | ||
906 | |||
907 | /*---------------------------------------------------------------- | 954 | /*---------------------------------------------------------------- |
908 | * Getting a buffer | 955 | * Getting a buffer |
909 | *--------------------------------------------------------------*/ | 956 | *--------------------------------------------------------------*/ |
@@ -1433,45 +1480,52 @@ static void drop_buffers(struct dm_bufio_client *c) | |||
1433 | } | 1480 | } |
1434 | 1481 | ||
1435 | /* | 1482 | /* |
1436 | * Test if the buffer is unused and too old, and commit it. | 1483 | * We may not be able to evict this buffer if IO pending or the client |
1484 | * is still using it. Caller is expected to know buffer is too old. | ||
1485 | * | ||
1437 | * And if GFP_NOFS is used, we must not do any I/O because we hold | 1486 | * And if GFP_NOFS is used, we must not do any I/O because we hold |
1438 | * dm_bufio_clients_lock and we would risk deadlock if the I/O gets | 1487 | * dm_bufio_clients_lock and we would risk deadlock if the I/O gets |
1439 | * rerouted to different bufio client. | 1488 | * rerouted to different bufio client. |
1440 | */ | 1489 | */ |
1441 | static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp, | 1490 | static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) |
1442 | unsigned long max_jiffies) | ||
1443 | { | 1491 | { |
1444 | if (jiffies - b->last_accessed < max_jiffies) | ||
1445 | return 0; | ||
1446 | |||
1447 | if (!(gfp & __GFP_FS)) { | 1492 | if (!(gfp & __GFP_FS)) { |
1448 | if (test_bit(B_READING, &b->state) || | 1493 | if (test_bit(B_READING, &b->state) || |
1449 | test_bit(B_WRITING, &b->state) || | 1494 | test_bit(B_WRITING, &b->state) || |
1450 | test_bit(B_DIRTY, &b->state)) | 1495 | test_bit(B_DIRTY, &b->state)) |
1451 | return 0; | 1496 | return false; |
1452 | } | 1497 | } |
1453 | 1498 | ||
1454 | if (b->hold_count) | 1499 | if (b->hold_count) |
1455 | return 0; | 1500 | return false; |
1456 | 1501 | ||
1457 | __make_buffer_clean(b); | 1502 | __make_buffer_clean(b); |
1458 | __unlink_buffer(b); | 1503 | __unlink_buffer(b); |
1459 | __free_buffer_wake(b); | 1504 | __free_buffer_wake(b); |
1460 | 1505 | ||
1461 | return 1; | 1506 | return true; |
1462 | } | 1507 | } |
1463 | 1508 | ||
1464 | static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, | 1509 | static unsigned get_retain_buffers(struct dm_bufio_client *c) |
1465 | gfp_t gfp_mask) | 1510 | { |
1511 | unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes); | ||
1512 | return retain_bytes / c->block_size; | ||
1513 | } | ||
1514 | |||
1515 | static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, | ||
1516 | gfp_t gfp_mask) | ||
1466 | { | 1517 | { |
1467 | int l; | 1518 | int l; |
1468 | struct dm_buffer *b, *tmp; | 1519 | struct dm_buffer *b, *tmp; |
1469 | long freed = 0; | 1520 | unsigned long freed = 0; |
1521 | unsigned long count = nr_to_scan; | ||
1522 | unsigned retain_target = get_retain_buffers(c); | ||
1470 | 1523 | ||
1471 | for (l = 0; l < LIST_SIZE; l++) { | 1524 | for (l = 0; l < LIST_SIZE; l++) { |
1472 | list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { | 1525 | list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { |
1473 | freed += __cleanup_old_buffer(b, gfp_mask, 0); | 1526 | if (__try_evict_buffer(b, gfp_mask)) |
1474 | if (!--nr_to_scan) | 1527 | freed++; |
1528 | if (!--nr_to_scan || ((count - freed) <= retain_target)) | ||
1475 | return freed; | 1529 | return freed; |
1476 | dm_bufio_cond_resched(); | 1530 | dm_bufio_cond_resched(); |
1477 | } | 1531 | } |
@@ -1533,11 +1587,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign | |||
1533 | r = -ENOMEM; | 1587 | r = -ENOMEM; |
1534 | goto bad_client; | 1588 | goto bad_client; |
1535 | } | 1589 | } |
1536 | c->cache_hash = vmalloc(sizeof(struct hlist_head) << DM_BUFIO_HASH_BITS); | 1590 | c->buffer_tree = RB_ROOT; |
1537 | if (!c->cache_hash) { | ||
1538 | r = -ENOMEM; | ||
1539 | goto bad_hash; | ||
1540 | } | ||
1541 | 1591 | ||
1542 | c->bdev = bdev; | 1592 | c->bdev = bdev; |
1543 | c->block_size = block_size; | 1593 | c->block_size = block_size; |
@@ -1556,9 +1606,6 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign | |||
1556 | c->n_buffers[i] = 0; | 1606 | c->n_buffers[i] = 0; |
1557 | } | 1607 | } |
1558 | 1608 | ||
1559 | for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++) | ||
1560 | INIT_HLIST_HEAD(&c->cache_hash[i]); | ||
1561 | |||
1562 | mutex_init(&c->lock); | 1609 | mutex_init(&c->lock); |
1563 | INIT_LIST_HEAD(&c->reserved_buffers); | 1610 | INIT_LIST_HEAD(&c->reserved_buffers); |
1564 | c->need_reserved_buffers = reserved_buffers; | 1611 | c->need_reserved_buffers = reserved_buffers; |
@@ -1632,8 +1679,6 @@ bad_cache: | |||
1632 | } | 1679 | } |
1633 | dm_io_client_destroy(c->dm_io); | 1680 | dm_io_client_destroy(c->dm_io); |
1634 | bad_dm_io: | 1681 | bad_dm_io: |
1635 | vfree(c->cache_hash); | ||
1636 | bad_hash: | ||
1637 | kfree(c); | 1682 | kfree(c); |
1638 | bad_client: | 1683 | bad_client: |
1639 | return ERR_PTR(r); | 1684 | return ERR_PTR(r); |
@@ -1660,9 +1705,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) | |||
1660 | 1705 | ||
1661 | mutex_unlock(&dm_bufio_clients_lock); | 1706 | mutex_unlock(&dm_bufio_clients_lock); |
1662 | 1707 | ||
1663 | for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++) | 1708 | BUG_ON(!RB_EMPTY_ROOT(&c->buffer_tree)); |
1664 | BUG_ON(!hlist_empty(&c->cache_hash[i])); | ||
1665 | |||
1666 | BUG_ON(c->need_reserved_buffers); | 1709 | BUG_ON(c->need_reserved_buffers); |
1667 | 1710 | ||
1668 | while (!list_empty(&c->reserved_buffers)) { | 1711 | while (!list_empty(&c->reserved_buffers)) { |
@@ -1680,36 +1723,60 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) | |||
1680 | BUG_ON(c->n_buffers[i]); | 1723 | BUG_ON(c->n_buffers[i]); |
1681 | 1724 | ||
1682 | dm_io_client_destroy(c->dm_io); | 1725 | dm_io_client_destroy(c->dm_io); |
1683 | vfree(c->cache_hash); | ||
1684 | kfree(c); | 1726 | kfree(c); |
1685 | } | 1727 | } |
1686 | EXPORT_SYMBOL_GPL(dm_bufio_client_destroy); | 1728 | EXPORT_SYMBOL_GPL(dm_bufio_client_destroy); |
1687 | 1729 | ||
1688 | static void cleanup_old_buffers(void) | 1730 | static unsigned get_max_age_hz(void) |
1689 | { | 1731 | { |
1690 | unsigned long max_age = ACCESS_ONCE(dm_bufio_max_age); | 1732 | unsigned max_age = ACCESS_ONCE(dm_bufio_max_age); |
1691 | struct dm_bufio_client *c; | ||
1692 | 1733 | ||
1693 | if (max_age > ULONG_MAX / HZ) | 1734 | if (max_age > UINT_MAX / HZ) |
1694 | max_age = ULONG_MAX / HZ; | 1735 | max_age = UINT_MAX / HZ; |
1695 | 1736 | ||
1696 | mutex_lock(&dm_bufio_clients_lock); | 1737 | return max_age * HZ; |
1697 | list_for_each_entry(c, &dm_bufio_all_clients, client_list) { | 1738 | } |
1698 | if (!dm_bufio_trylock(c)) | ||
1699 | continue; | ||
1700 | 1739 | ||
1701 | while (!list_empty(&c->lru[LIST_CLEAN])) { | 1740 | static bool older_than(struct dm_buffer *b, unsigned long age_hz) |
1702 | struct dm_buffer *b; | 1741 | { |
1703 | b = list_entry(c->lru[LIST_CLEAN].prev, | 1742 | return (jiffies - b->last_accessed) >= age_hz; |
1704 | struct dm_buffer, lru_list); | 1743 | } |
1705 | if (!__cleanup_old_buffer(b, 0, max_age * HZ)) | 1744 | |
1706 | break; | 1745 | static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) |
1707 | dm_bufio_cond_resched(); | 1746 | { |
1708 | } | 1747 | struct dm_buffer *b, *tmp; |
1748 | unsigned retain_target = get_retain_buffers(c); | ||
1749 | unsigned count; | ||
1750 | |||
1751 | dm_bufio_lock(c); | ||
1752 | |||
1753 | count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; | ||
1754 | list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_CLEAN], lru_list) { | ||
1755 | if (count <= retain_target) | ||
1756 | break; | ||
1757 | |||
1758 | if (!older_than(b, age_hz)) | ||
1759 | break; | ||
1760 | |||
1761 | if (__try_evict_buffer(b, 0)) | ||
1762 | count--; | ||
1709 | 1763 | ||
1710 | dm_bufio_unlock(c); | ||
1711 | dm_bufio_cond_resched(); | 1764 | dm_bufio_cond_resched(); |
1712 | } | 1765 | } |
1766 | |||
1767 | dm_bufio_unlock(c); | ||
1768 | } | ||
1769 | |||
1770 | static void cleanup_old_buffers(void) | ||
1771 | { | ||
1772 | unsigned long max_age_hz = get_max_age_hz(); | ||
1773 | struct dm_bufio_client *c; | ||
1774 | |||
1775 | mutex_lock(&dm_bufio_clients_lock); | ||
1776 | |||
1777 | list_for_each_entry(c, &dm_bufio_all_clients, client_list) | ||
1778 | __evict_old_buffers(c, max_age_hz); | ||
1779 | |||
1713 | mutex_unlock(&dm_bufio_clients_lock); | 1780 | mutex_unlock(&dm_bufio_clients_lock); |
1714 | } | 1781 | } |
1715 | 1782 | ||
@@ -1834,6 +1901,9 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache"); | |||
1834 | module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR); | 1901 | module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR); |
1835 | MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); | 1902 | MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); |
1836 | 1903 | ||
1904 | module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR); | ||
1905 | MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory"); | ||
1906 | |||
1837 | module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR); | 1907 | module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR); |
1838 | MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory"); | 1908 | MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory"); |
1839 | 1909 | ||
diff --git a/drivers/md/dm-cache-block-types.h b/drivers/md/dm-cache-block-types.h index aac0e2df06be..bed4ad4e1b7c 100644 --- a/drivers/md/dm-cache-block-types.h +++ b/drivers/md/dm-cache-block-types.h | |||
@@ -19,6 +19,7 @@ | |||
19 | 19 | ||
20 | typedef dm_block_t __bitwise__ dm_oblock_t; | 20 | typedef dm_block_t __bitwise__ dm_oblock_t; |
21 | typedef uint32_t __bitwise__ dm_cblock_t; | 21 | typedef uint32_t __bitwise__ dm_cblock_t; |
22 | typedef dm_block_t __bitwise__ dm_dblock_t; | ||
22 | 23 | ||
23 | static inline dm_oblock_t to_oblock(dm_block_t b) | 24 | static inline dm_oblock_t to_oblock(dm_block_t b) |
24 | { | 25 | { |
@@ -40,4 +41,14 @@ static inline uint32_t from_cblock(dm_cblock_t b) | |||
40 | return (__force uint32_t) b; | 41 | return (__force uint32_t) b; |
41 | } | 42 | } |
42 | 43 | ||
44 | static inline dm_dblock_t to_dblock(dm_block_t b) | ||
45 | { | ||
46 | return (__force dm_dblock_t) b; | ||
47 | } | ||
48 | |||
49 | static inline dm_block_t from_dblock(dm_dblock_t b) | ||
50 | { | ||
51 | return (__force dm_block_t) b; | ||
52 | } | ||
53 | |||
43 | #endif /* DM_CACHE_BLOCK_TYPES_H */ | 54 | #endif /* DM_CACHE_BLOCK_TYPES_H */ |
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 06709257adde..9fc616c2755e 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c | |||
@@ -109,7 +109,7 @@ struct dm_cache_metadata { | |||
109 | dm_block_t discard_root; | 109 | dm_block_t discard_root; |
110 | 110 | ||
111 | sector_t discard_block_size; | 111 | sector_t discard_block_size; |
112 | dm_oblock_t discard_nr_blocks; | 112 | dm_dblock_t discard_nr_blocks; |
113 | 113 | ||
114 | sector_t data_block_size; | 114 | sector_t data_block_size; |
115 | dm_cblock_t cache_blocks; | 115 | dm_cblock_t cache_blocks; |
@@ -329,7 +329,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) | |||
329 | disk_super->hint_root = cpu_to_le64(cmd->hint_root); | 329 | disk_super->hint_root = cpu_to_le64(cmd->hint_root); |
330 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); | 330 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); |
331 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); | 331 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); |
332 | disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); | 332 | disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); |
333 | disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE); | 333 | disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE); |
334 | disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); | 334 | disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); |
335 | disk_super->cache_blocks = cpu_to_le32(0); | 335 | disk_super->cache_blocks = cpu_to_le32(0); |
@@ -528,7 +528,7 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd, | |||
528 | cmd->hint_root = le64_to_cpu(disk_super->hint_root); | 528 | cmd->hint_root = le64_to_cpu(disk_super->hint_root); |
529 | cmd->discard_root = le64_to_cpu(disk_super->discard_root); | 529 | cmd->discard_root = le64_to_cpu(disk_super->discard_root); |
530 | cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size); | 530 | cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size); |
531 | cmd->discard_nr_blocks = to_oblock(le64_to_cpu(disk_super->discard_nr_blocks)); | 531 | cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks)); |
532 | cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); | 532 | cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); |
533 | cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); | 533 | cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); |
534 | strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); | 534 | strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); |
@@ -626,7 +626,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, | |||
626 | disk_super->hint_root = cpu_to_le64(cmd->hint_root); | 626 | disk_super->hint_root = cpu_to_le64(cmd->hint_root); |
627 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); | 627 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); |
628 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); | 628 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); |
629 | disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); | 629 | disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); |
630 | disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); | 630 | disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); |
631 | strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); | 631 | strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); |
632 | disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); | 632 | disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); |
@@ -797,15 +797,15 @@ out: | |||
797 | 797 | ||
798 | int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, | 798 | int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, |
799 | sector_t discard_block_size, | 799 | sector_t discard_block_size, |
800 | dm_oblock_t new_nr_entries) | 800 | dm_dblock_t new_nr_entries) |
801 | { | 801 | { |
802 | int r; | 802 | int r; |
803 | 803 | ||
804 | down_write(&cmd->root_lock); | 804 | down_write(&cmd->root_lock); |
805 | r = dm_bitset_resize(&cmd->discard_info, | 805 | r = dm_bitset_resize(&cmd->discard_info, |
806 | cmd->discard_root, | 806 | cmd->discard_root, |
807 | from_oblock(cmd->discard_nr_blocks), | 807 | from_dblock(cmd->discard_nr_blocks), |
808 | from_oblock(new_nr_entries), | 808 | from_dblock(new_nr_entries), |
809 | false, &cmd->discard_root); | 809 | false, &cmd->discard_root); |
810 | if (!r) { | 810 | if (!r) { |
811 | cmd->discard_block_size = discard_block_size; | 811 | cmd->discard_block_size = discard_block_size; |
@@ -818,28 +818,28 @@ int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, | |||
818 | return r; | 818 | return r; |
819 | } | 819 | } |
820 | 820 | ||
821 | static int __set_discard(struct dm_cache_metadata *cmd, dm_oblock_t b) | 821 | static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) |
822 | { | 822 | { |
823 | return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root, | 823 | return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root, |
824 | from_oblock(b), &cmd->discard_root); | 824 | from_dblock(b), &cmd->discard_root); |
825 | } | 825 | } |
826 | 826 | ||
827 | static int __clear_discard(struct dm_cache_metadata *cmd, dm_oblock_t b) | 827 | static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) |
828 | { | 828 | { |
829 | return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root, | 829 | return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root, |
830 | from_oblock(b), &cmd->discard_root); | 830 | from_dblock(b), &cmd->discard_root); |
831 | } | 831 | } |
832 | 832 | ||
833 | static int __is_discarded(struct dm_cache_metadata *cmd, dm_oblock_t b, | 833 | static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b, |
834 | bool *is_discarded) | 834 | bool *is_discarded) |
835 | { | 835 | { |
836 | return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root, | 836 | return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root, |
837 | from_oblock(b), &cmd->discard_root, | 837 | from_dblock(b), &cmd->discard_root, |
838 | is_discarded); | 838 | is_discarded); |
839 | } | 839 | } |
840 | 840 | ||
841 | static int __discard(struct dm_cache_metadata *cmd, | 841 | static int __discard(struct dm_cache_metadata *cmd, |
842 | dm_oblock_t dblock, bool discard) | 842 | dm_dblock_t dblock, bool discard) |
843 | { | 843 | { |
844 | int r; | 844 | int r; |
845 | 845 | ||
@@ -852,7 +852,7 @@ static int __discard(struct dm_cache_metadata *cmd, | |||
852 | } | 852 | } |
853 | 853 | ||
854 | int dm_cache_set_discard(struct dm_cache_metadata *cmd, | 854 | int dm_cache_set_discard(struct dm_cache_metadata *cmd, |
855 | dm_oblock_t dblock, bool discard) | 855 | dm_dblock_t dblock, bool discard) |
856 | { | 856 | { |
857 | int r; | 857 | int r; |
858 | 858 | ||
@@ -870,8 +870,8 @@ static int __load_discards(struct dm_cache_metadata *cmd, | |||
870 | dm_block_t b; | 870 | dm_block_t b; |
871 | bool discard; | 871 | bool discard; |
872 | 872 | ||
873 | for (b = 0; b < from_oblock(cmd->discard_nr_blocks); b++) { | 873 | for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { |
874 | dm_oblock_t dblock = to_oblock(b); | 874 | dm_dblock_t dblock = to_dblock(b); |
875 | 875 | ||
876 | if (cmd->clean_when_opened) { | 876 | if (cmd->clean_when_opened) { |
877 | r = __is_discarded(cmd, dblock, &discard); | 877 | r = __is_discarded(cmd, dblock, &discard); |
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index 7383c90ccdb8..4ecc403be283 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h | |||
@@ -70,14 +70,14 @@ dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd); | |||
70 | 70 | ||
71 | int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, | 71 | int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, |
72 | sector_t discard_block_size, | 72 | sector_t discard_block_size, |
73 | dm_oblock_t new_nr_entries); | 73 | dm_dblock_t new_nr_entries); |
74 | 74 | ||
75 | typedef int (*load_discard_fn)(void *context, sector_t discard_block_size, | 75 | typedef int (*load_discard_fn)(void *context, sector_t discard_block_size, |
76 | dm_oblock_t dblock, bool discarded); | 76 | dm_dblock_t dblock, bool discarded); |
77 | int dm_cache_load_discards(struct dm_cache_metadata *cmd, | 77 | int dm_cache_load_discards(struct dm_cache_metadata *cmd, |
78 | load_discard_fn fn, void *context); | 78 | load_discard_fn fn, void *context); |
79 | 79 | ||
80 | int dm_cache_set_discard(struct dm_cache_metadata *cmd, dm_oblock_t dblock, bool discard); | 80 | int dm_cache_set_discard(struct dm_cache_metadata *cmd, dm_dblock_t dblock, bool discard); |
81 | 81 | ||
82 | int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock); | 82 | int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock); |
83 | int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock, dm_oblock_t oblock); | 83 | int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock, dm_oblock_t oblock); |
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 0e385e40909e..13f547a4eeb6 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c | |||
@@ -181,24 +181,30 @@ static void queue_shift_down(struct queue *q) | |||
181 | * Gives us the oldest entry of the lowest popoulated level. If the first | 181 | * Gives us the oldest entry of the lowest popoulated level. If the first |
182 | * level is emptied then we shift down one level. | 182 | * level is emptied then we shift down one level. |
183 | */ | 183 | */ |
184 | static struct list_head *queue_pop(struct queue *q) | 184 | static struct list_head *queue_peek(struct queue *q) |
185 | { | 185 | { |
186 | unsigned level; | 186 | unsigned level; |
187 | struct list_head *r; | ||
188 | 187 | ||
189 | for (level = 0; level < NR_QUEUE_LEVELS; level++) | 188 | for (level = 0; level < NR_QUEUE_LEVELS; level++) |
190 | if (!list_empty(q->qs + level)) { | 189 | if (!list_empty(q->qs + level)) |
191 | r = q->qs[level].next; | 190 | return q->qs[level].next; |
192 | list_del(r); | ||
193 | 191 | ||
194 | /* have we just emptied the bottom level? */ | 192 | return NULL; |
195 | if (level == 0 && list_empty(q->qs)) | 193 | } |
196 | queue_shift_down(q); | ||
197 | 194 | ||
198 | return r; | 195 | static struct list_head *queue_pop(struct queue *q) |
199 | } | 196 | { |
197 | struct list_head *r = queue_peek(q); | ||
200 | 198 | ||
201 | return NULL; | 199 | if (r) { |
200 | list_del(r); | ||
201 | |||
202 | /* have we just emptied the bottom level? */ | ||
203 | if (list_empty(q->qs)) | ||
204 | queue_shift_down(q); | ||
205 | } | ||
206 | |||
207 | return r; | ||
202 | } | 208 | } |
203 | 209 | ||
204 | static struct list_head *list_pop(struct list_head *lh) | 210 | static struct list_head *list_pop(struct list_head *lh) |
@@ -383,13 +389,6 @@ struct mq_policy { | |||
383 | unsigned generation; | 389 | unsigned generation; |
384 | unsigned generation_period; /* in lookups (will probably change) */ | 390 | unsigned generation_period; /* in lookups (will probably change) */ |
385 | 391 | ||
386 | /* | ||
387 | * Entries in the pre_cache whose hit count passes the promotion | ||
388 | * threshold move to the cache proper. Working out the correct | ||
389 | * value for the promotion_threshold is crucial to this policy. | ||
390 | */ | ||
391 | unsigned promote_threshold; | ||
392 | |||
393 | unsigned discard_promote_adjustment; | 392 | unsigned discard_promote_adjustment; |
394 | unsigned read_promote_adjustment; | 393 | unsigned read_promote_adjustment; |
395 | unsigned write_promote_adjustment; | 394 | unsigned write_promote_adjustment; |
@@ -406,6 +405,7 @@ struct mq_policy { | |||
406 | #define DEFAULT_DISCARD_PROMOTE_ADJUSTMENT 1 | 405 | #define DEFAULT_DISCARD_PROMOTE_ADJUSTMENT 1 |
407 | #define DEFAULT_READ_PROMOTE_ADJUSTMENT 4 | 406 | #define DEFAULT_READ_PROMOTE_ADJUSTMENT 4 |
408 | #define DEFAULT_WRITE_PROMOTE_ADJUSTMENT 8 | 407 | #define DEFAULT_WRITE_PROMOTE_ADJUSTMENT 8 |
408 | #define DISCOURAGE_DEMOTING_DIRTY_THRESHOLD 128 | ||
409 | 409 | ||
410 | /*----------------------------------------------------------------*/ | 410 | /*----------------------------------------------------------------*/ |
411 | 411 | ||
@@ -518,6 +518,12 @@ static struct entry *pop(struct mq_policy *mq, struct queue *q) | |||
518 | return e; | 518 | return e; |
519 | } | 519 | } |
520 | 520 | ||
521 | static struct entry *peek(struct queue *q) | ||
522 | { | ||
523 | struct list_head *h = queue_peek(q); | ||
524 | return h ? container_of(h, struct entry, list) : NULL; | ||
525 | } | ||
526 | |||
521 | /* | 527 | /* |
522 | * Has this entry already been updated? | 528 | * Has this entry already been updated? |
523 | */ | 529 | */ |
@@ -570,10 +576,6 @@ static void check_generation(struct mq_policy *mq) | |||
570 | break; | 576 | break; |
571 | } | 577 | } |
572 | } | 578 | } |
573 | |||
574 | mq->promote_threshold = nr ? total / nr : 1; | ||
575 | if (mq->promote_threshold * nr < total) | ||
576 | mq->promote_threshold++; | ||
577 | } | 579 | } |
578 | } | 580 | } |
579 | 581 | ||
@@ -641,6 +643,30 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) | |||
641 | } | 643 | } |
642 | 644 | ||
643 | /* | 645 | /* |
646 | * Entries in the pre_cache whose hit count passes the promotion | ||
647 | * threshold move to the cache proper. Working out the correct | ||
648 | * value for the promotion_threshold is crucial to this policy. | ||
649 | */ | ||
650 | static unsigned promote_threshold(struct mq_policy *mq) | ||
651 | { | ||
652 | struct entry *e; | ||
653 | |||
654 | if (any_free_cblocks(mq)) | ||
655 | return 0; | ||
656 | |||
657 | e = peek(&mq->cache_clean); | ||
658 | if (e) | ||
659 | return e->hit_count; | ||
660 | |||
661 | e = peek(&mq->cache_dirty); | ||
662 | if (e) | ||
663 | return e->hit_count + DISCOURAGE_DEMOTING_DIRTY_THRESHOLD; | ||
664 | |||
665 | /* This should never happen */ | ||
666 | return 0; | ||
667 | } | ||
668 | |||
669 | /* | ||
644 | * We modify the basic promotion_threshold depending on the specific io. | 670 | * We modify the basic promotion_threshold depending on the specific io. |
645 | * | 671 | * |
646 | * If the origin block has been discarded then there's no cost to copy it | 672 | * If the origin block has been discarded then there's no cost to copy it |
@@ -653,7 +679,7 @@ static unsigned adjusted_promote_threshold(struct mq_policy *mq, | |||
653 | bool discarded_oblock, int data_dir) | 679 | bool discarded_oblock, int data_dir) |
654 | { | 680 | { |
655 | if (data_dir == READ) | 681 | if (data_dir == READ) |
656 | return mq->promote_threshold + mq->read_promote_adjustment; | 682 | return promote_threshold(mq) + mq->read_promote_adjustment; |
657 | 683 | ||
658 | if (discarded_oblock && (any_free_cblocks(mq) || any_clean_cblocks(mq))) { | 684 | if (discarded_oblock && (any_free_cblocks(mq) || any_clean_cblocks(mq))) { |
659 | /* | 685 | /* |
@@ -663,7 +689,7 @@ static unsigned adjusted_promote_threshold(struct mq_policy *mq, | |||
663 | return mq->discard_promote_adjustment; | 689 | return mq->discard_promote_adjustment; |
664 | } | 690 | } |
665 | 691 | ||
666 | return mq->promote_threshold + mq->write_promote_adjustment; | 692 | return promote_threshold(mq) + mq->write_promote_adjustment; |
667 | } | 693 | } |
668 | 694 | ||
669 | static bool should_promote(struct mq_policy *mq, struct entry *e, | 695 | static bool should_promote(struct mq_policy *mq, struct entry *e, |
@@ -839,7 +865,8 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock, | |||
839 | if (e && in_cache(mq, e)) | 865 | if (e && in_cache(mq, e)) |
840 | r = cache_entry_found(mq, e, result); | 866 | r = cache_entry_found(mq, e, result); |
841 | 867 | ||
842 | else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL) | 868 | else if (mq->tracker.thresholds[PATTERN_SEQUENTIAL] && |
869 | iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL) | ||
843 | result->op = POLICY_MISS; | 870 | result->op = POLICY_MISS; |
844 | 871 | ||
845 | else if (e) | 872 | else if (e) |
@@ -1230,7 +1257,6 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, | |||
1230 | mq->tick = 0; | 1257 | mq->tick = 0; |
1231 | mq->hit_count = 0; | 1258 | mq->hit_count = 0; |
1232 | mq->generation = 0; | 1259 | mq->generation = 0; |
1233 | mq->promote_threshold = 0; | ||
1234 | mq->discard_promote_adjustment = DEFAULT_DISCARD_PROMOTE_ADJUSTMENT; | 1260 | mq->discard_promote_adjustment = DEFAULT_DISCARD_PROMOTE_ADJUSTMENT; |
1235 | mq->read_promote_adjustment = DEFAULT_READ_PROMOTE_ADJUSTMENT; | 1261 | mq->read_promote_adjustment = DEFAULT_READ_PROMOTE_ADJUSTMENT; |
1236 | mq->write_promote_adjustment = DEFAULT_WRITE_PROMOTE_ADJUSTMENT; | 1262 | mq->write_promote_adjustment = DEFAULT_WRITE_PROMOTE_ADJUSTMENT; |
@@ -1265,7 +1291,7 @@ bad_pre_cache_init: | |||
1265 | 1291 | ||
1266 | static struct dm_cache_policy_type mq_policy_type = { | 1292 | static struct dm_cache_policy_type mq_policy_type = { |
1267 | .name = "mq", | 1293 | .name = "mq", |
1268 | .version = {1, 2, 0}, | 1294 | .version = {1, 3, 0}, |
1269 | .hint_size = 4, | 1295 | .hint_size = 4, |
1270 | .owner = THIS_MODULE, | 1296 | .owner = THIS_MODULE, |
1271 | .create = mq_create | 1297 | .create = mq_create |
@@ -1273,7 +1299,7 @@ static struct dm_cache_policy_type mq_policy_type = { | |||
1273 | 1299 | ||
1274 | static struct dm_cache_policy_type default_policy_type = { | 1300 | static struct dm_cache_policy_type default_policy_type = { |
1275 | .name = "default", | 1301 | .name = "default", |
1276 | .version = {1, 2, 0}, | 1302 | .version = {1, 3, 0}, |
1277 | .hint_size = 4, | 1303 | .hint_size = 4, |
1278 | .owner = THIS_MODULE, | 1304 | .owner = THIS_MODULE, |
1279 | .create = mq_create, | 1305 | .create = mq_create, |
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 7130505c2425..1e96d7889f51 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
@@ -95,7 +95,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) | |||
95 | 95 | ||
96 | /*----------------------------------------------------------------*/ | 96 | /*----------------------------------------------------------------*/ |
97 | 97 | ||
98 | #define PRISON_CELLS 1024 | ||
99 | #define MIGRATION_POOL_SIZE 128 | 98 | #define MIGRATION_POOL_SIZE 128 |
100 | #define COMMIT_PERIOD HZ | 99 | #define COMMIT_PERIOD HZ |
101 | #define MIGRATION_COUNT_WINDOW 10 | 100 | #define MIGRATION_COUNT_WINDOW 10 |
@@ -237,8 +236,9 @@ struct cache { | |||
237 | /* | 236 | /* |
238 | * origin_blocks entries, discarded if set. | 237 | * origin_blocks entries, discarded if set. |
239 | */ | 238 | */ |
240 | dm_oblock_t discard_nr_blocks; | 239 | dm_dblock_t discard_nr_blocks; |
241 | unsigned long *discard_bitset; | 240 | unsigned long *discard_bitset; |
241 | uint32_t discard_block_size; /* a power of 2 times sectors per block */ | ||
242 | 242 | ||
243 | /* | 243 | /* |
244 | * Rather than reconstructing the table line for the status we just | 244 | * Rather than reconstructing the table line for the status we just |
@@ -310,6 +310,7 @@ struct dm_cache_migration { | |||
310 | dm_cblock_t cblock; | 310 | dm_cblock_t cblock; |
311 | 311 | ||
312 | bool err:1; | 312 | bool err:1; |
313 | bool discard:1; | ||
313 | bool writeback:1; | 314 | bool writeback:1; |
314 | bool demote:1; | 315 | bool demote:1; |
315 | bool promote:1; | 316 | bool promote:1; |
@@ -433,11 +434,12 @@ static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cel | |||
433 | 434 | ||
434 | /*----------------------------------------------------------------*/ | 435 | /*----------------------------------------------------------------*/ |
435 | 436 | ||
436 | static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) | 437 | static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key) |
437 | { | 438 | { |
438 | key->virtual = 0; | 439 | key->virtual = 0; |
439 | key->dev = 0; | 440 | key->dev = 0; |
440 | key->block = from_oblock(oblock); | 441 | key->block_begin = from_oblock(begin); |
442 | key->block_end = from_oblock(end); | ||
441 | } | 443 | } |
442 | 444 | ||
443 | /* | 445 | /* |
@@ -447,15 +449,15 @@ static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) | |||
447 | */ | 449 | */ |
448 | typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); | 450 | typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); |
449 | 451 | ||
450 | static int bio_detain(struct cache *cache, dm_oblock_t oblock, | 452 | static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end, |
451 | struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, | 453 | struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, |
452 | cell_free_fn free_fn, void *free_context, | 454 | cell_free_fn free_fn, void *free_context, |
453 | struct dm_bio_prison_cell **cell_result) | 455 | struct dm_bio_prison_cell **cell_result) |
454 | { | 456 | { |
455 | int r; | 457 | int r; |
456 | struct dm_cell_key key; | 458 | struct dm_cell_key key; |
457 | 459 | ||
458 | build_key(oblock, &key); | 460 | build_key(oblock_begin, oblock_end, &key); |
459 | r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); | 461 | r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); |
460 | if (r) | 462 | if (r) |
461 | free_fn(free_context, cell_prealloc); | 463 | free_fn(free_context, cell_prealloc); |
@@ -463,6 +465,16 @@ static int bio_detain(struct cache *cache, dm_oblock_t oblock, | |||
463 | return r; | 465 | return r; |
464 | } | 466 | } |
465 | 467 | ||
468 | static int bio_detain(struct cache *cache, dm_oblock_t oblock, | ||
469 | struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, | ||
470 | cell_free_fn free_fn, void *free_context, | ||
471 | struct dm_bio_prison_cell **cell_result) | ||
472 | { | ||
473 | dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); | ||
474 | return bio_detain_range(cache, oblock, end, bio, | ||
475 | cell_prealloc, free_fn, free_context, cell_result); | ||
476 | } | ||
477 | |||
466 | static int get_cell(struct cache *cache, | 478 | static int get_cell(struct cache *cache, |
467 | dm_oblock_t oblock, | 479 | dm_oblock_t oblock, |
468 | struct prealloc *structs, | 480 | struct prealloc *structs, |
@@ -474,7 +486,7 @@ static int get_cell(struct cache *cache, | |||
474 | 486 | ||
475 | cell_prealloc = prealloc_get_cell(structs); | 487 | cell_prealloc = prealloc_get_cell(structs); |
476 | 488 | ||
477 | build_key(oblock, &key); | 489 | build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key); |
478 | r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); | 490 | r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); |
479 | if (r) | 491 | if (r) |
480 | prealloc_put_cell(structs, cell_prealloc); | 492 | prealloc_put_cell(structs, cell_prealloc); |
@@ -524,33 +536,57 @@ static dm_block_t block_div(dm_block_t b, uint32_t n) | |||
524 | return b; | 536 | return b; |
525 | } | 537 | } |
526 | 538 | ||
527 | static void set_discard(struct cache *cache, dm_oblock_t b) | 539 | static dm_block_t oblocks_per_dblock(struct cache *cache) |
540 | { | ||
541 | dm_block_t oblocks = cache->discard_block_size; | ||
542 | |||
543 | if (block_size_is_power_of_two(cache)) | ||
544 | oblocks >>= cache->sectors_per_block_shift; | ||
545 | else | ||
546 | oblocks = block_div(oblocks, cache->sectors_per_block); | ||
547 | |||
548 | return oblocks; | ||
549 | } | ||
550 | |||
551 | static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) | ||
552 | { | ||
553 | return to_dblock(block_div(from_oblock(oblock), | ||
554 | oblocks_per_dblock(cache))); | ||
555 | } | ||
556 | |||
557 | static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock) | ||
558 | { | ||
559 | return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache)); | ||
560 | } | ||
561 | |||
562 | static void set_discard(struct cache *cache, dm_dblock_t b) | ||
528 | { | 563 | { |
529 | unsigned long flags; | 564 | unsigned long flags; |
530 | 565 | ||
566 | BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks)); | ||
531 | atomic_inc(&cache->stats.discard_count); | 567 | atomic_inc(&cache->stats.discard_count); |
532 | 568 | ||
533 | spin_lock_irqsave(&cache->lock, flags); | 569 | spin_lock_irqsave(&cache->lock, flags); |
534 | set_bit(from_oblock(b), cache->discard_bitset); | 570 | set_bit(from_dblock(b), cache->discard_bitset); |
535 | spin_unlock_irqrestore(&cache->lock, flags); | 571 | spin_unlock_irqrestore(&cache->lock, flags); |
536 | } | 572 | } |
537 | 573 | ||
538 | static void clear_discard(struct cache *cache, dm_oblock_t b) | 574 | static void clear_discard(struct cache *cache, dm_dblock_t b) |
539 | { | 575 | { |
540 | unsigned long flags; | 576 | unsigned long flags; |
541 | 577 | ||
542 | spin_lock_irqsave(&cache->lock, flags); | 578 | spin_lock_irqsave(&cache->lock, flags); |
543 | clear_bit(from_oblock(b), cache->discard_bitset); | 579 | clear_bit(from_dblock(b), cache->discard_bitset); |
544 | spin_unlock_irqrestore(&cache->lock, flags); | 580 | spin_unlock_irqrestore(&cache->lock, flags); |
545 | } | 581 | } |
546 | 582 | ||
547 | static bool is_discarded(struct cache *cache, dm_oblock_t b) | 583 | static bool is_discarded(struct cache *cache, dm_dblock_t b) |
548 | { | 584 | { |
549 | int r; | 585 | int r; |
550 | unsigned long flags; | 586 | unsigned long flags; |
551 | 587 | ||
552 | spin_lock_irqsave(&cache->lock, flags); | 588 | spin_lock_irqsave(&cache->lock, flags); |
553 | r = test_bit(from_oblock(b), cache->discard_bitset); | 589 | r = test_bit(from_dblock(b), cache->discard_bitset); |
554 | spin_unlock_irqrestore(&cache->lock, flags); | 590 | spin_unlock_irqrestore(&cache->lock, flags); |
555 | 591 | ||
556 | return r; | 592 | return r; |
@@ -562,7 +598,8 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) | |||
562 | unsigned long flags; | 598 | unsigned long flags; |
563 | 599 | ||
564 | spin_lock_irqsave(&cache->lock, flags); | 600 | spin_lock_irqsave(&cache->lock, flags); |
565 | r = test_bit(from_oblock(b), cache->discard_bitset); | 601 | r = test_bit(from_dblock(oblock_to_dblock(cache, b)), |
602 | cache->discard_bitset); | ||
566 | spin_unlock_irqrestore(&cache->lock, flags); | 603 | spin_unlock_irqrestore(&cache->lock, flags); |
567 | 604 | ||
568 | return r; | 605 | return r; |
@@ -687,7 +724,7 @@ static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, | |||
687 | check_if_tick_bio_needed(cache, bio); | 724 | check_if_tick_bio_needed(cache, bio); |
688 | remap_to_origin(cache, bio); | 725 | remap_to_origin(cache, bio); |
689 | if (bio_data_dir(bio) == WRITE) | 726 | if (bio_data_dir(bio) == WRITE) |
690 | clear_discard(cache, oblock); | 727 | clear_discard(cache, oblock_to_dblock(cache, oblock)); |
691 | } | 728 | } |
692 | 729 | ||
693 | static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, | 730 | static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, |
@@ -697,7 +734,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, | |||
697 | remap_to_cache(cache, bio, cblock); | 734 | remap_to_cache(cache, bio, cblock); |
698 | if (bio_data_dir(bio) == WRITE) { | 735 | if (bio_data_dir(bio) == WRITE) { |
699 | set_dirty(cache, oblock, cblock); | 736 | set_dirty(cache, oblock, cblock); |
700 | clear_discard(cache, oblock); | 737 | clear_discard(cache, oblock_to_dblock(cache, oblock)); |
701 | } | 738 | } |
702 | } | 739 | } |
703 | 740 | ||
@@ -951,10 +988,14 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) | |||
951 | } | 988 | } |
952 | 989 | ||
953 | } else { | 990 | } else { |
954 | clear_dirty(cache, mg->new_oblock, mg->cblock); | 991 | if (mg->requeue_holder) { |
955 | if (mg->requeue_holder) | 992 | clear_dirty(cache, mg->new_oblock, mg->cblock); |
956 | cell_defer(cache, mg->new_ocell, true); | 993 | cell_defer(cache, mg->new_ocell, true); |
957 | else { | 994 | } else { |
995 | /* | ||
996 | * The block was promoted via an overwrite, so it's dirty. | ||
997 | */ | ||
998 | set_dirty(cache, mg->new_oblock, mg->cblock); | ||
958 | bio_endio(mg->new_ocell->holder, 0); | 999 | bio_endio(mg->new_ocell->holder, 0); |
959 | cell_defer(cache, mg->new_ocell, false); | 1000 | cell_defer(cache, mg->new_ocell, false); |
960 | } | 1001 | } |
@@ -978,7 +1019,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) | |||
978 | wake_worker(cache); | 1019 | wake_worker(cache); |
979 | } | 1020 | } |
980 | 1021 | ||
981 | static void issue_copy_real(struct dm_cache_migration *mg) | 1022 | static void issue_copy(struct dm_cache_migration *mg) |
982 | { | 1023 | { |
983 | int r; | 1024 | int r; |
984 | struct dm_io_region o_region, c_region; | 1025 | struct dm_io_region o_region, c_region; |
@@ -1057,11 +1098,46 @@ static void avoid_copy(struct dm_cache_migration *mg) | |||
1057 | migration_success_pre_commit(mg); | 1098 | migration_success_pre_commit(mg); |
1058 | } | 1099 | } |
1059 | 1100 | ||
1060 | static void issue_copy(struct dm_cache_migration *mg) | 1101 | static void calc_discard_block_range(struct cache *cache, struct bio *bio, |
1102 | dm_dblock_t *b, dm_dblock_t *e) | ||
1103 | { | ||
1104 | sector_t sb = bio->bi_iter.bi_sector; | ||
1105 | sector_t se = bio_end_sector(bio); | ||
1106 | |||
1107 | *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size)); | ||
1108 | |||
1109 | if (se - sb < cache->discard_block_size) | ||
1110 | *e = *b; | ||
1111 | else | ||
1112 | *e = to_dblock(block_div(se, cache->discard_block_size)); | ||
1113 | } | ||
1114 | |||
1115 | static void issue_discard(struct dm_cache_migration *mg) | ||
1116 | { | ||
1117 | dm_dblock_t b, e; | ||
1118 | struct bio *bio = mg->new_ocell->holder; | ||
1119 | |||
1120 | calc_discard_block_range(mg->cache, bio, &b, &e); | ||
1121 | while (b != e) { | ||
1122 | set_discard(mg->cache, b); | ||
1123 | b = to_dblock(from_dblock(b) + 1); | ||
1124 | } | ||
1125 | |||
1126 | bio_endio(bio, 0); | ||
1127 | cell_defer(mg->cache, mg->new_ocell, false); | ||
1128 | free_migration(mg); | ||
1129 | } | ||
1130 | |||
1131 | static void issue_copy_or_discard(struct dm_cache_migration *mg) | ||
1061 | { | 1132 | { |
1062 | bool avoid; | 1133 | bool avoid; |
1063 | struct cache *cache = mg->cache; | 1134 | struct cache *cache = mg->cache; |
1064 | 1135 | ||
1136 | if (mg->discard) { | ||
1137 | issue_discard(mg); | ||
1138 | return; | ||
1139 | } | ||
1140 | |||
1065 | if (mg->writeback || mg->demote) | 1141 | if (mg->writeback || mg->demote) |
1066 | avoid = !is_dirty(cache, mg->cblock) || | 1142 | avoid = !is_dirty(cache, mg->cblock) || |
1067 | is_discarded_oblock(cache, mg->old_oblock); | 1143 | is_discarded_oblock(cache, mg->old_oblock); |
@@ -1070,13 +1146,14 @@ static void issue_copy(struct dm_cache_migration *mg) | |||
1070 | 1146 | ||
1071 | avoid = is_discarded_oblock(cache, mg->new_oblock); | 1147 | avoid = is_discarded_oblock(cache, mg->new_oblock); |
1072 | 1148 | ||
1073 | if (!avoid && bio_writes_complete_block(cache, bio)) { | 1149 | if (writeback_mode(&cache->features) && |
1150 | !avoid && bio_writes_complete_block(cache, bio)) { | ||
1074 | issue_overwrite(mg, bio); | 1151 | issue_overwrite(mg, bio); |
1075 | return; | 1152 | return; |
1076 | } | 1153 | } |
1077 | } | 1154 | } |
1078 | 1155 | ||
1079 | avoid ? avoid_copy(mg) : issue_copy_real(mg); | 1156 | avoid ? avoid_copy(mg) : issue_copy(mg); |
1080 | } | 1157 | } |
1081 | 1158 | ||
1082 | static void complete_migration(struct dm_cache_migration *mg) | 1159 | static void complete_migration(struct dm_cache_migration *mg) |
@@ -1161,6 +1238,7 @@ static void promote(struct cache *cache, struct prealloc *structs, | |||
1161 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | 1238 | struct dm_cache_migration *mg = prealloc_get_migration(structs); |
1162 | 1239 | ||
1163 | mg->err = false; | 1240 | mg->err = false; |
1241 | mg->discard = false; | ||
1164 | mg->writeback = false; | 1242 | mg->writeback = false; |
1165 | mg->demote = false; | 1243 | mg->demote = false; |
1166 | mg->promote = true; | 1244 | mg->promote = true; |
@@ -1184,6 +1262,7 @@ static void writeback(struct cache *cache, struct prealloc *structs, | |||
1184 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | 1262 | struct dm_cache_migration *mg = prealloc_get_migration(structs); |
1185 | 1263 | ||
1186 | mg->err = false; | 1264 | mg->err = false; |
1265 | mg->discard = false; | ||
1187 | mg->writeback = true; | 1266 | mg->writeback = true; |
1188 | mg->demote = false; | 1267 | mg->demote = false; |
1189 | mg->promote = false; | 1268 | mg->promote = false; |
@@ -1209,6 +1288,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs, | |||
1209 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | 1288 | struct dm_cache_migration *mg = prealloc_get_migration(structs); |
1210 | 1289 | ||
1211 | mg->err = false; | 1290 | mg->err = false; |
1291 | mg->discard = false; | ||
1212 | mg->writeback = false; | 1292 | mg->writeback = false; |
1213 | mg->demote = true; | 1293 | mg->demote = true; |
1214 | mg->promote = true; | 1294 | mg->promote = true; |
@@ -1237,6 +1317,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs, | |||
1237 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | 1317 | struct dm_cache_migration *mg = prealloc_get_migration(structs); |
1238 | 1318 | ||
1239 | mg->err = false; | 1319 | mg->err = false; |
1320 | mg->discard = false; | ||
1240 | mg->writeback = false; | 1321 | mg->writeback = false; |
1241 | mg->demote = true; | 1322 | mg->demote = true; |
1242 | mg->promote = false; | 1323 | mg->promote = false; |
@@ -1253,6 +1334,26 @@ static void invalidate(struct cache *cache, struct prealloc *structs, | |||
1253 | quiesce_migration(mg); | 1334 | quiesce_migration(mg); |
1254 | } | 1335 | } |
1255 | 1336 | ||
1337 | static void discard(struct cache *cache, struct prealloc *structs, | ||
1338 | struct dm_bio_prison_cell *cell) | ||
1339 | { | ||
1340 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | ||
1341 | |||
1342 | mg->err = false; | ||
1343 | mg->discard = true; | ||
1344 | mg->writeback = false; | ||
1345 | mg->demote = false; | ||
1346 | mg->promote = false; | ||
1347 | mg->requeue_holder = false; | ||
1348 | mg->invalidate = false; | ||
1349 | mg->cache = cache; | ||
1350 | mg->old_ocell = NULL; | ||
1351 | mg->new_ocell = cell; | ||
1352 | mg->start_jiffies = jiffies; | ||
1353 | |||
1354 | quiesce_migration(mg); | ||
1355 | } | ||
1356 | |||
1256 | /*---------------------------------------------------------------- | 1357 | /*---------------------------------------------------------------- |
1257 | * bio processing | 1358 | * bio processing |
1258 | *--------------------------------------------------------------*/ | 1359 | *--------------------------------------------------------------*/ |
@@ -1286,31 +1387,27 @@ static void process_flush_bio(struct cache *cache, struct bio *bio) | |||
1286 | issue(cache, bio); | 1387 | issue(cache, bio); |
1287 | } | 1388 | } |
1288 | 1389 | ||
1289 | /* | 1390 | static void process_discard_bio(struct cache *cache, struct prealloc *structs, |
1290 | * People generally discard large parts of a device, eg, the whole device | 1391 | struct bio *bio) |
1291 | * when formatting. Splitting these large discards up into cache block | ||
1292 | * sized ios and then quiescing (always neccessary for discard) takes too | ||
1293 | * long. | ||
1294 | * | ||
1295 | * We keep it simple, and allow any size of discard to come in, and just | ||
1296 | * mark off blocks on the discard bitset. No passdown occurs! | ||
1297 | * | ||
1298 | * To implement passdown we need to change the bio_prison such that a cell | ||
1299 | * can have a key that spans many blocks. | ||
1300 | */ | ||
1301 | static void process_discard_bio(struct cache *cache, struct bio *bio) | ||
1302 | { | 1392 | { |
1303 | dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector, | 1393 | int r; |
1304 | cache->sectors_per_block); | 1394 | dm_dblock_t b, e; |
1305 | dm_block_t end_block = bio_end_sector(bio); | 1395 | struct dm_bio_prison_cell *cell_prealloc, *new_ocell; |
1306 | dm_block_t b; | ||
1307 | 1396 | ||
1308 | end_block = block_div(end_block, cache->sectors_per_block); | 1397 | calc_discard_block_range(cache, bio, &b, &e); |
1398 | if (b == e) { | ||
1399 | bio_endio(bio, 0); | ||
1400 | return; | ||
1401 | } | ||
1309 | 1402 | ||
1310 | for (b = start_block; b < end_block; b++) | 1403 | cell_prealloc = prealloc_get_cell(structs); |
1311 | set_discard(cache, to_oblock(b)); | 1404 | r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc, |
1405 | (cell_free_fn) prealloc_put_cell, | ||
1406 | structs, &new_ocell); | ||
1407 | if (r > 0) | ||
1408 | return; | ||
1312 | 1409 | ||
1313 | bio_endio(bio, 0); | 1410 | discard(cache, structs, new_ocell); |
1314 | } | 1411 | } |
1315 | 1412 | ||
1316 | static bool spare_migration_bandwidth(struct cache *cache) | 1413 | static bool spare_migration_bandwidth(struct cache *cache) |
@@ -1340,9 +1437,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, | |||
1340 | dm_oblock_t block = get_bio_block(cache, bio); | 1437 | dm_oblock_t block = get_bio_block(cache, bio); |
1341 | struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; | 1438 | struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; |
1342 | struct policy_result lookup_result; | 1439 | struct policy_result lookup_result; |
1343 | bool discarded_block = is_discarded_oblock(cache, block); | ||
1344 | bool passthrough = passthrough_mode(&cache->features); | 1440 | bool passthrough = passthrough_mode(&cache->features); |
1345 | bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); | 1441 | bool discarded_block, can_migrate; |
1346 | 1442 | ||
1347 | /* | 1443 | /* |
1348 | * Check to see if that block is currently migrating. | 1444 | * Check to see if that block is currently migrating. |
@@ -1354,6 +1450,9 @@ static void process_bio(struct cache *cache, struct prealloc *structs, | |||
1354 | if (r > 0) | 1450 | if (r > 0) |
1355 | return; | 1451 | return; |
1356 | 1452 | ||
1453 | discarded_block = is_discarded_oblock(cache, block); | ||
1454 | can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); | ||
1455 | |||
1357 | r = policy_map(cache->policy, block, true, can_migrate, discarded_block, | 1456 | r = policy_map(cache->policy, block, true, can_migrate, discarded_block, |
1358 | bio, &lookup_result); | 1457 | bio, &lookup_result); |
1359 | 1458 | ||
@@ -1500,7 +1599,7 @@ static void process_deferred_bios(struct cache *cache) | |||
1500 | if (bio->bi_rw & REQ_FLUSH) | 1599 | if (bio->bi_rw & REQ_FLUSH) |
1501 | process_flush_bio(cache, bio); | 1600 | process_flush_bio(cache, bio); |
1502 | else if (bio->bi_rw & REQ_DISCARD) | 1601 | else if (bio->bi_rw & REQ_DISCARD) |
1503 | process_discard_bio(cache, bio); | 1602 | process_discard_bio(cache, &structs, bio); |
1504 | else | 1603 | else |
1505 | process_bio(cache, &structs, bio); | 1604 | process_bio(cache, &structs, bio); |
1506 | } | 1605 | } |
@@ -1715,7 +1814,7 @@ static void do_worker(struct work_struct *ws) | |||
1715 | process_invalidation_requests(cache); | 1814 | process_invalidation_requests(cache); |
1716 | } | 1815 | } |
1717 | 1816 | ||
1718 | process_migrations(cache, &cache->quiesced_migrations, issue_copy); | 1817 | process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard); |
1719 | process_migrations(cache, &cache->completed_migrations, complete_migration); | 1818 | process_migrations(cache, &cache->completed_migrations, complete_migration); |
1720 | 1819 | ||
1721 | if (commit_if_needed(cache)) { | 1820 | if (commit_if_needed(cache)) { |
@@ -2180,6 +2279,45 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca, | |||
2180 | return 0; | 2279 | return 0; |
2181 | } | 2280 | } |
2182 | 2281 | ||
2282 | /* | ||
2283 | * We want the discard block size to be at least the size of the cache | ||
2284 | * block size and have no more than 2^14 discard blocks across the origin. | ||
2285 | */ | ||
2286 | #define MAX_DISCARD_BLOCKS (1 << 14) | ||
2287 | |||
2288 | static bool too_many_discard_blocks(sector_t discard_block_size, | ||
2289 | sector_t origin_size) | ||
2290 | { | ||
2291 | (void) sector_div(origin_size, discard_block_size); | ||
2292 | |||
2293 | return origin_size > MAX_DISCARD_BLOCKS; | ||
2294 | } | ||
2295 | |||
2296 | static sector_t calculate_discard_block_size(sector_t cache_block_size, | ||
2297 | sector_t origin_size) | ||
2298 | { | ||
2299 | sector_t discard_block_size = cache_block_size; | ||
2300 | |||
2301 | if (origin_size) | ||
2302 | while (too_many_discard_blocks(discard_block_size, origin_size)) | ||
2303 | discard_block_size *= 2; | ||
2304 | |||
2305 | return discard_block_size; | ||
2306 | } | ||
2307 | |||
2308 | static void set_cache_size(struct cache *cache, dm_cblock_t size) | ||
2309 | { | ||
2310 | dm_block_t nr_blocks = from_cblock(size); | ||
2311 | |||
2312 | if (nr_blocks > (1 << 20) && cache->cache_size != size) | ||
2313 | DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n" | ||
2314 | "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n" | ||
2315 | "Please consider increasing the cache block size to reduce the overall cache block count.", | ||
2316 | (unsigned long long) nr_blocks); | ||
2317 | |||
2318 | cache->cache_size = size; | ||
2319 | } | ||
2320 | |||
2183 | #define DEFAULT_MIGRATION_THRESHOLD 2048 | 2321 | #define DEFAULT_MIGRATION_THRESHOLD 2048 |
2184 | 2322 | ||
2185 | static int cache_create(struct cache_args *ca, struct cache **result) | 2323 | static int cache_create(struct cache_args *ca, struct cache **result) |
@@ -2204,8 +2342,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2204 | ti->num_discard_bios = 1; | 2342 | ti->num_discard_bios = 1; |
2205 | ti->discards_supported = true; | 2343 | ti->discards_supported = true; |
2206 | ti->discard_zeroes_data_unsupported = true; | 2344 | ti->discard_zeroes_data_unsupported = true; |
2207 | /* Discard bios must be split on a block boundary */ | 2345 | ti->split_discard_bios = false; |
2208 | ti->split_discard_bios = true; | ||
2209 | 2346 | ||
2210 | cache->features = ca->features; | 2347 | cache->features = ca->features; |
2211 | ti->per_bio_data_size = get_per_bio_data_size(cache); | 2348 | ti->per_bio_data_size = get_per_bio_data_size(cache); |
@@ -2235,10 +2372,10 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2235 | 2372 | ||
2236 | cache->sectors_per_block_shift = -1; | 2373 | cache->sectors_per_block_shift = -1; |
2237 | cache_size = block_div(cache_size, ca->block_size); | 2374 | cache_size = block_div(cache_size, ca->block_size); |
2238 | cache->cache_size = to_cblock(cache_size); | 2375 | set_cache_size(cache, to_cblock(cache_size)); |
2239 | } else { | 2376 | } else { |
2240 | cache->sectors_per_block_shift = __ffs(ca->block_size); | 2377 | cache->sectors_per_block_shift = __ffs(ca->block_size); |
2241 | cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift); | 2378 | set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift)); |
2242 | } | 2379 | } |
2243 | 2380 | ||
2244 | r = create_cache_policy(cache, ca, error); | 2381 | r = create_cache_policy(cache, ca, error); |
@@ -2303,13 +2440,17 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2303 | } | 2440 | } |
2304 | clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); | 2441 | clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); |
2305 | 2442 | ||
2306 | cache->discard_nr_blocks = cache->origin_blocks; | 2443 | cache->discard_block_size = |
2307 | cache->discard_bitset = alloc_bitset(from_oblock(cache->discard_nr_blocks)); | 2444 | calculate_discard_block_size(cache->sectors_per_block, |
2445 | cache->origin_sectors); | ||
2446 | cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors, | ||
2447 | cache->discard_block_size)); | ||
2448 | cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); | ||
2308 | if (!cache->discard_bitset) { | 2449 | if (!cache->discard_bitset) { |
2309 | *error = "could not allocate discard bitset"; | 2450 | *error = "could not allocate discard bitset"; |
2310 | goto bad; | 2451 | goto bad; |
2311 | } | 2452 | } |
2312 | clear_bitset(cache->discard_bitset, from_oblock(cache->discard_nr_blocks)); | 2453 | clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); |
2313 | 2454 | ||
2314 | cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); | 2455 | cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); |
2315 | if (IS_ERR(cache->copier)) { | 2456 | if (IS_ERR(cache->copier)) { |
@@ -2327,7 +2468,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2327 | INIT_DELAYED_WORK(&cache->waker, do_waker); | 2468 | INIT_DELAYED_WORK(&cache->waker, do_waker); |
2328 | cache->last_commit_jiffies = jiffies; | 2469 | cache->last_commit_jiffies = jiffies; |
2329 | 2470 | ||
2330 | cache->prison = dm_bio_prison_create(PRISON_CELLS); | 2471 | cache->prison = dm_bio_prison_create(); |
2331 | if (!cache->prison) { | 2472 | if (!cache->prison) { |
2332 | *error = "could not create bio prison"; | 2473 | *error = "could not create bio prison"; |
2333 | goto bad; | 2474 | goto bad; |
@@ -2549,11 +2690,11 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso | |||
2549 | static int cache_map(struct dm_target *ti, struct bio *bio) | 2690 | static int cache_map(struct dm_target *ti, struct bio *bio) |
2550 | { | 2691 | { |
2551 | int r; | 2692 | int r; |
2552 | struct dm_bio_prison_cell *cell; | 2693 | struct dm_bio_prison_cell *cell = NULL; |
2553 | struct cache *cache = ti->private; | 2694 | struct cache *cache = ti->private; |
2554 | 2695 | ||
2555 | r = __cache_map(cache, bio, &cell); | 2696 | r = __cache_map(cache, bio, &cell); |
2556 | if (r == DM_MAPIO_REMAPPED) { | 2697 | if (r == DM_MAPIO_REMAPPED && cell) { |
2557 | inc_ds(cache, bio, cell); | 2698 | inc_ds(cache, bio, cell); |
2558 | cell_defer(cache, cell, false); | 2699 | cell_defer(cache, cell, false); |
2559 | } | 2700 | } |
@@ -2599,16 +2740,16 @@ static int write_discard_bitset(struct cache *cache) | |||
2599 | { | 2740 | { |
2600 | unsigned i, r; | 2741 | unsigned i, r; |
2601 | 2742 | ||
2602 | r = dm_cache_discard_bitset_resize(cache->cmd, cache->sectors_per_block, | 2743 | r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, |
2603 | cache->origin_blocks); | 2744 | cache->discard_nr_blocks); |
2604 | if (r) { | 2745 | if (r) { |
2605 | DMERR("could not resize on-disk discard bitset"); | 2746 | DMERR("could not resize on-disk discard bitset"); |
2606 | return r; | 2747 | return r; |
2607 | } | 2748 | } |
2608 | 2749 | ||
2609 | for (i = 0; i < from_oblock(cache->discard_nr_blocks); i++) { | 2750 | for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { |
2610 | r = dm_cache_set_discard(cache->cmd, to_oblock(i), | 2751 | r = dm_cache_set_discard(cache->cmd, to_dblock(i), |
2611 | is_discarded(cache, to_oblock(i))); | 2752 | is_discarded(cache, to_dblock(i))); |
2612 | if (r) | 2753 | if (r) |
2613 | return r; | 2754 | return r; |
2614 | } | 2755 | } |
@@ -2680,15 +2821,86 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, | |||
2680 | return 0; | 2821 | return 0; |
2681 | } | 2822 | } |
2682 | 2823 | ||
2824 | /* | ||
2825 | * The discard block size in the on disk metadata is not | ||
2826 | * neccessarily the same as we're currently using. So we have to | ||
2827 | * be careful to only set the discarded attribute if we know it | ||
2828 | * covers a complete block of the new size. | ||
2829 | */ | ||
2830 | struct discard_load_info { | ||
2831 | struct cache *cache; | ||
2832 | |||
2833 | /* | ||
2834 | * These blocks are sized using the on disk dblock size, rather | ||
2835 | * than the current one. | ||
2836 | */ | ||
2837 | dm_block_t block_size; | ||
2838 | dm_block_t discard_begin, discard_end; | ||
2839 | }; | ||
2840 | |||
2841 | static void discard_load_info_init(struct cache *cache, | ||
2842 | struct discard_load_info *li) | ||
2843 | { | ||
2844 | li->cache = cache; | ||
2845 | li->discard_begin = li->discard_end = 0; | ||
2846 | } | ||
2847 | |||
2848 | static void set_discard_range(struct discard_load_info *li) | ||
2849 | { | ||
2850 | sector_t b, e; | ||
2851 | |||
2852 | if (li->discard_begin == li->discard_end) | ||
2853 | return; | ||
2854 | |||
2855 | /* | ||
2856 | * Convert to sectors. | ||
2857 | */ | ||
2858 | b = li->discard_begin * li->block_size; | ||
2859 | e = li->discard_end * li->block_size; | ||
2860 | |||
2861 | /* | ||
2862 | * Then convert back to the current dblock size. | ||
2863 | */ | ||
2864 | b = dm_sector_div_up(b, li->cache->discard_block_size); | ||
2865 | sector_div(e, li->cache->discard_block_size); | ||
2866 | |||
2867 | /* | ||
2868 | * The origin may have shrunk, so we need to check we're still in | ||
2869 | * bounds. | ||
2870 | */ | ||
2871 | if (e > from_dblock(li->cache->discard_nr_blocks)) | ||
2872 | e = from_dblock(li->cache->discard_nr_blocks); | ||
2873 | |||
2874 | for (; b < e; b++) | ||
2875 | set_discard(li->cache, to_dblock(b)); | ||
2876 | } | ||
2877 | |||
2683 | static int load_discard(void *context, sector_t discard_block_size, | 2878 | static int load_discard(void *context, sector_t discard_block_size, |
2684 | dm_oblock_t oblock, bool discard) | 2879 | dm_dblock_t dblock, bool discard) |
2685 | { | 2880 | { |
2686 | struct cache *cache = context; | 2881 | struct discard_load_info *li = context; |
2687 | 2882 | ||
2688 | if (discard) | 2883 | li->block_size = discard_block_size; |
2689 | set_discard(cache, oblock); | 2884 | |
2690 | else | 2885 | if (discard) { |
2691 | clear_discard(cache, oblock); | 2886 | if (from_dblock(dblock) == li->discard_end) |
2887 | /* | ||
2888 | * We're already in a discard range, just extend it. | ||
2889 | */ | ||
2890 | li->discard_end = li->discard_end + 1ULL; | ||
2891 | |||
2892 | else { | ||
2893 | /* | ||
2894 | * Emit the old range and start a new one. | ||
2895 | */ | ||
2896 | set_discard_range(li); | ||
2897 | li->discard_begin = from_dblock(dblock); | ||
2898 | li->discard_end = li->discard_begin + 1ULL; | ||
2899 | } | ||
2900 | } else { | ||
2901 | set_discard_range(li); | ||
2902 | li->discard_begin = li->discard_end = 0; | ||
2903 | } | ||
2692 | 2904 | ||
2693 | return 0; | 2905 | return 0; |
2694 | } | 2906 | } |
@@ -2730,7 +2942,7 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) | |||
2730 | return r; | 2942 | return r; |
2731 | } | 2943 | } |
2732 | 2944 | ||
2733 | cache->cache_size = new_size; | 2945 | set_cache_size(cache, new_size); |
2734 | 2946 | ||
2735 | return 0; | 2947 | return 0; |
2736 | } | 2948 | } |
@@ -2772,11 +2984,22 @@ static int cache_preresume(struct dm_target *ti) | |||
2772 | } | 2984 | } |
2773 | 2985 | ||
2774 | if (!cache->loaded_discards) { | 2986 | if (!cache->loaded_discards) { |
2775 | r = dm_cache_load_discards(cache->cmd, load_discard, cache); | 2987 | struct discard_load_info li; |
2988 | |||
2989 | /* | ||
2990 | * The discard bitset could have been resized, or the | ||
2991 | * discard block size changed. To be safe we start by | ||
2992 | * setting every dblock to not discarded. | ||
2993 | */ | ||
2994 | clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); | ||
2995 | |||
2996 | discard_load_info_init(cache, &li); | ||
2997 | r = dm_cache_load_discards(cache->cmd, load_discard, &li); | ||
2776 | if (r) { | 2998 | if (r) { |
2777 | DMERR("could not load origin discards"); | 2999 | DMERR("could not load origin discards"); |
2778 | return r; | 3000 | return r; |
2779 | } | 3001 | } |
3002 | set_discard_range(&li); | ||
2780 | 3003 | ||
2781 | cache->loaded_discards = true; | 3004 | cache->loaded_discards = true; |
2782 | } | 3005 | } |
@@ -3079,8 +3302,9 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits) | |||
3079 | /* | 3302 | /* |
3080 | * FIXME: these limits may be incompatible with the cache device | 3303 | * FIXME: these limits may be incompatible with the cache device |
3081 | */ | 3304 | */ |
3082 | limits->max_discard_sectors = cache->sectors_per_block; | 3305 | limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, |
3083 | limits->discard_granularity = cache->sectors_per_block << SECTOR_SHIFT; | 3306 | cache->origin_sectors); |
3307 | limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; | ||
3084 | } | 3308 | } |
3085 | 3309 | ||
3086 | static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | 3310 | static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) |
@@ -3104,7 +3328,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3104 | 3328 | ||
3105 | static struct target_type cache_target = { | 3329 | static struct target_type cache_target = { |
3106 | .name = "cache", | 3330 | .name = "cache", |
3107 | .version = {1, 5, 0}, | 3331 | .version = {1, 6, 0}, |
3108 | .module = THIS_MODULE, | 3332 | .module = THIS_MODULE, |
3109 | .ctr = cache_ctr, | 3333 | .ctr = cache_ctr, |
3110 | .dtr = cache_dtr, | 3334 | .dtr = cache_dtr, |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index fc93b9330af4..08981be7baa1 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -705,7 +705,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc, | |||
705 | for (i = 0; i < ((1 << SECTOR_SHIFT) / 8); i++) | 705 | for (i = 0; i < ((1 << SECTOR_SHIFT) / 8); i++) |
706 | crypto_xor(data + i * 8, buf, 8); | 706 | crypto_xor(data + i * 8, buf, 8); |
707 | out: | 707 | out: |
708 | memset(buf, 0, sizeof(buf)); | 708 | memzero_explicit(buf, sizeof(buf)); |
709 | return r; | 709 | return r; |
710 | } | 710 | } |
711 | 711 | ||
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 0be9381365d7..73f791bb9ea4 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c | |||
@@ -684,11 +684,14 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) | |||
684 | int srcu_idx; | 684 | int srcu_idx; |
685 | 685 | ||
686 | param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG | | 686 | param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG | |
687 | DM_ACTIVE_PRESENT_FLAG); | 687 | DM_ACTIVE_PRESENT_FLAG | DM_INTERNAL_SUSPEND_FLAG); |
688 | 688 | ||
689 | if (dm_suspended_md(md)) | 689 | if (dm_suspended_md(md)) |
690 | param->flags |= DM_SUSPEND_FLAG; | 690 | param->flags |= DM_SUSPEND_FLAG; |
691 | 691 | ||
692 | if (dm_suspended_internally_md(md)) | ||
693 | param->flags |= DM_INTERNAL_SUSPEND_FLAG; | ||
694 | |||
692 | if (dm_test_deferred_remove_flag(md)) | 695 | if (dm_test_deferred_remove_flag(md)) |
693 | param->flags |= DM_DEFERRED_REMOVE; | 696 | param->flags |= DM_DEFERRED_REMOVE; |
694 | 697 | ||
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 87f86c77b094..f478a4c96d2f 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c | |||
@@ -824,7 +824,7 @@ static int message_stats_create(struct mapped_device *md, | |||
824 | return 1; | 824 | return 1; |
825 | 825 | ||
826 | id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data, | 826 | id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data, |
827 | dm_internal_suspend, dm_internal_resume, md); | 827 | dm_internal_suspend_fast, dm_internal_resume_fast, md); |
828 | if (id < 0) | 828 | if (id < 0) |
829 | return id; | 829 | return id; |
830 | 830 | ||
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index b2bd1ebf4562..3afae9e062f8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -1521,18 +1521,32 @@ fmode_t dm_table_get_mode(struct dm_table *t) | |||
1521 | } | 1521 | } |
1522 | EXPORT_SYMBOL(dm_table_get_mode); | 1522 | EXPORT_SYMBOL(dm_table_get_mode); |
1523 | 1523 | ||
1524 | static void suspend_targets(struct dm_table *t, unsigned postsuspend) | 1524 | enum suspend_mode { |
1525 | PRESUSPEND, | ||
1526 | PRESUSPEND_UNDO, | ||
1527 | POSTSUSPEND, | ||
1528 | }; | ||
1529 | |||
1530 | static void suspend_targets(struct dm_table *t, enum suspend_mode mode) | ||
1525 | { | 1531 | { |
1526 | int i = t->num_targets; | 1532 | int i = t->num_targets; |
1527 | struct dm_target *ti = t->targets; | 1533 | struct dm_target *ti = t->targets; |
1528 | 1534 | ||
1529 | while (i--) { | 1535 | while (i--) { |
1530 | if (postsuspend) { | 1536 | switch (mode) { |
1537 | case PRESUSPEND: | ||
1538 | if (ti->type->presuspend) | ||
1539 | ti->type->presuspend(ti); | ||
1540 | break; | ||
1541 | case PRESUSPEND_UNDO: | ||
1542 | if (ti->type->presuspend_undo) | ||
1543 | ti->type->presuspend_undo(ti); | ||
1544 | break; | ||
1545 | case POSTSUSPEND: | ||
1531 | if (ti->type->postsuspend) | 1546 | if (ti->type->postsuspend) |
1532 | ti->type->postsuspend(ti); | 1547 | ti->type->postsuspend(ti); |
1533 | } else if (ti->type->presuspend) | 1548 | break; |
1534 | ti->type->presuspend(ti); | 1549 | } |
1535 | |||
1536 | ti++; | 1550 | ti++; |
1537 | } | 1551 | } |
1538 | } | 1552 | } |
@@ -1542,7 +1556,15 @@ void dm_table_presuspend_targets(struct dm_table *t) | |||
1542 | if (!t) | 1556 | if (!t) |
1543 | return; | 1557 | return; |
1544 | 1558 | ||
1545 | suspend_targets(t, 0); | 1559 | suspend_targets(t, PRESUSPEND); |
1560 | } | ||
1561 | |||
1562 | void dm_table_presuspend_undo_targets(struct dm_table *t) | ||
1563 | { | ||
1564 | if (!t) | ||
1565 | return; | ||
1566 | |||
1567 | suspend_targets(t, PRESUSPEND_UNDO); | ||
1546 | } | 1568 | } |
1547 | 1569 | ||
1548 | void dm_table_postsuspend_targets(struct dm_table *t) | 1570 | void dm_table_postsuspend_targets(struct dm_table *t) |
@@ -1550,7 +1572,7 @@ void dm_table_postsuspend_targets(struct dm_table *t) | |||
1550 | if (!t) | 1572 | if (!t) |
1551 | return; | 1573 | return; |
1552 | 1574 | ||
1553 | suspend_targets(t, 1); | 1575 | suspend_targets(t, POSTSUSPEND); |
1554 | } | 1576 | } |
1555 | 1577 | ||
1556 | int dm_table_resume_targets(struct dm_table *t) | 1578 | int dm_table_resume_targets(struct dm_table *t) |
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index e9d33ad59df5..43adbb863f5a 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c | |||
@@ -1384,42 +1384,38 @@ static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) | |||
1384 | } | 1384 | } |
1385 | 1385 | ||
1386 | int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, | 1386 | int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, |
1387 | int can_block, struct dm_thin_lookup_result *result) | 1387 | int can_issue_io, struct dm_thin_lookup_result *result) |
1388 | { | 1388 | { |
1389 | int r = -EINVAL; | 1389 | int r; |
1390 | uint64_t block_time = 0; | ||
1391 | __le64 value; | 1390 | __le64 value; |
1392 | struct dm_pool_metadata *pmd = td->pmd; | 1391 | struct dm_pool_metadata *pmd = td->pmd; |
1393 | dm_block_t keys[2] = { td->id, block }; | 1392 | dm_block_t keys[2] = { td->id, block }; |
1394 | struct dm_btree_info *info; | 1393 | struct dm_btree_info *info; |
1395 | 1394 | ||
1396 | if (can_block) { | ||
1397 | down_read(&pmd->root_lock); | ||
1398 | info = &pmd->info; | ||
1399 | } else if (down_read_trylock(&pmd->root_lock)) | ||
1400 | info = &pmd->nb_info; | ||
1401 | else | ||
1402 | return -EWOULDBLOCK; | ||
1403 | |||
1404 | if (pmd->fail_io) | 1395 | if (pmd->fail_io) |
1405 | goto out; | 1396 | return -EINVAL; |
1406 | 1397 | ||
1407 | r = dm_btree_lookup(info, pmd->root, keys, &value); | 1398 | down_read(&pmd->root_lock); |
1408 | if (!r) | ||
1409 | block_time = le64_to_cpu(value); | ||
1410 | 1399 | ||
1411 | out: | 1400 | if (can_issue_io) { |
1412 | up_read(&pmd->root_lock); | 1401 | info = &pmd->info; |
1402 | } else | ||
1403 | info = &pmd->nb_info; | ||
1413 | 1404 | ||
1405 | r = dm_btree_lookup(info, pmd->root, keys, &value); | ||
1414 | if (!r) { | 1406 | if (!r) { |
1407 | uint64_t block_time = 0; | ||
1415 | dm_block_t exception_block; | 1408 | dm_block_t exception_block; |
1416 | uint32_t exception_time; | 1409 | uint32_t exception_time; |
1410 | |||
1411 | block_time = le64_to_cpu(value); | ||
1417 | unpack_block_time(block_time, &exception_block, | 1412 | unpack_block_time(block_time, &exception_block, |
1418 | &exception_time); | 1413 | &exception_time); |
1419 | result->block = exception_block; | 1414 | result->block = exception_block; |
1420 | result->shared = __snapshotted_since(td, exception_time); | 1415 | result->shared = __snapshotted_since(td, exception_time); |
1421 | } | 1416 | } |
1422 | 1417 | ||
1418 | up_read(&pmd->root_lock); | ||
1423 | return r; | 1419 | return r; |
1424 | } | 1420 | } |
1425 | 1421 | ||
@@ -1813,3 +1809,8 @@ bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd) | |||
1813 | 1809 | ||
1814 | return needs_check; | 1810 | return needs_check; |
1815 | } | 1811 | } |
1812 | |||
1813 | void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd) | ||
1814 | { | ||
1815 | dm_tm_issue_prefetches(pmd->tm); | ||
1816 | } | ||
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index e3c857db195a..921d15ee56a0 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h | |||
@@ -139,12 +139,12 @@ struct dm_thin_lookup_result { | |||
139 | 139 | ||
140 | /* | 140 | /* |
141 | * Returns: | 141 | * Returns: |
142 | * -EWOULDBLOCK iff @can_block is set and would block. | 142 | * -EWOULDBLOCK iff @can_issue_io is set and would issue IO |
143 | * -ENODATA iff that mapping is not present. | 143 | * -ENODATA iff that mapping is not present. |
144 | * 0 success | 144 | * 0 success |
145 | */ | 145 | */ |
146 | int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, | 146 | int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, |
147 | int can_block, struct dm_thin_lookup_result *result); | 147 | int can_issue_io, struct dm_thin_lookup_result *result); |
148 | 148 | ||
149 | /* | 149 | /* |
150 | * Obtain an unused block. | 150 | * Obtain an unused block. |
@@ -213,6 +213,11 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, | |||
213 | int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd); | 213 | int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd); |
214 | bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd); | 214 | bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd); |
215 | 215 | ||
216 | /* | ||
217 | * Issue any prefetches that may be useful. | ||
218 | */ | ||
219 | void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd); | ||
220 | |||
216 | /*----------------------------------------------------------------*/ | 221 | /*----------------------------------------------------------------*/ |
217 | 222 | ||
218 | #endif | 223 | #endif |
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 0f86d802b533..8735543eacdb 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -11,11 +11,13 @@ | |||
11 | #include <linux/device-mapper.h> | 11 | #include <linux/device-mapper.h> |
12 | #include <linux/dm-io.h> | 12 | #include <linux/dm-io.h> |
13 | #include <linux/dm-kcopyd.h> | 13 | #include <linux/dm-kcopyd.h> |
14 | #include <linux/log2.h> | ||
14 | #include <linux/list.h> | 15 | #include <linux/list.h> |
15 | #include <linux/rculist.h> | 16 | #include <linux/rculist.h> |
16 | #include <linux/init.h> | 17 | #include <linux/init.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/sort.h> | ||
19 | #include <linux/rbtree.h> | 21 | #include <linux/rbtree.h> |
20 | 22 | ||
21 | #define DM_MSG_PREFIX "thin" | 23 | #define DM_MSG_PREFIX "thin" |
@@ -25,7 +27,6 @@ | |||
25 | */ | 27 | */ |
26 | #define ENDIO_HOOK_POOL_SIZE 1024 | 28 | #define ENDIO_HOOK_POOL_SIZE 1024 |
27 | #define MAPPING_POOL_SIZE 1024 | 29 | #define MAPPING_POOL_SIZE 1024 |
28 | #define PRISON_CELLS 1024 | ||
29 | #define COMMIT_PERIOD HZ | 30 | #define COMMIT_PERIOD HZ |
30 | #define NO_SPACE_TIMEOUT_SECS 60 | 31 | #define NO_SPACE_TIMEOUT_SECS 60 |
31 | 32 | ||
@@ -114,7 +115,8 @@ static void build_data_key(struct dm_thin_device *td, | |||
114 | { | 115 | { |
115 | key->virtual = 0; | 116 | key->virtual = 0; |
116 | key->dev = dm_thin_dev_id(td); | 117 | key->dev = dm_thin_dev_id(td); |
117 | key->block = b; | 118 | key->block_begin = b; |
119 | key->block_end = b + 1ULL; | ||
118 | } | 120 | } |
119 | 121 | ||
120 | static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, | 122 | static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, |
@@ -122,7 +124,55 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, | |||
122 | { | 124 | { |
123 | key->virtual = 1; | 125 | key->virtual = 1; |
124 | key->dev = dm_thin_dev_id(td); | 126 | key->dev = dm_thin_dev_id(td); |
125 | key->block = b; | 127 | key->block_begin = b; |
128 | key->block_end = b + 1ULL; | ||
129 | } | ||
130 | |||
131 | /*----------------------------------------------------------------*/ | ||
132 | |||
133 | #define THROTTLE_THRESHOLD (1 * HZ) | ||
134 | |||
135 | struct throttle { | ||
136 | struct rw_semaphore lock; | ||
137 | unsigned long threshold; | ||
138 | bool throttle_applied; | ||
139 | }; | ||
140 | |||
141 | static void throttle_init(struct throttle *t) | ||
142 | { | ||
143 | init_rwsem(&t->lock); | ||
144 | t->throttle_applied = false; | ||
145 | } | ||
146 | |||
147 | static void throttle_work_start(struct throttle *t) | ||
148 | { | ||
149 | t->threshold = jiffies + THROTTLE_THRESHOLD; | ||
150 | } | ||
151 | |||
152 | static void throttle_work_update(struct throttle *t) | ||
153 | { | ||
154 | if (!t->throttle_applied && jiffies > t->threshold) { | ||
155 | down_write(&t->lock); | ||
156 | t->throttle_applied = true; | ||
157 | } | ||
158 | } | ||
159 | |||
160 | static void throttle_work_complete(struct throttle *t) | ||
161 | { | ||
162 | if (t->throttle_applied) { | ||
163 | t->throttle_applied = false; | ||
164 | up_write(&t->lock); | ||
165 | } | ||
166 | } | ||
167 | |||
168 | static void throttle_lock(struct throttle *t) | ||
169 | { | ||
170 | down_read(&t->lock); | ||
171 | } | ||
172 | |||
173 | static void throttle_unlock(struct throttle *t) | ||
174 | { | ||
175 | up_read(&t->lock); | ||
126 | } | 176 | } |
127 | 177 | ||
128 | /*----------------------------------------------------------------*/ | 178 | /*----------------------------------------------------------------*/ |
@@ -155,8 +205,11 @@ struct pool_features { | |||
155 | 205 | ||
156 | struct thin_c; | 206 | struct thin_c; |
157 | typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio); | 207 | typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio); |
208 | typedef void (*process_cell_fn)(struct thin_c *tc, struct dm_bio_prison_cell *cell); | ||
158 | typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m); | 209 | typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m); |
159 | 210 | ||
211 | #define CELL_SORT_ARRAY_SIZE 8192 | ||
212 | |||
160 | struct pool { | 213 | struct pool { |
161 | struct list_head list; | 214 | struct list_head list; |
162 | struct dm_target *ti; /* Only set if a pool target is bound */ | 215 | struct dm_target *ti; /* Only set if a pool target is bound */ |
@@ -171,11 +224,13 @@ struct pool { | |||
171 | 224 | ||
172 | struct pool_features pf; | 225 | struct pool_features pf; |
173 | bool low_water_triggered:1; /* A dm event has been sent */ | 226 | bool low_water_triggered:1; /* A dm event has been sent */ |
227 | bool suspended:1; | ||
174 | 228 | ||
175 | struct dm_bio_prison *prison; | 229 | struct dm_bio_prison *prison; |
176 | struct dm_kcopyd_client *copier; | 230 | struct dm_kcopyd_client *copier; |
177 | 231 | ||
178 | struct workqueue_struct *wq; | 232 | struct workqueue_struct *wq; |
233 | struct throttle throttle; | ||
179 | struct work_struct worker; | 234 | struct work_struct worker; |
180 | struct delayed_work waker; | 235 | struct delayed_work waker; |
181 | struct delayed_work no_space_timeout; | 236 | struct delayed_work no_space_timeout; |
@@ -198,8 +253,13 @@ struct pool { | |||
198 | process_bio_fn process_bio; | 253 | process_bio_fn process_bio; |
199 | process_bio_fn process_discard; | 254 | process_bio_fn process_discard; |
200 | 255 | ||
256 | process_cell_fn process_cell; | ||
257 | process_cell_fn process_discard_cell; | ||
258 | |||
201 | process_mapping_fn process_prepared_mapping; | 259 | process_mapping_fn process_prepared_mapping; |
202 | process_mapping_fn process_prepared_discard; | 260 | process_mapping_fn process_prepared_discard; |
261 | |||
262 | struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE]; | ||
203 | }; | 263 | }; |
204 | 264 | ||
205 | static enum pool_mode get_pool_mode(struct pool *pool); | 265 | static enum pool_mode get_pool_mode(struct pool *pool); |
@@ -232,8 +292,11 @@ struct thin_c { | |||
232 | 292 | ||
233 | struct pool *pool; | 293 | struct pool *pool; |
234 | struct dm_thin_device *td; | 294 | struct dm_thin_device *td; |
295 | struct mapped_device *thin_md; | ||
296 | |||
235 | bool requeue_mode:1; | 297 | bool requeue_mode:1; |
236 | spinlock_t lock; | 298 | spinlock_t lock; |
299 | struct list_head deferred_cells; | ||
237 | struct bio_list deferred_bio_list; | 300 | struct bio_list deferred_bio_list; |
238 | struct bio_list retry_on_resume_list; | 301 | struct bio_list retry_on_resume_list; |
239 | struct rb_root sort_bio_list; /* sorted list of deferred bios */ | 302 | struct rb_root sort_bio_list; /* sorted list of deferred bios */ |
@@ -290,6 +353,15 @@ static void cell_release(struct pool *pool, | |||
290 | dm_bio_prison_free_cell(pool->prison, cell); | 353 | dm_bio_prison_free_cell(pool->prison, cell); |
291 | } | 354 | } |
292 | 355 | ||
356 | static void cell_visit_release(struct pool *pool, | ||
357 | void (*fn)(void *, struct dm_bio_prison_cell *), | ||
358 | void *context, | ||
359 | struct dm_bio_prison_cell *cell) | ||
360 | { | ||
361 | dm_cell_visit_release(pool->prison, fn, context, cell); | ||
362 | dm_bio_prison_free_cell(pool->prison, cell); | ||
363 | } | ||
364 | |||
293 | static void cell_release_no_holder(struct pool *pool, | 365 | static void cell_release_no_holder(struct pool *pool, |
294 | struct dm_bio_prison_cell *cell, | 366 | struct dm_bio_prison_cell *cell, |
295 | struct bio_list *bios) | 367 | struct bio_list *bios) |
@@ -298,19 +370,6 @@ static void cell_release_no_holder(struct pool *pool, | |||
298 | dm_bio_prison_free_cell(pool->prison, cell); | 370 | dm_bio_prison_free_cell(pool->prison, cell); |
299 | } | 371 | } |
300 | 372 | ||
301 | static void cell_defer_no_holder_no_free(struct thin_c *tc, | ||
302 | struct dm_bio_prison_cell *cell) | ||
303 | { | ||
304 | struct pool *pool = tc->pool; | ||
305 | unsigned long flags; | ||
306 | |||
307 | spin_lock_irqsave(&tc->lock, flags); | ||
308 | dm_cell_release_no_holder(pool->prison, cell, &tc->deferred_bio_list); | ||
309 | spin_unlock_irqrestore(&tc->lock, flags); | ||
310 | |||
311 | wake_worker(pool); | ||
312 | } | ||
313 | |||
314 | static void cell_error_with_code(struct pool *pool, | 373 | static void cell_error_with_code(struct pool *pool, |
315 | struct dm_bio_prison_cell *cell, int error_code) | 374 | struct dm_bio_prison_cell *cell, int error_code) |
316 | { | 375 | { |
@@ -323,6 +382,16 @@ static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell) | |||
323 | cell_error_with_code(pool, cell, -EIO); | 382 | cell_error_with_code(pool, cell, -EIO); |
324 | } | 383 | } |
325 | 384 | ||
385 | static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell) | ||
386 | { | ||
387 | cell_error_with_code(pool, cell, 0); | ||
388 | } | ||
389 | |||
390 | static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell) | ||
391 | { | ||
392 | cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE); | ||
393 | } | ||
394 | |||
326 | /*----------------------------------------------------------------*/ | 395 | /*----------------------------------------------------------------*/ |
327 | 396 | ||
328 | /* | 397 | /* |
@@ -393,44 +462,65 @@ struct dm_thin_endio_hook { | |||
393 | struct rb_node rb_node; | 462 | struct rb_node rb_node; |
394 | }; | 463 | }; |
395 | 464 | ||
396 | static void requeue_bio_list(struct thin_c *tc, struct bio_list *master) | 465 | static void __merge_bio_list(struct bio_list *bios, struct bio_list *master) |
466 | { | ||
467 | bio_list_merge(bios, master); | ||
468 | bio_list_init(master); | ||
469 | } | ||
470 | |||
471 | static void error_bio_list(struct bio_list *bios, int error) | ||
397 | { | 472 | { |
398 | struct bio *bio; | 473 | struct bio *bio; |
474 | |||
475 | while ((bio = bio_list_pop(bios))) | ||
476 | bio_endio(bio, error); | ||
477 | } | ||
478 | |||
479 | static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error) | ||
480 | { | ||
399 | struct bio_list bios; | 481 | struct bio_list bios; |
400 | unsigned long flags; | 482 | unsigned long flags; |
401 | 483 | ||
402 | bio_list_init(&bios); | 484 | bio_list_init(&bios); |
403 | 485 | ||
404 | spin_lock_irqsave(&tc->lock, flags); | 486 | spin_lock_irqsave(&tc->lock, flags); |
405 | bio_list_merge(&bios, master); | 487 | __merge_bio_list(&bios, master); |
406 | bio_list_init(master); | ||
407 | spin_unlock_irqrestore(&tc->lock, flags); | 488 | spin_unlock_irqrestore(&tc->lock, flags); |
408 | 489 | ||
409 | while ((bio = bio_list_pop(&bios))) | 490 | error_bio_list(&bios, error); |
410 | bio_endio(bio, DM_ENDIO_REQUEUE); | ||
411 | } | 491 | } |
412 | 492 | ||
413 | static void requeue_io(struct thin_c *tc) | 493 | static void requeue_deferred_cells(struct thin_c *tc) |
414 | { | 494 | { |
415 | requeue_bio_list(tc, &tc->deferred_bio_list); | 495 | struct pool *pool = tc->pool; |
416 | requeue_bio_list(tc, &tc->retry_on_resume_list); | 496 | unsigned long flags; |
497 | struct list_head cells; | ||
498 | struct dm_bio_prison_cell *cell, *tmp; | ||
499 | |||
500 | INIT_LIST_HEAD(&cells); | ||
501 | |||
502 | spin_lock_irqsave(&tc->lock, flags); | ||
503 | list_splice_init(&tc->deferred_cells, &cells); | ||
504 | spin_unlock_irqrestore(&tc->lock, flags); | ||
505 | |||
506 | list_for_each_entry_safe(cell, tmp, &cells, user_list) | ||
507 | cell_requeue(pool, cell); | ||
417 | } | 508 | } |
418 | 509 | ||
419 | static void error_thin_retry_list(struct thin_c *tc) | 510 | static void requeue_io(struct thin_c *tc) |
420 | { | 511 | { |
421 | struct bio *bio; | ||
422 | unsigned long flags; | ||
423 | struct bio_list bios; | 512 | struct bio_list bios; |
513 | unsigned long flags; | ||
424 | 514 | ||
425 | bio_list_init(&bios); | 515 | bio_list_init(&bios); |
426 | 516 | ||
427 | spin_lock_irqsave(&tc->lock, flags); | 517 | spin_lock_irqsave(&tc->lock, flags); |
428 | bio_list_merge(&bios, &tc->retry_on_resume_list); | 518 | __merge_bio_list(&bios, &tc->deferred_bio_list); |
429 | bio_list_init(&tc->retry_on_resume_list); | 519 | __merge_bio_list(&bios, &tc->retry_on_resume_list); |
430 | spin_unlock_irqrestore(&tc->lock, flags); | 520 | spin_unlock_irqrestore(&tc->lock, flags); |
431 | 521 | ||
432 | while ((bio = bio_list_pop(&bios))) | 522 | error_bio_list(&bios, DM_ENDIO_REQUEUE); |
433 | bio_io_error(bio); | 523 | requeue_deferred_cells(tc); |
434 | } | 524 | } |
435 | 525 | ||
436 | static void error_retry_list(struct pool *pool) | 526 | static void error_retry_list(struct pool *pool) |
@@ -439,7 +529,7 @@ static void error_retry_list(struct pool *pool) | |||
439 | 529 | ||
440 | rcu_read_lock(); | 530 | rcu_read_lock(); |
441 | list_for_each_entry_rcu(tc, &pool->active_thins, list) | 531 | list_for_each_entry_rcu(tc, &pool->active_thins, list) |
442 | error_thin_retry_list(tc); | 532 | error_thin_bio_list(tc, &tc->retry_on_resume_list, -EIO); |
443 | rcu_read_unlock(); | 533 | rcu_read_unlock(); |
444 | } | 534 | } |
445 | 535 | ||
@@ -629,33 +719,75 @@ static void overwrite_endio(struct bio *bio, int err) | |||
629 | */ | 719 | */ |
630 | 720 | ||
631 | /* | 721 | /* |
632 | * This sends the bios in the cell back to the deferred_bios list. | 722 | * This sends the bios in the cell, except the original holder, back |
723 | * to the deferred_bios list. | ||
633 | */ | 724 | */ |
634 | static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell) | 725 | static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) |
635 | { | 726 | { |
636 | struct pool *pool = tc->pool; | 727 | struct pool *pool = tc->pool; |
637 | unsigned long flags; | 728 | unsigned long flags; |
638 | 729 | ||
639 | spin_lock_irqsave(&tc->lock, flags); | 730 | spin_lock_irqsave(&tc->lock, flags); |
640 | cell_release(pool, cell, &tc->deferred_bio_list); | 731 | cell_release_no_holder(pool, cell, &tc->deferred_bio_list); |
641 | spin_unlock_irqrestore(&tc->lock, flags); | 732 | spin_unlock_irqrestore(&tc->lock, flags); |
642 | 733 | ||
643 | wake_worker(pool); | 734 | wake_worker(pool); |
644 | } | 735 | } |
645 | 736 | ||
646 | /* | 737 | static void thin_defer_bio(struct thin_c *tc, struct bio *bio); |
647 | * Same as cell_defer above, except it omits the original holder of the cell. | 738 | |
648 | */ | 739 | struct remap_info { |
649 | static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) | 740 | struct thin_c *tc; |
741 | struct bio_list defer_bios; | ||
742 | struct bio_list issue_bios; | ||
743 | }; | ||
744 | |||
745 | static void __inc_remap_and_issue_cell(void *context, | ||
746 | struct dm_bio_prison_cell *cell) | ||
650 | { | 747 | { |
651 | struct pool *pool = tc->pool; | 748 | struct remap_info *info = context; |
652 | unsigned long flags; | 749 | struct bio *bio; |
653 | 750 | ||
654 | spin_lock_irqsave(&tc->lock, flags); | 751 | while ((bio = bio_list_pop(&cell->bios))) { |
655 | cell_release_no_holder(pool, cell, &tc->deferred_bio_list); | 752 | if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) |
656 | spin_unlock_irqrestore(&tc->lock, flags); | 753 | bio_list_add(&info->defer_bios, bio); |
754 | else { | ||
755 | inc_all_io_entry(info->tc->pool, bio); | ||
657 | 756 | ||
658 | wake_worker(pool); | 757 | /* |
758 | * We can't issue the bios with the bio prison lock | ||
759 | * held, so we add them to a list to issue on | ||
760 | * return from this function. | ||
761 | */ | ||
762 | bio_list_add(&info->issue_bios, bio); | ||
763 | } | ||
764 | } | ||
765 | } | ||
766 | |||
767 | static void inc_remap_and_issue_cell(struct thin_c *tc, | ||
768 | struct dm_bio_prison_cell *cell, | ||
769 | dm_block_t block) | ||
770 | { | ||
771 | struct bio *bio; | ||
772 | struct remap_info info; | ||
773 | |||
774 | info.tc = tc; | ||
775 | bio_list_init(&info.defer_bios); | ||
776 | bio_list_init(&info.issue_bios); | ||
777 | |||
778 | /* | ||
779 | * We have to be careful to inc any bios we're about to issue | ||
780 | * before the cell is released, and avoid a race with new bios | ||
781 | * being added to the cell. | ||
782 | */ | ||
783 | cell_visit_release(tc->pool, __inc_remap_and_issue_cell, | ||
784 | &info, cell); | ||
785 | |||
786 | while ((bio = bio_list_pop(&info.defer_bios))) | ||
787 | thin_defer_bio(tc, bio); | ||
788 | |||
789 | while ((bio = bio_list_pop(&info.issue_bios))) | ||
790 | remap_and_issue(info.tc, bio, block); | ||
659 | } | 791 | } |
660 | 792 | ||
661 | static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) | 793 | static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) |
@@ -706,10 +838,13 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) | |||
706 | * the bios in the cell. | 838 | * the bios in the cell. |
707 | */ | 839 | */ |
708 | if (bio) { | 840 | if (bio) { |
709 | cell_defer_no_holder(tc, m->cell); | 841 | inc_remap_and_issue_cell(tc, m->cell, m->data_block); |
710 | bio_endio(bio, 0); | 842 | bio_endio(bio, 0); |
711 | } else | 843 | } else { |
712 | cell_defer(tc, m->cell); | 844 | inc_all_io_entry(tc->pool, m->cell->holder); |
845 | remap_and_issue(tc, m->cell->holder, m->data_block); | ||
846 | inc_remap_and_issue_cell(tc, m->cell, m->data_block); | ||
847 | } | ||
713 | 848 | ||
714 | out: | 849 | out: |
715 | list_del(&m->list); | 850 | list_del(&m->list); |
@@ -842,6 +977,20 @@ static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m, | |||
842 | } | 977 | } |
843 | } | 978 | } |
844 | 979 | ||
980 | static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio, | ||
981 | dm_block_t data_block, | ||
982 | struct dm_thin_new_mapping *m) | ||
983 | { | ||
984 | struct pool *pool = tc->pool; | ||
985 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | ||
986 | |||
987 | h->overwrite_mapping = m; | ||
988 | m->bio = bio; | ||
989 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); | ||
990 | inc_all_io_entry(pool, bio); | ||
991 | remap_and_issue(tc, bio, data_block); | ||
992 | } | ||
993 | |||
845 | /* | 994 | /* |
846 | * A partial copy also needs to zero the uncopied region. | 995 | * A partial copy also needs to zero the uncopied region. |
847 | */ | 996 | */ |
@@ -876,15 +1025,9 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | |||
876 | * If the whole block of data is being overwritten, we can issue the | 1025 | * If the whole block of data is being overwritten, we can issue the |
877 | * bio immediately. Otherwise we use kcopyd to clone the data first. | 1026 | * bio immediately. Otherwise we use kcopyd to clone the data first. |
878 | */ | 1027 | */ |
879 | if (io_overwrites_block(pool, bio)) { | 1028 | if (io_overwrites_block(pool, bio)) |
880 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 1029 | remap_and_issue_overwrite(tc, bio, data_dest, m); |
881 | 1030 | else { | |
882 | h->overwrite_mapping = m; | ||
883 | m->bio = bio; | ||
884 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); | ||
885 | inc_all_io_entry(pool, bio); | ||
886 | remap_and_issue(tc, bio, data_dest); | ||
887 | } else { | ||
888 | struct dm_io_region from, to; | 1031 | struct dm_io_region from, to; |
889 | 1032 | ||
890 | from.bdev = origin->bdev; | 1033 | from.bdev = origin->bdev; |
@@ -953,16 +1096,10 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, | |||
953 | if (!pool->pf.zero_new_blocks) | 1096 | if (!pool->pf.zero_new_blocks) |
954 | process_prepared_mapping(m); | 1097 | process_prepared_mapping(m); |
955 | 1098 | ||
956 | else if (io_overwrites_block(pool, bio)) { | 1099 | else if (io_overwrites_block(pool, bio)) |
957 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 1100 | remap_and_issue_overwrite(tc, bio, data_block, m); |
958 | |||
959 | h->overwrite_mapping = m; | ||
960 | m->bio = bio; | ||
961 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); | ||
962 | inc_all_io_entry(pool, bio); | ||
963 | remap_and_issue(tc, bio, data_block); | ||
964 | 1101 | ||
965 | } else | 1102 | else |
966 | ll_zero(tc, m, | 1103 | ll_zero(tc, m, |
967 | data_block * pool->sectors_per_block, | 1104 | data_block * pool->sectors_per_block, |
968 | (data_block + 1) * pool->sectors_per_block); | 1105 | (data_block + 1) * pool->sectors_per_block); |
@@ -1134,29 +1271,25 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c | |||
1134 | bio_list_init(&bios); | 1271 | bio_list_init(&bios); |
1135 | cell_release(pool, cell, &bios); | 1272 | cell_release(pool, cell, &bios); |
1136 | 1273 | ||
1137 | error = should_error_unserviceable_bio(pool); | 1274 | while ((bio = bio_list_pop(&bios))) |
1138 | if (error) | 1275 | retry_on_resume(bio); |
1139 | while ((bio = bio_list_pop(&bios))) | ||
1140 | bio_endio(bio, error); | ||
1141 | else | ||
1142 | while ((bio = bio_list_pop(&bios))) | ||
1143 | retry_on_resume(bio); | ||
1144 | } | 1276 | } |
1145 | 1277 | ||
1146 | static void process_discard(struct thin_c *tc, struct bio *bio) | 1278 | static void process_discard_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell) |
1147 | { | 1279 | { |
1148 | int r; | 1280 | int r; |
1149 | unsigned long flags; | 1281 | struct bio *bio = cell->holder; |
1150 | struct pool *pool = tc->pool; | 1282 | struct pool *pool = tc->pool; |
1151 | struct dm_bio_prison_cell *cell, *cell2; | 1283 | struct dm_bio_prison_cell *cell2; |
1152 | struct dm_cell_key key, key2; | 1284 | struct dm_cell_key key2; |
1153 | dm_block_t block = get_bio_block(tc, bio); | 1285 | dm_block_t block = get_bio_block(tc, bio); |
1154 | struct dm_thin_lookup_result lookup_result; | 1286 | struct dm_thin_lookup_result lookup_result; |
1155 | struct dm_thin_new_mapping *m; | 1287 | struct dm_thin_new_mapping *m; |
1156 | 1288 | ||
1157 | build_virtual_key(tc->td, block, &key); | 1289 | if (tc->requeue_mode) { |
1158 | if (bio_detain(tc->pool, &key, bio, &cell)) | 1290 | cell_requeue(pool, cell); |
1159 | return; | 1291 | return; |
1292 | } | ||
1160 | 1293 | ||
1161 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); | 1294 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); |
1162 | switch (r) { | 1295 | switch (r) { |
@@ -1187,12 +1320,9 @@ static void process_discard(struct thin_c *tc, struct bio *bio) | |||
1187 | m->cell2 = cell2; | 1320 | m->cell2 = cell2; |
1188 | m->bio = bio; | 1321 | m->bio = bio; |
1189 | 1322 | ||
1190 | if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) { | 1323 | if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) |
1191 | spin_lock_irqsave(&pool->lock, flags); | 1324 | pool->process_prepared_discard(m); |
1192 | list_add_tail(&m->list, &pool->prepared_discards); | 1325 | |
1193 | spin_unlock_irqrestore(&pool->lock, flags); | ||
1194 | wake_worker(pool); | ||
1195 | } | ||
1196 | } else { | 1326 | } else { |
1197 | inc_all_io_entry(pool, bio); | 1327 | inc_all_io_entry(pool, bio); |
1198 | cell_defer_no_holder(tc, cell); | 1328 | cell_defer_no_holder(tc, cell); |
@@ -1227,6 +1357,19 @@ static void process_discard(struct thin_c *tc, struct bio *bio) | |||
1227 | } | 1357 | } |
1228 | } | 1358 | } |
1229 | 1359 | ||
1360 | static void process_discard_bio(struct thin_c *tc, struct bio *bio) | ||
1361 | { | ||
1362 | struct dm_bio_prison_cell *cell; | ||
1363 | struct dm_cell_key key; | ||
1364 | dm_block_t block = get_bio_block(tc, bio); | ||
1365 | |||
1366 | build_virtual_key(tc->td, block, &key); | ||
1367 | if (bio_detain(tc->pool, &key, bio, &cell)) | ||
1368 | return; | ||
1369 | |||
1370 | process_discard_cell(tc, cell); | ||
1371 | } | ||
1372 | |||
1230 | static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, | 1373 | static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, |
1231 | struct dm_cell_key *key, | 1374 | struct dm_cell_key *key, |
1232 | struct dm_thin_lookup_result *lookup_result, | 1375 | struct dm_thin_lookup_result *lookup_result, |
@@ -1255,11 +1398,53 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, | |||
1255 | } | 1398 | } |
1256 | } | 1399 | } |
1257 | 1400 | ||
1401 | static void __remap_and_issue_shared_cell(void *context, | ||
1402 | struct dm_bio_prison_cell *cell) | ||
1403 | { | ||
1404 | struct remap_info *info = context; | ||
1405 | struct bio *bio; | ||
1406 | |||
1407 | while ((bio = bio_list_pop(&cell->bios))) { | ||
1408 | if ((bio_data_dir(bio) == WRITE) || | ||
1409 | (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA))) | ||
1410 | bio_list_add(&info->defer_bios, bio); | ||
1411 | else { | ||
1412 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));; | ||
1413 | |||
1414 | h->shared_read_entry = dm_deferred_entry_inc(info->tc->pool->shared_read_ds); | ||
1415 | inc_all_io_entry(info->tc->pool, bio); | ||
1416 | bio_list_add(&info->issue_bios, bio); | ||
1417 | } | ||
1418 | } | ||
1419 | } | ||
1420 | |||
1421 | static void remap_and_issue_shared_cell(struct thin_c *tc, | ||
1422 | struct dm_bio_prison_cell *cell, | ||
1423 | dm_block_t block) | ||
1424 | { | ||
1425 | struct bio *bio; | ||
1426 | struct remap_info info; | ||
1427 | |||
1428 | info.tc = tc; | ||
1429 | bio_list_init(&info.defer_bios); | ||
1430 | bio_list_init(&info.issue_bios); | ||
1431 | |||
1432 | cell_visit_release(tc->pool, __remap_and_issue_shared_cell, | ||
1433 | &info, cell); | ||
1434 | |||
1435 | while ((bio = bio_list_pop(&info.defer_bios))) | ||
1436 | thin_defer_bio(tc, bio); | ||
1437 | |||
1438 | while ((bio = bio_list_pop(&info.issue_bios))) | ||
1439 | remap_and_issue(tc, bio, block); | ||
1440 | } | ||
1441 | |||
1258 | static void process_shared_bio(struct thin_c *tc, struct bio *bio, | 1442 | static void process_shared_bio(struct thin_c *tc, struct bio *bio, |
1259 | dm_block_t block, | 1443 | dm_block_t block, |
1260 | struct dm_thin_lookup_result *lookup_result) | 1444 | struct dm_thin_lookup_result *lookup_result, |
1445 | struct dm_bio_prison_cell *virt_cell) | ||
1261 | { | 1446 | { |
1262 | struct dm_bio_prison_cell *cell; | 1447 | struct dm_bio_prison_cell *data_cell; |
1263 | struct pool *pool = tc->pool; | 1448 | struct pool *pool = tc->pool; |
1264 | struct dm_cell_key key; | 1449 | struct dm_cell_key key; |
1265 | 1450 | ||
@@ -1268,19 +1453,23 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, | |||
1268 | * of being broken so we have nothing further to do here. | 1453 | * of being broken so we have nothing further to do here. |
1269 | */ | 1454 | */ |
1270 | build_data_key(tc->td, lookup_result->block, &key); | 1455 | build_data_key(tc->td, lookup_result->block, &key); |
1271 | if (bio_detain(pool, &key, bio, &cell)) | 1456 | if (bio_detain(pool, &key, bio, &data_cell)) { |
1457 | cell_defer_no_holder(tc, virt_cell); | ||
1272 | return; | 1458 | return; |
1459 | } | ||
1273 | 1460 | ||
1274 | if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size) | 1461 | if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size) { |
1275 | break_sharing(tc, bio, block, &key, lookup_result, cell); | 1462 | break_sharing(tc, bio, block, &key, lookup_result, data_cell); |
1276 | else { | 1463 | cell_defer_no_holder(tc, virt_cell); |
1464 | } else { | ||
1277 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 1465 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); |
1278 | 1466 | ||
1279 | h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); | 1467 | h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); |
1280 | inc_all_io_entry(pool, bio); | 1468 | inc_all_io_entry(pool, bio); |
1281 | cell_defer_no_holder(tc, cell); | ||
1282 | |||
1283 | remap_and_issue(tc, bio, lookup_result->block); | 1469 | remap_and_issue(tc, bio, lookup_result->block); |
1470 | |||
1471 | remap_and_issue_shared_cell(tc, data_cell, lookup_result->block); | ||
1472 | remap_and_issue_shared_cell(tc, virt_cell, lookup_result->block); | ||
1284 | } | 1473 | } |
1285 | } | 1474 | } |
1286 | 1475 | ||
@@ -1333,34 +1522,28 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block | |||
1333 | } | 1522 | } |
1334 | } | 1523 | } |
1335 | 1524 | ||
1336 | static void process_bio(struct thin_c *tc, struct bio *bio) | 1525 | static void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell) |
1337 | { | 1526 | { |
1338 | int r; | 1527 | int r; |
1339 | struct pool *pool = tc->pool; | 1528 | struct pool *pool = tc->pool; |
1529 | struct bio *bio = cell->holder; | ||
1340 | dm_block_t block = get_bio_block(tc, bio); | 1530 | dm_block_t block = get_bio_block(tc, bio); |
1341 | struct dm_bio_prison_cell *cell; | ||
1342 | struct dm_cell_key key; | ||
1343 | struct dm_thin_lookup_result lookup_result; | 1531 | struct dm_thin_lookup_result lookup_result; |
1344 | 1532 | ||
1345 | /* | 1533 | if (tc->requeue_mode) { |
1346 | * If cell is already occupied, then the block is already | 1534 | cell_requeue(pool, cell); |
1347 | * being provisioned so we have nothing further to do here. | ||
1348 | */ | ||
1349 | build_virtual_key(tc->td, block, &key); | ||
1350 | if (bio_detain(pool, &key, bio, &cell)) | ||
1351 | return; | 1535 | return; |
1536 | } | ||
1352 | 1537 | ||
1353 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); | 1538 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); |
1354 | switch (r) { | 1539 | switch (r) { |
1355 | case 0: | 1540 | case 0: |
1356 | if (lookup_result.shared) { | 1541 | if (lookup_result.shared) |
1357 | process_shared_bio(tc, bio, block, &lookup_result); | 1542 | process_shared_bio(tc, bio, block, &lookup_result, cell); |
1358 | cell_defer_no_holder(tc, cell); /* FIXME: pass this cell into process_shared? */ | 1543 | else { |
1359 | } else { | ||
1360 | inc_all_io_entry(pool, bio); | 1544 | inc_all_io_entry(pool, bio); |
1361 | cell_defer_no_holder(tc, cell); | ||
1362 | |||
1363 | remap_and_issue(tc, bio, lookup_result.block); | 1545 | remap_and_issue(tc, bio, lookup_result.block); |
1546 | inc_remap_and_issue_cell(tc, cell, lookup_result.block); | ||
1364 | } | 1547 | } |
1365 | break; | 1548 | break; |
1366 | 1549 | ||
@@ -1394,7 +1577,26 @@ static void process_bio(struct thin_c *tc, struct bio *bio) | |||
1394 | } | 1577 | } |
1395 | } | 1578 | } |
1396 | 1579 | ||
1397 | static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | 1580 | static void process_bio(struct thin_c *tc, struct bio *bio) |
1581 | { | ||
1582 | struct pool *pool = tc->pool; | ||
1583 | dm_block_t block = get_bio_block(tc, bio); | ||
1584 | struct dm_bio_prison_cell *cell; | ||
1585 | struct dm_cell_key key; | ||
1586 | |||
1587 | /* | ||
1588 | * If cell is already occupied, then the block is already | ||
1589 | * being provisioned so we have nothing further to do here. | ||
1590 | */ | ||
1591 | build_virtual_key(tc->td, block, &key); | ||
1592 | if (bio_detain(pool, &key, bio, &cell)) | ||
1593 | return; | ||
1594 | |||
1595 | process_cell(tc, cell); | ||
1596 | } | ||
1597 | |||
1598 | static void __process_bio_read_only(struct thin_c *tc, struct bio *bio, | ||
1599 | struct dm_bio_prison_cell *cell) | ||
1398 | { | 1600 | { |
1399 | int r; | 1601 | int r; |
1400 | int rw = bio_data_dir(bio); | 1602 | int rw = bio_data_dir(bio); |
@@ -1404,15 +1606,21 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | |||
1404 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); | 1606 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); |
1405 | switch (r) { | 1607 | switch (r) { |
1406 | case 0: | 1608 | case 0: |
1407 | if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size) | 1609 | if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size) { |
1408 | handle_unserviceable_bio(tc->pool, bio); | 1610 | handle_unserviceable_bio(tc->pool, bio); |
1409 | else { | 1611 | if (cell) |
1612 | cell_defer_no_holder(tc, cell); | ||
1613 | } else { | ||
1410 | inc_all_io_entry(tc->pool, bio); | 1614 | inc_all_io_entry(tc->pool, bio); |
1411 | remap_and_issue(tc, bio, lookup_result.block); | 1615 | remap_and_issue(tc, bio, lookup_result.block); |
1616 | if (cell) | ||
1617 | inc_remap_and_issue_cell(tc, cell, lookup_result.block); | ||
1412 | } | 1618 | } |
1413 | break; | 1619 | break; |
1414 | 1620 | ||
1415 | case -ENODATA: | 1621 | case -ENODATA: |
1622 | if (cell) | ||
1623 | cell_defer_no_holder(tc, cell); | ||
1416 | if (rw != READ) { | 1624 | if (rw != READ) { |
1417 | handle_unserviceable_bio(tc->pool, bio); | 1625 | handle_unserviceable_bio(tc->pool, bio); |
1418 | break; | 1626 | break; |
@@ -1431,11 +1639,23 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | |||
1431 | default: | 1639 | default: |
1432 | DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", | 1640 | DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", |
1433 | __func__, r); | 1641 | __func__, r); |
1642 | if (cell) | ||
1643 | cell_defer_no_holder(tc, cell); | ||
1434 | bio_io_error(bio); | 1644 | bio_io_error(bio); |
1435 | break; | 1645 | break; |
1436 | } | 1646 | } |
1437 | } | 1647 | } |
1438 | 1648 | ||
1649 | static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | ||
1650 | { | ||
1651 | __process_bio_read_only(tc, bio, NULL); | ||
1652 | } | ||
1653 | |||
1654 | static void process_cell_read_only(struct thin_c *tc, struct dm_bio_prison_cell *cell) | ||
1655 | { | ||
1656 | __process_bio_read_only(tc, cell->holder, cell); | ||
1657 | } | ||
1658 | |||
1439 | static void process_bio_success(struct thin_c *tc, struct bio *bio) | 1659 | static void process_bio_success(struct thin_c *tc, struct bio *bio) |
1440 | { | 1660 | { |
1441 | bio_endio(bio, 0); | 1661 | bio_endio(bio, 0); |
@@ -1446,6 +1666,16 @@ static void process_bio_fail(struct thin_c *tc, struct bio *bio) | |||
1446 | bio_io_error(bio); | 1666 | bio_io_error(bio); |
1447 | } | 1667 | } |
1448 | 1668 | ||
1669 | static void process_cell_success(struct thin_c *tc, struct dm_bio_prison_cell *cell) | ||
1670 | { | ||
1671 | cell_success(tc->pool, cell); | ||
1672 | } | ||
1673 | |||
1674 | static void process_cell_fail(struct thin_c *tc, struct dm_bio_prison_cell *cell) | ||
1675 | { | ||
1676 | cell_error(tc->pool, cell); | ||
1677 | } | ||
1678 | |||
1449 | /* | 1679 | /* |
1450 | * FIXME: should we also commit due to size of transaction, measured in | 1680 | * FIXME: should we also commit due to size of transaction, measured in |
1451 | * metadata blocks? | 1681 | * metadata blocks? |
@@ -1527,9 +1757,10 @@ static void process_thin_deferred_bios(struct thin_c *tc) | |||
1527 | struct bio *bio; | 1757 | struct bio *bio; |
1528 | struct bio_list bios; | 1758 | struct bio_list bios; |
1529 | struct blk_plug plug; | 1759 | struct blk_plug plug; |
1760 | unsigned count = 0; | ||
1530 | 1761 | ||
1531 | if (tc->requeue_mode) { | 1762 | if (tc->requeue_mode) { |
1532 | requeue_bio_list(tc, &tc->deferred_bio_list); | 1763 | error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE); |
1533 | return; | 1764 | return; |
1534 | } | 1765 | } |
1535 | 1766 | ||
@@ -1568,10 +1799,97 @@ static void process_thin_deferred_bios(struct thin_c *tc) | |||
1568 | pool->process_discard(tc, bio); | 1799 | pool->process_discard(tc, bio); |
1569 | else | 1800 | else |
1570 | pool->process_bio(tc, bio); | 1801 | pool->process_bio(tc, bio); |
1802 | |||
1803 | if ((count++ & 127) == 0) { | ||
1804 | throttle_work_update(&pool->throttle); | ||
1805 | dm_pool_issue_prefetches(pool->pmd); | ||
1806 | } | ||
1571 | } | 1807 | } |
1572 | blk_finish_plug(&plug); | 1808 | blk_finish_plug(&plug); |
1573 | } | 1809 | } |
1574 | 1810 | ||
1811 | static int cmp_cells(const void *lhs, const void *rhs) | ||
1812 | { | ||
1813 | struct dm_bio_prison_cell *lhs_cell = *((struct dm_bio_prison_cell **) lhs); | ||
1814 | struct dm_bio_prison_cell *rhs_cell = *((struct dm_bio_prison_cell **) rhs); | ||
1815 | |||
1816 | BUG_ON(!lhs_cell->holder); | ||
1817 | BUG_ON(!rhs_cell->holder); | ||
1818 | |||
1819 | if (lhs_cell->holder->bi_iter.bi_sector < rhs_cell->holder->bi_iter.bi_sector) | ||
1820 | return -1; | ||
1821 | |||
1822 | if (lhs_cell->holder->bi_iter.bi_sector > rhs_cell->holder->bi_iter.bi_sector) | ||
1823 | return 1; | ||
1824 | |||
1825 | return 0; | ||
1826 | } | ||
1827 | |||
1828 | static unsigned sort_cells(struct pool *pool, struct list_head *cells) | ||
1829 | { | ||
1830 | unsigned count = 0; | ||
1831 | struct dm_bio_prison_cell *cell, *tmp; | ||
1832 | |||
1833 | list_for_each_entry_safe(cell, tmp, cells, user_list) { | ||
1834 | if (count >= CELL_SORT_ARRAY_SIZE) | ||
1835 | break; | ||
1836 | |||
1837 | pool->cell_sort_array[count++] = cell; | ||
1838 | list_del(&cell->user_list); | ||
1839 | } | ||
1840 | |||
1841 | sort(pool->cell_sort_array, count, sizeof(cell), cmp_cells, NULL); | ||
1842 | |||
1843 | return count; | ||
1844 | } | ||
1845 | |||
1846 | static void process_thin_deferred_cells(struct thin_c *tc) | ||
1847 | { | ||
1848 | struct pool *pool = tc->pool; | ||
1849 | unsigned long flags; | ||
1850 | struct list_head cells; | ||
1851 | struct dm_bio_prison_cell *cell; | ||
1852 | unsigned i, j, count; | ||
1853 | |||
1854 | INIT_LIST_HEAD(&cells); | ||
1855 | |||
1856 | spin_lock_irqsave(&tc->lock, flags); | ||
1857 | list_splice_init(&tc->deferred_cells, &cells); | ||
1858 | spin_unlock_irqrestore(&tc->lock, flags); | ||
1859 | |||
1860 | if (list_empty(&cells)) | ||
1861 | return; | ||
1862 | |||
1863 | do { | ||
1864 | count = sort_cells(tc->pool, &cells); | ||
1865 | |||
1866 | for (i = 0; i < count; i++) { | ||
1867 | cell = pool->cell_sort_array[i]; | ||
1868 | BUG_ON(!cell->holder); | ||
1869 | |||
1870 | /* | ||
1871 | * If we've got no free new_mapping structs, and processing | ||
1872 | * this bio might require one, we pause until there are some | ||
1873 | * prepared mappings to process. | ||
1874 | */ | ||
1875 | if (ensure_next_mapping(pool)) { | ||
1876 | for (j = i; j < count; j++) | ||
1877 | list_add(&pool->cell_sort_array[j]->user_list, &cells); | ||
1878 | |||
1879 | spin_lock_irqsave(&tc->lock, flags); | ||
1880 | list_splice(&cells, &tc->deferred_cells); | ||
1881 | spin_unlock_irqrestore(&tc->lock, flags); | ||
1882 | return; | ||
1883 | } | ||
1884 | |||
1885 | if (cell->holder->bi_rw & REQ_DISCARD) | ||
1886 | pool->process_discard_cell(tc, cell); | ||
1887 | else | ||
1888 | pool->process_cell(tc, cell); | ||
1889 | } | ||
1890 | } while (!list_empty(&cells)); | ||
1891 | } | ||
1892 | |||
1575 | static void thin_get(struct thin_c *tc); | 1893 | static void thin_get(struct thin_c *tc); |
1576 | static void thin_put(struct thin_c *tc); | 1894 | static void thin_put(struct thin_c *tc); |
1577 | 1895 | ||
@@ -1620,6 +1938,7 @@ static void process_deferred_bios(struct pool *pool) | |||
1620 | 1938 | ||
1621 | tc = get_first_thin(pool); | 1939 | tc = get_first_thin(pool); |
1622 | while (tc) { | 1940 | while (tc) { |
1941 | process_thin_deferred_cells(tc); | ||
1623 | process_thin_deferred_bios(tc); | 1942 | process_thin_deferred_bios(tc); |
1624 | tc = get_next_thin(pool, tc); | 1943 | tc = get_next_thin(pool, tc); |
1625 | } | 1944 | } |
@@ -1653,9 +1972,15 @@ static void do_worker(struct work_struct *ws) | |||
1653 | { | 1972 | { |
1654 | struct pool *pool = container_of(ws, struct pool, worker); | 1973 | struct pool *pool = container_of(ws, struct pool, worker); |
1655 | 1974 | ||
1975 | throttle_work_start(&pool->throttle); | ||
1976 | dm_pool_issue_prefetches(pool->pmd); | ||
1977 | throttle_work_update(&pool->throttle); | ||
1656 | process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping); | 1978 | process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping); |
1979 | throttle_work_update(&pool->throttle); | ||
1657 | process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard); | 1980 | process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard); |
1981 | throttle_work_update(&pool->throttle); | ||
1658 | process_deferred_bios(pool); | 1982 | process_deferred_bios(pool); |
1983 | throttle_work_complete(&pool->throttle); | ||
1659 | } | 1984 | } |
1660 | 1985 | ||
1661 | /* | 1986 | /* |
@@ -1792,6 +2117,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1792 | dm_pool_metadata_read_only(pool->pmd); | 2117 | dm_pool_metadata_read_only(pool->pmd); |
1793 | pool->process_bio = process_bio_fail; | 2118 | pool->process_bio = process_bio_fail; |
1794 | pool->process_discard = process_bio_fail; | 2119 | pool->process_discard = process_bio_fail; |
2120 | pool->process_cell = process_cell_fail; | ||
2121 | pool->process_discard_cell = process_cell_fail; | ||
1795 | pool->process_prepared_mapping = process_prepared_mapping_fail; | 2122 | pool->process_prepared_mapping = process_prepared_mapping_fail; |
1796 | pool->process_prepared_discard = process_prepared_discard_fail; | 2123 | pool->process_prepared_discard = process_prepared_discard_fail; |
1797 | 2124 | ||
@@ -1804,6 +2131,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1804 | dm_pool_metadata_read_only(pool->pmd); | 2131 | dm_pool_metadata_read_only(pool->pmd); |
1805 | pool->process_bio = process_bio_read_only; | 2132 | pool->process_bio = process_bio_read_only; |
1806 | pool->process_discard = process_bio_success; | 2133 | pool->process_discard = process_bio_success; |
2134 | pool->process_cell = process_cell_read_only; | ||
2135 | pool->process_discard_cell = process_cell_success; | ||
1807 | pool->process_prepared_mapping = process_prepared_mapping_fail; | 2136 | pool->process_prepared_mapping = process_prepared_mapping_fail; |
1808 | pool->process_prepared_discard = process_prepared_discard_passdown; | 2137 | pool->process_prepared_discard = process_prepared_discard_passdown; |
1809 | 2138 | ||
@@ -1822,7 +2151,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1822 | if (old_mode != new_mode) | 2151 | if (old_mode != new_mode) |
1823 | notify_of_pool_mode_change(pool, "out-of-data-space"); | 2152 | notify_of_pool_mode_change(pool, "out-of-data-space"); |
1824 | pool->process_bio = process_bio_read_only; | 2153 | pool->process_bio = process_bio_read_only; |
1825 | pool->process_discard = process_discard; | 2154 | pool->process_discard = process_discard_bio; |
2155 | pool->process_cell = process_cell_read_only; | ||
2156 | pool->process_discard_cell = process_discard_cell; | ||
1826 | pool->process_prepared_mapping = process_prepared_mapping; | 2157 | pool->process_prepared_mapping = process_prepared_mapping; |
1827 | pool->process_prepared_discard = process_prepared_discard_passdown; | 2158 | pool->process_prepared_discard = process_prepared_discard_passdown; |
1828 | 2159 | ||
@@ -1835,7 +2166,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1835 | notify_of_pool_mode_change(pool, "write"); | 2166 | notify_of_pool_mode_change(pool, "write"); |
1836 | dm_pool_metadata_read_write(pool->pmd); | 2167 | dm_pool_metadata_read_write(pool->pmd); |
1837 | pool->process_bio = process_bio; | 2168 | pool->process_bio = process_bio; |
1838 | pool->process_discard = process_discard; | 2169 | pool->process_discard = process_discard_bio; |
2170 | pool->process_cell = process_cell; | ||
2171 | pool->process_discard_cell = process_discard_cell; | ||
1839 | pool->process_prepared_mapping = process_prepared_mapping; | 2172 | pool->process_prepared_mapping = process_prepared_mapping; |
1840 | pool->process_prepared_discard = process_prepared_discard; | 2173 | pool->process_prepared_discard = process_prepared_discard; |
1841 | break; | 2174 | break; |
@@ -1895,6 +2228,29 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio) | |||
1895 | wake_worker(pool); | 2228 | wake_worker(pool); |
1896 | } | 2229 | } |
1897 | 2230 | ||
2231 | static void thin_defer_bio_with_throttle(struct thin_c *tc, struct bio *bio) | ||
2232 | { | ||
2233 | struct pool *pool = tc->pool; | ||
2234 | |||
2235 | throttle_lock(&pool->throttle); | ||
2236 | thin_defer_bio(tc, bio); | ||
2237 | throttle_unlock(&pool->throttle); | ||
2238 | } | ||
2239 | |||
2240 | static void thin_defer_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell) | ||
2241 | { | ||
2242 | unsigned long flags; | ||
2243 | struct pool *pool = tc->pool; | ||
2244 | |||
2245 | throttle_lock(&pool->throttle); | ||
2246 | spin_lock_irqsave(&tc->lock, flags); | ||
2247 | list_add_tail(&cell->user_list, &tc->deferred_cells); | ||
2248 | spin_unlock_irqrestore(&tc->lock, flags); | ||
2249 | throttle_unlock(&pool->throttle); | ||
2250 | |||
2251 | wake_worker(pool); | ||
2252 | } | ||
2253 | |||
1898 | static void thin_hook_bio(struct thin_c *tc, struct bio *bio) | 2254 | static void thin_hook_bio(struct thin_c *tc, struct bio *bio) |
1899 | { | 2255 | { |
1900 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 2256 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); |
@@ -1915,8 +2271,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
1915 | dm_block_t block = get_bio_block(tc, bio); | 2271 | dm_block_t block = get_bio_block(tc, bio); |
1916 | struct dm_thin_device *td = tc->td; | 2272 | struct dm_thin_device *td = tc->td; |
1917 | struct dm_thin_lookup_result result; | 2273 | struct dm_thin_lookup_result result; |
1918 | struct dm_bio_prison_cell cell1, cell2; | 2274 | struct dm_bio_prison_cell *virt_cell, *data_cell; |
1919 | struct dm_bio_prison_cell *cell_result; | ||
1920 | struct dm_cell_key key; | 2275 | struct dm_cell_key key; |
1921 | 2276 | ||
1922 | thin_hook_bio(tc, bio); | 2277 | thin_hook_bio(tc, bio); |
@@ -1932,7 +2287,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
1932 | } | 2287 | } |
1933 | 2288 | ||
1934 | if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { | 2289 | if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { |
1935 | thin_defer_bio(tc, bio); | 2290 | thin_defer_bio_with_throttle(tc, bio); |
1936 | return DM_MAPIO_SUBMITTED; | 2291 | return DM_MAPIO_SUBMITTED; |
1937 | } | 2292 | } |
1938 | 2293 | ||
@@ -1941,7 +2296,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
1941 | * there's a race with discard. | 2296 | * there's a race with discard. |
1942 | */ | 2297 | */ |
1943 | build_virtual_key(tc->td, block, &key); | 2298 | build_virtual_key(tc->td, block, &key); |
1944 | if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1, &cell_result)) | 2299 | if (bio_detain(tc->pool, &key, bio, &virt_cell)) |
1945 | return DM_MAPIO_SUBMITTED; | 2300 | return DM_MAPIO_SUBMITTED; |
1946 | 2301 | ||
1947 | r = dm_thin_find_block(td, block, 0, &result); | 2302 | r = dm_thin_find_block(td, block, 0, &result); |
@@ -1966,20 +2321,19 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
1966 | * More distant ancestors are irrelevant. The | 2321 | * More distant ancestors are irrelevant. The |
1967 | * shared flag will be set in their case. | 2322 | * shared flag will be set in their case. |
1968 | */ | 2323 | */ |
1969 | thin_defer_bio(tc, bio); | 2324 | thin_defer_cell(tc, virt_cell); |
1970 | cell_defer_no_holder_no_free(tc, &cell1); | ||
1971 | return DM_MAPIO_SUBMITTED; | 2325 | return DM_MAPIO_SUBMITTED; |
1972 | } | 2326 | } |
1973 | 2327 | ||
1974 | build_data_key(tc->td, result.block, &key); | 2328 | build_data_key(tc->td, result.block, &key); |
1975 | if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2, &cell_result)) { | 2329 | if (bio_detain(tc->pool, &key, bio, &data_cell)) { |
1976 | cell_defer_no_holder_no_free(tc, &cell1); | 2330 | cell_defer_no_holder(tc, virt_cell); |
1977 | return DM_MAPIO_SUBMITTED; | 2331 | return DM_MAPIO_SUBMITTED; |
1978 | } | 2332 | } |
1979 | 2333 | ||
1980 | inc_all_io_entry(tc->pool, bio); | 2334 | inc_all_io_entry(tc->pool, bio); |
1981 | cell_defer_no_holder_no_free(tc, &cell2); | 2335 | cell_defer_no_holder(tc, data_cell); |
1982 | cell_defer_no_holder_no_free(tc, &cell1); | 2336 | cell_defer_no_holder(tc, virt_cell); |
1983 | 2337 | ||
1984 | remap(tc, bio, result.block); | 2338 | remap(tc, bio, result.block); |
1985 | return DM_MAPIO_REMAPPED; | 2339 | return DM_MAPIO_REMAPPED; |
@@ -1991,18 +2345,13 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
1991 | * of doing so. | 2345 | * of doing so. |
1992 | */ | 2346 | */ |
1993 | handle_unserviceable_bio(tc->pool, bio); | 2347 | handle_unserviceable_bio(tc->pool, bio); |
1994 | cell_defer_no_holder_no_free(tc, &cell1); | 2348 | cell_defer_no_holder(tc, virt_cell); |
1995 | return DM_MAPIO_SUBMITTED; | 2349 | return DM_MAPIO_SUBMITTED; |
1996 | } | 2350 | } |
1997 | /* fall through */ | 2351 | /* fall through */ |
1998 | 2352 | ||
1999 | case -EWOULDBLOCK: | 2353 | case -EWOULDBLOCK: |
2000 | /* | 2354 | thin_defer_cell(tc, virt_cell); |
2001 | * In future, the failed dm_thin_find_block above could | ||
2002 | * provide the hint to load the metadata into cache. | ||
2003 | */ | ||
2004 | thin_defer_bio(tc, bio); | ||
2005 | cell_defer_no_holder_no_free(tc, &cell1); | ||
2006 | return DM_MAPIO_SUBMITTED; | 2355 | return DM_MAPIO_SUBMITTED; |
2007 | 2356 | ||
2008 | default: | 2357 | default: |
@@ -2012,7 +2361,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
2012 | * pool is switched to fail-io mode. | 2361 | * pool is switched to fail-io mode. |
2013 | */ | 2362 | */ |
2014 | bio_io_error(bio); | 2363 | bio_io_error(bio); |
2015 | cell_defer_no_holder_no_free(tc, &cell1); | 2364 | cell_defer_no_holder(tc, virt_cell); |
2016 | return DM_MAPIO_SUBMITTED; | 2365 | return DM_MAPIO_SUBMITTED; |
2017 | } | 2366 | } |
2018 | } | 2367 | } |
@@ -2193,7 +2542,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
2193 | pool->sectors_per_block_shift = __ffs(block_size); | 2542 | pool->sectors_per_block_shift = __ffs(block_size); |
2194 | pool->low_water_blocks = 0; | 2543 | pool->low_water_blocks = 0; |
2195 | pool_features_init(&pool->pf); | 2544 | pool_features_init(&pool->pf); |
2196 | pool->prison = dm_bio_prison_create(PRISON_CELLS); | 2545 | pool->prison = dm_bio_prison_create(); |
2197 | if (!pool->prison) { | 2546 | if (!pool->prison) { |
2198 | *error = "Error creating pool's bio prison"; | 2547 | *error = "Error creating pool's bio prison"; |
2199 | err_p = ERR_PTR(-ENOMEM); | 2548 | err_p = ERR_PTR(-ENOMEM); |
@@ -2219,6 +2568,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
2219 | goto bad_wq; | 2568 | goto bad_wq; |
2220 | } | 2569 | } |
2221 | 2570 | ||
2571 | throttle_init(&pool->throttle); | ||
2222 | INIT_WORK(&pool->worker, do_worker); | 2572 | INIT_WORK(&pool->worker, do_worker); |
2223 | INIT_DELAYED_WORK(&pool->waker, do_waker); | 2573 | INIT_DELAYED_WORK(&pool->waker, do_waker); |
2224 | INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); | 2574 | INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); |
@@ -2228,6 +2578,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
2228 | INIT_LIST_HEAD(&pool->prepared_discards); | 2578 | INIT_LIST_HEAD(&pool->prepared_discards); |
2229 | INIT_LIST_HEAD(&pool->active_thins); | 2579 | INIT_LIST_HEAD(&pool->active_thins); |
2230 | pool->low_water_triggered = false; | 2580 | pool->low_water_triggered = false; |
2581 | pool->suspended = true; | ||
2231 | 2582 | ||
2232 | pool->shared_read_ds = dm_deferred_set_create(); | 2583 | pool->shared_read_ds = dm_deferred_set_create(); |
2233 | if (!pool->shared_read_ds) { | 2584 | if (!pool->shared_read_ds) { |
@@ -2764,20 +3115,77 @@ static int pool_preresume(struct dm_target *ti) | |||
2764 | return 0; | 3115 | return 0; |
2765 | } | 3116 | } |
2766 | 3117 | ||
3118 | static void pool_suspend_active_thins(struct pool *pool) | ||
3119 | { | ||
3120 | struct thin_c *tc; | ||
3121 | |||
3122 | /* Suspend all active thin devices */ | ||
3123 | tc = get_first_thin(pool); | ||
3124 | while (tc) { | ||
3125 | dm_internal_suspend_noflush(tc->thin_md); | ||
3126 | tc = get_next_thin(pool, tc); | ||
3127 | } | ||
3128 | } | ||
3129 | |||
3130 | static void pool_resume_active_thins(struct pool *pool) | ||
3131 | { | ||
3132 | struct thin_c *tc; | ||
3133 | |||
3134 | /* Resume all active thin devices */ | ||
3135 | tc = get_first_thin(pool); | ||
3136 | while (tc) { | ||
3137 | dm_internal_resume(tc->thin_md); | ||
3138 | tc = get_next_thin(pool, tc); | ||
3139 | } | ||
3140 | } | ||
3141 | |||
2767 | static void pool_resume(struct dm_target *ti) | 3142 | static void pool_resume(struct dm_target *ti) |
2768 | { | 3143 | { |
2769 | struct pool_c *pt = ti->private; | 3144 | struct pool_c *pt = ti->private; |
2770 | struct pool *pool = pt->pool; | 3145 | struct pool *pool = pt->pool; |
2771 | unsigned long flags; | 3146 | unsigned long flags; |
2772 | 3147 | ||
3148 | /* | ||
3149 | * Must requeue active_thins' bios and then resume | ||
3150 | * active_thins _before_ clearing 'suspend' flag. | ||
3151 | */ | ||
3152 | requeue_bios(pool); | ||
3153 | pool_resume_active_thins(pool); | ||
3154 | |||
2773 | spin_lock_irqsave(&pool->lock, flags); | 3155 | spin_lock_irqsave(&pool->lock, flags); |
2774 | pool->low_water_triggered = false; | 3156 | pool->low_water_triggered = false; |
3157 | pool->suspended = false; | ||
2775 | spin_unlock_irqrestore(&pool->lock, flags); | 3158 | spin_unlock_irqrestore(&pool->lock, flags); |
2776 | requeue_bios(pool); | ||
2777 | 3159 | ||
2778 | do_waker(&pool->waker.work); | 3160 | do_waker(&pool->waker.work); |
2779 | } | 3161 | } |
2780 | 3162 | ||
3163 | static void pool_presuspend(struct dm_target *ti) | ||
3164 | { | ||
3165 | struct pool_c *pt = ti->private; | ||
3166 | struct pool *pool = pt->pool; | ||
3167 | unsigned long flags; | ||
3168 | |||
3169 | spin_lock_irqsave(&pool->lock, flags); | ||
3170 | pool->suspended = true; | ||
3171 | spin_unlock_irqrestore(&pool->lock, flags); | ||
3172 | |||
3173 | pool_suspend_active_thins(pool); | ||
3174 | } | ||
3175 | |||
3176 | static void pool_presuspend_undo(struct dm_target *ti) | ||
3177 | { | ||
3178 | struct pool_c *pt = ti->private; | ||
3179 | struct pool *pool = pt->pool; | ||
3180 | unsigned long flags; | ||
3181 | |||
3182 | pool_resume_active_thins(pool); | ||
3183 | |||
3184 | spin_lock_irqsave(&pool->lock, flags); | ||
3185 | pool->suspended = false; | ||
3186 | spin_unlock_irqrestore(&pool->lock, flags); | ||
3187 | } | ||
3188 | |||
2781 | static void pool_postsuspend(struct dm_target *ti) | 3189 | static void pool_postsuspend(struct dm_target *ti) |
2782 | { | 3190 | { |
2783 | struct pool_c *pt = ti->private; | 3191 | struct pool_c *pt = ti->private; |
@@ -2949,7 +3357,6 @@ static int process_release_metadata_snap_mesg(unsigned argc, char **argv, struct | |||
2949 | * create_thin <dev_id> | 3357 | * create_thin <dev_id> |
2950 | * create_snap <dev_id> <origin_id> | 3358 | * create_snap <dev_id> <origin_id> |
2951 | * delete <dev_id> | 3359 | * delete <dev_id> |
2952 | * trim <dev_id> <new_size_in_sectors> | ||
2953 | * set_transaction_id <current_trans_id> <new_trans_id> | 3360 | * set_transaction_id <current_trans_id> <new_trans_id> |
2954 | * reserve_metadata_snap | 3361 | * reserve_metadata_snap |
2955 | * release_metadata_snap | 3362 | * release_metadata_snap |
@@ -3177,15 +3584,35 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3177 | { | 3584 | { |
3178 | struct pool_c *pt = ti->private; | 3585 | struct pool_c *pt = ti->private; |
3179 | struct pool *pool = pt->pool; | 3586 | struct pool *pool = pt->pool; |
3180 | uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; | 3587 | sector_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; |
3588 | |||
3589 | /* | ||
3590 | * If max_sectors is smaller than pool->sectors_per_block adjust it | ||
3591 | * to the highest possible power-of-2 factor of pool->sectors_per_block. | ||
3592 | * This is especially beneficial when the pool's data device is a RAID | ||
3593 | * device that has a full stripe width that matches pool->sectors_per_block | ||
3594 | * -- because even though partial RAID stripe-sized IOs will be issued to a | ||
3595 | * single RAID stripe; when aggregated they will end on a full RAID stripe | ||
3596 | * boundary.. which avoids additional partial RAID stripe writes cascading | ||
3597 | */ | ||
3598 | if (limits->max_sectors < pool->sectors_per_block) { | ||
3599 | while (!is_factor(pool->sectors_per_block, limits->max_sectors)) { | ||
3600 | if ((limits->max_sectors & (limits->max_sectors - 1)) == 0) | ||
3601 | limits->max_sectors--; | ||
3602 | limits->max_sectors = rounddown_pow_of_two(limits->max_sectors); | ||
3603 | } | ||
3604 | } | ||
3181 | 3605 | ||
3182 | /* | 3606 | /* |
3183 | * If the system-determined stacked limits are compatible with the | 3607 | * If the system-determined stacked limits are compatible with the |
3184 | * pool's blocksize (io_opt is a factor) do not override them. | 3608 | * pool's blocksize (io_opt is a factor) do not override them. |
3185 | */ | 3609 | */ |
3186 | if (io_opt_sectors < pool->sectors_per_block || | 3610 | if (io_opt_sectors < pool->sectors_per_block || |
3187 | do_div(io_opt_sectors, pool->sectors_per_block)) { | 3611 | !is_factor(io_opt_sectors, pool->sectors_per_block)) { |
3188 | blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT); | 3612 | if (is_factor(pool->sectors_per_block, limits->max_sectors)) |
3613 | blk_limits_io_min(limits, limits->max_sectors << SECTOR_SHIFT); | ||
3614 | else | ||
3615 | blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT); | ||
3189 | blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); | 3616 | blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); |
3190 | } | 3617 | } |
3191 | 3618 | ||
@@ -3214,11 +3641,13 @@ static struct target_type pool_target = { | |||
3214 | .name = "thin-pool", | 3641 | .name = "thin-pool", |
3215 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | | 3642 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
3216 | DM_TARGET_IMMUTABLE, | 3643 | DM_TARGET_IMMUTABLE, |
3217 | .version = {1, 13, 0}, | 3644 | .version = {1, 14, 0}, |
3218 | .module = THIS_MODULE, | 3645 | .module = THIS_MODULE, |
3219 | .ctr = pool_ctr, | 3646 | .ctr = pool_ctr, |
3220 | .dtr = pool_dtr, | 3647 | .dtr = pool_dtr, |
3221 | .map = pool_map, | 3648 | .map = pool_map, |
3649 | .presuspend = pool_presuspend, | ||
3650 | .presuspend_undo = pool_presuspend_undo, | ||
3222 | .postsuspend = pool_postsuspend, | 3651 | .postsuspend = pool_postsuspend, |
3223 | .preresume = pool_preresume, | 3652 | .preresume = pool_preresume, |
3224 | .resume = pool_resume, | 3653 | .resume = pool_resume, |
@@ -3248,14 +3677,14 @@ static void thin_dtr(struct dm_target *ti) | |||
3248 | struct thin_c *tc = ti->private; | 3677 | struct thin_c *tc = ti->private; |
3249 | unsigned long flags; | 3678 | unsigned long flags; |
3250 | 3679 | ||
3251 | thin_put(tc); | ||
3252 | wait_for_completion(&tc->can_destroy); | ||
3253 | |||
3254 | spin_lock_irqsave(&tc->pool->lock, flags); | 3680 | spin_lock_irqsave(&tc->pool->lock, flags); |
3255 | list_del_rcu(&tc->list); | 3681 | list_del_rcu(&tc->list); |
3256 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 3682 | spin_unlock_irqrestore(&tc->pool->lock, flags); |
3257 | synchronize_rcu(); | 3683 | synchronize_rcu(); |
3258 | 3684 | ||
3685 | thin_put(tc); | ||
3686 | wait_for_completion(&tc->can_destroy); | ||
3687 | |||
3259 | mutex_lock(&dm_thin_pool_table.mutex); | 3688 | mutex_lock(&dm_thin_pool_table.mutex); |
3260 | 3689 | ||
3261 | __pool_dec(tc->pool); | 3690 | __pool_dec(tc->pool); |
@@ -3302,7 +3731,9 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3302 | r = -ENOMEM; | 3731 | r = -ENOMEM; |
3303 | goto out_unlock; | 3732 | goto out_unlock; |
3304 | } | 3733 | } |
3734 | tc->thin_md = dm_table_get_md(ti->table); | ||
3305 | spin_lock_init(&tc->lock); | 3735 | spin_lock_init(&tc->lock); |
3736 | INIT_LIST_HEAD(&tc->deferred_cells); | ||
3306 | bio_list_init(&tc->deferred_bio_list); | 3737 | bio_list_init(&tc->deferred_bio_list); |
3307 | bio_list_init(&tc->retry_on_resume_list); | 3738 | bio_list_init(&tc->retry_on_resume_list); |
3308 | tc->sort_bio_list = RB_ROOT; | 3739 | tc->sort_bio_list = RB_ROOT; |
@@ -3347,18 +3778,18 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3347 | if (get_pool_mode(tc->pool) == PM_FAIL) { | 3778 | if (get_pool_mode(tc->pool) == PM_FAIL) { |
3348 | ti->error = "Couldn't open thin device, Pool is in fail mode"; | 3779 | ti->error = "Couldn't open thin device, Pool is in fail mode"; |
3349 | r = -EINVAL; | 3780 | r = -EINVAL; |
3350 | goto bad_thin_open; | 3781 | goto bad_pool; |
3351 | } | 3782 | } |
3352 | 3783 | ||
3353 | r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td); | 3784 | r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td); |
3354 | if (r) { | 3785 | if (r) { |
3355 | ti->error = "Couldn't open thin internal device"; | 3786 | ti->error = "Couldn't open thin internal device"; |
3356 | goto bad_thin_open; | 3787 | goto bad_pool; |
3357 | } | 3788 | } |
3358 | 3789 | ||
3359 | r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block); | 3790 | r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block); |
3360 | if (r) | 3791 | if (r) |
3361 | goto bad_target_max_io_len; | 3792 | goto bad; |
3362 | 3793 | ||
3363 | ti->num_flush_bios = 1; | 3794 | ti->num_flush_bios = 1; |
3364 | ti->flush_supported = true; | 3795 | ti->flush_supported = true; |
@@ -3373,14 +3804,16 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3373 | ti->split_discard_bios = true; | 3804 | ti->split_discard_bios = true; |
3374 | } | 3805 | } |
3375 | 3806 | ||
3376 | dm_put(pool_md); | ||
3377 | |||
3378 | mutex_unlock(&dm_thin_pool_table.mutex); | 3807 | mutex_unlock(&dm_thin_pool_table.mutex); |
3379 | 3808 | ||
3380 | atomic_set(&tc->refcount, 1); | ||
3381 | init_completion(&tc->can_destroy); | ||
3382 | |||
3383 | spin_lock_irqsave(&tc->pool->lock, flags); | 3809 | spin_lock_irqsave(&tc->pool->lock, flags); |
3810 | if (tc->pool->suspended) { | ||
3811 | spin_unlock_irqrestore(&tc->pool->lock, flags); | ||
3812 | mutex_lock(&dm_thin_pool_table.mutex); /* reacquire for __pool_dec */ | ||
3813 | ti->error = "Unable to activate thin device while pool is suspended"; | ||
3814 | r = -EINVAL; | ||
3815 | goto bad; | ||
3816 | } | ||
3384 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); | 3817 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); |
3385 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 3818 | spin_unlock_irqrestore(&tc->pool->lock, flags); |
3386 | /* | 3819 | /* |
@@ -3391,11 +3824,16 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3391 | */ | 3824 | */ |
3392 | synchronize_rcu(); | 3825 | synchronize_rcu(); |
3393 | 3826 | ||
3827 | dm_put(pool_md); | ||
3828 | |||
3829 | atomic_set(&tc->refcount, 1); | ||
3830 | init_completion(&tc->can_destroy); | ||
3831 | |||
3394 | return 0; | 3832 | return 0; |
3395 | 3833 | ||
3396 | bad_target_max_io_len: | 3834 | bad: |
3397 | dm_pool_close_thin_device(tc->td); | 3835 | dm_pool_close_thin_device(tc->td); |
3398 | bad_thin_open: | 3836 | bad_pool: |
3399 | __pool_dec(tc->pool); | 3837 | __pool_dec(tc->pool); |
3400 | bad_pool_lookup: | 3838 | bad_pool_lookup: |
3401 | dm_put(pool_md); | 3839 | dm_put(pool_md); |
@@ -3541,6 +3979,21 @@ err: | |||
3541 | DMEMIT("Error"); | 3979 | DMEMIT("Error"); |
3542 | } | 3980 | } |
3543 | 3981 | ||
3982 | static int thin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, | ||
3983 | struct bio_vec *biovec, int max_size) | ||
3984 | { | ||
3985 | struct thin_c *tc = ti->private; | ||
3986 | struct request_queue *q = bdev_get_queue(tc->pool_dev->bdev); | ||
3987 | |||
3988 | if (!q->merge_bvec_fn) | ||
3989 | return max_size; | ||
3990 | |||
3991 | bvm->bi_bdev = tc->pool_dev->bdev; | ||
3992 | bvm->bi_sector = dm_target_offset(ti, bvm->bi_sector); | ||
3993 | |||
3994 | return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); | ||
3995 | } | ||
3996 | |||
3544 | static int thin_iterate_devices(struct dm_target *ti, | 3997 | static int thin_iterate_devices(struct dm_target *ti, |
3545 | iterate_devices_callout_fn fn, void *data) | 3998 | iterate_devices_callout_fn fn, void *data) |
3546 | { | 3999 | { |
@@ -3565,7 +4018,7 @@ static int thin_iterate_devices(struct dm_target *ti, | |||
3565 | 4018 | ||
3566 | static struct target_type thin_target = { | 4019 | static struct target_type thin_target = { |
3567 | .name = "thin", | 4020 | .name = "thin", |
3568 | .version = {1, 13, 0}, | 4021 | .version = {1, 14, 0}, |
3569 | .module = THIS_MODULE, | 4022 | .module = THIS_MODULE, |
3570 | .ctr = thin_ctr, | 4023 | .ctr = thin_ctr, |
3571 | .dtr = thin_dtr, | 4024 | .dtr = thin_dtr, |
@@ -3575,6 +4028,7 @@ static struct target_type thin_target = { | |||
3575 | .presuspend = thin_presuspend, | 4028 | .presuspend = thin_presuspend, |
3576 | .postsuspend = thin_postsuspend, | 4029 | .postsuspend = thin_postsuspend, |
3577 | .status = thin_status, | 4030 | .status = thin_status, |
4031 | .merge = thin_merge, | ||
3578 | .iterate_devices = thin_iterate_devices, | 4032 | .iterate_devices = thin_iterate_devices, |
3579 | }; | 4033 | }; |
3580 | 4034 | ||
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 58f3927fd7cc..8f37ed215b19 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/idr.h> | 19 | #include <linux/idr.h> |
20 | #include <linux/hdreg.h> | 20 | #include <linux/hdreg.h> |
21 | #include <linux/delay.h> | 21 | #include <linux/delay.h> |
22 | #include <linux/wait.h> | ||
22 | 23 | ||
23 | #include <trace/events/block.h> | 24 | #include <trace/events/block.h> |
24 | 25 | ||
@@ -117,6 +118,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | |||
117 | #define DMF_NOFLUSH_SUSPENDING 5 | 118 | #define DMF_NOFLUSH_SUSPENDING 5 |
118 | #define DMF_MERGE_IS_OPTIONAL 6 | 119 | #define DMF_MERGE_IS_OPTIONAL 6 |
119 | #define DMF_DEFERRED_REMOVE 7 | 120 | #define DMF_DEFERRED_REMOVE 7 |
121 | #define DMF_SUSPENDED_INTERNALLY 8 | ||
120 | 122 | ||
121 | /* | 123 | /* |
122 | * A dummy definition to make RCU happy. | 124 | * A dummy definition to make RCU happy. |
@@ -140,7 +142,7 @@ struct mapped_device { | |||
140 | * Use dm_get_live_table{_fast} or take suspend_lock for | 142 | * Use dm_get_live_table{_fast} or take suspend_lock for |
141 | * dereference. | 143 | * dereference. |
142 | */ | 144 | */ |
143 | struct dm_table *map; | 145 | struct dm_table __rcu *map; |
144 | 146 | ||
145 | struct list_head table_devices; | 147 | struct list_head table_devices; |
146 | struct mutex table_devices_lock; | 148 | struct mutex table_devices_lock; |
@@ -525,14 +527,15 @@ retry: | |||
525 | goto out; | 527 | goto out; |
526 | 528 | ||
527 | tgt = dm_table_get_target(map, 0); | 529 | tgt = dm_table_get_target(map, 0); |
530 | if (!tgt->type->ioctl) | ||
531 | goto out; | ||
528 | 532 | ||
529 | if (dm_suspended_md(md)) { | 533 | if (dm_suspended_md(md)) { |
530 | r = -EAGAIN; | 534 | r = -EAGAIN; |
531 | goto out; | 535 | goto out; |
532 | } | 536 | } |
533 | 537 | ||
534 | if (tgt->type->ioctl) | 538 | r = tgt->type->ioctl(tgt, cmd, arg); |
535 | r = tgt->type->ioctl(tgt, cmd, arg); | ||
536 | 539 | ||
537 | out: | 540 | out: |
538 | dm_put_live_table(md, srcu_idx); | 541 | dm_put_live_table(md, srcu_idx); |
@@ -1607,9 +1610,9 @@ static int dm_merge_bvec(struct request_queue *q, | |||
1607 | * Find maximum amount of I/O that won't need splitting | 1610 | * Find maximum amount of I/O that won't need splitting |
1608 | */ | 1611 | */ |
1609 | max_sectors = min(max_io_len(bvm->bi_sector, ti), | 1612 | max_sectors = min(max_io_len(bvm->bi_sector, ti), |
1610 | (sector_t) BIO_MAX_SECTORS); | 1613 | (sector_t) queue_max_sectors(q)); |
1611 | max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; | 1614 | max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; |
1612 | if (max_size < 0) | 1615 | if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */ |
1613 | max_size = 0; | 1616 | max_size = 0; |
1614 | 1617 | ||
1615 | /* | 1618 | /* |
@@ -1621,10 +1624,10 @@ static int dm_merge_bvec(struct request_queue *q, | |||
1621 | max_size = ti->type->merge(ti, bvm, biovec, max_size); | 1624 | max_size = ti->type->merge(ti, bvm, biovec, max_size); |
1622 | /* | 1625 | /* |
1623 | * If the target doesn't support merge method and some of the devices | 1626 | * If the target doesn't support merge method and some of the devices |
1624 | * provided their merge_bvec method (we know this by looking at | 1627 | * provided their merge_bvec method (we know this by looking for the |
1625 | * queue_max_hw_sectors), then we can't allow bios with multiple vector | 1628 | * max_hw_sectors that dm_set_device_limits may set), then we can't |
1626 | * entries. So always set max_size to 0, and the code below allows | 1629 | * allow bios with multiple vector entries. So always set max_size |
1627 | * just one page. | 1630 | * to 0, and the code below allows just one page. |
1628 | */ | 1631 | */ |
1629 | else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) | 1632 | else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) |
1630 | max_size = 0; | 1633 | max_size = 0; |
@@ -2332,7 +2335,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | |||
2332 | 2335 | ||
2333 | merge_is_optional = dm_table_merge_is_optional(t); | 2336 | merge_is_optional = dm_table_merge_is_optional(t); |
2334 | 2337 | ||
2335 | old_map = md->map; | 2338 | old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); |
2336 | rcu_assign_pointer(md->map, t); | 2339 | rcu_assign_pointer(md->map, t); |
2337 | md->immutable_target_type = dm_table_get_immutable_target_type(t); | 2340 | md->immutable_target_type = dm_table_get_immutable_target_type(t); |
2338 | 2341 | ||
@@ -2341,7 +2344,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | |||
2341 | set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); | 2344 | set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); |
2342 | else | 2345 | else |
2343 | clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); | 2346 | clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); |
2344 | dm_sync_table(md); | 2347 | if (old_map) |
2348 | dm_sync_table(md); | ||
2345 | 2349 | ||
2346 | return old_map; | 2350 | return old_map; |
2347 | } | 2351 | } |
@@ -2351,7 +2355,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | |||
2351 | */ | 2355 | */ |
2352 | static struct dm_table *__unbind(struct mapped_device *md) | 2356 | static struct dm_table *__unbind(struct mapped_device *md) |
2353 | { | 2357 | { |
2354 | struct dm_table *map = md->map; | 2358 | struct dm_table *map = rcu_dereference_protected(md->map, 1); |
2355 | 2359 | ||
2356 | if (!map) | 2360 | if (!map) |
2357 | return NULL; | 2361 | return NULL; |
@@ -2716,36 +2720,18 @@ static void unlock_fs(struct mapped_device *md) | |||
2716 | } | 2720 | } |
2717 | 2721 | ||
2718 | /* | 2722 | /* |
2719 | * We need to be able to change a mapping table under a mounted | 2723 | * If __dm_suspend returns 0, the device is completely quiescent |
2720 | * filesystem. For example we might want to move some data in | 2724 | * now. There is no request-processing activity. All new requests |
2721 | * the background. Before the table can be swapped with | 2725 | * are being added to md->deferred list. |
2722 | * dm_bind_table, dm_suspend must be called to flush any in | ||
2723 | * flight bios and ensure that any further io gets deferred. | ||
2724 | */ | ||
2725 | /* | ||
2726 | * Suspend mechanism in request-based dm. | ||
2727 | * | 2726 | * |
2728 | * 1. Flush all I/Os by lock_fs() if needed. | 2727 | * Caller must hold md->suspend_lock |
2729 | * 2. Stop dispatching any I/O by stopping the request_queue. | ||
2730 | * 3. Wait for all in-flight I/Os to be completed or requeued. | ||
2731 | * | ||
2732 | * To abort suspend, start the request_queue. | ||
2733 | */ | 2728 | */ |
2734 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | 2729 | static int __dm_suspend(struct mapped_device *md, struct dm_table *map, |
2730 | unsigned suspend_flags, int interruptible) | ||
2735 | { | 2731 | { |
2736 | struct dm_table *map = NULL; | 2732 | bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; |
2737 | int r = 0; | 2733 | bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG; |
2738 | int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; | 2734 | int r; |
2739 | int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; | ||
2740 | |||
2741 | mutex_lock(&md->suspend_lock); | ||
2742 | |||
2743 | if (dm_suspended_md(md)) { | ||
2744 | r = -EINVAL; | ||
2745 | goto out_unlock; | ||
2746 | } | ||
2747 | |||
2748 | map = md->map; | ||
2749 | 2735 | ||
2750 | /* | 2736 | /* |
2751 | * DMF_NOFLUSH_SUSPENDING must be set before presuspend. | 2737 | * DMF_NOFLUSH_SUSPENDING must be set before presuspend. |
@@ -2754,7 +2740,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2754 | if (noflush) | 2740 | if (noflush) |
2755 | set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | 2741 | set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); |
2756 | 2742 | ||
2757 | /* This does not get reverted if there's an error later. */ | 2743 | /* |
2744 | * This gets reverted if there's an error later and the targets | ||
2745 | * provide the .presuspend_undo hook. | ||
2746 | */ | ||
2758 | dm_table_presuspend_targets(map); | 2747 | dm_table_presuspend_targets(map); |
2759 | 2748 | ||
2760 | /* | 2749 | /* |
@@ -2765,8 +2754,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2765 | */ | 2754 | */ |
2766 | if (!noflush && do_lockfs) { | 2755 | if (!noflush && do_lockfs) { |
2767 | r = lock_fs(md); | 2756 | r = lock_fs(md); |
2768 | if (r) | 2757 | if (r) { |
2769 | goto out_unlock; | 2758 | dm_table_presuspend_undo_targets(map); |
2759 | return r; | ||
2760 | } | ||
2770 | } | 2761 | } |
2771 | 2762 | ||
2772 | /* | 2763 | /* |
@@ -2782,7 +2773,8 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2782 | * flush_workqueue(md->wq). | 2773 | * flush_workqueue(md->wq). |
2783 | */ | 2774 | */ |
2784 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 2775 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
2785 | synchronize_srcu(&md->io_barrier); | 2776 | if (map) |
2777 | synchronize_srcu(&md->io_barrier); | ||
2786 | 2778 | ||
2787 | /* | 2779 | /* |
2788 | * Stop md->queue before flushing md->wq in case request-based | 2780 | * Stop md->queue before flushing md->wq in case request-based |
@@ -2798,11 +2790,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2798 | * We call dm_wait_for_completion to wait for all existing requests | 2790 | * We call dm_wait_for_completion to wait for all existing requests |
2799 | * to finish. | 2791 | * to finish. |
2800 | */ | 2792 | */ |
2801 | r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); | 2793 | r = dm_wait_for_completion(md, interruptible); |
2802 | 2794 | ||
2803 | if (noflush) | 2795 | if (noflush) |
2804 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | 2796 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); |
2805 | synchronize_srcu(&md->io_barrier); | 2797 | if (map) |
2798 | synchronize_srcu(&md->io_barrier); | ||
2806 | 2799 | ||
2807 | /* were we interrupted ? */ | 2800 | /* were we interrupted ? */ |
2808 | if (r < 0) { | 2801 | if (r < 0) { |
@@ -2812,14 +2805,56 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2812 | start_queue(md->queue); | 2805 | start_queue(md->queue); |
2813 | 2806 | ||
2814 | unlock_fs(md); | 2807 | unlock_fs(md); |
2815 | goto out_unlock; /* pushback list is already flushed, so skip flush */ | 2808 | dm_table_presuspend_undo_targets(map); |
2809 | /* pushback list is already flushed, so skip flush */ | ||
2816 | } | 2810 | } |
2817 | 2811 | ||
2818 | /* | 2812 | return r; |
2819 | * If dm_wait_for_completion returned 0, the device is completely | 2813 | } |
2820 | * quiescent now. There is no request-processing activity. All new | 2814 | |
2821 | * requests are being added to md->deferred list. | 2815 | /* |
2822 | */ | 2816 | * We need to be able to change a mapping table under a mounted |
2817 | * filesystem. For example we might want to move some data in | ||
2818 | * the background. Before the table can be swapped with | ||
2819 | * dm_bind_table, dm_suspend must be called to flush any in | ||
2820 | * flight bios and ensure that any further io gets deferred. | ||
2821 | */ | ||
2822 | /* | ||
2823 | * Suspend mechanism in request-based dm. | ||
2824 | * | ||
2825 | * 1. Flush all I/Os by lock_fs() if needed. | ||
2826 | * 2. Stop dispatching any I/O by stopping the request_queue. | ||
2827 | * 3. Wait for all in-flight I/Os to be completed or requeued. | ||
2828 | * | ||
2829 | * To abort suspend, start the request_queue. | ||
2830 | */ | ||
2831 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | ||
2832 | { | ||
2833 | struct dm_table *map = NULL; | ||
2834 | int r = 0; | ||
2835 | |||
2836 | retry: | ||
2837 | mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); | ||
2838 | |||
2839 | if (dm_suspended_md(md)) { | ||
2840 | r = -EINVAL; | ||
2841 | goto out_unlock; | ||
2842 | } | ||
2843 | |||
2844 | if (dm_suspended_internally_md(md)) { | ||
2845 | /* already internally suspended, wait for internal resume */ | ||
2846 | mutex_unlock(&md->suspend_lock); | ||
2847 | r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); | ||
2848 | if (r) | ||
2849 | return r; | ||
2850 | goto retry; | ||
2851 | } | ||
2852 | |||
2853 | map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); | ||
2854 | |||
2855 | r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE); | ||
2856 | if (r) | ||
2857 | goto out_unlock; | ||
2823 | 2858 | ||
2824 | set_bit(DMF_SUSPENDED, &md->flags); | 2859 | set_bit(DMF_SUSPENDED, &md->flags); |
2825 | 2860 | ||
@@ -2830,22 +2865,13 @@ out_unlock: | |||
2830 | return r; | 2865 | return r; |
2831 | } | 2866 | } |
2832 | 2867 | ||
2833 | int dm_resume(struct mapped_device *md) | 2868 | static int __dm_resume(struct mapped_device *md, struct dm_table *map) |
2834 | { | 2869 | { |
2835 | int r = -EINVAL; | 2870 | if (map) { |
2836 | struct dm_table *map = NULL; | 2871 | int r = dm_table_resume_targets(map); |
2837 | 2872 | if (r) | |
2838 | mutex_lock(&md->suspend_lock); | 2873 | return r; |
2839 | if (!dm_suspended_md(md)) | 2874 | } |
2840 | goto out; | ||
2841 | |||
2842 | map = md->map; | ||
2843 | if (!map || !dm_table_get_size(map)) | ||
2844 | goto out; | ||
2845 | |||
2846 | r = dm_table_resume_targets(map); | ||
2847 | if (r) | ||
2848 | goto out; | ||
2849 | 2875 | ||
2850 | dm_queue_flush(md); | 2876 | dm_queue_flush(md); |
2851 | 2877 | ||
@@ -2859,6 +2885,37 @@ int dm_resume(struct mapped_device *md) | |||
2859 | 2885 | ||
2860 | unlock_fs(md); | 2886 | unlock_fs(md); |
2861 | 2887 | ||
2888 | return 0; | ||
2889 | } | ||
2890 | |||
2891 | int dm_resume(struct mapped_device *md) | ||
2892 | { | ||
2893 | int r = -EINVAL; | ||
2894 | struct dm_table *map = NULL; | ||
2895 | |||
2896 | retry: | ||
2897 | mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); | ||
2898 | |||
2899 | if (!dm_suspended_md(md)) | ||
2900 | goto out; | ||
2901 | |||
2902 | if (dm_suspended_internally_md(md)) { | ||
2903 | /* already internally suspended, wait for internal resume */ | ||
2904 | mutex_unlock(&md->suspend_lock); | ||
2905 | r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); | ||
2906 | if (r) | ||
2907 | return r; | ||
2908 | goto retry; | ||
2909 | } | ||
2910 | |||
2911 | map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); | ||
2912 | if (!map || !dm_table_get_size(map)) | ||
2913 | goto out; | ||
2914 | |||
2915 | r = __dm_resume(md, map); | ||
2916 | if (r) | ||
2917 | goto out; | ||
2918 | |||
2862 | clear_bit(DMF_SUSPENDED, &md->flags); | 2919 | clear_bit(DMF_SUSPENDED, &md->flags); |
2863 | 2920 | ||
2864 | r = 0; | 2921 | r = 0; |
@@ -2872,15 +2929,80 @@ out: | |||
2872 | * Internal suspend/resume works like userspace-driven suspend. It waits | 2929 | * Internal suspend/resume works like userspace-driven suspend. It waits |
2873 | * until all bios finish and prevents issuing new bios to the target drivers. | 2930 | * until all bios finish and prevents issuing new bios to the target drivers. |
2874 | * It may be used only from the kernel. | 2931 | * It may be used only from the kernel. |
2875 | * | ||
2876 | * Internal suspend holds md->suspend_lock, which prevents interaction with | ||
2877 | * userspace-driven suspend. | ||
2878 | */ | 2932 | */ |
2879 | 2933 | ||
2880 | void dm_internal_suspend(struct mapped_device *md) | 2934 | static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags) |
2881 | { | 2935 | { |
2882 | mutex_lock(&md->suspend_lock); | 2936 | struct dm_table *map = NULL; |
2937 | |||
2938 | if (dm_suspended_internally_md(md)) | ||
2939 | return; /* nested internal suspend */ | ||
2940 | |||
2941 | if (dm_suspended_md(md)) { | ||
2942 | set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); | ||
2943 | return; /* nest suspend */ | ||
2944 | } | ||
2945 | |||
2946 | map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); | ||
2947 | |||
2948 | /* | ||
2949 | * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is | ||
2950 | * supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend | ||
2951 | * would require changing .presuspend to return an error -- avoid this | ||
2952 | * until there is a need for more elaborate variants of internal suspend. | ||
2953 | */ | ||
2954 | (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE); | ||
2955 | |||
2956 | set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); | ||
2957 | |||
2958 | dm_table_postsuspend_targets(map); | ||
2959 | } | ||
2960 | |||
2961 | static void __dm_internal_resume(struct mapped_device *md) | ||
2962 | { | ||
2963 | if (!dm_suspended_internally_md(md)) | ||
2964 | return; /* resume from nested internal suspend */ | ||
2965 | |||
2883 | if (dm_suspended_md(md)) | 2966 | if (dm_suspended_md(md)) |
2967 | goto done; /* resume from nested suspend */ | ||
2968 | |||
2969 | /* | ||
2970 | * NOTE: existing callers don't need to call dm_table_resume_targets | ||
2971 | * (which may fail -- so best to avoid it for now by passing NULL map) | ||
2972 | */ | ||
2973 | (void) __dm_resume(md, NULL); | ||
2974 | |||
2975 | done: | ||
2976 | clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); | ||
2977 | smp_mb__after_atomic(); | ||
2978 | wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY); | ||
2979 | } | ||
2980 | |||
2981 | void dm_internal_suspend_noflush(struct mapped_device *md) | ||
2982 | { | ||
2983 | mutex_lock(&md->suspend_lock); | ||
2984 | __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG); | ||
2985 | mutex_unlock(&md->suspend_lock); | ||
2986 | } | ||
2987 | EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush); | ||
2988 | |||
2989 | void dm_internal_resume(struct mapped_device *md) | ||
2990 | { | ||
2991 | mutex_lock(&md->suspend_lock); | ||
2992 | __dm_internal_resume(md); | ||
2993 | mutex_unlock(&md->suspend_lock); | ||
2994 | } | ||
2995 | EXPORT_SYMBOL_GPL(dm_internal_resume); | ||
2996 | |||
2997 | /* | ||
2998 | * Fast variants of internal suspend/resume hold md->suspend_lock, | ||
2999 | * which prevents interaction with userspace-driven suspend. | ||
3000 | */ | ||
3001 | |||
3002 | void dm_internal_suspend_fast(struct mapped_device *md) | ||
3003 | { | ||
3004 | mutex_lock(&md->suspend_lock); | ||
3005 | if (dm_suspended_md(md) || dm_suspended_internally_md(md)) | ||
2884 | return; | 3006 | return; |
2885 | 3007 | ||
2886 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 3008 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
@@ -2889,9 +3011,9 @@ void dm_internal_suspend(struct mapped_device *md) | |||
2889 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 3011 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
2890 | } | 3012 | } |
2891 | 3013 | ||
2892 | void dm_internal_resume(struct mapped_device *md) | 3014 | void dm_internal_resume_fast(struct mapped_device *md) |
2893 | { | 3015 | { |
2894 | if (dm_suspended_md(md)) | 3016 | if (dm_suspended_md(md) || dm_suspended_internally_md(md)) |
2895 | goto done; | 3017 | goto done; |
2896 | 3018 | ||
2897 | dm_queue_flush(md); | 3019 | dm_queue_flush(md); |
@@ -2977,6 +3099,11 @@ int dm_suspended_md(struct mapped_device *md) | |||
2977 | return test_bit(DMF_SUSPENDED, &md->flags); | 3099 | return test_bit(DMF_SUSPENDED, &md->flags); |
2978 | } | 3100 | } |
2979 | 3101 | ||
3102 | int dm_suspended_internally_md(struct mapped_device *md) | ||
3103 | { | ||
3104 | return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); | ||
3105 | } | ||
3106 | |||
2980 | int dm_test_deferred_remove_flag(struct mapped_device *md) | 3107 | int dm_test_deferred_remove_flag(struct mapped_device *md) |
2981 | { | 3108 | { |
2982 | return test_bit(DMF_DEFERRED_REMOVE, &md->flags); | 3109 | return test_bit(DMF_DEFERRED_REMOVE, &md->flags); |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 988c7fb7b145..84b0f9e4ba6c 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -65,6 +65,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, | |||
65 | struct queue_limits *limits); | 65 | struct queue_limits *limits); |
66 | struct list_head *dm_table_get_devices(struct dm_table *t); | 66 | struct list_head *dm_table_get_devices(struct dm_table *t); |
67 | void dm_table_presuspend_targets(struct dm_table *t); | 67 | void dm_table_presuspend_targets(struct dm_table *t); |
68 | void dm_table_presuspend_undo_targets(struct dm_table *t); | ||
68 | void dm_table_postsuspend_targets(struct dm_table *t); | 69 | void dm_table_postsuspend_targets(struct dm_table *t); |
69 | int dm_table_resume_targets(struct dm_table *t); | 70 | int dm_table_resume_targets(struct dm_table *t); |
70 | int dm_table_any_congested(struct dm_table *t, int bdi_bits); | 71 | int dm_table_any_congested(struct dm_table *t, int bdi_bits); |
@@ -129,6 +130,15 @@ int dm_deleting_md(struct mapped_device *md); | |||
129 | int dm_suspended_md(struct mapped_device *md); | 130 | int dm_suspended_md(struct mapped_device *md); |
130 | 131 | ||
131 | /* | 132 | /* |
133 | * Internal suspend and resume methods. | ||
134 | */ | ||
135 | int dm_suspended_internally_md(struct mapped_device *md); | ||
136 | void dm_internal_suspend_fast(struct mapped_device *md); | ||
137 | void dm_internal_resume_fast(struct mapped_device *md); | ||
138 | void dm_internal_suspend_noflush(struct mapped_device *md); | ||
139 | void dm_internal_resume(struct mapped_device *md); | ||
140 | |||
141 | /* | ||
132 | * Test if the device is scheduled for deferred remove. | 142 | * Test if the device is scheduled for deferred remove. |
133 | */ | 143 | */ |
134 | int dm_test_deferred_remove_flag(struct mapped_device *md); | 144 | int dm_test_deferred_remove_flag(struct mapped_device *md); |
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index 1d75b1dc1e2e..e64b61ad0ef3 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c | |||
@@ -645,8 +645,10 @@ static int array_resize(struct dm_array_info *info, dm_block_t root, | |||
645 | int r; | 645 | int r; |
646 | struct resize resize; | 646 | struct resize resize; |
647 | 647 | ||
648 | if (old_size == new_size) | 648 | if (old_size == new_size) { |
649 | *new_root = root; | ||
649 | return 0; | 650 | return 0; |
651 | } | ||
650 | 652 | ||
651 | resize.info = info; | 653 | resize.info = info; |
652 | resize.root = root; | 654 | resize.root = root; |
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 786b689bdfc7..e8a904298887 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c | |||
@@ -564,7 +564,9 @@ static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count | |||
564 | { | 564 | { |
565 | struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); | 565 | struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); |
566 | 566 | ||
567 | return smm->ll.nr_blocks; | 567 | *count = smm->ll.nr_blocks; |
568 | |||
569 | return 0; | ||
568 | } | 570 | } |
569 | 571 | ||
570 | static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) | 572 | static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) |
@@ -581,7 +583,9 @@ static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, | |||
581 | { | 583 | { |
582 | struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); | 584 | struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); |
583 | 585 | ||
584 | return b < smm->begin ? 1 : 0; | 586 | *result = (b < smm->begin) ? 1 : 0; |
587 | |||
588 | return 0; | ||
585 | } | 589 | } |
586 | 590 | ||
587 | static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, | 591 | static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, |
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 3bc30a0ae3d6..9cb797d800cf 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c | |||
@@ -10,6 +10,8 @@ | |||
10 | #include "dm-persistent-data-internal.h" | 10 | #include "dm-persistent-data-internal.h" |
11 | 11 | ||
12 | #include <linux/export.h> | 12 | #include <linux/export.h> |
13 | #include <linux/mutex.h> | ||
14 | #include <linux/hash.h> | ||
13 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
14 | #include <linux/device-mapper.h> | 16 | #include <linux/device-mapper.h> |
15 | 17 | ||
@@ -17,6 +19,61 @@ | |||
17 | 19 | ||
18 | /*----------------------------------------------------------------*/ | 20 | /*----------------------------------------------------------------*/ |
19 | 21 | ||
22 | #define PREFETCH_SIZE 128 | ||
23 | #define PREFETCH_BITS 7 | ||
24 | #define PREFETCH_SENTINEL ((dm_block_t) -1ULL) | ||
25 | |||
26 | struct prefetch_set { | ||
27 | struct mutex lock; | ||
28 | dm_block_t blocks[PREFETCH_SIZE]; | ||
29 | }; | ||
30 | |||
31 | static unsigned prefetch_hash(dm_block_t b) | ||
32 | { | ||
33 | return hash_64(b, PREFETCH_BITS); | ||
34 | } | ||
35 | |||
36 | static void prefetch_wipe(struct prefetch_set *p) | ||
37 | { | ||
38 | unsigned i; | ||
39 | for (i = 0; i < PREFETCH_SIZE; i++) | ||
40 | p->blocks[i] = PREFETCH_SENTINEL; | ||
41 | } | ||
42 | |||
43 | static void prefetch_init(struct prefetch_set *p) | ||
44 | { | ||
45 | mutex_init(&p->lock); | ||
46 | prefetch_wipe(p); | ||
47 | } | ||
48 | |||
49 | static void prefetch_add(struct prefetch_set *p, dm_block_t b) | ||
50 | { | ||
51 | unsigned h = prefetch_hash(b); | ||
52 | |||
53 | mutex_lock(&p->lock); | ||
54 | if (p->blocks[h] == PREFETCH_SENTINEL) | ||
55 | p->blocks[h] = b; | ||
56 | |||
57 | mutex_unlock(&p->lock); | ||
58 | } | ||
59 | |||
60 | static void prefetch_issue(struct prefetch_set *p, struct dm_block_manager *bm) | ||
61 | { | ||
62 | unsigned i; | ||
63 | |||
64 | mutex_lock(&p->lock); | ||
65 | |||
66 | for (i = 0; i < PREFETCH_SIZE; i++) | ||
67 | if (p->blocks[i] != PREFETCH_SENTINEL) { | ||
68 | dm_bm_prefetch(bm, p->blocks[i]); | ||
69 | p->blocks[i] = PREFETCH_SENTINEL; | ||
70 | } | ||
71 | |||
72 | mutex_unlock(&p->lock); | ||
73 | } | ||
74 | |||
75 | /*----------------------------------------------------------------*/ | ||
76 | |||
20 | struct shadow_info { | 77 | struct shadow_info { |
21 | struct hlist_node hlist; | 78 | struct hlist_node hlist; |
22 | dm_block_t where; | 79 | dm_block_t where; |
@@ -37,6 +94,8 @@ struct dm_transaction_manager { | |||
37 | 94 | ||
38 | spinlock_t lock; | 95 | spinlock_t lock; |
39 | struct hlist_head buckets[DM_HASH_SIZE]; | 96 | struct hlist_head buckets[DM_HASH_SIZE]; |
97 | |||
98 | struct prefetch_set prefetches; | ||
40 | }; | 99 | }; |
41 | 100 | ||
42 | /*----------------------------------------------------------------*/ | 101 | /*----------------------------------------------------------------*/ |
@@ -117,6 +176,8 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm, | |||
117 | for (i = 0; i < DM_HASH_SIZE; i++) | 176 | for (i = 0; i < DM_HASH_SIZE; i++) |
118 | INIT_HLIST_HEAD(tm->buckets + i); | 177 | INIT_HLIST_HEAD(tm->buckets + i); |
119 | 178 | ||
179 | prefetch_init(&tm->prefetches); | ||
180 | |||
120 | return tm; | 181 | return tm; |
121 | } | 182 | } |
122 | 183 | ||
@@ -268,8 +329,14 @@ int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b, | |||
268 | struct dm_block_validator *v, | 329 | struct dm_block_validator *v, |
269 | struct dm_block **blk) | 330 | struct dm_block **blk) |
270 | { | 331 | { |
271 | if (tm->is_clone) | 332 | if (tm->is_clone) { |
272 | return dm_bm_read_try_lock(tm->real->bm, b, v, blk); | 333 | int r = dm_bm_read_try_lock(tm->real->bm, b, v, blk); |
334 | |||
335 | if (r == -EWOULDBLOCK) | ||
336 | prefetch_add(&tm->real->prefetches, b); | ||
337 | |||
338 | return r; | ||
339 | } | ||
273 | 340 | ||
274 | return dm_bm_read_lock(tm->bm, b, v, blk); | 341 | return dm_bm_read_lock(tm->bm, b, v, blk); |
275 | } | 342 | } |
@@ -317,6 +384,12 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm) | |||
317 | return tm->bm; | 384 | return tm->bm; |
318 | } | 385 | } |
319 | 386 | ||
387 | void dm_tm_issue_prefetches(struct dm_transaction_manager *tm) | ||
388 | { | ||
389 | prefetch_issue(&tm->prefetches, tm->bm); | ||
390 | } | ||
391 | EXPORT_SYMBOL_GPL(dm_tm_issue_prefetches); | ||
392 | |||
320 | /*----------------------------------------------------------------*/ | 393 | /*----------------------------------------------------------------*/ |
321 | 394 | ||
322 | static int dm_tm_create_internal(struct dm_block_manager *bm, | 395 | static int dm_tm_create_internal(struct dm_block_manager *bm, |
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index 2772ed2a781a..2e0d4d66fb1b 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h | |||
@@ -109,6 +109,13 @@ int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b, | |||
109 | struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm); | 109 | struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm); |
110 | 110 | ||
111 | /* | 111 | /* |
112 | * If you're using a non-blocking clone the tm will build up a list of | ||
113 | * requested blocks that weren't in core. This call will request those | ||
114 | * blocks to be prefetched. | ||
115 | */ | ||
116 | void dm_tm_issue_prefetches(struct dm_transaction_manager *tm); | ||
117 | |||
118 | /* | ||
112 | * A little utility that ties the knot by producing a transaction manager | 119 | * A little utility that ties the knot by producing a transaction manager |
113 | * that has a space map managed by the transaction manager... | 120 | * that has a space map managed by the transaction manager... |
114 | * | 121 | * |