aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-25 18:12:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-25 18:12:46 -0400
commite93dd910b906d2bb881f334685eb03431fd3fa48 (patch)
treeae8527dcdc9237e1bd7a65490d98e77fbe006307
parentb4820416dd92bc3df33f261c60ec21b2c4481bec (diff)
parente8603136cb04ec2d0c9b4b5be7a071fc003cb399 (diff)
Merge tag 'dm-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device-mapper fixes from Mike Snitzer: "A few fixes for dm-snapshot, a 32 bit fix for dm-stats, a couple error handling fixes for dm-multipath. A fix for the thin provisioning target to not expose non-zero discard limits if discards are disabled. Lastly, add two DM module parameters which allow users to tune the emergency memory reserves that DM mainatins per device -- this helps fix a long-standing issue for dm-multipath. The conservative default reserve for request-based dm-multipath devices (256) has proven problematic for users with many multipathed SCSI devices but relatively little memory. To responsibly select a smaller value users should use the new nr_bios tracepoint info (via commit 75afb352 "block: Add nr_bios to block_rq_remap tracepoint") to determine the peak number of bios their workloads create" * tag 'dm-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm: add reserved_bio_based_ios module parameter dm: add reserved_rq_based_ios module parameter dm: lower bio-based mempool reservation dm thin: do not expose non-zero discard limits if discards disabled dm mpath: disable WRITE SAME if it fails dm-snapshot: fix performance degradation due to small hash size dm snapshot: workaround for a false positive lockdep warning dm stats: fix possible counter corruption on 32-bit systems dm mpath: do not fail path on -ENOSPC
-rw-r--r--drivers/md/dm-io.c7
-rw-r--r--drivers/md/dm-mpath.c18
-rw-r--r--drivers/md/dm-snap-persistent.c2
-rw-r--r--drivers/md/dm-snap.c5
-rw-r--r--drivers/md/dm-stats.c23
-rw-r--r--drivers/md/dm-thin.c14
-rw-r--r--drivers/md/dm.c71
-rw-r--r--drivers/md/dm.h3
-rw-r--r--include/linux/device-mapper.h3
9 files changed, 120 insertions, 26 deletions
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index ea49834377c8..2a20986a2fec 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -19,8 +19,6 @@
19#define DM_MSG_PREFIX "io" 19#define DM_MSG_PREFIX "io"
20 20
21#define DM_IO_MAX_REGIONS BITS_PER_LONG 21#define DM_IO_MAX_REGIONS BITS_PER_LONG
22#define MIN_IOS 16
23#define MIN_BIOS 16
24 22
25struct dm_io_client { 23struct dm_io_client {
26 mempool_t *pool; 24 mempool_t *pool;
@@ -50,16 +48,17 @@ static struct kmem_cache *_dm_io_cache;
50struct dm_io_client *dm_io_client_create(void) 48struct dm_io_client *dm_io_client_create(void)
51{ 49{
52 struct dm_io_client *client; 50 struct dm_io_client *client;
51 unsigned min_ios = dm_get_reserved_bio_based_ios();
53 52
54 client = kmalloc(sizeof(*client), GFP_KERNEL); 53 client = kmalloc(sizeof(*client), GFP_KERNEL);
55 if (!client) 54 if (!client)
56 return ERR_PTR(-ENOMEM); 55 return ERR_PTR(-ENOMEM);
57 56
58 client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache); 57 client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache);
59 if (!client->pool) 58 if (!client->pool)
60 goto bad; 59 goto bad;
61 60
62 client->bios = bioset_create(MIN_BIOS, 0); 61 client->bios = bioset_create(min_ios, 0);
63 if (!client->bios) 62 if (!client->bios)
64 goto bad; 63 goto bad;
65 64
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index b759a127f9c3..de570a558764 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/device-mapper.h> 8#include <linux/device-mapper.h>
9 9
10#include "dm.h"
10#include "dm-path-selector.h" 11#include "dm-path-selector.h"
11#include "dm-uevent.h" 12#include "dm-uevent.h"
12 13
@@ -116,8 +117,6 @@ struct dm_mpath_io {
116 117
117typedef int (*action_fn) (struct pgpath *pgpath); 118typedef int (*action_fn) (struct pgpath *pgpath);
118 119
119#define MIN_IOS 256 /* Mempool size */
120
121static struct kmem_cache *_mpio_cache; 120static struct kmem_cache *_mpio_cache;
122 121
123static struct workqueue_struct *kmultipathd, *kmpath_handlerd; 122static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
@@ -190,6 +189,7 @@ static void free_priority_group(struct priority_group *pg,
190static struct multipath *alloc_multipath(struct dm_target *ti) 189static struct multipath *alloc_multipath(struct dm_target *ti)
191{ 190{
192 struct multipath *m; 191 struct multipath *m;
192 unsigned min_ios = dm_get_reserved_rq_based_ios();
193 193
194 m = kzalloc(sizeof(*m), GFP_KERNEL); 194 m = kzalloc(sizeof(*m), GFP_KERNEL);
195 if (m) { 195 if (m) {
@@ -202,7 +202,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
202 INIT_WORK(&m->trigger_event, trigger_event); 202 INIT_WORK(&m->trigger_event, trigger_event);
203 init_waitqueue_head(&m->pg_init_wait); 203 init_waitqueue_head(&m->pg_init_wait);
204 mutex_init(&m->work_mutex); 204 mutex_init(&m->work_mutex);
205 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 205 m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
206 if (!m->mpio_pool) { 206 if (!m->mpio_pool) {
207 kfree(m); 207 kfree(m);
208 return NULL; 208 return NULL;
@@ -1268,6 +1268,7 @@ static int noretry_error(int error)
1268 case -EREMOTEIO: 1268 case -EREMOTEIO:
1269 case -EILSEQ: 1269 case -EILSEQ:
1270 case -ENODATA: 1270 case -ENODATA:
1271 case -ENOSPC:
1271 return 1; 1272 return 1;
1272 } 1273 }
1273 1274
@@ -1298,8 +1299,17 @@ static int do_end_io(struct multipath *m, struct request *clone,
1298 if (!error && !clone->errors) 1299 if (!error && !clone->errors)
1299 return 0; /* I/O complete */ 1300 return 0; /* I/O complete */
1300 1301
1301 if (noretry_error(error)) 1302 if (noretry_error(error)) {
1303 if ((clone->cmd_flags & REQ_WRITE_SAME) &&
1304 !clone->q->limits.max_write_same_sectors) {
1305 struct queue_limits *limits;
1306
1307 /* device doesn't really support WRITE SAME, disable it */
1308 limits = dm_get_queue_limits(dm_table_get_md(m->ti->table));
1309 limits->max_write_same_sectors = 0;
1310 }
1302 return error; 1311 return error;
1312 }
1303 1313
1304 if (mpio->pgpath) 1314 if (mpio->pgpath)
1305 fail_path(mpio->pgpath); 1315 fail_path(mpio->pgpath);
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 3ac415675b6c..4caa8e6d59d7 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
256 */ 256 */
257 INIT_WORK_ONSTACK(&req.work, do_metadata); 257 INIT_WORK_ONSTACK(&req.work, do_metadata);
258 queue_work(ps->metadata_wq, &req.work); 258 queue_work(ps->metadata_wq, &req.work);
259 flush_work(&req.work); 259 flush_workqueue(ps->metadata_wq);
260 260
261 return req.result; 261 return req.result;
262} 262}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index c434e5aab2df..aec57d76db5d 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -725,17 +725,16 @@ static int calc_max_buckets(void)
725 */ 725 */
726static int init_hash_tables(struct dm_snapshot *s) 726static int init_hash_tables(struct dm_snapshot *s)
727{ 727{
728 sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; 728 sector_t hash_size, cow_dev_size, max_buckets;
729 729
730 /* 730 /*
731 * Calculate based on the size of the original volume or 731 * Calculate based on the size of the original volume or
732 * the COW volume... 732 * the COW volume...
733 */ 733 */
734 cow_dev_size = get_dev_size(s->cow->bdev); 734 cow_dev_size = get_dev_size(s->cow->bdev);
735 origin_dev_size = get_dev_size(s->origin->bdev);
736 max_buckets = calc_max_buckets(); 735 max_buckets = calc_max_buckets();
737 736
738 hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; 737 hash_size = cow_dev_size >> s->store->chunk_shift;
739 hash_size = min(hash_size, max_buckets); 738 hash_size = min(hash_size, max_buckets);
740 739
741 if (hash_size < 64) 740 if (hash_size < 64)
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 8ae31e8d3d64..3d404c1371ed 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -451,19 +451,26 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
451 struct dm_stat_percpu *p; 451 struct dm_stat_percpu *p;
452 452
453 /* 453 /*
454 * For strict correctness we should use local_irq_disable/enable 454 * For strict correctness we should use local_irq_save/restore
455 * instead of preempt_disable/enable. 455 * instead of preempt_disable/enable.
456 * 456 *
457 * This is racy if the driver finishes bios from non-interrupt 457 * preempt_disable/enable is racy if the driver finishes bios
458 * context as well as from interrupt context or from more different 458 * from non-interrupt context as well as from interrupt context
459 * interrupts. 459 * or from more different interrupts.
460 * 460 *
461 * However, the race only results in not counting some events, 461 * On 64-bit architectures the race only results in not counting some
462 * so it is acceptable. 462 * events, so it is acceptable. On 32-bit architectures the race could
463 * cause the counter going off by 2^32, so we need to do proper locking
464 * there.
463 * 465 *
464 * part_stat_lock()/part_stat_unlock() have this race too. 466 * part_stat_lock()/part_stat_unlock() have this race too.
465 */ 467 */
468#if BITS_PER_LONG == 32
469 unsigned long flags;
470 local_irq_save(flags);
471#else
466 preempt_disable(); 472 preempt_disable();
473#endif
467 p = &s->stat_percpu[smp_processor_id()][entry]; 474 p = &s->stat_percpu[smp_processor_id()][entry];
468 475
469 if (!end) { 476 if (!end) {
@@ -478,7 +485,11 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
478 p->ticks[idx] += duration; 485 p->ticks[idx] += duration;
479 } 486 }
480 487
488#if BITS_PER_LONG == 32
489 local_irq_restore(flags);
490#else
481 preempt_enable(); 491 preempt_enable();
492#endif
482} 493}
483 494
484static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw, 495static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index ed063427d676..2c0cf511ec23 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2095,6 +2095,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
2095 * them down to the data device. The thin device's discard 2095 * them down to the data device. The thin device's discard
2096 * processing will cause mappings to be removed from the btree. 2096 * processing will cause mappings to be removed from the btree.
2097 */ 2097 */
2098 ti->discard_zeroes_data_unsupported = true;
2098 if (pf.discard_enabled && pf.discard_passdown) { 2099 if (pf.discard_enabled && pf.discard_passdown) {
2099 ti->num_discard_bios = 1; 2100 ti->num_discard_bios = 1;
2100 2101
@@ -2104,7 +2105,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
2104 * thin devices' discard limits consistent). 2105 * thin devices' discard limits consistent).
2105 */ 2106 */
2106 ti->discards_supported = true; 2107 ti->discards_supported = true;
2107 ti->discard_zeroes_data_unsupported = true;
2108 } 2108 }
2109 ti->private = pt; 2109 ti->private = pt;
2110 2110
@@ -2689,8 +2689,16 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
2689 * They get transferred to the live pool in bind_control_target() 2689 * They get transferred to the live pool in bind_control_target()
2690 * called from pool_preresume(). 2690 * called from pool_preresume().
2691 */ 2691 */
2692 if (!pt->adjusted_pf.discard_enabled) 2692 if (!pt->adjusted_pf.discard_enabled) {
2693 /*
2694 * Must explicitly disallow stacking discard limits otherwise the
2695 * block layer will stack them if pool's data device has support.
2696 * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the
2697 * user to see that, so make sure to set all discard limits to 0.
2698 */
2699 limits->discard_granularity = 0;
2693 return; 2700 return;
2701 }
2694 2702
2695 disable_passdown_if_not_supported(pt); 2703 disable_passdown_if_not_supported(pt);
2696 2704
@@ -2826,10 +2834,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
2826 ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); 2834 ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
2827 2835
2828 /* In case the pool supports discards, pass them on. */ 2836 /* In case the pool supports discards, pass them on. */
2837 ti->discard_zeroes_data_unsupported = true;
2829 if (tc->pool->pf.discard_enabled) { 2838 if (tc->pool->pf.discard_enabled) {
2830 ti->discards_supported = true; 2839 ti->discards_supported = true;
2831 ti->num_discard_bios = 1; 2840 ti->num_discard_bios = 1;
2832 ti->discard_zeroes_data_unsupported = true;
2833 /* Discard bios must be split on a block boundary */ 2841 /* Discard bios must be split on a block boundary */
2834 ti->split_discard_bios = true; 2842 ti->split_discard_bios = true;
2835 } 2843 }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6a5e9ed2fcc3..b3e26c7d1417 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -211,10 +211,55 @@ struct dm_md_mempools {
211 struct bio_set *bs; 211 struct bio_set *bs;
212}; 212};
213 213
214#define MIN_IOS 256 214#define RESERVED_BIO_BASED_IOS 16
215#define RESERVED_REQUEST_BASED_IOS 256
216#define RESERVED_MAX_IOS 1024
215static struct kmem_cache *_io_cache; 217static struct kmem_cache *_io_cache;
216static struct kmem_cache *_rq_tio_cache; 218static struct kmem_cache *_rq_tio_cache;
217 219
220/*
221 * Bio-based DM's mempools' reserved IOs set by the user.
222 */
223static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
224
225/*
226 * Request-based DM's mempools' reserved IOs set by the user.
227 */
228static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
229
230static unsigned __dm_get_reserved_ios(unsigned *reserved_ios,
231 unsigned def, unsigned max)
232{
233 unsigned ios = ACCESS_ONCE(*reserved_ios);
234 unsigned modified_ios = 0;
235
236 if (!ios)
237 modified_ios = def;
238 else if (ios > max)
239 modified_ios = max;
240
241 if (modified_ios) {
242 (void)cmpxchg(reserved_ios, ios, modified_ios);
243 ios = modified_ios;
244 }
245
246 return ios;
247}
248
249unsigned dm_get_reserved_bio_based_ios(void)
250{
251 return __dm_get_reserved_ios(&reserved_bio_based_ios,
252 RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
253}
254EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
255
256unsigned dm_get_reserved_rq_based_ios(void)
257{
258 return __dm_get_reserved_ios(&reserved_rq_based_ios,
259 RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
260}
261EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
262
218static int __init local_init(void) 263static int __init local_init(void)
219{ 264{
220 int r = -ENOMEM; 265 int r = -ENOMEM;
@@ -2278,6 +2323,17 @@ struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
2278} 2323}
2279 2324
2280/* 2325/*
2326 * The queue_limits are only valid as long as you have a reference
2327 * count on 'md'.
2328 */
2329struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
2330{
2331 BUG_ON(!atomic_read(&md->holders));
2332 return &md->queue->limits;
2333}
2334EXPORT_SYMBOL_GPL(dm_get_queue_limits);
2335
2336/*
2281 * Fully initialize a request-based queue (->elevator, ->request_fn, etc). 2337 * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
2282 */ 2338 */
2283static int dm_init_request_based_queue(struct mapped_device *md) 2339static int dm_init_request_based_queue(struct mapped_device *md)
@@ -2862,18 +2918,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
2862 2918
2863 if (type == DM_TYPE_BIO_BASED) { 2919 if (type == DM_TYPE_BIO_BASED) {
2864 cachep = _io_cache; 2920 cachep = _io_cache;
2865 pool_size = 16; 2921 pool_size = dm_get_reserved_bio_based_ios();
2866 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); 2922 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
2867 } else if (type == DM_TYPE_REQUEST_BASED) { 2923 } else if (type == DM_TYPE_REQUEST_BASED) {
2868 cachep = _rq_tio_cache; 2924 cachep = _rq_tio_cache;
2869 pool_size = MIN_IOS; 2925 pool_size = dm_get_reserved_rq_based_ios();
2870 front_pad = offsetof(struct dm_rq_clone_bio_info, clone); 2926 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
2871 /* per_bio_data_size is not used. See __bind_mempools(). */ 2927 /* per_bio_data_size is not used. See __bind_mempools(). */
2872 WARN_ON(per_bio_data_size != 0); 2928 WARN_ON(per_bio_data_size != 0);
2873 } else 2929 } else
2874 goto out; 2930 goto out;
2875 2931
2876 pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep); 2932 pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
2877 if (!pools->io_pool) 2933 if (!pools->io_pool)
2878 goto out; 2934 goto out;
2879 2935
@@ -2924,6 +2980,13 @@ module_exit(dm_exit);
2924 2980
2925module_param(major, uint, 0); 2981module_param(major, uint, 0);
2926MODULE_PARM_DESC(major, "The major number of the device mapper"); 2982MODULE_PARM_DESC(major, "The major number of the device mapper");
2983
2984module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
2985MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
2986
2987module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
2988MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
2989
2927MODULE_DESCRIPTION(DM_NAME " driver"); 2990MODULE_DESCRIPTION(DM_NAME " driver");
2928MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 2991MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
2929MODULE_LICENSE("GPL"); 2992MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 5e604cc7b4aa..1d1ad7b7e527 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -184,6 +184,9 @@ void dm_free_md_mempools(struct dm_md_mempools *pools);
184/* 184/*
185 * Helpers that are used by DM core 185 * Helpers that are used by DM core
186 */ 186 */
187unsigned dm_get_reserved_bio_based_ios(void);
188unsigned dm_get_reserved_rq_based_ios(void);
189
187static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen) 190static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
188{ 191{
189 return !maxlen || strlen(result) + 1 >= maxlen; 192 return !maxlen || strlen(result) + 1 >= maxlen;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 653073de09e3..ed419c62dde1 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -406,13 +406,14 @@ int dm_noflush_suspending(struct dm_target *ti);
406union map_info *dm_get_mapinfo(struct bio *bio); 406union map_info *dm_get_mapinfo(struct bio *bio);
407union map_info *dm_get_rq_mapinfo(struct request *rq); 407union map_info *dm_get_rq_mapinfo(struct request *rq);
408 408
409struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
410
409/* 411/*
410 * Geometry functions. 412 * Geometry functions.
411 */ 413 */
412int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo); 414int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo);
413int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); 415int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo);
414 416
415
416/*----------------------------------------------------------------- 417/*-----------------------------------------------------------------
417 * Functions for manipulating device-mapper tables. 418 * Functions for manipulating device-mapper tables.
418 *---------------------------------------------------------------*/ 419 *---------------------------------------------------------------*/