diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Makefile | 6 | ||||
-rw-r--r-- | drivers/md/dm-emc.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 10 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 38 | ||||
-rw-r--r-- | drivers/md/dm-io.h | 79 | ||||
-rw-r--r-- | drivers/md/dm-kcopyd.c (renamed from drivers/md/kcopyd.c) | 298 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 254 | ||||
-rw-r--r-- | drivers/md/dm-log.h | 131 | ||||
-rw-r--r-- | drivers/md/dm-mpath-hp-sw.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-mpath-rdac.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 132 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 22 | ||||
-rw-r--r-- | drivers/md/dm-snap.h | 4 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 47 | ||||
-rw-r--r-- | drivers/md/dm-uevent.c | 22 | ||||
-rw-r--r-- | drivers/md/dm.c | 16 | ||||
-rw-r--r-- | drivers/md/dm.h | 98 | ||||
-rw-r--r-- | drivers/md/kcopyd.h | 42 | ||||
-rw-r--r-- | drivers/md/md.c | 129 | ||||
-rw-r--r-- | drivers/md/multipath.c | 3 | ||||
-rw-r--r-- | drivers/md/raid1.c | 31 | ||||
-rw-r--r-- | drivers/md/raid10.c | 33 | ||||
-rw-r--r-- | drivers/md/raid5.c | 191 | ||||
-rw-r--r-- | drivers/md/raid6algos.c | 3 |
24 files changed, 748 insertions, 845 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index d9aa7edb8780..7be09eeea293 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -3,10 +3,10 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ | 5 | dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ |
6 | dm-ioctl.o dm-io.o kcopyd.o | 6 | dm-ioctl.o dm-io.o dm-kcopyd.o |
7 | dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o | 7 | dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o |
8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o | 8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o |
9 | dm-mirror-objs := dm-log.o dm-raid1.o | 9 | dm-mirror-objs := dm-raid1.o |
10 | dm-rdac-objs := dm-mpath-rdac.o | 10 | dm-rdac-objs := dm-mpath-rdac.o |
11 | dm-hp-sw-objs := dm-mpath-hp-sw.o | 11 | dm-hp-sw-objs := dm-mpath-hp-sw.o |
12 | md-mod-objs := md.o bitmap.o | 12 | md-mod-objs := md.o bitmap.o |
@@ -39,7 +39,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o | |||
39 | obj-$(CONFIG_DM_MULTIPATH_HP) += dm-hp-sw.o | 39 | obj-$(CONFIG_DM_MULTIPATH_HP) += dm-hp-sw.o |
40 | obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o | 40 | obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o |
41 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o | 41 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o |
42 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o | 42 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o |
43 | obj-$(CONFIG_DM_ZERO) += dm-zero.o | 43 | obj-$(CONFIG_DM_ZERO) += dm-zero.o |
44 | 44 | ||
45 | quiet_cmd_unroll = UNROLL $@ | 45 | quiet_cmd_unroll = UNROLL $@ |
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c index 6b91b9ab1d41..3ea5ad4b7805 100644 --- a/drivers/md/dm-emc.c +++ b/drivers/md/dm-emc.c | |||
@@ -110,8 +110,6 @@ static struct request *get_failover_req(struct emc_handler *h, | |||
110 | memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); | 110 | memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); |
111 | rq->sense_len = 0; | 111 | rq->sense_len = 0; |
112 | 112 | ||
113 | memset(&rq->cmd, 0, BLK_MAX_CDB); | ||
114 | |||
115 | rq->timeout = EMC_FAILOVER_TIMEOUT; | 113 | rq->timeout = EMC_FAILOVER_TIMEOUT; |
116 | rq->cmd_type = REQ_TYPE_BLOCK_PC; | 114 | rq->cmd_type = REQ_TYPE_BLOCK_PC; |
117 | rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; | 115 | rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; |
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 5bbce29f143a..41f408068a7c 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c | |||
@@ -9,13 +9,13 @@ | |||
9 | 9 | ||
10 | #include "dm.h" | 10 | #include "dm.h" |
11 | #include "dm-snap.h" | 11 | #include "dm-snap.h" |
12 | #include "dm-io.h" | ||
13 | #include "kcopyd.h" | ||
14 | 12 | ||
15 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
16 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
17 | #include <linux/vmalloc.h> | 15 | #include <linux/vmalloc.h> |
18 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/dm-io.h> | ||
18 | #include <linux/dm-kcopyd.h> | ||
19 | 19 | ||
20 | #define DM_MSG_PREFIX "snapshots" | 20 | #define DM_MSG_PREFIX "snapshots" |
21 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ | 21 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ |
@@ -131,7 +131,7 @@ struct pstore { | |||
131 | 131 | ||
132 | static unsigned sectors_to_pages(unsigned sectors) | 132 | static unsigned sectors_to_pages(unsigned sectors) |
133 | { | 133 | { |
134 | return sectors / (PAGE_SIZE >> 9); | 134 | return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); |
135 | } | 135 | } |
136 | 136 | ||
137 | static int alloc_area(struct pstore *ps) | 137 | static int alloc_area(struct pstore *ps) |
@@ -159,7 +159,7 @@ static void free_area(struct pstore *ps) | |||
159 | } | 159 | } |
160 | 160 | ||
161 | struct mdata_req { | 161 | struct mdata_req { |
162 | struct io_region *where; | 162 | struct dm_io_region *where; |
163 | struct dm_io_request *io_req; | 163 | struct dm_io_request *io_req; |
164 | struct work_struct work; | 164 | struct work_struct work; |
165 | int result; | 165 | int result; |
@@ -177,7 +177,7 @@ static void do_metadata(struct work_struct *work) | |||
177 | */ | 177 | */ |
178 | static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) | 178 | static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) |
179 | { | 179 | { |
180 | struct io_region where = { | 180 | struct dm_io_region where = { |
181 | .bdev = ps->snap->cow->bdev, | 181 | .bdev = ps->snap->cow->bdev, |
182 | .sector = ps->snap->chunk_size * chunk, | 182 | .sector = ps->snap->chunk_size * chunk, |
183 | .count = ps->snap->chunk_size, | 183 | .count = ps->snap->chunk_size, |
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 8f25f628ef16..4789c42d9a3a 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -5,13 +5,14 @@ | |||
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include "dm-io.h" | 8 | #include "dm.h" |
9 | 9 | ||
10 | #include <linux/bio.h> | 10 | #include <linux/bio.h> |
11 | #include <linux/mempool.h> | 11 | #include <linux/mempool.h> |
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/dm-io.h> | ||
15 | 16 | ||
16 | struct dm_io_client { | 17 | struct dm_io_client { |
17 | mempool_t *pool; | 18 | mempool_t *pool; |
@@ -20,7 +21,7 @@ struct dm_io_client { | |||
20 | 21 | ||
21 | /* FIXME: can we shrink this ? */ | 22 | /* FIXME: can we shrink this ? */ |
22 | struct io { | 23 | struct io { |
23 | unsigned long error; | 24 | unsigned long error_bits; |
24 | atomic_t count; | 25 | atomic_t count; |
25 | struct task_struct *sleeper; | 26 | struct task_struct *sleeper; |
26 | struct dm_io_client *client; | 27 | struct dm_io_client *client; |
@@ -107,14 +108,14 @@ static inline unsigned bio_get_region(struct bio *bio) | |||
107 | static void dec_count(struct io *io, unsigned int region, int error) | 108 | static void dec_count(struct io *io, unsigned int region, int error) |
108 | { | 109 | { |
109 | if (error) | 110 | if (error) |
110 | set_bit(region, &io->error); | 111 | set_bit(region, &io->error_bits); |
111 | 112 | ||
112 | if (atomic_dec_and_test(&io->count)) { | 113 | if (atomic_dec_and_test(&io->count)) { |
113 | if (io->sleeper) | 114 | if (io->sleeper) |
114 | wake_up_process(io->sleeper); | 115 | wake_up_process(io->sleeper); |
115 | 116 | ||
116 | else { | 117 | else { |
117 | unsigned long r = io->error; | 118 | unsigned long r = io->error_bits; |
118 | io_notify_fn fn = io->callback; | 119 | io_notify_fn fn = io->callback; |
119 | void *context = io->context; | 120 | void *context = io->context; |
120 | 121 | ||
@@ -271,7 +272,7 @@ static void km_dp_init(struct dpages *dp, void *data) | |||
271 | /*----------------------------------------------------------------- | 272 | /*----------------------------------------------------------------- |
272 | * IO routines that accept a list of pages. | 273 | * IO routines that accept a list of pages. |
273 | *---------------------------------------------------------------*/ | 274 | *---------------------------------------------------------------*/ |
274 | static void do_region(int rw, unsigned int region, struct io_region *where, | 275 | static void do_region(int rw, unsigned region, struct dm_io_region *where, |
275 | struct dpages *dp, struct io *io) | 276 | struct dpages *dp, struct io *io) |
276 | { | 277 | { |
277 | struct bio *bio; | 278 | struct bio *bio; |
@@ -320,7 +321,7 @@ static void do_region(int rw, unsigned int region, struct io_region *where, | |||
320 | } | 321 | } |
321 | 322 | ||
322 | static void dispatch_io(int rw, unsigned int num_regions, | 323 | static void dispatch_io(int rw, unsigned int num_regions, |
323 | struct io_region *where, struct dpages *dp, | 324 | struct dm_io_region *where, struct dpages *dp, |
324 | struct io *io, int sync) | 325 | struct io *io, int sync) |
325 | { | 326 | { |
326 | int i; | 327 | int i; |
@@ -347,17 +348,17 @@ static void dispatch_io(int rw, unsigned int num_regions, | |||
347 | } | 348 | } |
348 | 349 | ||
349 | static int sync_io(struct dm_io_client *client, unsigned int num_regions, | 350 | static int sync_io(struct dm_io_client *client, unsigned int num_regions, |
350 | struct io_region *where, int rw, struct dpages *dp, | 351 | struct dm_io_region *where, int rw, struct dpages *dp, |
351 | unsigned long *error_bits) | 352 | unsigned long *error_bits) |
352 | { | 353 | { |
353 | struct io io; | 354 | struct io io; |
354 | 355 | ||
355 | if (num_regions > 1 && rw != WRITE) { | 356 | if (num_regions > 1 && (rw & RW_MASK) != WRITE) { |
356 | WARN_ON(1); | 357 | WARN_ON(1); |
357 | return -EIO; | 358 | return -EIO; |
358 | } | 359 | } |
359 | 360 | ||
360 | io.error = 0; | 361 | io.error_bits = 0; |
361 | atomic_set(&io.count, 1); /* see dispatch_io() */ | 362 | atomic_set(&io.count, 1); /* see dispatch_io() */ |
362 | io.sleeper = current; | 363 | io.sleeper = current; |
363 | io.client = client; | 364 | io.client = client; |
@@ -378,25 +379,25 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, | |||
378 | return -EINTR; | 379 | return -EINTR; |
379 | 380 | ||
380 | if (error_bits) | 381 | if (error_bits) |
381 | *error_bits = io.error; | 382 | *error_bits = io.error_bits; |
382 | 383 | ||
383 | return io.error ? -EIO : 0; | 384 | return io.error_bits ? -EIO : 0; |
384 | } | 385 | } |
385 | 386 | ||
386 | static int async_io(struct dm_io_client *client, unsigned int num_regions, | 387 | static int async_io(struct dm_io_client *client, unsigned int num_regions, |
387 | struct io_region *where, int rw, struct dpages *dp, | 388 | struct dm_io_region *where, int rw, struct dpages *dp, |
388 | io_notify_fn fn, void *context) | 389 | io_notify_fn fn, void *context) |
389 | { | 390 | { |
390 | struct io *io; | 391 | struct io *io; |
391 | 392 | ||
392 | if (num_regions > 1 && rw != WRITE) { | 393 | if (num_regions > 1 && (rw & RW_MASK) != WRITE) { |
393 | WARN_ON(1); | 394 | WARN_ON(1); |
394 | fn(1, context); | 395 | fn(1, context); |
395 | return -EIO; | 396 | return -EIO; |
396 | } | 397 | } |
397 | 398 | ||
398 | io = mempool_alloc(client->pool, GFP_NOIO); | 399 | io = mempool_alloc(client->pool, GFP_NOIO); |
399 | io->error = 0; | 400 | io->error_bits = 0; |
400 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 401 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
401 | io->sleeper = NULL; | 402 | io->sleeper = NULL; |
402 | io->client = client; | 403 | io->client = client; |
@@ -435,10 +436,15 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp) | |||
435 | } | 436 | } |
436 | 437 | ||
437 | /* | 438 | /* |
438 | * New collapsed (a)synchronous interface | 439 | * New collapsed (a)synchronous interface. |
440 | * | ||
441 | * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug | ||
442 | * the queue with blk_unplug() some time later or set the BIO_RW_SYNC bit in | ||
443 | * io_req->bi_rw. If you fail to do one of these, the IO will be submitted to | ||
444 | * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. | ||
439 | */ | 445 | */ |
440 | int dm_io(struct dm_io_request *io_req, unsigned num_regions, | 446 | int dm_io(struct dm_io_request *io_req, unsigned num_regions, |
441 | struct io_region *where, unsigned long *sync_error_bits) | 447 | struct dm_io_region *where, unsigned long *sync_error_bits) |
442 | { | 448 | { |
443 | int r; | 449 | int r; |
444 | struct dpages dp; | 450 | struct dpages dp; |
diff --git a/drivers/md/dm-io.h b/drivers/md/dm-io.h deleted file mode 100644 index f647e2cceaa6..000000000000 --- a/drivers/md/dm-io.h +++ /dev/null | |||
@@ -1,79 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2003 Sistina Software | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | */ | ||
6 | |||
7 | #ifndef _DM_IO_H | ||
8 | #define _DM_IO_H | ||
9 | |||
10 | #include "dm.h" | ||
11 | |||
12 | struct io_region { | ||
13 | struct block_device *bdev; | ||
14 | sector_t sector; | ||
15 | sector_t count; /* If this is zero the region is ignored. */ | ||
16 | }; | ||
17 | |||
18 | struct page_list { | ||
19 | struct page_list *next; | ||
20 | struct page *page; | ||
21 | }; | ||
22 | |||
23 | typedef void (*io_notify_fn)(unsigned long error, void *context); | ||
24 | |||
25 | enum dm_io_mem_type { | ||
26 | DM_IO_PAGE_LIST,/* Page list */ | ||
27 | DM_IO_BVEC, /* Bio vector */ | ||
28 | DM_IO_VMA, /* Virtual memory area */ | ||
29 | DM_IO_KMEM, /* Kernel memory */ | ||
30 | }; | ||
31 | |||
32 | struct dm_io_memory { | ||
33 | enum dm_io_mem_type type; | ||
34 | |||
35 | union { | ||
36 | struct page_list *pl; | ||
37 | struct bio_vec *bvec; | ||
38 | void *vma; | ||
39 | void *addr; | ||
40 | } ptr; | ||
41 | |||
42 | unsigned offset; | ||
43 | }; | ||
44 | |||
45 | struct dm_io_notify { | ||
46 | io_notify_fn fn; /* Callback for asynchronous requests */ | ||
47 | void *context; /* Passed to callback */ | ||
48 | }; | ||
49 | |||
50 | /* | ||
51 | * IO request structure | ||
52 | */ | ||
53 | struct dm_io_client; | ||
54 | struct dm_io_request { | ||
55 | int bi_rw; /* READ|WRITE - not READA */ | ||
56 | struct dm_io_memory mem; /* Memory to use for io */ | ||
57 | struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ | ||
58 | struct dm_io_client *client; /* Client memory handler */ | ||
59 | }; | ||
60 | |||
61 | /* | ||
62 | * For async io calls, users can alternatively use the dm_io() function below | ||
63 | * and dm_io_client_create() to create private mempools for the client. | ||
64 | * | ||
65 | * Create/destroy may block. | ||
66 | */ | ||
67 | struct dm_io_client *dm_io_client_create(unsigned num_pages); | ||
68 | int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client); | ||
69 | void dm_io_client_destroy(struct dm_io_client *client); | ||
70 | |||
71 | /* | ||
72 | * IO interface using private per-client pools. | ||
73 | * Each bit in the optional 'sync_error_bits' bitset indicates whether an | ||
74 | * error occurred doing io to the corresponding region. | ||
75 | */ | ||
76 | int dm_io(struct dm_io_request *io_req, unsigned num_regions, | ||
77 | struct io_region *region, unsigned long *sync_error_bits); | ||
78 | |||
79 | #endif | ||
diff --git a/drivers/md/kcopyd.c b/drivers/md/dm-kcopyd.c index e76b52ade690..996802b8a452 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/dm-kcopyd.c | |||
@@ -9,9 +9,8 @@ | |||
9 | * completion notification. | 9 | * completion notification. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <asm/types.h> | 12 | #include <linux/types.h> |
13 | #include <asm/atomic.h> | 13 | #include <asm/atomic.h> |
14 | |||
15 | #include <linux/blkdev.h> | 14 | #include <linux/blkdev.h> |
16 | #include <linux/fs.h> | 15 | #include <linux/fs.h> |
17 | #include <linux/init.h> | 16 | #include <linux/init.h> |
@@ -23,24 +22,15 @@ | |||
23 | #include <linux/vmalloc.h> | 22 | #include <linux/vmalloc.h> |
24 | #include <linux/workqueue.h> | 23 | #include <linux/workqueue.h> |
25 | #include <linux/mutex.h> | 24 | #include <linux/mutex.h> |
25 | #include <linux/dm-kcopyd.h> | ||
26 | 26 | ||
27 | #include "kcopyd.h" | 27 | #include "dm.h" |
28 | |||
29 | static struct workqueue_struct *_kcopyd_wq; | ||
30 | static struct work_struct _kcopyd_work; | ||
31 | |||
32 | static void wake(void) | ||
33 | { | ||
34 | queue_work(_kcopyd_wq, &_kcopyd_work); | ||
35 | } | ||
36 | 28 | ||
37 | /*----------------------------------------------------------------- | 29 | /*----------------------------------------------------------------- |
38 | * Each kcopyd client has its own little pool of preallocated | 30 | * Each kcopyd client has its own little pool of preallocated |
39 | * pages for kcopyd io. | 31 | * pages for kcopyd io. |
40 | *---------------------------------------------------------------*/ | 32 | *---------------------------------------------------------------*/ |
41 | struct kcopyd_client { | 33 | struct dm_kcopyd_client { |
42 | struct list_head list; | ||
43 | |||
44 | spinlock_t lock; | 34 | spinlock_t lock; |
45 | struct page_list *pages; | 35 | struct page_list *pages; |
46 | unsigned int nr_pages; | 36 | unsigned int nr_pages; |
@@ -50,8 +40,32 @@ struct kcopyd_client { | |||
50 | 40 | ||
51 | wait_queue_head_t destroyq; | 41 | wait_queue_head_t destroyq; |
52 | atomic_t nr_jobs; | 42 | atomic_t nr_jobs; |
43 | |||
44 | mempool_t *job_pool; | ||
45 | |||
46 | struct workqueue_struct *kcopyd_wq; | ||
47 | struct work_struct kcopyd_work; | ||
48 | |||
49 | /* | ||
50 | * We maintain three lists of jobs: | ||
51 | * | ||
52 | * i) jobs waiting for pages | ||
53 | * ii) jobs that have pages, and are waiting for the io to be issued. | ||
54 | * iii) jobs that have completed. | ||
55 | * | ||
56 | * All three of these are protected by job_lock. | ||
57 | */ | ||
58 | spinlock_t job_lock; | ||
59 | struct list_head complete_jobs; | ||
60 | struct list_head io_jobs; | ||
61 | struct list_head pages_jobs; | ||
53 | }; | 62 | }; |
54 | 63 | ||
64 | static void wake(struct dm_kcopyd_client *kc) | ||
65 | { | ||
66 | queue_work(kc->kcopyd_wq, &kc->kcopyd_work); | ||
67 | } | ||
68 | |||
55 | static struct page_list *alloc_pl(void) | 69 | static struct page_list *alloc_pl(void) |
56 | { | 70 | { |
57 | struct page_list *pl; | 71 | struct page_list *pl; |
@@ -75,7 +89,7 @@ static void free_pl(struct page_list *pl) | |||
75 | kfree(pl); | 89 | kfree(pl); |
76 | } | 90 | } |
77 | 91 | ||
78 | static int kcopyd_get_pages(struct kcopyd_client *kc, | 92 | static int kcopyd_get_pages(struct dm_kcopyd_client *kc, |
79 | unsigned int nr, struct page_list **pages) | 93 | unsigned int nr, struct page_list **pages) |
80 | { | 94 | { |
81 | struct page_list *pl; | 95 | struct page_list *pl; |
@@ -98,7 +112,7 @@ static int kcopyd_get_pages(struct kcopyd_client *kc, | |||
98 | return 0; | 112 | return 0; |
99 | } | 113 | } |
100 | 114 | ||
101 | static void kcopyd_put_pages(struct kcopyd_client *kc, struct page_list *pl) | 115 | static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) |
102 | { | 116 | { |
103 | struct page_list *cursor; | 117 | struct page_list *cursor; |
104 | 118 | ||
@@ -126,7 +140,7 @@ static void drop_pages(struct page_list *pl) | |||
126 | } | 140 | } |
127 | } | 141 | } |
128 | 142 | ||
129 | static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr) | 143 | static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr) |
130 | { | 144 | { |
131 | unsigned int i; | 145 | unsigned int i; |
132 | struct page_list *pl = NULL, *next; | 146 | struct page_list *pl = NULL, *next; |
@@ -147,7 +161,7 @@ static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr) | |||
147 | return 0; | 161 | return 0; |
148 | } | 162 | } |
149 | 163 | ||
150 | static void client_free_pages(struct kcopyd_client *kc) | 164 | static void client_free_pages(struct dm_kcopyd_client *kc) |
151 | { | 165 | { |
152 | BUG_ON(kc->nr_free_pages != kc->nr_pages); | 166 | BUG_ON(kc->nr_free_pages != kc->nr_pages); |
153 | drop_pages(kc->pages); | 167 | drop_pages(kc->pages); |
@@ -161,7 +175,7 @@ static void client_free_pages(struct kcopyd_client *kc) | |||
161 | * ever having to do io (which could cause a deadlock). | 175 | * ever having to do io (which could cause a deadlock). |
162 | *---------------------------------------------------------------*/ | 176 | *---------------------------------------------------------------*/ |
163 | struct kcopyd_job { | 177 | struct kcopyd_job { |
164 | struct kcopyd_client *kc; | 178 | struct dm_kcopyd_client *kc; |
165 | struct list_head list; | 179 | struct list_head list; |
166 | unsigned long flags; | 180 | unsigned long flags; |
167 | 181 | ||
@@ -175,13 +189,13 @@ struct kcopyd_job { | |||
175 | * Either READ or WRITE | 189 | * Either READ or WRITE |
176 | */ | 190 | */ |
177 | int rw; | 191 | int rw; |
178 | struct io_region source; | 192 | struct dm_io_region source; |
179 | 193 | ||
180 | /* | 194 | /* |
181 | * The destinations for the transfer. | 195 | * The destinations for the transfer. |
182 | */ | 196 | */ |
183 | unsigned int num_dests; | 197 | unsigned int num_dests; |
184 | struct io_region dests[KCOPYD_MAX_REGIONS]; | 198 | struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; |
185 | 199 | ||
186 | sector_t offset; | 200 | sector_t offset; |
187 | unsigned int nr_pages; | 201 | unsigned int nr_pages; |
@@ -191,7 +205,7 @@ struct kcopyd_job { | |||
191 | * Set this to ensure you are notified when the job has | 205 | * Set this to ensure you are notified when the job has |
192 | * completed. 'context' is for callback to use. | 206 | * completed. 'context' is for callback to use. |
193 | */ | 207 | */ |
194 | kcopyd_notify_fn fn; | 208 | dm_kcopyd_notify_fn fn; |
195 | void *context; | 209 | void *context; |
196 | 210 | ||
197 | /* | 211 | /* |
@@ -207,47 +221,19 @@ struct kcopyd_job { | |||
207 | #define MIN_JOBS 512 | 221 | #define MIN_JOBS 512 |
208 | 222 | ||
209 | static struct kmem_cache *_job_cache; | 223 | static struct kmem_cache *_job_cache; |
210 | static mempool_t *_job_pool; | ||
211 | 224 | ||
212 | /* | 225 | int __init dm_kcopyd_init(void) |
213 | * We maintain three lists of jobs: | ||
214 | * | ||
215 | * i) jobs waiting for pages | ||
216 | * ii) jobs that have pages, and are waiting for the io to be issued. | ||
217 | * iii) jobs that have completed. | ||
218 | * | ||
219 | * All three of these are protected by job_lock. | ||
220 | */ | ||
221 | static DEFINE_SPINLOCK(_job_lock); | ||
222 | |||
223 | static LIST_HEAD(_complete_jobs); | ||
224 | static LIST_HEAD(_io_jobs); | ||
225 | static LIST_HEAD(_pages_jobs); | ||
226 | |||
227 | static int jobs_init(void) | ||
228 | { | 226 | { |
229 | _job_cache = KMEM_CACHE(kcopyd_job, 0); | 227 | _job_cache = KMEM_CACHE(kcopyd_job, 0); |
230 | if (!_job_cache) | 228 | if (!_job_cache) |
231 | return -ENOMEM; | 229 | return -ENOMEM; |
232 | 230 | ||
233 | _job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); | ||
234 | if (!_job_pool) { | ||
235 | kmem_cache_destroy(_job_cache); | ||
236 | return -ENOMEM; | ||
237 | } | ||
238 | |||
239 | return 0; | 231 | return 0; |
240 | } | 232 | } |
241 | 233 | ||
242 | static void jobs_exit(void) | 234 | void dm_kcopyd_exit(void) |
243 | { | 235 | { |
244 | BUG_ON(!list_empty(&_complete_jobs)); | ||
245 | BUG_ON(!list_empty(&_io_jobs)); | ||
246 | BUG_ON(!list_empty(&_pages_jobs)); | ||
247 | |||
248 | mempool_destroy(_job_pool); | ||
249 | kmem_cache_destroy(_job_cache); | 236 | kmem_cache_destroy(_job_cache); |
250 | _job_pool = NULL; | ||
251 | _job_cache = NULL; | 237 | _job_cache = NULL; |
252 | } | 238 | } |
253 | 239 | ||
@@ -255,18 +241,19 @@ static void jobs_exit(void) | |||
255 | * Functions to push and pop a job onto the head of a given job | 241 | * Functions to push and pop a job onto the head of a given job |
256 | * list. | 242 | * list. |
257 | */ | 243 | */ |
258 | static struct kcopyd_job *pop(struct list_head *jobs) | 244 | static struct kcopyd_job *pop(struct list_head *jobs, |
245 | struct dm_kcopyd_client *kc) | ||
259 | { | 246 | { |
260 | struct kcopyd_job *job = NULL; | 247 | struct kcopyd_job *job = NULL; |
261 | unsigned long flags; | 248 | unsigned long flags; |
262 | 249 | ||
263 | spin_lock_irqsave(&_job_lock, flags); | 250 | spin_lock_irqsave(&kc->job_lock, flags); |
264 | 251 | ||
265 | if (!list_empty(jobs)) { | 252 | if (!list_empty(jobs)) { |
266 | job = list_entry(jobs->next, struct kcopyd_job, list); | 253 | job = list_entry(jobs->next, struct kcopyd_job, list); |
267 | list_del(&job->list); | 254 | list_del(&job->list); |
268 | } | 255 | } |
269 | spin_unlock_irqrestore(&_job_lock, flags); | 256 | spin_unlock_irqrestore(&kc->job_lock, flags); |
270 | 257 | ||
271 | return job; | 258 | return job; |
272 | } | 259 | } |
@@ -274,10 +261,11 @@ static struct kcopyd_job *pop(struct list_head *jobs) | |||
274 | static void push(struct list_head *jobs, struct kcopyd_job *job) | 261 | static void push(struct list_head *jobs, struct kcopyd_job *job) |
275 | { | 262 | { |
276 | unsigned long flags; | 263 | unsigned long flags; |
264 | struct dm_kcopyd_client *kc = job->kc; | ||
277 | 265 | ||
278 | spin_lock_irqsave(&_job_lock, flags); | 266 | spin_lock_irqsave(&kc->job_lock, flags); |
279 | list_add_tail(&job->list, jobs); | 267 | list_add_tail(&job->list, jobs); |
280 | spin_unlock_irqrestore(&_job_lock, flags); | 268 | spin_unlock_irqrestore(&kc->job_lock, flags); |
281 | } | 269 | } |
282 | 270 | ||
283 | /* | 271 | /* |
@@ -294,11 +282,11 @@ static int run_complete_job(struct kcopyd_job *job) | |||
294 | void *context = job->context; | 282 | void *context = job->context; |
295 | int read_err = job->read_err; | 283 | int read_err = job->read_err; |
296 | unsigned long write_err = job->write_err; | 284 | unsigned long write_err = job->write_err; |
297 | kcopyd_notify_fn fn = job->fn; | 285 | dm_kcopyd_notify_fn fn = job->fn; |
298 | struct kcopyd_client *kc = job->kc; | 286 | struct dm_kcopyd_client *kc = job->kc; |
299 | 287 | ||
300 | kcopyd_put_pages(kc, job->pages); | 288 | kcopyd_put_pages(kc, job->pages); |
301 | mempool_free(job, _job_pool); | 289 | mempool_free(job, kc->job_pool); |
302 | fn(read_err, write_err, context); | 290 | fn(read_err, write_err, context); |
303 | 291 | ||
304 | if (atomic_dec_and_test(&kc->nr_jobs)) | 292 | if (atomic_dec_and_test(&kc->nr_jobs)) |
@@ -310,6 +298,7 @@ static int run_complete_job(struct kcopyd_job *job) | |||
310 | static void complete_io(unsigned long error, void *context) | 298 | static void complete_io(unsigned long error, void *context) |
311 | { | 299 | { |
312 | struct kcopyd_job *job = (struct kcopyd_job *) context; | 300 | struct kcopyd_job *job = (struct kcopyd_job *) context; |
301 | struct dm_kcopyd_client *kc = job->kc; | ||
313 | 302 | ||
314 | if (error) { | 303 | if (error) { |
315 | if (job->rw == WRITE) | 304 | if (job->rw == WRITE) |
@@ -317,22 +306,22 @@ static void complete_io(unsigned long error, void *context) | |||
317 | else | 306 | else |
318 | job->read_err = 1; | 307 | job->read_err = 1; |
319 | 308 | ||
320 | if (!test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) { | 309 | if (!test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) { |
321 | push(&_complete_jobs, job); | 310 | push(&kc->complete_jobs, job); |
322 | wake(); | 311 | wake(kc); |
323 | return; | 312 | return; |
324 | } | 313 | } |
325 | } | 314 | } |
326 | 315 | ||
327 | if (job->rw == WRITE) | 316 | if (job->rw == WRITE) |
328 | push(&_complete_jobs, job); | 317 | push(&kc->complete_jobs, job); |
329 | 318 | ||
330 | else { | 319 | else { |
331 | job->rw = WRITE; | 320 | job->rw = WRITE; |
332 | push(&_io_jobs, job); | 321 | push(&kc->io_jobs, job); |
333 | } | 322 | } |
334 | 323 | ||
335 | wake(); | 324 | wake(kc); |
336 | } | 325 | } |
337 | 326 | ||
338 | /* | 327 | /* |
@@ -343,7 +332,7 @@ static int run_io_job(struct kcopyd_job *job) | |||
343 | { | 332 | { |
344 | int r; | 333 | int r; |
345 | struct dm_io_request io_req = { | 334 | struct dm_io_request io_req = { |
346 | .bi_rw = job->rw, | 335 | .bi_rw = job->rw | (1 << BIO_RW_SYNC), |
347 | .mem.type = DM_IO_PAGE_LIST, | 336 | .mem.type = DM_IO_PAGE_LIST, |
348 | .mem.ptr.pl = job->pages, | 337 | .mem.ptr.pl = job->pages, |
349 | .mem.offset = job->offset, | 338 | .mem.offset = job->offset, |
@@ -369,7 +358,7 @@ static int run_pages_job(struct kcopyd_job *job) | |||
369 | r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); | 358 | r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); |
370 | if (!r) { | 359 | if (!r) { |
371 | /* this job is ready for io */ | 360 | /* this job is ready for io */ |
372 | push(&_io_jobs, job); | 361 | push(&job->kc->io_jobs, job); |
373 | return 0; | 362 | return 0; |
374 | } | 363 | } |
375 | 364 | ||
@@ -384,12 +373,13 @@ static int run_pages_job(struct kcopyd_job *job) | |||
384 | * Run through a list for as long as possible. Returns the count | 373 | * Run through a list for as long as possible. Returns the count |
385 | * of successful jobs. | 374 | * of successful jobs. |
386 | */ | 375 | */ |
387 | static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) | 376 | static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, |
377 | int (*fn) (struct kcopyd_job *)) | ||
388 | { | 378 | { |
389 | struct kcopyd_job *job; | 379 | struct kcopyd_job *job; |
390 | int r, count = 0; | 380 | int r, count = 0; |
391 | 381 | ||
392 | while ((job = pop(jobs))) { | 382 | while ((job = pop(jobs, kc))) { |
393 | 383 | ||
394 | r = fn(job); | 384 | r = fn(job); |
395 | 385 | ||
@@ -399,7 +389,7 @@ static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) | |||
399 | job->write_err = (unsigned long) -1L; | 389 | job->write_err = (unsigned long) -1L; |
400 | else | 390 | else |
401 | job->read_err = 1; | 391 | job->read_err = 1; |
402 | push(&_complete_jobs, job); | 392 | push(&kc->complete_jobs, job); |
403 | break; | 393 | break; |
404 | } | 394 | } |
405 | 395 | ||
@@ -421,8 +411,11 @@ static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) | |||
421 | /* | 411 | /* |
422 | * kcopyd does this every time it's woken up. | 412 | * kcopyd does this every time it's woken up. |
423 | */ | 413 | */ |
424 | static void do_work(struct work_struct *ignored) | 414 | static void do_work(struct work_struct *work) |
425 | { | 415 | { |
416 | struct dm_kcopyd_client *kc = container_of(work, | ||
417 | struct dm_kcopyd_client, kcopyd_work); | ||
418 | |||
426 | /* | 419 | /* |
427 | * The order that these are called is *very* important. | 420 | * The order that these are called is *very* important. |
428 | * complete jobs can free some pages for pages jobs. | 421 | * complete jobs can free some pages for pages jobs. |
@@ -430,9 +423,9 @@ static void do_work(struct work_struct *ignored) | |||
430 | * list. io jobs call wake when they complete and it all | 423 | * list. io jobs call wake when they complete and it all |
431 | * starts again. | 424 | * starts again. |
432 | */ | 425 | */ |
433 | process_jobs(&_complete_jobs, run_complete_job); | 426 | process_jobs(&kc->complete_jobs, kc, run_complete_job); |
434 | process_jobs(&_pages_jobs, run_pages_job); | 427 | process_jobs(&kc->pages_jobs, kc, run_pages_job); |
435 | process_jobs(&_io_jobs, run_io_job); | 428 | process_jobs(&kc->io_jobs, kc, run_io_job); |
436 | } | 429 | } |
437 | 430 | ||
438 | /* | 431 | /* |
@@ -442,9 +435,10 @@ static void do_work(struct work_struct *ignored) | |||
442 | */ | 435 | */ |
443 | static void dispatch_job(struct kcopyd_job *job) | 436 | static void dispatch_job(struct kcopyd_job *job) |
444 | { | 437 | { |
445 | atomic_inc(&job->kc->nr_jobs); | 438 | struct dm_kcopyd_client *kc = job->kc; |
446 | push(&_pages_jobs, job); | 439 | atomic_inc(&kc->nr_jobs); |
447 | wake(); | 440 | push(&kc->pages_jobs, job); |
441 | wake(kc); | ||
448 | } | 442 | } |
449 | 443 | ||
450 | #define SUB_JOB_SIZE 128 | 444 | #define SUB_JOB_SIZE 128 |
@@ -469,7 +463,7 @@ static void segment_complete(int read_err, unsigned long write_err, | |||
469 | * Only dispatch more work if there hasn't been an error. | 463 | * Only dispatch more work if there hasn't been an error. |
470 | */ | 464 | */ |
471 | if ((!job->read_err && !job->write_err) || | 465 | if ((!job->read_err && !job->write_err) || |
472 | test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) { | 466 | test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) { |
473 | /* get the next chunk of work */ | 467 | /* get the next chunk of work */ |
474 | progress = job->progress; | 468 | progress = job->progress; |
475 | count = job->source.count - progress; | 469 | count = job->source.count - progress; |
@@ -484,7 +478,8 @@ static void segment_complete(int read_err, unsigned long write_err, | |||
484 | 478 | ||
485 | if (count) { | 479 | if (count) { |
486 | int i; | 480 | int i; |
487 | struct kcopyd_job *sub_job = mempool_alloc(_job_pool, GFP_NOIO); | 481 | struct kcopyd_job *sub_job = mempool_alloc(job->kc->job_pool, |
482 | GFP_NOIO); | ||
488 | 483 | ||
489 | *sub_job = *job; | 484 | *sub_job = *job; |
490 | sub_job->source.sector += progress; | 485 | sub_job->source.sector += progress; |
@@ -508,7 +503,7 @@ static void segment_complete(int read_err, unsigned long write_err, | |||
508 | * after we've completed. | 503 | * after we've completed. |
509 | */ | 504 | */ |
510 | job->fn(read_err, write_err, job->context); | 505 | job->fn(read_err, write_err, job->context); |
511 | mempool_free(job, _job_pool); | 506 | mempool_free(job, job->kc->job_pool); |
512 | } | 507 | } |
513 | } | 508 | } |
514 | 509 | ||
@@ -526,16 +521,16 @@ static void split_job(struct kcopyd_job *job) | |||
526 | segment_complete(0, 0u, job); | 521 | segment_complete(0, 0u, job); |
527 | } | 522 | } |
528 | 523 | ||
529 | int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from, | 524 | int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, |
530 | unsigned int num_dests, struct io_region *dests, | 525 | unsigned int num_dests, struct dm_io_region *dests, |
531 | unsigned int flags, kcopyd_notify_fn fn, void *context) | 526 | unsigned int flags, dm_kcopyd_notify_fn fn, void *context) |
532 | { | 527 | { |
533 | struct kcopyd_job *job; | 528 | struct kcopyd_job *job; |
534 | 529 | ||
535 | /* | 530 | /* |
536 | * Allocate a new job. | 531 | * Allocate a new job. |
537 | */ | 532 | */ |
538 | job = mempool_alloc(_job_pool, GFP_NOIO); | 533 | job = mempool_alloc(kc->job_pool, GFP_NOIO); |
539 | 534 | ||
540 | /* | 535 | /* |
541 | * set up for the read. | 536 | * set up for the read. |
@@ -569,6 +564,7 @@ int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from, | |||
569 | 564 | ||
570 | return 0; | 565 | return 0; |
571 | } | 566 | } |
567 | EXPORT_SYMBOL(dm_kcopyd_copy); | ||
572 | 568 | ||
573 | /* | 569 | /* |
574 | * Cancels a kcopyd job, eg. someone might be deactivating a | 570 | * Cancels a kcopyd job, eg. someone might be deactivating a |
@@ -583,126 +579,76 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) | |||
583 | #endif /* 0 */ | 579 | #endif /* 0 */ |
584 | 580 | ||
585 | /*----------------------------------------------------------------- | 581 | /*----------------------------------------------------------------- |
586 | * Unit setup | 582 | * Client setup |
587 | *---------------------------------------------------------------*/ | 583 | *---------------------------------------------------------------*/ |
588 | static DEFINE_MUTEX(_client_lock); | 584 | int dm_kcopyd_client_create(unsigned int nr_pages, |
589 | static LIST_HEAD(_clients); | 585 | struct dm_kcopyd_client **result) |
590 | |||
591 | static void client_add(struct kcopyd_client *kc) | ||
592 | { | 586 | { |
593 | mutex_lock(&_client_lock); | 587 | int r = -ENOMEM; |
594 | list_add(&kc->list, &_clients); | 588 | struct dm_kcopyd_client *kc; |
595 | mutex_unlock(&_client_lock); | ||
596 | } | ||
597 | |||
598 | static void client_del(struct kcopyd_client *kc) | ||
599 | { | ||
600 | mutex_lock(&_client_lock); | ||
601 | list_del(&kc->list); | ||
602 | mutex_unlock(&_client_lock); | ||
603 | } | ||
604 | |||
605 | static DEFINE_MUTEX(kcopyd_init_lock); | ||
606 | static int kcopyd_clients = 0; | ||
607 | 589 | ||
608 | static int kcopyd_init(void) | 590 | kc = kmalloc(sizeof(*kc), GFP_KERNEL); |
609 | { | 591 | if (!kc) |
610 | int r; | ||
611 | |||
612 | mutex_lock(&kcopyd_init_lock); | ||
613 | |||
614 | if (kcopyd_clients) { | ||
615 | /* Already initialized. */ | ||
616 | kcopyd_clients++; | ||
617 | mutex_unlock(&kcopyd_init_lock); | ||
618 | return 0; | ||
619 | } | ||
620 | |||
621 | r = jobs_init(); | ||
622 | if (r) { | ||
623 | mutex_unlock(&kcopyd_init_lock); | ||
624 | return r; | ||
625 | } | ||
626 | |||
627 | _kcopyd_wq = create_singlethread_workqueue("kcopyd"); | ||
628 | if (!_kcopyd_wq) { | ||
629 | jobs_exit(); | ||
630 | mutex_unlock(&kcopyd_init_lock); | ||
631 | return -ENOMEM; | 592 | return -ENOMEM; |
632 | } | ||
633 | |||
634 | kcopyd_clients++; | ||
635 | INIT_WORK(&_kcopyd_work, do_work); | ||
636 | mutex_unlock(&kcopyd_init_lock); | ||
637 | return 0; | ||
638 | } | ||
639 | 593 | ||
640 | static void kcopyd_exit(void) | 594 | spin_lock_init(&kc->lock); |
641 | { | 595 | spin_lock_init(&kc->job_lock); |
642 | mutex_lock(&kcopyd_init_lock); | 596 | INIT_LIST_HEAD(&kc->complete_jobs); |
643 | kcopyd_clients--; | 597 | INIT_LIST_HEAD(&kc->io_jobs); |
644 | if (!kcopyd_clients) { | 598 | INIT_LIST_HEAD(&kc->pages_jobs); |
645 | jobs_exit(); | ||
646 | destroy_workqueue(_kcopyd_wq); | ||
647 | _kcopyd_wq = NULL; | ||
648 | } | ||
649 | mutex_unlock(&kcopyd_init_lock); | ||
650 | } | ||
651 | |||
652 | int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result) | ||
653 | { | ||
654 | int r = 0; | ||
655 | struct kcopyd_client *kc; | ||
656 | 599 | ||
657 | r = kcopyd_init(); | 600 | kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); |
658 | if (r) | 601 | if (!kc->job_pool) |
659 | return r; | 602 | goto bad_slab; |
660 | 603 | ||
661 | kc = kmalloc(sizeof(*kc), GFP_KERNEL); | 604 | INIT_WORK(&kc->kcopyd_work, do_work); |
662 | if (!kc) { | 605 | kc->kcopyd_wq = create_singlethread_workqueue("kcopyd"); |
663 | kcopyd_exit(); | 606 | if (!kc->kcopyd_wq) |
664 | return -ENOMEM; | 607 | goto bad_workqueue; |
665 | } | ||
666 | 608 | ||
667 | spin_lock_init(&kc->lock); | ||
668 | kc->pages = NULL; | 609 | kc->pages = NULL; |
669 | kc->nr_pages = kc->nr_free_pages = 0; | 610 | kc->nr_pages = kc->nr_free_pages = 0; |
670 | r = client_alloc_pages(kc, nr_pages); | 611 | r = client_alloc_pages(kc, nr_pages); |
671 | if (r) { | 612 | if (r) |
672 | kfree(kc); | 613 | goto bad_client_pages; |
673 | kcopyd_exit(); | ||
674 | return r; | ||
675 | } | ||
676 | 614 | ||
677 | kc->io_client = dm_io_client_create(nr_pages); | 615 | kc->io_client = dm_io_client_create(nr_pages); |
678 | if (IS_ERR(kc->io_client)) { | 616 | if (IS_ERR(kc->io_client)) { |
679 | r = PTR_ERR(kc->io_client); | 617 | r = PTR_ERR(kc->io_client); |
680 | client_free_pages(kc); | 618 | goto bad_io_client; |
681 | kfree(kc); | ||
682 | kcopyd_exit(); | ||
683 | return r; | ||
684 | } | 619 | } |
685 | 620 | ||
686 | init_waitqueue_head(&kc->destroyq); | 621 | init_waitqueue_head(&kc->destroyq); |
687 | atomic_set(&kc->nr_jobs, 0); | 622 | atomic_set(&kc->nr_jobs, 0); |
688 | 623 | ||
689 | client_add(kc); | ||
690 | *result = kc; | 624 | *result = kc; |
691 | return 0; | 625 | return 0; |
626 | |||
627 | bad_io_client: | ||
628 | client_free_pages(kc); | ||
629 | bad_client_pages: | ||
630 | destroy_workqueue(kc->kcopyd_wq); | ||
631 | bad_workqueue: | ||
632 | mempool_destroy(kc->job_pool); | ||
633 | bad_slab: | ||
634 | kfree(kc); | ||
635 | |||
636 | return r; | ||
692 | } | 637 | } |
638 | EXPORT_SYMBOL(dm_kcopyd_client_create); | ||
693 | 639 | ||
694 | void kcopyd_client_destroy(struct kcopyd_client *kc) | 640 | void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) |
695 | { | 641 | { |
696 | /* Wait for completion of all jobs submitted by this client. */ | 642 | /* Wait for completion of all jobs submitted by this client. */ |
697 | wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); | 643 | wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); |
698 | 644 | ||
645 | BUG_ON(!list_empty(&kc->complete_jobs)); | ||
646 | BUG_ON(!list_empty(&kc->io_jobs)); | ||
647 | BUG_ON(!list_empty(&kc->pages_jobs)); | ||
648 | destroy_workqueue(kc->kcopyd_wq); | ||
699 | dm_io_client_destroy(kc->io_client); | 649 | dm_io_client_destroy(kc->io_client); |
700 | client_free_pages(kc); | 650 | client_free_pages(kc); |
701 | client_del(kc); | 651 | mempool_destroy(kc->job_pool); |
702 | kfree(kc); | 652 | kfree(kc); |
703 | kcopyd_exit(); | ||
704 | } | 653 | } |
705 | 654 | EXPORT_SYMBOL(dm_kcopyd_client_destroy); | |
706 | EXPORT_SYMBOL(kcopyd_client_create); | ||
707 | EXPORT_SYMBOL(kcopyd_client_destroy); | ||
708 | EXPORT_SYMBOL(kcopyd_copy); | ||
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 2a74b2142f50..67a6f31b7fc3 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2003 Sistina Software | 2 | * Copyright (C) 2003 Sistina Software |
3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. | ||
3 | * | 4 | * |
4 | * This file is released under the LGPL. | 5 | * This file is released under the LGPL. |
5 | */ | 6 | */ |
@@ -8,64 +9,58 @@ | |||
8 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
9 | #include <linux/module.h> | 10 | #include <linux/module.h> |
10 | #include <linux/vmalloc.h> | 11 | #include <linux/vmalloc.h> |
12 | #include <linux/dm-io.h> | ||
13 | #include <linux/dm-dirty-log.h> | ||
11 | 14 | ||
12 | #include "dm-log.h" | 15 | #include "dm.h" |
13 | #include "dm-io.h" | ||
14 | 16 | ||
15 | #define DM_MSG_PREFIX "mirror log" | 17 | #define DM_MSG_PREFIX "dirty region log" |
16 | 18 | ||
17 | static LIST_HEAD(_log_types); | 19 | struct dm_dirty_log_internal { |
18 | static DEFINE_SPINLOCK(_lock); | 20 | struct dm_dirty_log_type *type; |
19 | 21 | ||
20 | int dm_register_dirty_log_type(struct dirty_log_type *type) | 22 | struct list_head list; |
21 | { | 23 | long use; |
22 | spin_lock(&_lock); | 24 | }; |
23 | type->use_count = 0; | ||
24 | list_add(&type->list, &_log_types); | ||
25 | spin_unlock(&_lock); | ||
26 | 25 | ||
27 | return 0; | 26 | static LIST_HEAD(_log_types); |
28 | } | 27 | static DEFINE_SPINLOCK(_lock); |
29 | 28 | ||
30 | int dm_unregister_dirty_log_type(struct dirty_log_type *type) | 29 | static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name) |
31 | { | 30 | { |
32 | spin_lock(&_lock); | 31 | struct dm_dirty_log_internal *log_type; |
33 | |||
34 | if (type->use_count) | ||
35 | DMWARN("Attempt to unregister a log type that is still in use"); | ||
36 | else | ||
37 | list_del(&type->list); | ||
38 | 32 | ||
39 | spin_unlock(&_lock); | 33 | list_for_each_entry(log_type, &_log_types, list) |
34 | if (!strcmp(name, log_type->type->name)) | ||
35 | return log_type; | ||
40 | 36 | ||
41 | return 0; | 37 | return NULL; |
42 | } | 38 | } |
43 | 39 | ||
44 | static struct dirty_log_type *_get_type(const char *type_name) | 40 | static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name) |
45 | { | 41 | { |
46 | struct dirty_log_type *type; | 42 | struct dm_dirty_log_internal *log_type; |
47 | 43 | ||
48 | spin_lock(&_lock); | 44 | spin_lock(&_lock); |
49 | list_for_each_entry (type, &_log_types, list) | 45 | |
50 | if (!strcmp(type_name, type->name)) { | 46 | log_type = __find_dirty_log_type(name); |
51 | if (!type->use_count && !try_module_get(type->module)){ | 47 | if (log_type) { |
52 | spin_unlock(&_lock); | 48 | if (!log_type->use && !try_module_get(log_type->type->module)) |
53 | return NULL; | 49 | log_type = NULL; |
54 | } | 50 | else |
55 | type->use_count++; | 51 | log_type->use++; |
56 | spin_unlock(&_lock); | 52 | } |
57 | return type; | ||
58 | } | ||
59 | 53 | ||
60 | spin_unlock(&_lock); | 54 | spin_unlock(&_lock); |
61 | return NULL; | 55 | |
56 | return log_type; | ||
62 | } | 57 | } |
63 | 58 | ||
64 | /* | 59 | /* |
65 | * get_type | 60 | * get_type |
66 | * @type_name | 61 | * @type_name |
67 | * | 62 | * |
68 | * Attempt to retrieve the dirty_log_type by name. If not already | 63 | * Attempt to retrieve the dm_dirty_log_type by name. If not already |
69 | * available, attempt to load the appropriate module. | 64 | * available, attempt to load the appropriate module. |
70 | * | 65 | * |
71 | * Log modules are named "dm-log-" followed by the 'type_name'. | 66 | * Log modules are named "dm-log-" followed by the 'type_name'. |
@@ -78,14 +73,17 @@ static struct dirty_log_type *_get_type(const char *type_name) | |||
78 | * | 73 | * |
79 | * Returns: dirty_log_type* on success, NULL on failure | 74 | * Returns: dirty_log_type* on success, NULL on failure |
80 | */ | 75 | */ |
81 | static struct dirty_log_type *get_type(const char *type_name) | 76 | static struct dm_dirty_log_type *get_type(const char *type_name) |
82 | { | 77 | { |
83 | char *p, *type_name_dup; | 78 | char *p, *type_name_dup; |
84 | struct dirty_log_type *type; | 79 | struct dm_dirty_log_internal *log_type; |
80 | |||
81 | if (!type_name) | ||
82 | return NULL; | ||
85 | 83 | ||
86 | type = _get_type(type_name); | 84 | log_type = _get_dirty_log_type(type_name); |
87 | if (type) | 85 | if (log_type) |
88 | return type; | 86 | return log_type->type; |
89 | 87 | ||
90 | type_name_dup = kstrdup(type_name, GFP_KERNEL); | 88 | type_name_dup = kstrdup(type_name, GFP_KERNEL); |
91 | if (!type_name_dup) { | 89 | if (!type_name_dup) { |
@@ -95,34 +93,106 @@ static struct dirty_log_type *get_type(const char *type_name) | |||
95 | } | 93 | } |
96 | 94 | ||
97 | while (request_module("dm-log-%s", type_name_dup) || | 95 | while (request_module("dm-log-%s", type_name_dup) || |
98 | !(type = _get_type(type_name))) { | 96 | !(log_type = _get_dirty_log_type(type_name))) { |
99 | p = strrchr(type_name_dup, '-'); | 97 | p = strrchr(type_name_dup, '-'); |
100 | if (!p) | 98 | if (!p) |
101 | break; | 99 | break; |
102 | p[0] = '\0'; | 100 | p[0] = '\0'; |
103 | } | 101 | } |
104 | 102 | ||
105 | if (!type) | 103 | if (!log_type) |
106 | DMWARN("Module for logging type \"%s\" not found.", type_name); | 104 | DMWARN("Module for logging type \"%s\" not found.", type_name); |
107 | 105 | ||
108 | kfree(type_name_dup); | 106 | kfree(type_name_dup); |
109 | 107 | ||
110 | return type; | 108 | return log_type ? log_type->type : NULL; |
111 | } | 109 | } |
112 | 110 | ||
113 | static void put_type(struct dirty_log_type *type) | 111 | static void put_type(struct dm_dirty_log_type *type) |
114 | { | 112 | { |
113 | struct dm_dirty_log_internal *log_type; | ||
114 | |||
115 | if (!type) | ||
116 | return; | ||
117 | |||
115 | spin_lock(&_lock); | 118 | spin_lock(&_lock); |
116 | if (!--type->use_count) | 119 | log_type = __find_dirty_log_type(type->name); |
120 | if (!log_type) | ||
121 | goto out; | ||
122 | |||
123 | if (!--log_type->use) | ||
117 | module_put(type->module); | 124 | module_put(type->module); |
125 | |||
126 | BUG_ON(log_type->use < 0); | ||
127 | |||
128 | out: | ||
118 | spin_unlock(&_lock); | 129 | spin_unlock(&_lock); |
119 | } | 130 | } |
120 | 131 | ||
121 | struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *ti, | 132 | static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type) |
122 | unsigned int argc, char **argv) | ||
123 | { | 133 | { |
124 | struct dirty_log_type *type; | 134 | struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type), |
125 | struct dirty_log *log; | 135 | GFP_KERNEL); |
136 | |||
137 | if (log_type) | ||
138 | log_type->type = type; | ||
139 | |||
140 | return log_type; | ||
141 | } | ||
142 | |||
143 | int dm_dirty_log_type_register(struct dm_dirty_log_type *type) | ||
144 | { | ||
145 | struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type); | ||
146 | int r = 0; | ||
147 | |||
148 | if (!log_type) | ||
149 | return -ENOMEM; | ||
150 | |||
151 | spin_lock(&_lock); | ||
152 | if (!__find_dirty_log_type(type->name)) | ||
153 | list_add(&log_type->list, &_log_types); | ||
154 | else { | ||
155 | kfree(log_type); | ||
156 | r = -EEXIST; | ||
157 | } | ||
158 | spin_unlock(&_lock); | ||
159 | |||
160 | return r; | ||
161 | } | ||
162 | EXPORT_SYMBOL(dm_dirty_log_type_register); | ||
163 | |||
164 | int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type) | ||
165 | { | ||
166 | struct dm_dirty_log_internal *log_type; | ||
167 | |||
168 | spin_lock(&_lock); | ||
169 | |||
170 | log_type = __find_dirty_log_type(type->name); | ||
171 | if (!log_type) { | ||
172 | spin_unlock(&_lock); | ||
173 | return -EINVAL; | ||
174 | } | ||
175 | |||
176 | if (log_type->use) { | ||
177 | spin_unlock(&_lock); | ||
178 | return -ETXTBSY; | ||
179 | } | ||
180 | |||
181 | list_del(&log_type->list); | ||
182 | |||
183 | spin_unlock(&_lock); | ||
184 | kfree(log_type); | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | EXPORT_SYMBOL(dm_dirty_log_type_unregister); | ||
189 | |||
190 | struct dm_dirty_log *dm_dirty_log_create(const char *type_name, | ||
191 | struct dm_target *ti, | ||
192 | unsigned int argc, char **argv) | ||
193 | { | ||
194 | struct dm_dirty_log_type *type; | ||
195 | struct dm_dirty_log *log; | ||
126 | 196 | ||
127 | log = kmalloc(sizeof(*log), GFP_KERNEL); | 197 | log = kmalloc(sizeof(*log), GFP_KERNEL); |
128 | if (!log) | 198 | if (!log) |
@@ -143,13 +213,15 @@ struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *t | |||
143 | 213 | ||
144 | return log; | 214 | return log; |
145 | } | 215 | } |
216 | EXPORT_SYMBOL(dm_dirty_log_create); | ||
146 | 217 | ||
147 | void dm_destroy_dirty_log(struct dirty_log *log) | 218 | void dm_dirty_log_destroy(struct dm_dirty_log *log) |
148 | { | 219 | { |
149 | log->type->dtr(log); | 220 | log->type->dtr(log); |
150 | put_type(log->type); | 221 | put_type(log->type); |
151 | kfree(log); | 222 | kfree(log); |
152 | } | 223 | } |
224 | EXPORT_SYMBOL(dm_dirty_log_destroy); | ||
153 | 225 | ||
154 | /*----------------------------------------------------------------- | 226 | /*----------------------------------------------------------------- |
155 | * Persistent and core logs share a lot of their implementation. | 227 | * Persistent and core logs share a lot of their implementation. |
@@ -207,7 +279,7 @@ struct log_c { | |||
207 | struct dm_dev *log_dev; | 279 | struct dm_dev *log_dev; |
208 | struct log_header header; | 280 | struct log_header header; |
209 | 281 | ||
210 | struct io_region header_location; | 282 | struct dm_io_region header_location; |
211 | struct log_header *disk_header; | 283 | struct log_header *disk_header; |
212 | }; | 284 | }; |
213 | 285 | ||
@@ -215,7 +287,7 @@ struct log_c { | |||
215 | * The touched member needs to be updated every time we access | 287 | * The touched member needs to be updated every time we access |
216 | * one of the bitsets. | 288 | * one of the bitsets. |
217 | */ | 289 | */ |
218 | static inline int log_test_bit(uint32_t *bs, unsigned bit) | 290 | static inline int log_test_bit(uint32_t *bs, unsigned bit) |
219 | { | 291 | { |
220 | return ext2_test_bit(bit, (unsigned long *) bs) ? 1 : 0; | 292 | return ext2_test_bit(bit, (unsigned long *) bs) ? 1 : 0; |
221 | } | 293 | } |
@@ -302,7 +374,7 @@ static inline int write_header(struct log_c *log) | |||
302 | * argv contains region_size followed optionally by [no]sync | 374 | * argv contains region_size followed optionally by [no]sync |
303 | *--------------------------------------------------------------*/ | 375 | *--------------------------------------------------------------*/ |
304 | #define BYTE_SHIFT 3 | 376 | #define BYTE_SHIFT 3 |
305 | static int create_log_context(struct dirty_log *log, struct dm_target *ti, | 377 | static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, |
306 | unsigned int argc, char **argv, | 378 | unsigned int argc, char **argv, |
307 | struct dm_dev *dev) | 379 | struct dm_dev *dev) |
308 | { | 380 | { |
@@ -315,7 +387,7 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti, | |||
315 | int r; | 387 | int r; |
316 | 388 | ||
317 | if (argc < 1 || argc > 2) { | 389 | if (argc < 1 || argc > 2) { |
318 | DMWARN("wrong number of arguments to mirror log"); | 390 | DMWARN("wrong number of arguments to dirty region log"); |
319 | return -EINVAL; | 391 | return -EINVAL; |
320 | } | 392 | } |
321 | 393 | ||
@@ -325,8 +397,8 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti, | |||
325 | else if (!strcmp(argv[1], "nosync")) | 397 | else if (!strcmp(argv[1], "nosync")) |
326 | sync = NOSYNC; | 398 | sync = NOSYNC; |
327 | else { | 399 | else { |
328 | DMWARN("unrecognised sync argument to mirror log: %s", | 400 | DMWARN("unrecognised sync argument to " |
329 | argv[1]); | 401 | "dirty region log: %s", argv[1]); |
330 | return -EINVAL; | 402 | return -EINVAL; |
331 | } | 403 | } |
332 | } | 404 | } |
@@ -434,7 +506,7 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti, | |||
434 | return 0; | 506 | return 0; |
435 | } | 507 | } |
436 | 508 | ||
437 | static int core_ctr(struct dirty_log *log, struct dm_target *ti, | 509 | static int core_ctr(struct dm_dirty_log *log, struct dm_target *ti, |
438 | unsigned int argc, char **argv) | 510 | unsigned int argc, char **argv) |
439 | { | 511 | { |
440 | return create_log_context(log, ti, argc, argv, NULL); | 512 | return create_log_context(log, ti, argc, argv, NULL); |
@@ -447,7 +519,7 @@ static void destroy_log_context(struct log_c *lc) | |||
447 | kfree(lc); | 519 | kfree(lc); |
448 | } | 520 | } |
449 | 521 | ||
450 | static void core_dtr(struct dirty_log *log) | 522 | static void core_dtr(struct dm_dirty_log *log) |
451 | { | 523 | { |
452 | struct log_c *lc = (struct log_c *) log->context; | 524 | struct log_c *lc = (struct log_c *) log->context; |
453 | 525 | ||
@@ -460,14 +532,14 @@ static void core_dtr(struct dirty_log *log) | |||
460 | * | 532 | * |
461 | * argv contains log_device region_size followed optionally by [no]sync | 533 | * argv contains log_device region_size followed optionally by [no]sync |
462 | *--------------------------------------------------------------*/ | 534 | *--------------------------------------------------------------*/ |
463 | static int disk_ctr(struct dirty_log *log, struct dm_target *ti, | 535 | static int disk_ctr(struct dm_dirty_log *log, struct dm_target *ti, |
464 | unsigned int argc, char **argv) | 536 | unsigned int argc, char **argv) |
465 | { | 537 | { |
466 | int r; | 538 | int r; |
467 | struct dm_dev *dev; | 539 | struct dm_dev *dev; |
468 | 540 | ||
469 | if (argc < 2 || argc > 3) { | 541 | if (argc < 2 || argc > 3) { |
470 | DMWARN("wrong number of arguments to disk mirror log"); | 542 | DMWARN("wrong number of arguments to disk dirty region log"); |
471 | return -EINVAL; | 543 | return -EINVAL; |
472 | } | 544 | } |
473 | 545 | ||
@@ -485,7 +557,7 @@ static int disk_ctr(struct dirty_log *log, struct dm_target *ti, | |||
485 | return 0; | 557 | return 0; |
486 | } | 558 | } |
487 | 559 | ||
488 | static void disk_dtr(struct dirty_log *log) | 560 | static void disk_dtr(struct dm_dirty_log *log) |
489 | { | 561 | { |
490 | struct log_c *lc = (struct log_c *) log->context; | 562 | struct log_c *lc = (struct log_c *) log->context; |
491 | 563 | ||
@@ -514,7 +586,7 @@ static void fail_log_device(struct log_c *lc) | |||
514 | dm_table_event(lc->ti->table); | 586 | dm_table_event(lc->ti->table); |
515 | } | 587 | } |
516 | 588 | ||
517 | static int disk_resume(struct dirty_log *log) | 589 | static int disk_resume(struct dm_dirty_log *log) |
518 | { | 590 | { |
519 | int r; | 591 | int r; |
520 | unsigned i; | 592 | unsigned i; |
@@ -524,7 +596,7 @@ static int disk_resume(struct dirty_log *log) | |||
524 | /* read the disk header */ | 596 | /* read the disk header */ |
525 | r = read_header(lc); | 597 | r = read_header(lc); |
526 | if (r) { | 598 | if (r) { |
527 | DMWARN("%s: Failed to read header on mirror log device", | 599 | DMWARN("%s: Failed to read header on dirty region log device", |
528 | lc->log_dev->name); | 600 | lc->log_dev->name); |
529 | fail_log_device(lc); | 601 | fail_log_device(lc); |
530 | /* | 602 | /* |
@@ -562,7 +634,7 @@ static int disk_resume(struct dirty_log *log) | |||
562 | /* write the new header */ | 634 | /* write the new header */ |
563 | r = write_header(lc); | 635 | r = write_header(lc); |
564 | if (r) { | 636 | if (r) { |
565 | DMWARN("%s: Failed to write header on mirror log device", | 637 | DMWARN("%s: Failed to write header on dirty region log device", |
566 | lc->log_dev->name); | 638 | lc->log_dev->name); |
567 | fail_log_device(lc); | 639 | fail_log_device(lc); |
568 | } | 640 | } |
@@ -570,38 +642,38 @@ static int disk_resume(struct dirty_log *log) | |||
570 | return r; | 642 | return r; |
571 | } | 643 | } |
572 | 644 | ||
573 | static uint32_t core_get_region_size(struct dirty_log *log) | 645 | static uint32_t core_get_region_size(struct dm_dirty_log *log) |
574 | { | 646 | { |
575 | struct log_c *lc = (struct log_c *) log->context; | 647 | struct log_c *lc = (struct log_c *) log->context; |
576 | return lc->region_size; | 648 | return lc->region_size; |
577 | } | 649 | } |
578 | 650 | ||
579 | static int core_resume(struct dirty_log *log) | 651 | static int core_resume(struct dm_dirty_log *log) |
580 | { | 652 | { |
581 | struct log_c *lc = (struct log_c *) log->context; | 653 | struct log_c *lc = (struct log_c *) log->context; |
582 | lc->sync_search = 0; | 654 | lc->sync_search = 0; |
583 | return 0; | 655 | return 0; |
584 | } | 656 | } |
585 | 657 | ||
586 | static int core_is_clean(struct dirty_log *log, region_t region) | 658 | static int core_is_clean(struct dm_dirty_log *log, region_t region) |
587 | { | 659 | { |
588 | struct log_c *lc = (struct log_c *) log->context; | 660 | struct log_c *lc = (struct log_c *) log->context; |
589 | return log_test_bit(lc->clean_bits, region); | 661 | return log_test_bit(lc->clean_bits, region); |
590 | } | 662 | } |
591 | 663 | ||
592 | static int core_in_sync(struct dirty_log *log, region_t region, int block) | 664 | static int core_in_sync(struct dm_dirty_log *log, region_t region, int block) |
593 | { | 665 | { |
594 | struct log_c *lc = (struct log_c *) log->context; | 666 | struct log_c *lc = (struct log_c *) log->context; |
595 | return log_test_bit(lc->sync_bits, region); | 667 | return log_test_bit(lc->sync_bits, region); |
596 | } | 668 | } |
597 | 669 | ||
598 | static int core_flush(struct dirty_log *log) | 670 | static int core_flush(struct dm_dirty_log *log) |
599 | { | 671 | { |
600 | /* no op */ | 672 | /* no op */ |
601 | return 0; | 673 | return 0; |
602 | } | 674 | } |
603 | 675 | ||
604 | static int disk_flush(struct dirty_log *log) | 676 | static int disk_flush(struct dm_dirty_log *log) |
605 | { | 677 | { |
606 | int r; | 678 | int r; |
607 | struct log_c *lc = (struct log_c *) log->context; | 679 | struct log_c *lc = (struct log_c *) log->context; |
@@ -619,19 +691,19 @@ static int disk_flush(struct dirty_log *log) | |||
619 | return r; | 691 | return r; |
620 | } | 692 | } |
621 | 693 | ||
622 | static void core_mark_region(struct dirty_log *log, region_t region) | 694 | static void core_mark_region(struct dm_dirty_log *log, region_t region) |
623 | { | 695 | { |
624 | struct log_c *lc = (struct log_c *) log->context; | 696 | struct log_c *lc = (struct log_c *) log->context; |
625 | log_clear_bit(lc, lc->clean_bits, region); | 697 | log_clear_bit(lc, lc->clean_bits, region); |
626 | } | 698 | } |
627 | 699 | ||
628 | static void core_clear_region(struct dirty_log *log, region_t region) | 700 | static void core_clear_region(struct dm_dirty_log *log, region_t region) |
629 | { | 701 | { |
630 | struct log_c *lc = (struct log_c *) log->context; | 702 | struct log_c *lc = (struct log_c *) log->context; |
631 | log_set_bit(lc, lc->clean_bits, region); | 703 | log_set_bit(lc, lc->clean_bits, region); |
632 | } | 704 | } |
633 | 705 | ||
634 | static int core_get_resync_work(struct dirty_log *log, region_t *region) | 706 | static int core_get_resync_work(struct dm_dirty_log *log, region_t *region) |
635 | { | 707 | { |
636 | struct log_c *lc = (struct log_c *) log->context; | 708 | struct log_c *lc = (struct log_c *) log->context; |
637 | 709 | ||
@@ -654,7 +726,7 @@ static int core_get_resync_work(struct dirty_log *log, region_t *region) | |||
654 | return 1; | 726 | return 1; |
655 | } | 727 | } |
656 | 728 | ||
657 | static void core_set_region_sync(struct dirty_log *log, region_t region, | 729 | static void core_set_region_sync(struct dm_dirty_log *log, region_t region, |
658 | int in_sync) | 730 | int in_sync) |
659 | { | 731 | { |
660 | struct log_c *lc = (struct log_c *) log->context; | 732 | struct log_c *lc = (struct log_c *) log->context; |
@@ -669,7 +741,7 @@ static void core_set_region_sync(struct dirty_log *log, region_t region, | |||
669 | } | 741 | } |
670 | } | 742 | } |
671 | 743 | ||
672 | static region_t core_get_sync_count(struct dirty_log *log) | 744 | static region_t core_get_sync_count(struct dm_dirty_log *log) |
673 | { | 745 | { |
674 | struct log_c *lc = (struct log_c *) log->context; | 746 | struct log_c *lc = (struct log_c *) log->context; |
675 | 747 | ||
@@ -680,7 +752,7 @@ static region_t core_get_sync_count(struct dirty_log *log) | |||
680 | if (lc->sync != DEFAULTSYNC) \ | 752 | if (lc->sync != DEFAULTSYNC) \ |
681 | DMEMIT("%ssync ", lc->sync == NOSYNC ? "no" : "") | 753 | DMEMIT("%ssync ", lc->sync == NOSYNC ? "no" : "") |
682 | 754 | ||
683 | static int core_status(struct dirty_log *log, status_type_t status, | 755 | static int core_status(struct dm_dirty_log *log, status_type_t status, |
684 | char *result, unsigned int maxlen) | 756 | char *result, unsigned int maxlen) |
685 | { | 757 | { |
686 | int sz = 0; | 758 | int sz = 0; |
@@ -700,7 +772,7 @@ static int core_status(struct dirty_log *log, status_type_t status, | |||
700 | return sz; | 772 | return sz; |
701 | } | 773 | } |
702 | 774 | ||
703 | static int disk_status(struct dirty_log *log, status_type_t status, | 775 | static int disk_status(struct dm_dirty_log *log, status_type_t status, |
704 | char *result, unsigned int maxlen) | 776 | char *result, unsigned int maxlen) |
705 | { | 777 | { |
706 | int sz = 0; | 778 | int sz = 0; |
@@ -722,7 +794,7 @@ static int disk_status(struct dirty_log *log, status_type_t status, | |||
722 | return sz; | 794 | return sz; |
723 | } | 795 | } |
724 | 796 | ||
725 | static struct dirty_log_type _core_type = { | 797 | static struct dm_dirty_log_type _core_type = { |
726 | .name = "core", | 798 | .name = "core", |
727 | .module = THIS_MODULE, | 799 | .module = THIS_MODULE, |
728 | .ctr = core_ctr, | 800 | .ctr = core_ctr, |
@@ -740,7 +812,7 @@ static struct dirty_log_type _core_type = { | |||
740 | .status = core_status, | 812 | .status = core_status, |
741 | }; | 813 | }; |
742 | 814 | ||
743 | static struct dirty_log_type _disk_type = { | 815 | static struct dm_dirty_log_type _disk_type = { |
744 | .name = "disk", | 816 | .name = "disk", |
745 | .module = THIS_MODULE, | 817 | .module = THIS_MODULE, |
746 | .ctr = disk_ctr, | 818 | .ctr = disk_ctr, |
@@ -763,26 +835,28 @@ int __init dm_dirty_log_init(void) | |||
763 | { | 835 | { |
764 | int r; | 836 | int r; |
765 | 837 | ||
766 | r = dm_register_dirty_log_type(&_core_type); | 838 | r = dm_dirty_log_type_register(&_core_type); |
767 | if (r) | 839 | if (r) |
768 | DMWARN("couldn't register core log"); | 840 | DMWARN("couldn't register core log"); |
769 | 841 | ||
770 | r = dm_register_dirty_log_type(&_disk_type); | 842 | r = dm_dirty_log_type_register(&_disk_type); |
771 | if (r) { | 843 | if (r) { |
772 | DMWARN("couldn't register disk type"); | 844 | DMWARN("couldn't register disk type"); |
773 | dm_unregister_dirty_log_type(&_core_type); | 845 | dm_dirty_log_type_unregister(&_core_type); |
774 | } | 846 | } |
775 | 847 | ||
776 | return r; | 848 | return r; |
777 | } | 849 | } |
778 | 850 | ||
779 | void dm_dirty_log_exit(void) | 851 | void __exit dm_dirty_log_exit(void) |
780 | { | 852 | { |
781 | dm_unregister_dirty_log_type(&_disk_type); | 853 | dm_dirty_log_type_unregister(&_disk_type); |
782 | dm_unregister_dirty_log_type(&_core_type); | 854 | dm_dirty_log_type_unregister(&_core_type); |
783 | } | 855 | } |
784 | 856 | ||
785 | EXPORT_SYMBOL(dm_register_dirty_log_type); | 857 | module_init(dm_dirty_log_init); |
786 | EXPORT_SYMBOL(dm_unregister_dirty_log_type); | 858 | module_exit(dm_dirty_log_exit); |
787 | EXPORT_SYMBOL(dm_create_dirty_log); | 859 | |
788 | EXPORT_SYMBOL(dm_destroy_dirty_log); | 860 | MODULE_DESCRIPTION(DM_NAME " dirty region log"); |
861 | MODULE_AUTHOR("Joe Thornber, Heinz Mauelshagen <dm-devel@redhat.com>"); | ||
862 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/md/dm-log.h b/drivers/md/dm-log.h deleted file mode 100644 index 3fae87eb5963..000000000000 --- a/drivers/md/dm-log.h +++ /dev/null | |||
@@ -1,131 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2003 Sistina Software | ||
3 | * | ||
4 | * This file is released under the LGPL. | ||
5 | */ | ||
6 | |||
7 | #ifndef DM_DIRTY_LOG | ||
8 | #define DM_DIRTY_LOG | ||
9 | |||
10 | #include "dm.h" | ||
11 | |||
12 | typedef sector_t region_t; | ||
13 | |||
14 | struct dirty_log_type; | ||
15 | |||
16 | struct dirty_log { | ||
17 | struct dirty_log_type *type; | ||
18 | void *context; | ||
19 | }; | ||
20 | |||
21 | struct dirty_log_type { | ||
22 | struct list_head list; | ||
23 | const char *name; | ||
24 | struct module *module; | ||
25 | unsigned int use_count; | ||
26 | |||
27 | int (*ctr)(struct dirty_log *log, struct dm_target *ti, | ||
28 | unsigned int argc, char **argv); | ||
29 | void (*dtr)(struct dirty_log *log); | ||
30 | |||
31 | /* | ||
32 | * There are times when we don't want the log to touch | ||
33 | * the disk. | ||
34 | */ | ||
35 | int (*presuspend)(struct dirty_log *log); | ||
36 | int (*postsuspend)(struct dirty_log *log); | ||
37 | int (*resume)(struct dirty_log *log); | ||
38 | |||
39 | /* | ||
40 | * Retrieves the smallest size of region that the log can | ||
41 | * deal with. | ||
42 | */ | ||
43 | uint32_t (*get_region_size)(struct dirty_log *log); | ||
44 | |||
45 | /* | ||
46 | * A predicate to say whether a region is clean or not. | ||
47 | * May block. | ||
48 | */ | ||
49 | int (*is_clean)(struct dirty_log *log, region_t region); | ||
50 | |||
51 | /* | ||
52 | * Returns: 0, 1, -EWOULDBLOCK, < 0 | ||
53 | * | ||
54 | * A predicate function to check the area given by | ||
55 | * [sector, sector + len) is in sync. | ||
56 | * | ||
57 | * If -EWOULDBLOCK is returned the state of the region is | ||
58 | * unknown, typically this will result in a read being | ||
59 | * passed to a daemon to deal with, since a daemon is | ||
60 | * allowed to block. | ||
61 | */ | ||
62 | int (*in_sync)(struct dirty_log *log, region_t region, int can_block); | ||
63 | |||
64 | /* | ||
65 | * Flush the current log state (eg, to disk). This | ||
66 | * function may block. | ||
67 | */ | ||
68 | int (*flush)(struct dirty_log *log); | ||
69 | |||
70 | /* | ||
71 | * Mark an area as clean or dirty. These functions may | ||
72 | * block, though for performance reasons blocking should | ||
73 | * be extremely rare (eg, allocating another chunk of | ||
74 | * memory for some reason). | ||
75 | */ | ||
76 | void (*mark_region)(struct dirty_log *log, region_t region); | ||
77 | void (*clear_region)(struct dirty_log *log, region_t region); | ||
78 | |||
79 | /* | ||
80 | * Returns: <0 (error), 0 (no region), 1 (region) | ||
81 | * | ||
82 | * The mirrord will need perform recovery on regions of | ||
83 | * the mirror that are in the NOSYNC state. This | ||
84 | * function asks the log to tell the caller about the | ||
85 | * next region that this machine should recover. | ||
86 | * | ||
87 | * Do not confuse this function with 'in_sync()', one | ||
88 | * tells you if an area is synchronised, the other | ||
89 | * assigns recovery work. | ||
90 | */ | ||
91 | int (*get_resync_work)(struct dirty_log *log, region_t *region); | ||
92 | |||
93 | /* | ||
94 | * This notifies the log that the resync status of a region | ||
95 | * has changed. It also clears the region from the recovering | ||
96 | * list (if present). | ||
97 | */ | ||
98 | void (*set_region_sync)(struct dirty_log *log, | ||
99 | region_t region, int in_sync); | ||
100 | |||
101 | /* | ||
102 | * Returns the number of regions that are in sync. | ||
103 | */ | ||
104 | region_t (*get_sync_count)(struct dirty_log *log); | ||
105 | |||
106 | /* | ||
107 | * Support function for mirror status requests. | ||
108 | */ | ||
109 | int (*status)(struct dirty_log *log, status_type_t status_type, | ||
110 | char *result, unsigned int maxlen); | ||
111 | }; | ||
112 | |||
113 | int dm_register_dirty_log_type(struct dirty_log_type *type); | ||
114 | int dm_unregister_dirty_log_type(struct dirty_log_type *type); | ||
115 | |||
116 | |||
117 | /* | ||
118 | * Make sure you use these two functions, rather than calling | ||
119 | * type->constructor/destructor() directly. | ||
120 | */ | ||
121 | struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *ti, | ||
122 | unsigned int argc, char **argv); | ||
123 | void dm_destroy_dirty_log(struct dirty_log *log); | ||
124 | |||
125 | /* | ||
126 | * init/exit functions. | ||
127 | */ | ||
128 | int dm_dirty_log_init(void); | ||
129 | void dm_dirty_log_exit(void); | ||
130 | |||
131 | #endif | ||
diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c index 204bf42c9449..b63a0ab37c53 100644 --- a/drivers/md/dm-mpath-hp-sw.c +++ b/drivers/md/dm-mpath-hp-sw.c | |||
@@ -137,7 +137,6 @@ static struct request *hp_sw_get_request(struct dm_path *path) | |||
137 | req->sense = h->sense; | 137 | req->sense = h->sense; |
138 | memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); | 138 | memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); |
139 | 139 | ||
140 | memset(&req->cmd, 0, BLK_MAX_CDB); | ||
141 | req->cmd[0] = START_STOP; | 140 | req->cmd[0] = START_STOP; |
142 | req->cmd[4] = 1; | 141 | req->cmd[4] = 1; |
143 | req->cmd_len = COMMAND_SIZE(req->cmd[0]); | 142 | req->cmd_len = COMMAND_SIZE(req->cmd[0]); |
diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c index e04eb5c697fb..95e77734880a 100644 --- a/drivers/md/dm-mpath-rdac.c +++ b/drivers/md/dm-mpath-rdac.c | |||
@@ -284,7 +284,6 @@ static struct request *get_rdac_req(struct rdac_handler *h, | |||
284 | return NULL; | 284 | return NULL; |
285 | } | 285 | } |
286 | 286 | ||
287 | memset(&rq->cmd, 0, BLK_MAX_CDB); | ||
288 | rq->sense = h->sense; | 287 | rq->sense = h->sense; |
289 | memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); | 288 | memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); |
290 | rq->sense_len = 0; | 289 | rq->sense_len = 0; |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 762cb086bb7f..ff05fe893083 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -7,9 +7,6 @@ | |||
7 | #include "dm.h" | 7 | #include "dm.h" |
8 | #include "dm-bio-list.h" | 8 | #include "dm-bio-list.h" |
9 | #include "dm-bio-record.h" | 9 | #include "dm-bio-record.h" |
10 | #include "dm-io.h" | ||
11 | #include "dm-log.h" | ||
12 | #include "kcopyd.h" | ||
13 | 10 | ||
14 | #include <linux/ctype.h> | 11 | #include <linux/ctype.h> |
15 | #include <linux/init.h> | 12 | #include <linux/init.h> |
@@ -22,6 +19,9 @@ | |||
22 | #include <linux/workqueue.h> | 19 | #include <linux/workqueue.h> |
23 | #include <linux/log2.h> | 20 | #include <linux/log2.h> |
24 | #include <linux/hardirq.h> | 21 | #include <linux/hardirq.h> |
22 | #include <linux/dm-io.h> | ||
23 | #include <linux/dm-dirty-log.h> | ||
24 | #include <linux/dm-kcopyd.h> | ||
25 | 25 | ||
26 | #define DM_MSG_PREFIX "raid1" | 26 | #define DM_MSG_PREFIX "raid1" |
27 | #define DM_IO_PAGES 64 | 27 | #define DM_IO_PAGES 64 |
@@ -74,7 +74,7 @@ struct region_hash { | |||
74 | unsigned region_shift; | 74 | unsigned region_shift; |
75 | 75 | ||
76 | /* holds persistent region state */ | 76 | /* holds persistent region state */ |
77 | struct dirty_log *log; | 77 | struct dm_dirty_log *log; |
78 | 78 | ||
79 | /* hash table */ | 79 | /* hash table */ |
80 | rwlock_t hash_lock; | 80 | rwlock_t hash_lock; |
@@ -133,7 +133,7 @@ struct mirror_set { | |||
133 | struct dm_target *ti; | 133 | struct dm_target *ti; |
134 | struct list_head list; | 134 | struct list_head list; |
135 | struct region_hash rh; | 135 | struct region_hash rh; |
136 | struct kcopyd_client *kcopyd_client; | 136 | struct dm_kcopyd_client *kcopyd_client; |
137 | uint64_t features; | 137 | uint64_t features; |
138 | 138 | ||
139 | spinlock_t lock; /* protects the lists */ | 139 | spinlock_t lock; /* protects the lists */ |
@@ -154,6 +154,9 @@ struct mirror_set { | |||
154 | 154 | ||
155 | struct workqueue_struct *kmirrord_wq; | 155 | struct workqueue_struct *kmirrord_wq; |
156 | struct work_struct kmirrord_work; | 156 | struct work_struct kmirrord_work; |
157 | struct timer_list timer; | ||
158 | unsigned long timer_pending; | ||
159 | |||
157 | struct work_struct trigger_event; | 160 | struct work_struct trigger_event; |
158 | 161 | ||
159 | unsigned int nr_mirrors; | 162 | unsigned int nr_mirrors; |
@@ -178,13 +181,32 @@ static void wake(struct mirror_set *ms) | |||
178 | queue_work(ms->kmirrord_wq, &ms->kmirrord_work); | 181 | queue_work(ms->kmirrord_wq, &ms->kmirrord_work); |
179 | } | 182 | } |
180 | 183 | ||
184 | static void delayed_wake_fn(unsigned long data) | ||
185 | { | ||
186 | struct mirror_set *ms = (struct mirror_set *) data; | ||
187 | |||
188 | clear_bit(0, &ms->timer_pending); | ||
189 | wake(ms); | ||
190 | } | ||
191 | |||
192 | static void delayed_wake(struct mirror_set *ms) | ||
193 | { | ||
194 | if (test_and_set_bit(0, &ms->timer_pending)) | ||
195 | return; | ||
196 | |||
197 | ms->timer.expires = jiffies + HZ / 5; | ||
198 | ms->timer.data = (unsigned long) ms; | ||
199 | ms->timer.function = delayed_wake_fn; | ||
200 | add_timer(&ms->timer); | ||
201 | } | ||
202 | |||
181 | /* FIXME move this */ | 203 | /* FIXME move this */ |
182 | static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw); | 204 | static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw); |
183 | 205 | ||
184 | #define MIN_REGIONS 64 | 206 | #define MIN_REGIONS 64 |
185 | #define MAX_RECOVERY 1 | 207 | #define MAX_RECOVERY 1 |
186 | static int rh_init(struct region_hash *rh, struct mirror_set *ms, | 208 | static int rh_init(struct region_hash *rh, struct mirror_set *ms, |
187 | struct dirty_log *log, uint32_t region_size, | 209 | struct dm_dirty_log *log, uint32_t region_size, |
188 | region_t nr_regions) | 210 | region_t nr_regions) |
189 | { | 211 | { |
190 | unsigned int nr_buckets, max_buckets; | 212 | unsigned int nr_buckets, max_buckets; |
@@ -249,7 +271,7 @@ static void rh_exit(struct region_hash *rh) | |||
249 | } | 271 | } |
250 | 272 | ||
251 | if (rh->log) | 273 | if (rh->log) |
252 | dm_destroy_dirty_log(rh->log); | 274 | dm_dirty_log_destroy(rh->log); |
253 | if (rh->region_pool) | 275 | if (rh->region_pool) |
254 | mempool_destroy(rh->region_pool); | 276 | mempool_destroy(rh->region_pool); |
255 | vfree(rh->buckets); | 277 | vfree(rh->buckets); |
@@ -405,24 +427,22 @@ static void rh_update_states(struct region_hash *rh) | |||
405 | write_lock_irq(&rh->hash_lock); | 427 | write_lock_irq(&rh->hash_lock); |
406 | spin_lock(&rh->region_lock); | 428 | spin_lock(&rh->region_lock); |
407 | if (!list_empty(&rh->clean_regions)) { | 429 | if (!list_empty(&rh->clean_regions)) { |
408 | list_splice(&rh->clean_regions, &clean); | 430 | list_splice_init(&rh->clean_regions, &clean); |
409 | INIT_LIST_HEAD(&rh->clean_regions); | ||
410 | 431 | ||
411 | list_for_each_entry(reg, &clean, list) | 432 | list_for_each_entry(reg, &clean, list) |
412 | list_del(®->hash_list); | 433 | list_del(®->hash_list); |
413 | } | 434 | } |
414 | 435 | ||
415 | if (!list_empty(&rh->recovered_regions)) { | 436 | if (!list_empty(&rh->recovered_regions)) { |
416 | list_splice(&rh->recovered_regions, &recovered); | 437 | list_splice_init(&rh->recovered_regions, &recovered); |
417 | INIT_LIST_HEAD(&rh->recovered_regions); | ||
418 | 438 | ||
419 | list_for_each_entry (reg, &recovered, list) | 439 | list_for_each_entry (reg, &recovered, list) |
420 | list_del(®->hash_list); | 440 | list_del(®->hash_list); |
421 | } | 441 | } |
422 | 442 | ||
423 | if (!list_empty(&rh->failed_recovered_regions)) { | 443 | if (!list_empty(&rh->failed_recovered_regions)) { |
424 | list_splice(&rh->failed_recovered_regions, &failed_recovered); | 444 | list_splice_init(&rh->failed_recovered_regions, |
425 | INIT_LIST_HEAD(&rh->failed_recovered_regions); | 445 | &failed_recovered); |
426 | 446 | ||
427 | list_for_each_entry(reg, &failed_recovered, list) | 447 | list_for_each_entry(reg, &failed_recovered, list) |
428 | list_del(®->hash_list); | 448 | list_del(®->hash_list); |
@@ -790,7 +810,7 @@ static int recover(struct mirror_set *ms, struct region *reg) | |||
790 | { | 810 | { |
791 | int r; | 811 | int r; |
792 | unsigned int i; | 812 | unsigned int i; |
793 | struct io_region from, to[KCOPYD_MAX_REGIONS], *dest; | 813 | struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest; |
794 | struct mirror *m; | 814 | struct mirror *m; |
795 | unsigned long flags = 0; | 815 | unsigned long flags = 0; |
796 | 816 | ||
@@ -822,9 +842,9 @@ static int recover(struct mirror_set *ms, struct region *reg) | |||
822 | } | 842 | } |
823 | 843 | ||
824 | /* hand to kcopyd */ | 844 | /* hand to kcopyd */ |
825 | set_bit(KCOPYD_IGNORE_ERROR, &flags); | 845 | set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); |
826 | r = kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, flags, | 846 | r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, |
827 | recovery_complete, reg); | 847 | flags, recovery_complete, reg); |
828 | 848 | ||
829 | return r; | 849 | return r; |
830 | } | 850 | } |
@@ -833,7 +853,7 @@ static void do_recovery(struct mirror_set *ms) | |||
833 | { | 853 | { |
834 | int r; | 854 | int r; |
835 | struct region *reg; | 855 | struct region *reg; |
836 | struct dirty_log *log = ms->rh.log; | 856 | struct dm_dirty_log *log = ms->rh.log; |
837 | 857 | ||
838 | /* | 858 | /* |
839 | * Start quiescing some regions. | 859 | * Start quiescing some regions. |
@@ -909,7 +929,7 @@ static void map_bio(struct mirror *m, struct bio *bio) | |||
909 | bio->bi_sector = map_sector(m, bio); | 929 | bio->bi_sector = map_sector(m, bio); |
910 | } | 930 | } |
911 | 931 | ||
912 | static void map_region(struct io_region *io, struct mirror *m, | 932 | static void map_region(struct dm_io_region *io, struct mirror *m, |
913 | struct bio *bio) | 933 | struct bio *bio) |
914 | { | 934 | { |
915 | io->bdev = m->dev->bdev; | 935 | io->bdev = m->dev->bdev; |
@@ -951,7 +971,7 @@ static void read_callback(unsigned long error, void *context) | |||
951 | /* Asynchronous read. */ | 971 | /* Asynchronous read. */ |
952 | static void read_async_bio(struct mirror *m, struct bio *bio) | 972 | static void read_async_bio(struct mirror *m, struct bio *bio) |
953 | { | 973 | { |
954 | struct io_region io; | 974 | struct dm_io_region io; |
955 | struct dm_io_request io_req = { | 975 | struct dm_io_request io_req = { |
956 | .bi_rw = READ, | 976 | .bi_rw = READ, |
957 | .mem.type = DM_IO_BVEC, | 977 | .mem.type = DM_IO_BVEC, |
@@ -1019,7 +1039,7 @@ static void __bio_mark_nosync(struct mirror_set *ms, | |||
1019 | { | 1039 | { |
1020 | unsigned long flags; | 1040 | unsigned long flags; |
1021 | struct region_hash *rh = &ms->rh; | 1041 | struct region_hash *rh = &ms->rh; |
1022 | struct dirty_log *log = ms->rh.log; | 1042 | struct dm_dirty_log *log = ms->rh.log; |
1023 | struct region *reg; | 1043 | struct region *reg; |
1024 | region_t region = bio_to_region(rh, bio); | 1044 | region_t region = bio_to_region(rh, bio); |
1025 | int recovering = 0; | 1045 | int recovering = 0; |
@@ -1107,7 +1127,7 @@ out: | |||
1107 | static void do_write(struct mirror_set *ms, struct bio *bio) | 1127 | static void do_write(struct mirror_set *ms, struct bio *bio) |
1108 | { | 1128 | { |
1109 | unsigned int i; | 1129 | unsigned int i; |
1110 | struct io_region io[ms->nr_mirrors], *dest = io; | 1130 | struct dm_io_region io[ms->nr_mirrors], *dest = io; |
1111 | struct mirror *m; | 1131 | struct mirror *m; |
1112 | struct dm_io_request io_req = { | 1132 | struct dm_io_request io_req = { |
1113 | .bi_rw = WRITE, | 1133 | .bi_rw = WRITE, |
@@ -1182,6 +1202,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
1182 | spin_lock_irq(&ms->lock); | 1202 | spin_lock_irq(&ms->lock); |
1183 | bio_list_merge(&ms->failures, &sync); | 1203 | bio_list_merge(&ms->failures, &sync); |
1184 | spin_unlock_irq(&ms->lock); | 1204 | spin_unlock_irq(&ms->lock); |
1205 | wake(ms); | ||
1185 | } else | 1206 | } else |
1186 | while ((bio = bio_list_pop(&sync))) | 1207 | while ((bio = bio_list_pop(&sync))) |
1187 | do_write(ms, bio); | 1208 | do_write(ms, bio); |
@@ -1241,7 +1262,7 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) | |||
1241 | bio_list_merge(&ms->failures, failures); | 1262 | bio_list_merge(&ms->failures, failures); |
1242 | spin_unlock_irq(&ms->lock); | 1263 | spin_unlock_irq(&ms->lock); |
1243 | 1264 | ||
1244 | wake(ms); | 1265 | delayed_wake(ms); |
1245 | } | 1266 | } |
1246 | 1267 | ||
1247 | static void trigger_event(struct work_struct *work) | 1268 | static void trigger_event(struct work_struct *work) |
@@ -1255,7 +1276,7 @@ static void trigger_event(struct work_struct *work) | |||
1255 | /*----------------------------------------------------------------- | 1276 | /*----------------------------------------------------------------- |
1256 | * kmirrord | 1277 | * kmirrord |
1257 | *---------------------------------------------------------------*/ | 1278 | *---------------------------------------------------------------*/ |
1258 | static int _do_mirror(struct work_struct *work) | 1279 | static void do_mirror(struct work_struct *work) |
1259 | { | 1280 | { |
1260 | struct mirror_set *ms =container_of(work, struct mirror_set, | 1281 | struct mirror_set *ms =container_of(work, struct mirror_set, |
1261 | kmirrord_work); | 1282 | kmirrord_work); |
@@ -1277,23 +1298,7 @@ static int _do_mirror(struct work_struct *work) | |||
1277 | do_writes(ms, &writes); | 1298 | do_writes(ms, &writes); |
1278 | do_failures(ms, &failures); | 1299 | do_failures(ms, &failures); |
1279 | 1300 | ||
1280 | return (ms->failures.head) ? 1 : 0; | 1301 | dm_table_unplug_all(ms->ti->table); |
1281 | } | ||
1282 | |||
1283 | static void do_mirror(struct work_struct *work) | ||
1284 | { | ||
1285 | /* | ||
1286 | * If _do_mirror returns 1, we give it | ||
1287 | * another shot. This helps for cases like | ||
1288 | * 'suspend' where we call flush_workqueue | ||
1289 | * and expect all work to be finished. If | ||
1290 | * a failure happens during a suspend, we | ||
1291 | * couldn't issue a 'wake' because it would | ||
1292 | * not be honored. Therefore, we return '1' | ||
1293 | * from _do_mirror, and retry here. | ||
1294 | */ | ||
1295 | while (_do_mirror(work)) | ||
1296 | schedule(); | ||
1297 | } | 1302 | } |
1298 | 1303 | ||
1299 | 1304 | ||
@@ -1303,7 +1308,7 @@ static void do_mirror(struct work_struct *work) | |||
1303 | static struct mirror_set *alloc_context(unsigned int nr_mirrors, | 1308 | static struct mirror_set *alloc_context(unsigned int nr_mirrors, |
1304 | uint32_t region_size, | 1309 | uint32_t region_size, |
1305 | struct dm_target *ti, | 1310 | struct dm_target *ti, |
1306 | struct dirty_log *dl) | 1311 | struct dm_dirty_log *dl) |
1307 | { | 1312 | { |
1308 | size_t len; | 1313 | size_t len; |
1309 | struct mirror_set *ms = NULL; | 1314 | struct mirror_set *ms = NULL; |
@@ -1403,12 +1408,12 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, | |||
1403 | /* | 1408 | /* |
1404 | * Create dirty log: log_type #log_params <log_params> | 1409 | * Create dirty log: log_type #log_params <log_params> |
1405 | */ | 1410 | */ |
1406 | static struct dirty_log *create_dirty_log(struct dm_target *ti, | 1411 | static struct dm_dirty_log *create_dirty_log(struct dm_target *ti, |
1407 | unsigned int argc, char **argv, | 1412 | unsigned int argc, char **argv, |
1408 | unsigned int *args_used) | 1413 | unsigned int *args_used) |
1409 | { | 1414 | { |
1410 | unsigned int param_count; | 1415 | unsigned int param_count; |
1411 | struct dirty_log *dl; | 1416 | struct dm_dirty_log *dl; |
1412 | 1417 | ||
1413 | if (argc < 2) { | 1418 | if (argc < 2) { |
1414 | ti->error = "Insufficient mirror log arguments"; | 1419 | ti->error = "Insufficient mirror log arguments"; |
@@ -1427,7 +1432,7 @@ static struct dirty_log *create_dirty_log(struct dm_target *ti, | |||
1427 | return NULL; | 1432 | return NULL; |
1428 | } | 1433 | } |
1429 | 1434 | ||
1430 | dl = dm_create_dirty_log(argv[0], ti, param_count, argv + 2); | 1435 | dl = dm_dirty_log_create(argv[0], ti, param_count, argv + 2); |
1431 | if (!dl) { | 1436 | if (!dl) { |
1432 | ti->error = "Error creating mirror dirty log"; | 1437 | ti->error = "Error creating mirror dirty log"; |
1433 | return NULL; | 1438 | return NULL; |
@@ -1435,7 +1440,7 @@ static struct dirty_log *create_dirty_log(struct dm_target *ti, | |||
1435 | 1440 | ||
1436 | if (!_check_region_size(ti, dl->type->get_region_size(dl))) { | 1441 | if (!_check_region_size(ti, dl->type->get_region_size(dl))) { |
1437 | ti->error = "Invalid region size"; | 1442 | ti->error = "Invalid region size"; |
1438 | dm_destroy_dirty_log(dl); | 1443 | dm_dirty_log_destroy(dl); |
1439 | return NULL; | 1444 | return NULL; |
1440 | } | 1445 | } |
1441 | 1446 | ||
@@ -1496,7 +1501,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1496 | int r; | 1501 | int r; |
1497 | unsigned int nr_mirrors, m, args_used; | 1502 | unsigned int nr_mirrors, m, args_used; |
1498 | struct mirror_set *ms; | 1503 | struct mirror_set *ms; |
1499 | struct dirty_log *dl; | 1504 | struct dm_dirty_log *dl; |
1500 | 1505 | ||
1501 | dl = create_dirty_log(ti, argc, argv, &args_used); | 1506 | dl = create_dirty_log(ti, argc, argv, &args_used); |
1502 | if (!dl) | 1507 | if (!dl) |
@@ -1506,9 +1511,9 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1506 | argc -= args_used; | 1511 | argc -= args_used; |
1507 | 1512 | ||
1508 | if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 || | 1513 | if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 || |
1509 | nr_mirrors < 2 || nr_mirrors > KCOPYD_MAX_REGIONS + 1) { | 1514 | nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) { |
1510 | ti->error = "Invalid number of mirrors"; | 1515 | ti->error = "Invalid number of mirrors"; |
1511 | dm_destroy_dirty_log(dl); | 1516 | dm_dirty_log_destroy(dl); |
1512 | return -EINVAL; | 1517 | return -EINVAL; |
1513 | } | 1518 | } |
1514 | 1519 | ||
@@ -1516,13 +1521,13 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1516 | 1521 | ||
1517 | if (argc < nr_mirrors * 2) { | 1522 | if (argc < nr_mirrors * 2) { |
1518 | ti->error = "Too few mirror arguments"; | 1523 | ti->error = "Too few mirror arguments"; |
1519 | dm_destroy_dirty_log(dl); | 1524 | dm_dirty_log_destroy(dl); |
1520 | return -EINVAL; | 1525 | return -EINVAL; |
1521 | } | 1526 | } |
1522 | 1527 | ||
1523 | ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl); | 1528 | ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl); |
1524 | if (!ms) { | 1529 | if (!ms) { |
1525 | dm_destroy_dirty_log(dl); | 1530 | dm_dirty_log_destroy(dl); |
1526 | return -ENOMEM; | 1531 | return -ENOMEM; |
1527 | } | 1532 | } |
1528 | 1533 | ||
@@ -1547,6 +1552,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1547 | goto err_free_context; | 1552 | goto err_free_context; |
1548 | } | 1553 | } |
1549 | INIT_WORK(&ms->kmirrord_work, do_mirror); | 1554 | INIT_WORK(&ms->kmirrord_work, do_mirror); |
1555 | init_timer(&ms->timer); | ||
1556 | ms->timer_pending = 0; | ||
1550 | INIT_WORK(&ms->trigger_event, trigger_event); | 1557 | INIT_WORK(&ms->trigger_event, trigger_event); |
1551 | 1558 | ||
1552 | r = parse_features(ms, argc, argv, &args_used); | 1559 | r = parse_features(ms, argc, argv, &args_used); |
@@ -1571,7 +1578,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1571 | goto err_destroy_wq; | 1578 | goto err_destroy_wq; |
1572 | } | 1579 | } |
1573 | 1580 | ||
1574 | r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); | 1581 | r = dm_kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); |
1575 | if (r) | 1582 | if (r) |
1576 | goto err_destroy_wq; | 1583 | goto err_destroy_wq; |
1577 | 1584 | ||
@@ -1589,8 +1596,9 @@ static void mirror_dtr(struct dm_target *ti) | |||
1589 | { | 1596 | { |
1590 | struct mirror_set *ms = (struct mirror_set *) ti->private; | 1597 | struct mirror_set *ms = (struct mirror_set *) ti->private; |
1591 | 1598 | ||
1599 | del_timer_sync(&ms->timer); | ||
1592 | flush_workqueue(ms->kmirrord_wq); | 1600 | flush_workqueue(ms->kmirrord_wq); |
1593 | kcopyd_client_destroy(ms->kcopyd_client); | 1601 | dm_kcopyd_client_destroy(ms->kcopyd_client); |
1594 | destroy_workqueue(ms->kmirrord_wq); | 1602 | destroy_workqueue(ms->kmirrord_wq); |
1595 | free_context(ms, ti, ms->nr_mirrors); | 1603 | free_context(ms, ti, ms->nr_mirrors); |
1596 | } | 1604 | } |
@@ -1734,7 +1742,7 @@ out: | |||
1734 | static void mirror_presuspend(struct dm_target *ti) | 1742 | static void mirror_presuspend(struct dm_target *ti) |
1735 | { | 1743 | { |
1736 | struct mirror_set *ms = (struct mirror_set *) ti->private; | 1744 | struct mirror_set *ms = (struct mirror_set *) ti->private; |
1737 | struct dirty_log *log = ms->rh.log; | 1745 | struct dm_dirty_log *log = ms->rh.log; |
1738 | 1746 | ||
1739 | atomic_set(&ms->suspend, 1); | 1747 | atomic_set(&ms->suspend, 1); |
1740 | 1748 | ||
@@ -1763,7 +1771,7 @@ static void mirror_presuspend(struct dm_target *ti) | |||
1763 | static void mirror_postsuspend(struct dm_target *ti) | 1771 | static void mirror_postsuspend(struct dm_target *ti) |
1764 | { | 1772 | { |
1765 | struct mirror_set *ms = ti->private; | 1773 | struct mirror_set *ms = ti->private; |
1766 | struct dirty_log *log = ms->rh.log; | 1774 | struct dm_dirty_log *log = ms->rh.log; |
1767 | 1775 | ||
1768 | if (log->type->postsuspend && log->type->postsuspend(log)) | 1776 | if (log->type->postsuspend && log->type->postsuspend(log)) |
1769 | /* FIXME: need better error handling */ | 1777 | /* FIXME: need better error handling */ |
@@ -1773,7 +1781,7 @@ static void mirror_postsuspend(struct dm_target *ti) | |||
1773 | static void mirror_resume(struct dm_target *ti) | 1781 | static void mirror_resume(struct dm_target *ti) |
1774 | { | 1782 | { |
1775 | struct mirror_set *ms = ti->private; | 1783 | struct mirror_set *ms = ti->private; |
1776 | struct dirty_log *log = ms->rh.log; | 1784 | struct dm_dirty_log *log = ms->rh.log; |
1777 | 1785 | ||
1778 | atomic_set(&ms->suspend, 0); | 1786 | atomic_set(&ms->suspend, 0); |
1779 | if (log->type->resume && log->type->resume(log)) | 1787 | if (log->type->resume && log->type->resume(log)) |
@@ -1811,7 +1819,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type, | |||
1811 | { | 1819 | { |
1812 | unsigned int m, sz = 0; | 1820 | unsigned int m, sz = 0; |
1813 | struct mirror_set *ms = (struct mirror_set *) ti->private; | 1821 | struct mirror_set *ms = (struct mirror_set *) ti->private; |
1814 | struct dirty_log *log = ms->rh.log; | 1822 | struct dm_dirty_log *log = ms->rh.log; |
1815 | char buffer[ms->nr_mirrors + 1]; | 1823 | char buffer[ms->nr_mirrors + 1]; |
1816 | 1824 | ||
1817 | switch (type) { | 1825 | switch (type) { |
@@ -1864,15 +1872,9 @@ static int __init dm_mirror_init(void) | |||
1864 | { | 1872 | { |
1865 | int r; | 1873 | int r; |
1866 | 1874 | ||
1867 | r = dm_dirty_log_init(); | ||
1868 | if (r) | ||
1869 | return r; | ||
1870 | |||
1871 | r = dm_register_target(&mirror_target); | 1875 | r = dm_register_target(&mirror_target); |
1872 | if (r < 0) { | 1876 | if (r < 0) |
1873 | DMERR("Failed to register mirror target"); | 1877 | DMERR("Failed to register mirror target"); |
1874 | dm_dirty_log_exit(); | ||
1875 | } | ||
1876 | 1878 | ||
1877 | return r; | 1879 | return r; |
1878 | } | 1880 | } |
@@ -1884,8 +1886,6 @@ static void __exit dm_mirror_exit(void) | |||
1884 | r = dm_unregister_target(&mirror_target); | 1886 | r = dm_unregister_target(&mirror_target); |
1885 | if (r < 0) | 1887 | if (r < 0) |
1886 | DMERR("unregister failed %d", r); | 1888 | DMERR("unregister failed %d", r); |
1887 | |||
1888 | dm_dirty_log_exit(); | ||
1889 | } | 1889 | } |
1890 | 1890 | ||
1891 | /* Module hooks */ | 1891 | /* Module hooks */ |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 4dc8a43c034b..1ba8a47d61b1 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -18,10 +18,10 @@ | |||
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/vmalloc.h> | 19 | #include <linux/vmalloc.h> |
20 | #include <linux/log2.h> | 20 | #include <linux/log2.h> |
21 | #include <linux/dm-kcopyd.h> | ||
21 | 22 | ||
22 | #include "dm-snap.h" | 23 | #include "dm-snap.h" |
23 | #include "dm-bio-list.h" | 24 | #include "dm-bio-list.h" |
24 | #include "kcopyd.h" | ||
25 | 25 | ||
26 | #define DM_MSG_PREFIX "snapshots" | 26 | #define DM_MSG_PREFIX "snapshots" |
27 | 27 | ||
@@ -36,9 +36,9 @@ | |||
36 | #define SNAPSHOT_COPY_PRIORITY 2 | 36 | #define SNAPSHOT_COPY_PRIORITY 2 |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * Each snapshot reserves this many pages for io | 39 | * Reserve 1MB for each snapshot initially (with minimum of 1 page). |
40 | */ | 40 | */ |
41 | #define SNAPSHOT_PAGES 256 | 41 | #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) |
42 | 42 | ||
43 | static struct workqueue_struct *ksnapd; | 43 | static struct workqueue_struct *ksnapd; |
44 | static void flush_queued_bios(struct work_struct *work); | 44 | static void flush_queued_bios(struct work_struct *work); |
@@ -536,7 +536,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
536 | s->last_percent = 0; | 536 | s->last_percent = 0; |
537 | init_rwsem(&s->lock); | 537 | init_rwsem(&s->lock); |
538 | spin_lock_init(&s->pe_lock); | 538 | spin_lock_init(&s->pe_lock); |
539 | s->table = ti->table; | 539 | s->ti = ti; |
540 | 540 | ||
541 | /* Allocate hash table for COW data */ | 541 | /* Allocate hash table for COW data */ |
542 | if (init_hash_tables(s)) { | 542 | if (init_hash_tables(s)) { |
@@ -558,7 +558,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
558 | goto bad4; | 558 | goto bad4; |
559 | } | 559 | } |
560 | 560 | ||
561 | r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); | 561 | r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); |
562 | if (r) { | 562 | if (r) { |
563 | ti->error = "Could not create kcopyd client"; | 563 | ti->error = "Could not create kcopyd client"; |
564 | goto bad5; | 564 | goto bad5; |
@@ -591,7 +591,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
591 | return 0; | 591 | return 0; |
592 | 592 | ||
593 | bad6: | 593 | bad6: |
594 | kcopyd_client_destroy(s->kcopyd_client); | 594 | dm_kcopyd_client_destroy(s->kcopyd_client); |
595 | 595 | ||
596 | bad5: | 596 | bad5: |
597 | s->store.destroy(&s->store); | 597 | s->store.destroy(&s->store); |
@@ -613,7 +613,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
613 | 613 | ||
614 | static void __free_exceptions(struct dm_snapshot *s) | 614 | static void __free_exceptions(struct dm_snapshot *s) |
615 | { | 615 | { |
616 | kcopyd_client_destroy(s->kcopyd_client); | 616 | dm_kcopyd_client_destroy(s->kcopyd_client); |
617 | s->kcopyd_client = NULL; | 617 | s->kcopyd_client = NULL; |
618 | 618 | ||
619 | exit_exception_table(&s->pending, pending_cache); | 619 | exit_exception_table(&s->pending, pending_cache); |
@@ -699,7 +699,7 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) | |||
699 | 699 | ||
700 | s->valid = 0; | 700 | s->valid = 0; |
701 | 701 | ||
702 | dm_table_event(s->table); | 702 | dm_table_event(s->ti->table); |
703 | } | 703 | } |
704 | 704 | ||
705 | static void get_pending_exception(struct dm_snap_pending_exception *pe) | 705 | static void get_pending_exception(struct dm_snap_pending_exception *pe) |
@@ -824,7 +824,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) | |||
824 | static void start_copy(struct dm_snap_pending_exception *pe) | 824 | static void start_copy(struct dm_snap_pending_exception *pe) |
825 | { | 825 | { |
826 | struct dm_snapshot *s = pe->snap; | 826 | struct dm_snapshot *s = pe->snap; |
827 | struct io_region src, dest; | 827 | struct dm_io_region src, dest; |
828 | struct block_device *bdev = s->origin->bdev; | 828 | struct block_device *bdev = s->origin->bdev; |
829 | sector_t dev_size; | 829 | sector_t dev_size; |
830 | 830 | ||
@@ -839,7 +839,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) | |||
839 | dest.count = src.count; | 839 | dest.count = src.count; |
840 | 840 | ||
841 | /* Hand over to kcopyd */ | 841 | /* Hand over to kcopyd */ |
842 | kcopyd_copy(s->kcopyd_client, | 842 | dm_kcopyd_copy(s->kcopyd_client, |
843 | &src, 1, &dest, 0, copy_callback, pe); | 843 | &src, 1, &dest, 0, copy_callback, pe); |
844 | } | 844 | } |
845 | 845 | ||
@@ -1060,7 +1060,7 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) | |||
1060 | goto next_snapshot; | 1060 | goto next_snapshot; |
1061 | 1061 | ||
1062 | /* Nothing to do if writing beyond end of snapshot */ | 1062 | /* Nothing to do if writing beyond end of snapshot */ |
1063 | if (bio->bi_sector >= dm_table_get_size(snap->table)) | 1063 | if (bio->bi_sector >= dm_table_get_size(snap->ti->table)) |
1064 | goto next_snapshot; | 1064 | goto next_snapshot; |
1065 | 1065 | ||
1066 | /* | 1066 | /* |
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 93bce5d49742..24f9fb73b982 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h | |||
@@ -132,7 +132,7 @@ struct exception_store { | |||
132 | 132 | ||
133 | struct dm_snapshot { | 133 | struct dm_snapshot { |
134 | struct rw_semaphore lock; | 134 | struct rw_semaphore lock; |
135 | struct dm_table *table; | 135 | struct dm_target *ti; |
136 | 136 | ||
137 | struct dm_dev *origin; | 137 | struct dm_dev *origin; |
138 | struct dm_dev *cow; | 138 | struct dm_dev *cow; |
@@ -169,7 +169,7 @@ struct dm_snapshot { | |||
169 | /* The on disk metadata handler */ | 169 | /* The on disk metadata handler */ |
170 | struct exception_store store; | 170 | struct exception_store store; |
171 | 171 | ||
172 | struct kcopyd_client *kcopyd_client; | 172 | struct dm_kcopyd_client *kcopyd_client; |
173 | 173 | ||
174 | /* Queue of snapshot writes for ksnapd to flush */ | 174 | /* Queue of snapshot writes for ksnapd to flush */ |
175 | struct bio_list queued_bios; | 175 | struct bio_list queued_bios; |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e75b1437b58b..94116eaf4709 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -245,44 +245,6 @@ int dm_table_create(struct dm_table **result, int mode, | |||
245 | return 0; | 245 | return 0; |
246 | } | 246 | } |
247 | 247 | ||
248 | int dm_create_error_table(struct dm_table **result, struct mapped_device *md) | ||
249 | { | ||
250 | struct dm_table *t; | ||
251 | sector_t dev_size = 1; | ||
252 | int r; | ||
253 | |||
254 | /* | ||
255 | * Find current size of device. | ||
256 | * Default to 1 sector if inactive. | ||
257 | */ | ||
258 | t = dm_get_table(md); | ||
259 | if (t) { | ||
260 | dev_size = dm_table_get_size(t); | ||
261 | dm_table_put(t); | ||
262 | } | ||
263 | |||
264 | r = dm_table_create(&t, FMODE_READ, 1, md); | ||
265 | if (r) | ||
266 | return r; | ||
267 | |||
268 | r = dm_table_add_target(t, "error", 0, dev_size, NULL); | ||
269 | if (r) | ||
270 | goto out; | ||
271 | |||
272 | r = dm_table_complete(t); | ||
273 | if (r) | ||
274 | goto out; | ||
275 | |||
276 | *result = t; | ||
277 | |||
278 | out: | ||
279 | if (r) | ||
280 | dm_table_put(t); | ||
281 | |||
282 | return r; | ||
283 | } | ||
284 | EXPORT_SYMBOL_GPL(dm_create_error_table); | ||
285 | |||
286 | static void free_devices(struct list_head *devices) | 248 | static void free_devices(struct list_head *devices) |
287 | { | 249 | { |
288 | struct list_head *tmp, *next; | 250 | struct list_head *tmp, *next; |
@@ -911,10 +873,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) | |||
911 | q->max_hw_sectors = t->limits.max_hw_sectors; | 873 | q->max_hw_sectors = t->limits.max_hw_sectors; |
912 | q->seg_boundary_mask = t->limits.seg_boundary_mask; | 874 | q->seg_boundary_mask = t->limits.seg_boundary_mask; |
913 | q->bounce_pfn = t->limits.bounce_pfn; | 875 | q->bounce_pfn = t->limits.bounce_pfn; |
876 | |||
914 | if (t->limits.no_cluster) | 877 | if (t->limits.no_cluster) |
915 | q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER); | 878 | queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); |
916 | else | 879 | else |
917 | q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER); | 880 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); |
918 | 881 | ||
919 | } | 882 | } |
920 | 883 | ||
@@ -954,7 +917,7 @@ void dm_table_presuspend_targets(struct dm_table *t) | |||
954 | if (!t) | 917 | if (!t) |
955 | return; | 918 | return; |
956 | 919 | ||
957 | return suspend_targets(t, 0); | 920 | suspend_targets(t, 0); |
958 | } | 921 | } |
959 | 922 | ||
960 | void dm_table_postsuspend_targets(struct dm_table *t) | 923 | void dm_table_postsuspend_targets(struct dm_table *t) |
@@ -962,7 +925,7 @@ void dm_table_postsuspend_targets(struct dm_table *t) | |||
962 | if (!t) | 925 | if (!t) |
963 | return; | 926 | return; |
964 | 927 | ||
965 | return suspend_targets(t, 1); | 928 | suspend_targets(t, 1); |
966 | } | 929 | } |
967 | 930 | ||
968 | int dm_table_resume_targets(struct dm_table *t) | 931 | int dm_table_resume_targets(struct dm_table *t) |
diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c index 50377e5dc2a3..6f65883aef12 100644 --- a/drivers/md/dm-uevent.c +++ b/drivers/md/dm-uevent.c | |||
@@ -78,7 +78,7 @@ static struct dm_uevent *dm_build_path_uevent(struct mapped_device *md, | |||
78 | 78 | ||
79 | event = dm_uevent_alloc(md); | 79 | event = dm_uevent_alloc(md); |
80 | if (!event) { | 80 | if (!event) { |
81 | DMERR("%s: dm_uevent_alloc() failed", __FUNCTION__); | 81 | DMERR("%s: dm_uevent_alloc() failed", __func__); |
82 | goto err_nomem; | 82 | goto err_nomem; |
83 | } | 83 | } |
84 | 84 | ||
@@ -86,32 +86,32 @@ static struct dm_uevent *dm_build_path_uevent(struct mapped_device *md, | |||
86 | 86 | ||
87 | if (add_uevent_var(&event->ku_env, "DM_TARGET=%s", ti->type->name)) { | 87 | if (add_uevent_var(&event->ku_env, "DM_TARGET=%s", ti->type->name)) { |
88 | DMERR("%s: add_uevent_var() for DM_TARGET failed", | 88 | DMERR("%s: add_uevent_var() for DM_TARGET failed", |
89 | __FUNCTION__); | 89 | __func__); |
90 | goto err_add; | 90 | goto err_add; |
91 | } | 91 | } |
92 | 92 | ||
93 | if (add_uevent_var(&event->ku_env, "DM_ACTION=%s", dm_action)) { | 93 | if (add_uevent_var(&event->ku_env, "DM_ACTION=%s", dm_action)) { |
94 | DMERR("%s: add_uevent_var() for DM_ACTION failed", | 94 | DMERR("%s: add_uevent_var() for DM_ACTION failed", |
95 | __FUNCTION__); | 95 | __func__); |
96 | goto err_add; | 96 | goto err_add; |
97 | } | 97 | } |
98 | 98 | ||
99 | if (add_uevent_var(&event->ku_env, "DM_SEQNUM=%u", | 99 | if (add_uevent_var(&event->ku_env, "DM_SEQNUM=%u", |
100 | dm_next_uevent_seq(md))) { | 100 | dm_next_uevent_seq(md))) { |
101 | DMERR("%s: add_uevent_var() for DM_SEQNUM failed", | 101 | DMERR("%s: add_uevent_var() for DM_SEQNUM failed", |
102 | __FUNCTION__); | 102 | __func__); |
103 | goto err_add; | 103 | goto err_add; |
104 | } | 104 | } |
105 | 105 | ||
106 | if (add_uevent_var(&event->ku_env, "DM_PATH=%s", path)) { | 106 | if (add_uevent_var(&event->ku_env, "DM_PATH=%s", path)) { |
107 | DMERR("%s: add_uevent_var() for DM_PATH failed", __FUNCTION__); | 107 | DMERR("%s: add_uevent_var() for DM_PATH failed", __func__); |
108 | goto err_add; | 108 | goto err_add; |
109 | } | 109 | } |
110 | 110 | ||
111 | if (add_uevent_var(&event->ku_env, "DM_NR_VALID_PATHS=%d", | 111 | if (add_uevent_var(&event->ku_env, "DM_NR_VALID_PATHS=%d", |
112 | nr_valid_paths)) { | 112 | nr_valid_paths)) { |
113 | DMERR("%s: add_uevent_var() for DM_NR_VALID_PATHS failed", | 113 | DMERR("%s: add_uevent_var() for DM_NR_VALID_PATHS failed", |
114 | __FUNCTION__); | 114 | __func__); |
115 | goto err_add; | 115 | goto err_add; |
116 | } | 116 | } |
117 | 117 | ||
@@ -146,25 +146,25 @@ void dm_send_uevents(struct list_head *events, struct kobject *kobj) | |||
146 | if (dm_copy_name_and_uuid(event->md, event->name, | 146 | if (dm_copy_name_and_uuid(event->md, event->name, |
147 | event->uuid)) { | 147 | event->uuid)) { |
148 | DMERR("%s: dm_copy_name_and_uuid() failed", | 148 | DMERR("%s: dm_copy_name_and_uuid() failed", |
149 | __FUNCTION__); | 149 | __func__); |
150 | goto uevent_free; | 150 | goto uevent_free; |
151 | } | 151 | } |
152 | 152 | ||
153 | if (add_uevent_var(&event->ku_env, "DM_NAME=%s", event->name)) { | 153 | if (add_uevent_var(&event->ku_env, "DM_NAME=%s", event->name)) { |
154 | DMERR("%s: add_uevent_var() for DM_NAME failed", | 154 | DMERR("%s: add_uevent_var() for DM_NAME failed", |
155 | __FUNCTION__); | 155 | __func__); |
156 | goto uevent_free; | 156 | goto uevent_free; |
157 | } | 157 | } |
158 | 158 | ||
159 | if (add_uevent_var(&event->ku_env, "DM_UUID=%s", event->uuid)) { | 159 | if (add_uevent_var(&event->ku_env, "DM_UUID=%s", event->uuid)) { |
160 | DMERR("%s: add_uevent_var() for DM_UUID failed", | 160 | DMERR("%s: add_uevent_var() for DM_UUID failed", |
161 | __FUNCTION__); | 161 | __func__); |
162 | goto uevent_free; | 162 | goto uevent_free; |
163 | } | 163 | } |
164 | 164 | ||
165 | r = kobject_uevent_env(kobj, event->action, event->ku_env.envp); | 165 | r = kobject_uevent_env(kobj, event->action, event->ku_env.envp); |
166 | if (r) | 166 | if (r) |
167 | DMERR("%s: kobject_uevent_env failed", __FUNCTION__); | 167 | DMERR("%s: kobject_uevent_env failed", __func__); |
168 | uevent_free: | 168 | uevent_free: |
169 | dm_uevent_free(event); | 169 | dm_uevent_free(event); |
170 | } | 170 | } |
@@ -187,7 +187,7 @@ void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti, | |||
187 | struct dm_uevent *event; | 187 | struct dm_uevent *event; |
188 | 188 | ||
189 | if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) { | 189 | if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) { |
190 | DMERR("%s: Invalid event_type %d", __FUNCTION__, event_type); | 190 | DMERR("%s: Invalid event_type %d", __func__, event_type); |
191 | goto out; | 191 | goto out; |
192 | } | 192 | } |
193 | 193 | ||
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6617ce4af095..372369b1cc20 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -204,6 +204,7 @@ static int (*_inits[])(void) __initdata = { | |||
204 | dm_target_init, | 204 | dm_target_init, |
205 | dm_linear_init, | 205 | dm_linear_init, |
206 | dm_stripe_init, | 206 | dm_stripe_init, |
207 | dm_kcopyd_init, | ||
207 | dm_interface_init, | 208 | dm_interface_init, |
208 | }; | 209 | }; |
209 | 210 | ||
@@ -212,6 +213,7 @@ static void (*_exits[])(void) = { | |||
212 | dm_target_exit, | 213 | dm_target_exit, |
213 | dm_linear_exit, | 214 | dm_linear_exit, |
214 | dm_stripe_exit, | 215 | dm_stripe_exit, |
216 | dm_kcopyd_exit, | ||
215 | dm_interface_exit, | 217 | dm_interface_exit, |
216 | }; | 218 | }; |
217 | 219 | ||
@@ -922,7 +924,7 @@ static void free_minor(int minor) | |||
922 | /* | 924 | /* |
923 | * See if the device with a specific minor # is free. | 925 | * See if the device with a specific minor # is free. |
924 | */ | 926 | */ |
925 | static int specific_minor(struct mapped_device *md, int minor) | 927 | static int specific_minor(int minor) |
926 | { | 928 | { |
927 | int r, m; | 929 | int r, m; |
928 | 930 | ||
@@ -955,7 +957,7 @@ out: | |||
955 | return r; | 957 | return r; |
956 | } | 958 | } |
957 | 959 | ||
958 | static int next_free_minor(struct mapped_device *md, int *minor) | 960 | static int next_free_minor(int *minor) |
959 | { | 961 | { |
960 | int r, m; | 962 | int r, m; |
961 | 963 | ||
@@ -966,9 +968,8 @@ static int next_free_minor(struct mapped_device *md, int *minor) | |||
966 | spin_lock(&_minor_lock); | 968 | spin_lock(&_minor_lock); |
967 | 969 | ||
968 | r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); | 970 | r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); |
969 | if (r) { | 971 | if (r) |
970 | goto out; | 972 | goto out; |
971 | } | ||
972 | 973 | ||
973 | if (m >= (1 << MINORBITS)) { | 974 | if (m >= (1 << MINORBITS)) { |
974 | idr_remove(&_minor_idr, m); | 975 | idr_remove(&_minor_idr, m); |
@@ -991,7 +992,7 @@ static struct block_device_operations dm_blk_dops; | |||
991 | static struct mapped_device *alloc_dev(int minor) | 992 | static struct mapped_device *alloc_dev(int minor) |
992 | { | 993 | { |
993 | int r; | 994 | int r; |
994 | struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL); | 995 | struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL); |
995 | void *old_md; | 996 | void *old_md; |
996 | 997 | ||
997 | if (!md) { | 998 | if (!md) { |
@@ -1004,13 +1005,12 @@ static struct mapped_device *alloc_dev(int minor) | |||
1004 | 1005 | ||
1005 | /* get a minor number for the dev */ | 1006 | /* get a minor number for the dev */ |
1006 | if (minor == DM_ANY_MINOR) | 1007 | if (minor == DM_ANY_MINOR) |
1007 | r = next_free_minor(md, &minor); | 1008 | r = next_free_minor(&minor); |
1008 | else | 1009 | else |
1009 | r = specific_minor(md, minor); | 1010 | r = specific_minor(minor); |
1010 | if (r < 0) | 1011 | if (r < 0) |
1011 | goto bad_minor; | 1012 | goto bad_minor; |
1012 | 1013 | ||
1013 | memset(md, 0, sizeof(*md)); | ||
1014 | init_rwsem(&md->io_lock); | 1014 | init_rwsem(&md->io_lock); |
1015 | mutex_init(&md->suspend_lock); | 1015 | mutex_init(&md->suspend_lock); |
1016 | spin_lock_init(&md->pushback_lock); | 1016 | spin_lock_init(&md->pushback_lock); |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index b4584a39383b..8c03b634e62e 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -16,67 +16,6 @@ | |||
16 | #include <linux/blkdev.h> | 16 | #include <linux/blkdev.h> |
17 | #include <linux/hdreg.h> | 17 | #include <linux/hdreg.h> |
18 | 18 | ||
19 | #define DM_NAME "device-mapper" | ||
20 | |||
21 | #define DMERR(f, arg...) \ | ||
22 | printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) | ||
23 | #define DMERR_LIMIT(f, arg...) \ | ||
24 | do { \ | ||
25 | if (printk_ratelimit()) \ | ||
26 | printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " \ | ||
27 | f "\n", ## arg); \ | ||
28 | } while (0) | ||
29 | |||
30 | #define DMWARN(f, arg...) \ | ||
31 | printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) | ||
32 | #define DMWARN_LIMIT(f, arg...) \ | ||
33 | do { \ | ||
34 | if (printk_ratelimit()) \ | ||
35 | printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " \ | ||
36 | f "\n", ## arg); \ | ||
37 | } while (0) | ||
38 | |||
39 | #define DMINFO(f, arg...) \ | ||
40 | printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) | ||
41 | #define DMINFO_LIMIT(f, arg...) \ | ||
42 | do { \ | ||
43 | if (printk_ratelimit()) \ | ||
44 | printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f \ | ||
45 | "\n", ## arg); \ | ||
46 | } while (0) | ||
47 | |||
48 | #ifdef CONFIG_DM_DEBUG | ||
49 | # define DMDEBUG(f, arg...) \ | ||
50 | printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX " DEBUG: " f "\n", ## arg) | ||
51 | # define DMDEBUG_LIMIT(f, arg...) \ | ||
52 | do { \ | ||
53 | if (printk_ratelimit()) \ | ||
54 | printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX ": " f \ | ||
55 | "\n", ## arg); \ | ||
56 | } while (0) | ||
57 | #else | ||
58 | # define DMDEBUG(f, arg...) do {} while (0) | ||
59 | # define DMDEBUG_LIMIT(f, arg...) do {} while (0) | ||
60 | #endif | ||
61 | |||
62 | #define DMEMIT(x...) sz += ((sz >= maxlen) ? \ | ||
63 | 0 : scnprintf(result + sz, maxlen - sz, x)) | ||
64 | |||
65 | #define SECTOR_SHIFT 9 | ||
66 | |||
67 | /* | ||
68 | * Definitions of return values from target end_io function. | ||
69 | */ | ||
70 | #define DM_ENDIO_INCOMPLETE 1 | ||
71 | #define DM_ENDIO_REQUEUE 2 | ||
72 | |||
73 | /* | ||
74 | * Definitions of return values from target map function. | ||
75 | */ | ||
76 | #define DM_MAPIO_SUBMITTED 0 | ||
77 | #define DM_MAPIO_REMAPPED 1 | ||
78 | #define DM_MAPIO_REQUEUE DM_ENDIO_REQUEUE | ||
79 | |||
80 | /* | 19 | /* |
81 | * Suspend feature flags | 20 | * Suspend feature flags |
82 | */ | 21 | */ |
@@ -136,34 +75,6 @@ static inline int array_too_big(unsigned long fixed, unsigned long obj, | |||
136 | return (num > (ULONG_MAX - fixed) / obj); | 75 | return (num > (ULONG_MAX - fixed) / obj); |
137 | } | 76 | } |
138 | 77 | ||
139 | /* | ||
140 | * Ceiling(n / sz) | ||
141 | */ | ||
142 | #define dm_div_up(n, sz) (((n) + (sz) - 1) / (sz)) | ||
143 | |||
144 | #define dm_sector_div_up(n, sz) ( \ | ||
145 | { \ | ||
146 | sector_t _r = ((n) + (sz) - 1); \ | ||
147 | sector_div(_r, (sz)); \ | ||
148 | _r; \ | ||
149 | } \ | ||
150 | ) | ||
151 | |||
152 | /* | ||
153 | * ceiling(n / size) * size | ||
154 | */ | ||
155 | #define dm_round_up(n, sz) (dm_div_up((n), (sz)) * (sz)) | ||
156 | |||
157 | static inline sector_t to_sector(unsigned long n) | ||
158 | { | ||
159 | return (n >> 9); | ||
160 | } | ||
161 | |||
162 | static inline unsigned long to_bytes(sector_t n) | ||
163 | { | ||
164 | return (n << 9); | ||
165 | } | ||
166 | |||
167 | int dm_split_args(int *argc, char ***argvp, char *input); | 78 | int dm_split_args(int *argc, char ***argvp, char *input); |
168 | 79 | ||
169 | /* | 80 | /* |
@@ -189,4 +100,13 @@ int dm_lock_for_deletion(struct mapped_device *md); | |||
189 | 100 | ||
190 | void dm_kobject_uevent(struct mapped_device *md); | 101 | void dm_kobject_uevent(struct mapped_device *md); |
191 | 102 | ||
103 | /* | ||
104 | * Dirty log | ||
105 | */ | ||
106 | int dm_dirty_log_init(void); | ||
107 | void dm_dirty_log_exit(void); | ||
108 | |||
109 | int dm_kcopyd_init(void); | ||
110 | void dm_kcopyd_exit(void); | ||
111 | |||
192 | #endif | 112 | #endif |
diff --git a/drivers/md/kcopyd.h b/drivers/md/kcopyd.h deleted file mode 100644 index 4845f2a0c676..000000000000 --- a/drivers/md/kcopyd.h +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001 Sistina Software | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | * | ||
6 | * Kcopyd provides a simple interface for copying an area of one | ||
7 | * block-device to one or more other block-devices, with an asynchronous | ||
8 | * completion notification. | ||
9 | */ | ||
10 | |||
11 | #ifndef DM_KCOPYD_H | ||
12 | #define DM_KCOPYD_H | ||
13 | |||
14 | #include "dm-io.h" | ||
15 | |||
16 | /* FIXME: make this configurable */ | ||
17 | #define KCOPYD_MAX_REGIONS 8 | ||
18 | |||
19 | #define KCOPYD_IGNORE_ERROR 1 | ||
20 | |||
21 | /* | ||
22 | * To use kcopyd you must first create a kcopyd client object. | ||
23 | */ | ||
24 | struct kcopyd_client; | ||
25 | int kcopyd_client_create(unsigned int num_pages, struct kcopyd_client **result); | ||
26 | void kcopyd_client_destroy(struct kcopyd_client *kc); | ||
27 | |||
28 | /* | ||
29 | * Submit a copy job to kcopyd. This is built on top of the | ||
30 | * previous three fns. | ||
31 | * | ||
32 | * read_err is a boolean, | ||
33 | * write_err is a bitset, with 1 bit for each destination region | ||
34 | */ | ||
35 | typedef void (*kcopyd_notify_fn)(int read_err, unsigned long write_err, | ||
36 | void *context); | ||
37 | |||
38 | int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from, | ||
39 | unsigned int num_dests, struct io_region *dests, | ||
40 | unsigned int flags, kcopyd_notify_fn fn, void *context); | ||
41 | |||
42 | #endif | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index 5ebfb4d79901..83eb78b00137 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -276,13 +276,15 @@ static mddev_t * mddev_find(dev_t unit) | |||
276 | init_waitqueue_head(&new->sb_wait); | 276 | init_waitqueue_head(&new->sb_wait); |
277 | new->reshape_position = MaxSector; | 277 | new->reshape_position = MaxSector; |
278 | new->resync_max = MaxSector; | 278 | new->resync_max = MaxSector; |
279 | new->level = LEVEL_NONE; | ||
279 | 280 | ||
280 | new->queue = blk_alloc_queue(GFP_KERNEL); | 281 | new->queue = blk_alloc_queue(GFP_KERNEL); |
281 | if (!new->queue) { | 282 | if (!new->queue) { |
282 | kfree(new); | 283 | kfree(new); |
283 | return NULL; | 284 | return NULL; |
284 | } | 285 | } |
285 | set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags); | 286 | /* Can be unlocked because the queue is new: no concurrency */ |
287 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue); | ||
286 | 288 | ||
287 | blk_queue_make_request(new->queue, md_fail_request); | 289 | blk_queue_make_request(new->queue, md_fail_request); |
288 | 290 | ||
@@ -731,9 +733,9 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
731 | else | 733 | else |
732 | rdev->desc_nr = sb->this_disk.number; | 734 | rdev->desc_nr = sb->this_disk.number; |
733 | 735 | ||
734 | if (refdev == 0) | 736 | if (!refdev) { |
735 | ret = 1; | 737 | ret = 1; |
736 | else { | 738 | } else { |
737 | __u64 ev1, ev2; | 739 | __u64 ev1, ev2; |
738 | mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page); | 740 | mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page); |
739 | if (!uuid_equal(refsb, sb)) { | 741 | if (!uuid_equal(refsb, sb)) { |
@@ -1116,9 +1118,9 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1116 | else | 1118 | else |
1117 | rdev->desc_nr = le32_to_cpu(sb->dev_number); | 1119 | rdev->desc_nr = le32_to_cpu(sb->dev_number); |
1118 | 1120 | ||
1119 | if (refdev == 0) | 1121 | if (!refdev) { |
1120 | ret = 1; | 1122 | ret = 1; |
1121 | else { | 1123 | } else { |
1122 | __u64 ev1, ev2; | 1124 | __u64 ev1, ev2; |
1123 | struct mdp_superblock_1 *refsb = | 1125 | struct mdp_superblock_1 *refsb = |
1124 | (struct mdp_superblock_1*)page_address(refdev->sb_page); | 1126 | (struct mdp_superblock_1*)page_address(refdev->sb_page); |
@@ -1368,6 +1370,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1368 | MD_BUG(); | 1370 | MD_BUG(); |
1369 | return -EINVAL; | 1371 | return -EINVAL; |
1370 | } | 1372 | } |
1373 | |||
1374 | /* prevent duplicates */ | ||
1375 | if (find_rdev(mddev, rdev->bdev->bd_dev)) | ||
1376 | return -EEXIST; | ||
1377 | |||
1371 | /* make sure rdev->size exceeds mddev->size */ | 1378 | /* make sure rdev->size exceeds mddev->size */ |
1372 | if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) { | 1379 | if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) { |
1373 | if (mddev->pers) { | 1380 | if (mddev->pers) { |
@@ -1651,6 +1658,8 @@ static void md_update_sb(mddev_t * mddev, int force_change) | |||
1651 | int sync_req; | 1658 | int sync_req; |
1652 | int nospares = 0; | 1659 | int nospares = 0; |
1653 | 1660 | ||
1661 | if (mddev->external) | ||
1662 | return; | ||
1654 | repeat: | 1663 | repeat: |
1655 | spin_lock_irq(&mddev->write_lock); | 1664 | spin_lock_irq(&mddev->write_lock); |
1656 | 1665 | ||
@@ -1819,6 +1828,10 @@ state_show(mdk_rdev_t *rdev, char *page) | |||
1819 | len += sprintf(page+len, "%swrite_mostly",sep); | 1828 | len += sprintf(page+len, "%swrite_mostly",sep); |
1820 | sep = ","; | 1829 | sep = ","; |
1821 | } | 1830 | } |
1831 | if (test_bit(Blocked, &rdev->flags)) { | ||
1832 | len += sprintf(page+len, "%sblocked", sep); | ||
1833 | sep = ","; | ||
1834 | } | ||
1822 | if (!test_bit(Faulty, &rdev->flags) && | 1835 | if (!test_bit(Faulty, &rdev->flags) && |
1823 | !test_bit(In_sync, &rdev->flags)) { | 1836 | !test_bit(In_sync, &rdev->flags)) { |
1824 | len += sprintf(page+len, "%sspare", sep); | 1837 | len += sprintf(page+len, "%sspare", sep); |
@@ -1835,6 +1848,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1835 | * remove - disconnects the device | 1848 | * remove - disconnects the device |
1836 | * writemostly - sets write_mostly | 1849 | * writemostly - sets write_mostly |
1837 | * -writemostly - clears write_mostly | 1850 | * -writemostly - clears write_mostly |
1851 | * blocked - sets the Blocked flag | ||
1852 | * -blocked - clears the Blocked flag | ||
1838 | */ | 1853 | */ |
1839 | int err = -EINVAL; | 1854 | int err = -EINVAL; |
1840 | if (cmd_match(buf, "faulty") && rdev->mddev->pers) { | 1855 | if (cmd_match(buf, "faulty") && rdev->mddev->pers) { |
@@ -1857,6 +1872,16 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1857 | } else if (cmd_match(buf, "-writemostly")) { | 1872 | } else if (cmd_match(buf, "-writemostly")) { |
1858 | clear_bit(WriteMostly, &rdev->flags); | 1873 | clear_bit(WriteMostly, &rdev->flags); |
1859 | err = 0; | 1874 | err = 0; |
1875 | } else if (cmd_match(buf, "blocked")) { | ||
1876 | set_bit(Blocked, &rdev->flags); | ||
1877 | err = 0; | ||
1878 | } else if (cmd_match(buf, "-blocked")) { | ||
1879 | clear_bit(Blocked, &rdev->flags); | ||
1880 | wake_up(&rdev->blocked_wait); | ||
1881 | set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); | ||
1882 | md_wakeup_thread(rdev->mddev->thread); | ||
1883 | |||
1884 | err = 0; | ||
1860 | } | 1885 | } |
1861 | return err ? err : len; | 1886 | return err ? err : len; |
1862 | } | 1887 | } |
@@ -2096,7 +2121,7 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, | |||
2096 | rv = -EBUSY; | 2121 | rv = -EBUSY; |
2097 | else | 2122 | else |
2098 | rv = entry->store(rdev, page, length); | 2123 | rv = entry->store(rdev, page, length); |
2099 | mddev_unlock(rdev->mddev); | 2124 | mddev_unlock(mddev); |
2100 | } | 2125 | } |
2101 | return rv; | 2126 | return rv; |
2102 | } | 2127 | } |
@@ -2185,7 +2210,9 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
2185 | goto abort_free; | 2210 | goto abort_free; |
2186 | } | 2211 | } |
2187 | } | 2212 | } |
2213 | |||
2188 | INIT_LIST_HEAD(&rdev->same_set); | 2214 | INIT_LIST_HEAD(&rdev->same_set); |
2215 | init_waitqueue_head(&rdev->blocked_wait); | ||
2189 | 2216 | ||
2190 | return rdev; | 2217 | return rdev; |
2191 | 2218 | ||
@@ -2456,7 +2483,6 @@ resync_start_show(mddev_t *mddev, char *page) | |||
2456 | static ssize_t | 2483 | static ssize_t |
2457 | resync_start_store(mddev_t *mddev, const char *buf, size_t len) | 2484 | resync_start_store(mddev_t *mddev, const char *buf, size_t len) |
2458 | { | 2485 | { |
2459 | /* can only set chunk_size if array is not yet active */ | ||
2460 | char *e; | 2486 | char *e; |
2461 | unsigned long long n = simple_strtoull(buf, &e, 10); | 2487 | unsigned long long n = simple_strtoull(buf, &e, 10); |
2462 | 2488 | ||
@@ -2590,15 +2616,20 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2590 | err = do_md_stop(mddev, 1); | 2616 | err = do_md_stop(mddev, 1); |
2591 | else { | 2617 | else { |
2592 | mddev->ro = 1; | 2618 | mddev->ro = 1; |
2619 | set_disk_ro(mddev->gendisk, 1); | ||
2593 | err = do_md_run(mddev); | 2620 | err = do_md_run(mddev); |
2594 | } | 2621 | } |
2595 | break; | 2622 | break; |
2596 | case read_auto: | 2623 | case read_auto: |
2597 | /* stopping an active array */ | ||
2598 | if (mddev->pers) { | 2624 | if (mddev->pers) { |
2599 | err = do_md_stop(mddev, 1); | 2625 | if (mddev->ro != 1) |
2600 | if (err == 0) | 2626 | err = do_md_stop(mddev, 1); |
2601 | mddev->ro = 2; /* FIXME mark devices writable */ | 2627 | else |
2628 | err = restart_array(mddev); | ||
2629 | if (err == 0) { | ||
2630 | mddev->ro = 2; | ||
2631 | set_disk_ro(mddev->gendisk, 0); | ||
2632 | } | ||
2602 | } else { | 2633 | } else { |
2603 | mddev->ro = 2; | 2634 | mddev->ro = 2; |
2604 | err = do_md_run(mddev); | 2635 | err = do_md_run(mddev); |
@@ -2611,6 +2642,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2611 | if (atomic_read(&mddev->writes_pending) == 0) { | 2642 | if (atomic_read(&mddev->writes_pending) == 0) { |
2612 | if (mddev->in_sync == 0) { | 2643 | if (mddev->in_sync == 0) { |
2613 | mddev->in_sync = 1; | 2644 | mddev->in_sync = 1; |
2645 | if (mddev->safemode == 1) | ||
2646 | mddev->safemode = 0; | ||
2614 | if (mddev->persistent) | 2647 | if (mddev->persistent) |
2615 | set_bit(MD_CHANGE_CLEAN, | 2648 | set_bit(MD_CHANGE_CLEAN, |
2616 | &mddev->flags); | 2649 | &mddev->flags); |
@@ -2634,6 +2667,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2634 | err = 0; | 2667 | err = 0; |
2635 | } else { | 2668 | } else { |
2636 | mddev->ro = 0; | 2669 | mddev->ro = 0; |
2670 | set_disk_ro(mddev->gendisk, 0); | ||
2637 | err = do_md_run(mddev); | 2671 | err = do_md_run(mddev); |
2638 | } | 2672 | } |
2639 | break; | 2673 | break; |
@@ -3711,6 +3745,30 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3711 | mddev->reshape_position = MaxSector; | 3745 | mddev->reshape_position = MaxSector; |
3712 | mddev->external = 0; | 3746 | mddev->external = 0; |
3713 | mddev->persistent = 0; | 3747 | mddev->persistent = 0; |
3748 | mddev->level = LEVEL_NONE; | ||
3749 | mddev->clevel[0] = 0; | ||
3750 | mddev->flags = 0; | ||
3751 | mddev->ro = 0; | ||
3752 | mddev->metadata_type[0] = 0; | ||
3753 | mddev->chunk_size = 0; | ||
3754 | mddev->ctime = mddev->utime = 0; | ||
3755 | mddev->layout = 0; | ||
3756 | mddev->max_disks = 0; | ||
3757 | mddev->events = 0; | ||
3758 | mddev->delta_disks = 0; | ||
3759 | mddev->new_level = LEVEL_NONE; | ||
3760 | mddev->new_layout = 0; | ||
3761 | mddev->new_chunk = 0; | ||
3762 | mddev->curr_resync = 0; | ||
3763 | mddev->resync_mismatches = 0; | ||
3764 | mddev->suspend_lo = mddev->suspend_hi = 0; | ||
3765 | mddev->sync_speed_min = mddev->sync_speed_max = 0; | ||
3766 | mddev->recovery = 0; | ||
3767 | mddev->in_sync = 0; | ||
3768 | mddev->changed = 0; | ||
3769 | mddev->degraded = 0; | ||
3770 | mddev->barriers_work = 0; | ||
3771 | mddev->safemode = 0; | ||
3714 | 3772 | ||
3715 | } else if (mddev->pers) | 3773 | } else if (mddev->pers) |
3716 | printk(KERN_INFO "md: %s switched to read-only mode.\n", | 3774 | printk(KERN_INFO "md: %s switched to read-only mode.\n", |
@@ -4918,6 +4976,9 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
4918 | 4976 | ||
4919 | if (!rdev || test_bit(Faulty, &rdev->flags)) | 4977 | if (!rdev || test_bit(Faulty, &rdev->flags)) |
4920 | return; | 4978 | return; |
4979 | |||
4980 | if (mddev->external) | ||
4981 | set_bit(Blocked, &rdev->flags); | ||
4921 | /* | 4982 | /* |
4922 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", | 4983 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", |
4923 | mdname(mddev), | 4984 | mdname(mddev), |
@@ -5364,6 +5425,8 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
5364 | md_wakeup_thread(mddev->sync_thread); | 5425 | md_wakeup_thread(mddev->sync_thread); |
5365 | } | 5426 | } |
5366 | atomic_inc(&mddev->writes_pending); | 5427 | atomic_inc(&mddev->writes_pending); |
5428 | if (mddev->safemode == 1) | ||
5429 | mddev->safemode = 0; | ||
5367 | if (mddev->in_sync) { | 5430 | if (mddev->in_sync) { |
5368 | spin_lock_irq(&mddev->write_lock); | 5431 | spin_lock_irq(&mddev->write_lock); |
5369 | if (mddev->in_sync) { | 5432 | if (mddev->in_sync) { |
@@ -5718,7 +5781,7 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
5718 | 5781 | ||
5719 | rdev_for_each(rdev, rtmp, mddev) | 5782 | rdev_for_each(rdev, rtmp, mddev) |
5720 | if (rdev->raid_disk >= 0 && | 5783 | if (rdev->raid_disk >= 0 && |
5721 | !mddev->external && | 5784 | !test_bit(Blocked, &rdev->flags) && |
5722 | (test_bit(Faulty, &rdev->flags) || | 5785 | (test_bit(Faulty, &rdev->flags) || |
5723 | ! test_bit(In_sync, &rdev->flags)) && | 5786 | ! test_bit(In_sync, &rdev->flags)) && |
5724 | atomic_read(&rdev->nr_pending)==0) { | 5787 | atomic_read(&rdev->nr_pending)==0) { |
@@ -5788,7 +5851,7 @@ void md_check_recovery(mddev_t *mddev) | |||
5788 | return; | 5851 | return; |
5789 | 5852 | ||
5790 | if (signal_pending(current)) { | 5853 | if (signal_pending(current)) { |
5791 | if (mddev->pers->sync_request) { | 5854 | if (mddev->pers->sync_request && !mddev->external) { |
5792 | printk(KERN_INFO "md: %s in immediate safe mode\n", | 5855 | printk(KERN_INFO "md: %s in immediate safe mode\n", |
5793 | mdname(mddev)); | 5856 | mdname(mddev)); |
5794 | mddev->safemode = 2; | 5857 | mddev->safemode = 2; |
@@ -5800,7 +5863,7 @@ void md_check_recovery(mddev_t *mddev) | |||
5800 | (mddev->flags && !mddev->external) || | 5863 | (mddev->flags && !mddev->external) || |
5801 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || | 5864 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || |
5802 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || | 5865 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || |
5803 | (mddev->safemode == 1) || | 5866 | (mddev->external == 0 && mddev->safemode == 1) || |
5804 | (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) | 5867 | (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) |
5805 | && !mddev->in_sync && mddev->recovery_cp == MaxSector) | 5868 | && !mddev->in_sync && mddev->recovery_cp == MaxSector) |
5806 | )) | 5869 | )) |
@@ -5809,16 +5872,20 @@ void md_check_recovery(mddev_t *mddev) | |||
5809 | if (mddev_trylock(mddev)) { | 5872 | if (mddev_trylock(mddev)) { |
5810 | int spares = 0; | 5873 | int spares = 0; |
5811 | 5874 | ||
5812 | spin_lock_irq(&mddev->write_lock); | 5875 | if (!mddev->external) { |
5813 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | 5876 | spin_lock_irq(&mddev->write_lock); |
5814 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | 5877 | if (mddev->safemode && |
5815 | mddev->in_sync = 1; | 5878 | !atomic_read(&mddev->writes_pending) && |
5816 | if (mddev->persistent) | 5879 | !mddev->in_sync && |
5817 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 5880 | mddev->recovery_cp == MaxSector) { |
5881 | mddev->in_sync = 1; | ||
5882 | if (mddev->persistent) | ||
5883 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
5884 | } | ||
5885 | if (mddev->safemode == 1) | ||
5886 | mddev->safemode = 0; | ||
5887 | spin_unlock_irq(&mddev->write_lock); | ||
5818 | } | 5888 | } |
5819 | if (mddev->safemode == 1) | ||
5820 | mddev->safemode = 0; | ||
5821 | spin_unlock_irq(&mddev->write_lock); | ||
5822 | 5889 | ||
5823 | if (mddev->flags) | 5890 | if (mddev->flags) |
5824 | md_update_sb(mddev, 0); | 5891 | md_update_sb(mddev, 0); |
@@ -5913,6 +5980,16 @@ void md_check_recovery(mddev_t *mddev) | |||
5913 | } | 5980 | } |
5914 | } | 5981 | } |
5915 | 5982 | ||
5983 | void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) | ||
5984 | { | ||
5985 | sysfs_notify(&rdev->kobj, NULL, "state"); | ||
5986 | wait_event_timeout(rdev->blocked_wait, | ||
5987 | !test_bit(Blocked, &rdev->flags), | ||
5988 | msecs_to_jiffies(5000)); | ||
5989 | rdev_dec_pending(rdev, mddev); | ||
5990 | } | ||
5991 | EXPORT_SYMBOL(md_wait_for_blocked_rdev); | ||
5992 | |||
5916 | static int md_notify_reboot(struct notifier_block *this, | 5993 | static int md_notify_reboot(struct notifier_block *this, |
5917 | unsigned long code, void *x) | 5994 | unsigned long code, void *x) |
5918 | { | 5995 | { |
@@ -5947,13 +6024,9 @@ static struct notifier_block md_notifier = { | |||
5947 | 6024 | ||
5948 | static void md_geninit(void) | 6025 | static void md_geninit(void) |
5949 | { | 6026 | { |
5950 | struct proc_dir_entry *p; | ||
5951 | |||
5952 | dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t)); | 6027 | dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t)); |
5953 | 6028 | ||
5954 | p = create_proc_entry("mdstat", S_IRUGO, NULL); | 6029 | proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops); |
5955 | if (p) | ||
5956 | p->proc_fops = &md_seq_fops; | ||
5957 | } | 6030 | } |
5958 | 6031 | ||
5959 | static int __init md_init(void) | 6032 | static int __init md_init(void) |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 3f299d835a2b..42ee1a2dc144 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -244,7 +244,8 @@ static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) | |||
244 | conf->working_disks--; | 244 | conf->working_disks--; |
245 | mddev->degraded++; | 245 | mddev->degraded++; |
246 | printk(KERN_ALERT "multipath: IO failure on %s," | 246 | printk(KERN_ALERT "multipath: IO failure on %s," |
247 | " disabling IO path. \n Operation continuing" | 247 | " disabling IO path.\n" |
248 | "multipath: Operation continuing" | ||
248 | " on %d IO paths.\n", | 249 | " on %d IO paths.\n", |
249 | bdevname (rdev->bdev,b), | 250 | bdevname (rdev->bdev,b), |
250 | conf->working_disks); | 251 | conf->working_disks); |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ff61b309129a..6778b7cb39bd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -773,7 +773,6 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
773 | r1bio_t *r1_bio; | 773 | r1bio_t *r1_bio; |
774 | struct bio *read_bio; | 774 | struct bio *read_bio; |
775 | int i, targets = 0, disks; | 775 | int i, targets = 0, disks; |
776 | mdk_rdev_t *rdev; | ||
777 | struct bitmap *bitmap = mddev->bitmap; | 776 | struct bitmap *bitmap = mddev->bitmap; |
778 | unsigned long flags; | 777 | unsigned long flags; |
779 | struct bio_list bl; | 778 | struct bio_list bl; |
@@ -781,6 +780,7 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
781 | const int rw = bio_data_dir(bio); | 780 | const int rw = bio_data_dir(bio); |
782 | const int do_sync = bio_sync(bio); | 781 | const int do_sync = bio_sync(bio); |
783 | int do_barriers; | 782 | int do_barriers; |
783 | mdk_rdev_t *blocked_rdev; | ||
784 | 784 | ||
785 | /* | 785 | /* |
786 | * Register the new request and wait if the reconstruction | 786 | * Register the new request and wait if the reconstruction |
@@ -862,10 +862,17 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
862 | first = 0; | 862 | first = 0; |
863 | } | 863 | } |
864 | #endif | 864 | #endif |
865 | retry_write: | ||
866 | blocked_rdev = NULL; | ||
865 | rcu_read_lock(); | 867 | rcu_read_lock(); |
866 | for (i = 0; i < disks; i++) { | 868 | for (i = 0; i < disks; i++) { |
867 | if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL && | 869 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); |
868 | !test_bit(Faulty, &rdev->flags)) { | 870 | if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { |
871 | atomic_inc(&rdev->nr_pending); | ||
872 | blocked_rdev = rdev; | ||
873 | break; | ||
874 | } | ||
875 | if (rdev && !test_bit(Faulty, &rdev->flags)) { | ||
869 | atomic_inc(&rdev->nr_pending); | 876 | atomic_inc(&rdev->nr_pending); |
870 | if (test_bit(Faulty, &rdev->flags)) { | 877 | if (test_bit(Faulty, &rdev->flags)) { |
871 | rdev_dec_pending(rdev, mddev); | 878 | rdev_dec_pending(rdev, mddev); |
@@ -878,6 +885,20 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
878 | } | 885 | } |
879 | rcu_read_unlock(); | 886 | rcu_read_unlock(); |
880 | 887 | ||
888 | if (unlikely(blocked_rdev)) { | ||
889 | /* Wait for this device to become unblocked */ | ||
890 | int j; | ||
891 | |||
892 | for (j = 0; j < i; j++) | ||
893 | if (r1_bio->bios[j]) | ||
894 | rdev_dec_pending(conf->mirrors[j].rdev, mddev); | ||
895 | |||
896 | allow_barrier(conf); | ||
897 | md_wait_for_blocked_rdev(blocked_rdev, mddev); | ||
898 | wait_barrier(conf); | ||
899 | goto retry_write; | ||
900 | } | ||
901 | |||
881 | BUG_ON(targets == 0); /* we never fail the last device */ | 902 | BUG_ON(targets == 0); /* we never fail the last device */ |
882 | 903 | ||
883 | if (targets < conf->raid_disks) { | 904 | if (targets < conf->raid_disks) { |
@@ -1008,8 +1029,8 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1008 | } else | 1029 | } else |
1009 | set_bit(Faulty, &rdev->flags); | 1030 | set_bit(Faulty, &rdev->flags); |
1010 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 1031 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
1011 | printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n" | 1032 | printk(KERN_ALERT "raid1: Disk failure on %s, disabling device.\n" |
1012 | " Operation continuing on %d devices\n", | 1033 | "raid1: Operation continuing on %d devices.\n", |
1013 | bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); | 1034 | bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); |
1014 | } | 1035 | } |
1015 | 1036 | ||
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 32389d2f18fc..5938fa962922 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -790,6 +790,7 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
790 | const int do_sync = bio_sync(bio); | 790 | const int do_sync = bio_sync(bio); |
791 | struct bio_list bl; | 791 | struct bio_list bl; |
792 | unsigned long flags; | 792 | unsigned long flags; |
793 | mdk_rdev_t *blocked_rdev; | ||
793 | 794 | ||
794 | if (unlikely(bio_barrier(bio))) { | 795 | if (unlikely(bio_barrier(bio))) { |
795 | bio_endio(bio, -EOPNOTSUPP); | 796 | bio_endio(bio, -EOPNOTSUPP); |
@@ -879,17 +880,23 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
879 | /* | 880 | /* |
880 | * WRITE: | 881 | * WRITE: |
881 | */ | 882 | */ |
882 | /* first select target devices under spinlock and | 883 | /* first select target devices under rcu_lock and |
883 | * inc refcount on their rdev. Record them by setting | 884 | * inc refcount on their rdev. Record them by setting |
884 | * bios[x] to bio | 885 | * bios[x] to bio |
885 | */ | 886 | */ |
886 | raid10_find_phys(conf, r10_bio); | 887 | raid10_find_phys(conf, r10_bio); |
888 | retry_write: | ||
889 | blocked_rdev = 0; | ||
887 | rcu_read_lock(); | 890 | rcu_read_lock(); |
888 | for (i = 0; i < conf->copies; i++) { | 891 | for (i = 0; i < conf->copies; i++) { |
889 | int d = r10_bio->devs[i].devnum; | 892 | int d = r10_bio->devs[i].devnum; |
890 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev); | 893 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev); |
891 | if (rdev && | 894 | if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { |
892 | !test_bit(Faulty, &rdev->flags)) { | 895 | atomic_inc(&rdev->nr_pending); |
896 | blocked_rdev = rdev; | ||
897 | break; | ||
898 | } | ||
899 | if (rdev && !test_bit(Faulty, &rdev->flags)) { | ||
893 | atomic_inc(&rdev->nr_pending); | 900 | atomic_inc(&rdev->nr_pending); |
894 | r10_bio->devs[i].bio = bio; | 901 | r10_bio->devs[i].bio = bio; |
895 | } else { | 902 | } else { |
@@ -899,6 +906,22 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
899 | } | 906 | } |
900 | rcu_read_unlock(); | 907 | rcu_read_unlock(); |
901 | 908 | ||
909 | if (unlikely(blocked_rdev)) { | ||
910 | /* Have to wait for this device to get unblocked, then retry */ | ||
911 | int j; | ||
912 | int d; | ||
913 | |||
914 | for (j = 0; j < i; j++) | ||
915 | if (r10_bio->devs[j].bio) { | ||
916 | d = r10_bio->devs[j].devnum; | ||
917 | rdev_dec_pending(conf->mirrors[d].rdev, mddev); | ||
918 | } | ||
919 | allow_barrier(conf); | ||
920 | md_wait_for_blocked_rdev(blocked_rdev, mddev); | ||
921 | wait_barrier(conf); | ||
922 | goto retry_write; | ||
923 | } | ||
924 | |||
902 | atomic_set(&r10_bio->remaining, 0); | 925 | atomic_set(&r10_bio->remaining, 0); |
903 | 926 | ||
904 | bio_list_init(&bl); | 927 | bio_list_init(&bl); |
@@ -1001,8 +1024,8 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1001 | } | 1024 | } |
1002 | set_bit(Faulty, &rdev->flags); | 1025 | set_bit(Faulty, &rdev->flags); |
1003 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 1026 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
1004 | printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n" | 1027 | printk(KERN_ALERT "raid10: Disk failure on %s, disabling device.\n" |
1005 | " Operation continuing on %d devices\n", | 1028 | "raid10: Operation continuing on %d devices.\n", |
1006 | bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); | 1029 | bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); |
1007 | } | 1030 | } |
1008 | 1031 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b162b839a662..087eee0cb809 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -63,6 +63,7 @@ | |||
63 | #define STRIPE_SHIFT (PAGE_SHIFT - 9) | 63 | #define STRIPE_SHIFT (PAGE_SHIFT - 9) |
64 | #define STRIPE_SECTORS (STRIPE_SIZE>>9) | 64 | #define STRIPE_SECTORS (STRIPE_SIZE>>9) |
65 | #define IO_THRESHOLD 1 | 65 | #define IO_THRESHOLD 1 |
66 | #define BYPASS_THRESHOLD 1 | ||
66 | #define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head)) | 67 | #define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head)) |
67 | #define HASH_MASK (NR_HASH - 1) | 68 | #define HASH_MASK (NR_HASH - 1) |
68 | 69 | ||
@@ -398,6 +399,7 @@ static void ops_run_io(struct stripe_head *sh) | |||
398 | 399 | ||
399 | might_sleep(); | 400 | might_sleep(); |
400 | 401 | ||
402 | set_bit(STRIPE_IO_STARTED, &sh->state); | ||
401 | for (i = disks; i--; ) { | 403 | for (i = disks; i--; ) { |
402 | int rw; | 404 | int rw; |
403 | struct bio *bi; | 405 | struct bio *bi; |
@@ -433,7 +435,7 @@ static void ops_run_io(struct stripe_head *sh) | |||
433 | 435 | ||
434 | bi->bi_bdev = rdev->bdev; | 436 | bi->bi_bdev = rdev->bdev; |
435 | pr_debug("%s: for %llu schedule op %ld on disc %d\n", | 437 | pr_debug("%s: for %llu schedule op %ld on disc %d\n", |
436 | __FUNCTION__, (unsigned long long)sh->sector, | 438 | __func__, (unsigned long long)sh->sector, |
437 | bi->bi_rw, i); | 439 | bi->bi_rw, i); |
438 | atomic_inc(&sh->count); | 440 | atomic_inc(&sh->count); |
439 | bi->bi_sector = sh->sector + rdev->data_offset; | 441 | bi->bi_sector = sh->sector + rdev->data_offset; |
@@ -520,7 +522,7 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
520 | raid5_conf_t *conf = sh->raid_conf; | 522 | raid5_conf_t *conf = sh->raid_conf; |
521 | int i; | 523 | int i; |
522 | 524 | ||
523 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 525 | pr_debug("%s: stripe %llu\n", __func__, |
524 | (unsigned long long)sh->sector); | 526 | (unsigned long long)sh->sector); |
525 | 527 | ||
526 | /* clear completed biofills */ | 528 | /* clear completed biofills */ |
@@ -569,7 +571,7 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
569 | raid5_conf_t *conf = sh->raid_conf; | 571 | raid5_conf_t *conf = sh->raid_conf; |
570 | int i; | 572 | int i; |
571 | 573 | ||
572 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 574 | pr_debug("%s: stripe %llu\n", __func__, |
573 | (unsigned long long)sh->sector); | 575 | (unsigned long long)sh->sector); |
574 | 576 | ||
575 | for (i = sh->disks; i--; ) { | 577 | for (i = sh->disks; i--; ) { |
@@ -600,7 +602,7 @@ static void ops_complete_compute5(void *stripe_head_ref) | |||
600 | int target = sh->ops.target; | 602 | int target = sh->ops.target; |
601 | struct r5dev *tgt = &sh->dev[target]; | 603 | struct r5dev *tgt = &sh->dev[target]; |
602 | 604 | ||
603 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 605 | pr_debug("%s: stripe %llu\n", __func__, |
604 | (unsigned long long)sh->sector); | 606 | (unsigned long long)sh->sector); |
605 | 607 | ||
606 | set_bit(R5_UPTODATE, &tgt->flags); | 608 | set_bit(R5_UPTODATE, &tgt->flags); |
@@ -625,7 +627,7 @@ ops_run_compute5(struct stripe_head *sh, unsigned long pending) | |||
625 | int i; | 627 | int i; |
626 | 628 | ||
627 | pr_debug("%s: stripe %llu block: %d\n", | 629 | pr_debug("%s: stripe %llu block: %d\n", |
628 | __FUNCTION__, (unsigned long long)sh->sector, target); | 630 | __func__, (unsigned long long)sh->sector, target); |
629 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); | 631 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); |
630 | 632 | ||
631 | for (i = disks; i--; ) | 633 | for (i = disks; i--; ) |
@@ -653,7 +655,7 @@ static void ops_complete_prexor(void *stripe_head_ref) | |||
653 | { | 655 | { |
654 | struct stripe_head *sh = stripe_head_ref; | 656 | struct stripe_head *sh = stripe_head_ref; |
655 | 657 | ||
656 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 658 | pr_debug("%s: stripe %llu\n", __func__, |
657 | (unsigned long long)sh->sector); | 659 | (unsigned long long)sh->sector); |
658 | 660 | ||
659 | set_bit(STRIPE_OP_PREXOR, &sh->ops.complete); | 661 | set_bit(STRIPE_OP_PREXOR, &sh->ops.complete); |
@@ -670,7 +672,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
670 | /* existing parity data subtracted */ | 672 | /* existing parity data subtracted */ |
671 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; | 673 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; |
672 | 674 | ||
673 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 675 | pr_debug("%s: stripe %llu\n", __func__, |
674 | (unsigned long long)sh->sector); | 676 | (unsigned long long)sh->sector); |
675 | 677 | ||
676 | for (i = disks; i--; ) { | 678 | for (i = disks; i--; ) { |
@@ -699,7 +701,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, | |||
699 | */ | 701 | */ |
700 | int prexor = test_bit(STRIPE_OP_PREXOR, &pending); | 702 | int prexor = test_bit(STRIPE_OP_PREXOR, &pending); |
701 | 703 | ||
702 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 704 | pr_debug("%s: stripe %llu\n", __func__, |
703 | (unsigned long long)sh->sector); | 705 | (unsigned long long)sh->sector); |
704 | 706 | ||
705 | for (i = disks; i--; ) { | 707 | for (i = disks; i--; ) { |
@@ -744,7 +746,7 @@ static void ops_complete_postxor(void *stripe_head_ref) | |||
744 | { | 746 | { |
745 | struct stripe_head *sh = stripe_head_ref; | 747 | struct stripe_head *sh = stripe_head_ref; |
746 | 748 | ||
747 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 749 | pr_debug("%s: stripe %llu\n", __func__, |
748 | (unsigned long long)sh->sector); | 750 | (unsigned long long)sh->sector); |
749 | 751 | ||
750 | set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); | 752 | set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); |
@@ -757,7 +759,7 @@ static void ops_complete_write(void *stripe_head_ref) | |||
757 | struct stripe_head *sh = stripe_head_ref; | 759 | struct stripe_head *sh = stripe_head_ref; |
758 | int disks = sh->disks, i, pd_idx = sh->pd_idx; | 760 | int disks = sh->disks, i, pd_idx = sh->pd_idx; |
759 | 761 | ||
760 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 762 | pr_debug("%s: stripe %llu\n", __func__, |
761 | (unsigned long long)sh->sector); | 763 | (unsigned long long)sh->sector); |
762 | 764 | ||
763 | for (i = disks; i--; ) { | 765 | for (i = disks; i--; ) { |
@@ -787,7 +789,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, | |||
787 | unsigned long flags; | 789 | unsigned long flags; |
788 | dma_async_tx_callback callback; | 790 | dma_async_tx_callback callback; |
789 | 791 | ||
790 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 792 | pr_debug("%s: stripe %llu\n", __func__, |
791 | (unsigned long long)sh->sector); | 793 | (unsigned long long)sh->sector); |
792 | 794 | ||
793 | /* check if prexor is active which means only process blocks | 795 | /* check if prexor is active which means only process blocks |
@@ -837,7 +839,7 @@ static void ops_complete_check(void *stripe_head_ref) | |||
837 | struct stripe_head *sh = stripe_head_ref; | 839 | struct stripe_head *sh = stripe_head_ref; |
838 | int pd_idx = sh->pd_idx; | 840 | int pd_idx = sh->pd_idx; |
839 | 841 | ||
840 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 842 | pr_debug("%s: stripe %llu\n", __func__, |
841 | (unsigned long long)sh->sector); | 843 | (unsigned long long)sh->sector); |
842 | 844 | ||
843 | if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) && | 845 | if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) && |
@@ -859,7 +861,7 @@ static void ops_run_check(struct stripe_head *sh) | |||
859 | int count = 0, pd_idx = sh->pd_idx, i; | 861 | int count = 0, pd_idx = sh->pd_idx, i; |
860 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; | 862 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; |
861 | 863 | ||
862 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 864 | pr_debug("%s: stripe %llu\n", __func__, |
863 | (unsigned long long)sh->sector); | 865 | (unsigned long long)sh->sector); |
864 | 866 | ||
865 | for (i = disks; i--; ) { | 867 | for (i = disks; i--; ) { |
@@ -1260,8 +1262,8 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1260 | } | 1262 | } |
1261 | set_bit(Faulty, &rdev->flags); | 1263 | set_bit(Faulty, &rdev->flags); |
1262 | printk (KERN_ALERT | 1264 | printk (KERN_ALERT |
1263 | "raid5: Disk failure on %s, disabling device." | 1265 | "raid5: Disk failure on %s, disabling device.\n" |
1264 | " Operation continuing on %d devices\n", | 1266 | "raid5: Operation continuing on %d devices.\n", |
1265 | bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); | 1267 | bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); |
1266 | } | 1268 | } |
1267 | } | 1269 | } |
@@ -1720,6 +1722,9 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand) | |||
1720 | locked++; | 1722 | locked++; |
1721 | } | 1723 | } |
1722 | } | 1724 | } |
1725 | if (locked + 1 == disks) | ||
1726 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) | ||
1727 | atomic_inc(&sh->raid_conf->pending_full_writes); | ||
1723 | } else { | 1728 | } else { |
1724 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || | 1729 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || |
1725 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); | 1730 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); |
@@ -1759,7 +1764,7 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand) | |||
1759 | locked++; | 1764 | locked++; |
1760 | 1765 | ||
1761 | pr_debug("%s: stripe %llu locked: %d pending: %lx\n", | 1766 | pr_debug("%s: stripe %llu locked: %d pending: %lx\n", |
1762 | __FUNCTION__, (unsigned long long)sh->sector, | 1767 | __func__, (unsigned long long)sh->sector, |
1763 | locked, sh->ops.pending); | 1768 | locked, sh->ops.pending); |
1764 | 1769 | ||
1765 | return locked; | 1770 | return locked; |
@@ -1947,6 +1952,9 @@ handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh, | |||
1947 | STRIPE_SECTORS, 0, 0); | 1952 | STRIPE_SECTORS, 0, 0); |
1948 | } | 1953 | } |
1949 | 1954 | ||
1955 | if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) | ||
1956 | if (atomic_dec_and_test(&conf->pending_full_writes)) | ||
1957 | md_wakeup_thread(conf->mddev->thread); | ||
1950 | } | 1958 | } |
1951 | 1959 | ||
1952 | /* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks | 1960 | /* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks |
@@ -2149,6 +2157,10 @@ static void handle_completed_write_requests(raid5_conf_t *conf, | |||
2149 | 0); | 2157 | 0); |
2150 | } | 2158 | } |
2151 | } | 2159 | } |
2160 | |||
2161 | if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) | ||
2162 | if (atomic_dec_and_test(&conf->pending_full_writes)) | ||
2163 | md_wakeup_thread(conf->mddev->thread); | ||
2152 | } | 2164 | } |
2153 | 2165 | ||
2154 | static void handle_issuing_new_write_requests5(raid5_conf_t *conf, | 2166 | static void handle_issuing_new_write_requests5(raid5_conf_t *conf, |
@@ -2333,6 +2345,9 @@ static void handle_issuing_new_write_requests6(raid5_conf_t *conf, | |||
2333 | s->locked++; | 2345 | s->locked++; |
2334 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | 2346 | set_bit(R5_Wantwrite, &sh->dev[i].flags); |
2335 | } | 2347 | } |
2348 | if (s->locked == disks) | ||
2349 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) | ||
2350 | atomic_inc(&conf->pending_full_writes); | ||
2336 | /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ | 2351 | /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ |
2337 | set_bit(STRIPE_INSYNC, &sh->state); | 2352 | set_bit(STRIPE_INSYNC, &sh->state); |
2338 | 2353 | ||
@@ -2592,6 +2607,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | |||
2592 | } | 2607 | } |
2593 | } | 2608 | } |
2594 | 2609 | ||
2610 | |||
2595 | /* | 2611 | /* |
2596 | * handle_stripe - do things to a stripe. | 2612 | * handle_stripe - do things to a stripe. |
2597 | * | 2613 | * |
@@ -2617,6 +2633,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2617 | struct stripe_head_state s; | 2633 | struct stripe_head_state s; |
2618 | struct r5dev *dev; | 2634 | struct r5dev *dev; |
2619 | unsigned long pending = 0; | 2635 | unsigned long pending = 0; |
2636 | mdk_rdev_t *blocked_rdev = NULL; | ||
2620 | 2637 | ||
2621 | memset(&s, 0, sizeof(s)); | 2638 | memset(&s, 0, sizeof(s)); |
2622 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " | 2639 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " |
@@ -2676,6 +2693,11 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2676 | if (dev->written) | 2693 | if (dev->written) |
2677 | s.written++; | 2694 | s.written++; |
2678 | rdev = rcu_dereference(conf->disks[i].rdev); | 2695 | rdev = rcu_dereference(conf->disks[i].rdev); |
2696 | if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { | ||
2697 | blocked_rdev = rdev; | ||
2698 | atomic_inc(&rdev->nr_pending); | ||
2699 | break; | ||
2700 | } | ||
2679 | if (!rdev || !test_bit(In_sync, &rdev->flags)) { | 2701 | if (!rdev || !test_bit(In_sync, &rdev->flags)) { |
2680 | /* The ReadError flag will just be confusing now */ | 2702 | /* The ReadError flag will just be confusing now */ |
2681 | clear_bit(R5_ReadError, &dev->flags); | 2703 | clear_bit(R5_ReadError, &dev->flags); |
@@ -2690,6 +2712,11 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2690 | } | 2712 | } |
2691 | rcu_read_unlock(); | 2713 | rcu_read_unlock(); |
2692 | 2714 | ||
2715 | if (unlikely(blocked_rdev)) { | ||
2716 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2717 | goto unlock; | ||
2718 | } | ||
2719 | |||
2693 | if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) | 2720 | if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) |
2694 | sh->ops.count++; | 2721 | sh->ops.count++; |
2695 | 2722 | ||
@@ -2879,8 +2906,13 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2879 | if (sh->ops.count) | 2906 | if (sh->ops.count) |
2880 | pending = get_stripe_work(sh); | 2907 | pending = get_stripe_work(sh); |
2881 | 2908 | ||
2909 | unlock: | ||
2882 | spin_unlock(&sh->lock); | 2910 | spin_unlock(&sh->lock); |
2883 | 2911 | ||
2912 | /* wait for this device to become unblocked */ | ||
2913 | if (unlikely(blocked_rdev)) | ||
2914 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); | ||
2915 | |||
2884 | if (pending) | 2916 | if (pending) |
2885 | raid5_run_ops(sh, pending); | 2917 | raid5_run_ops(sh, pending); |
2886 | 2918 | ||
@@ -2897,6 +2929,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
2897 | struct stripe_head_state s; | 2929 | struct stripe_head_state s; |
2898 | struct r6_state r6s; | 2930 | struct r6_state r6s; |
2899 | struct r5dev *dev, *pdev, *qdev; | 2931 | struct r5dev *dev, *pdev, *qdev; |
2932 | mdk_rdev_t *blocked_rdev = NULL; | ||
2900 | 2933 | ||
2901 | r6s.qd_idx = raid6_next_disk(pd_idx, disks); | 2934 | r6s.qd_idx = raid6_next_disk(pd_idx, disks); |
2902 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " | 2935 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " |
@@ -2960,6 +2993,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
2960 | if (dev->written) | 2993 | if (dev->written) |
2961 | s.written++; | 2994 | s.written++; |
2962 | rdev = rcu_dereference(conf->disks[i].rdev); | 2995 | rdev = rcu_dereference(conf->disks[i].rdev); |
2996 | if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { | ||
2997 | blocked_rdev = rdev; | ||
2998 | atomic_inc(&rdev->nr_pending); | ||
2999 | break; | ||
3000 | } | ||
2963 | if (!rdev || !test_bit(In_sync, &rdev->flags)) { | 3001 | if (!rdev || !test_bit(In_sync, &rdev->flags)) { |
2964 | /* The ReadError flag will just be confusing now */ | 3002 | /* The ReadError flag will just be confusing now */ |
2965 | clear_bit(R5_ReadError, &dev->flags); | 3003 | clear_bit(R5_ReadError, &dev->flags); |
@@ -2974,6 +3012,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
2974 | set_bit(R5_Insync, &dev->flags); | 3012 | set_bit(R5_Insync, &dev->flags); |
2975 | } | 3013 | } |
2976 | rcu_read_unlock(); | 3014 | rcu_read_unlock(); |
3015 | |||
3016 | if (unlikely(blocked_rdev)) { | ||
3017 | set_bit(STRIPE_HANDLE, &sh->state); | ||
3018 | goto unlock; | ||
3019 | } | ||
2977 | pr_debug("locked=%d uptodate=%d to_read=%d" | 3020 | pr_debug("locked=%d uptodate=%d to_read=%d" |
2978 | " to_write=%d failed=%d failed_num=%d,%d\n", | 3021 | " to_write=%d failed=%d failed_num=%d,%d\n", |
2979 | s.locked, s.uptodate, s.to_read, s.to_write, s.failed, | 3022 | s.locked, s.uptodate, s.to_read, s.to_write, s.failed, |
@@ -3079,8 +3122,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3079 | !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) | 3122 | !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) |
3080 | handle_stripe_expansion(conf, sh, &r6s); | 3123 | handle_stripe_expansion(conf, sh, &r6s); |
3081 | 3124 | ||
3125 | unlock: | ||
3082 | spin_unlock(&sh->lock); | 3126 | spin_unlock(&sh->lock); |
3083 | 3127 | ||
3128 | /* wait for this device to become unblocked */ | ||
3129 | if (unlikely(blocked_rdev)) | ||
3130 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); | ||
3131 | |||
3084 | return_io(return_bi); | 3132 | return_io(return_bi); |
3085 | 3133 | ||
3086 | for (i=disks; i-- ;) { | 3134 | for (i=disks; i-- ;) { |
@@ -3094,6 +3142,8 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3094 | else | 3142 | else |
3095 | continue; | 3143 | continue; |
3096 | 3144 | ||
3145 | set_bit(STRIPE_IO_STARTED, &sh->state); | ||
3146 | |||
3097 | bi = &sh->dev[i].req; | 3147 | bi = &sh->dev[i].req; |
3098 | 3148 | ||
3099 | bi->bi_rw = rw; | 3149 | bi->bi_rw = rw; |
@@ -3164,7 +3214,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf) | |||
3164 | clear_bit(STRIPE_DELAYED, &sh->state); | 3214 | clear_bit(STRIPE_DELAYED, &sh->state); |
3165 | if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 3215 | if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
3166 | atomic_inc(&conf->preread_active_stripes); | 3216 | atomic_inc(&conf->preread_active_stripes); |
3167 | list_add_tail(&sh->lru, &conf->handle_list); | 3217 | list_add_tail(&sh->lru, &conf->hold_list); |
3168 | } | 3218 | } |
3169 | } else | 3219 | } else |
3170 | blk_plug_device(conf->mddev->queue); | 3220 | blk_plug_device(conf->mddev->queue); |
@@ -3442,6 +3492,58 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) | |||
3442 | } | 3492 | } |
3443 | } | 3493 | } |
3444 | 3494 | ||
3495 | /* __get_priority_stripe - get the next stripe to process | ||
3496 | * | ||
3497 | * Full stripe writes are allowed to pass preread active stripes up until | ||
3498 | * the bypass_threshold is exceeded. In general the bypass_count | ||
3499 | * increments when the handle_list is handled before the hold_list; however, it | ||
3500 | * will not be incremented when STRIPE_IO_STARTED is sampled set signifying a | ||
3501 | * stripe with in flight i/o. The bypass_count will be reset when the | ||
3502 | * head of the hold_list has changed, i.e. the head was promoted to the | ||
3503 | * handle_list. | ||
3504 | */ | ||
3505 | static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf) | ||
3506 | { | ||
3507 | struct stripe_head *sh; | ||
3508 | |||
3509 | pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n", | ||
3510 | __func__, | ||
3511 | list_empty(&conf->handle_list) ? "empty" : "busy", | ||
3512 | list_empty(&conf->hold_list) ? "empty" : "busy", | ||
3513 | atomic_read(&conf->pending_full_writes), conf->bypass_count); | ||
3514 | |||
3515 | if (!list_empty(&conf->handle_list)) { | ||
3516 | sh = list_entry(conf->handle_list.next, typeof(*sh), lru); | ||
3517 | |||
3518 | if (list_empty(&conf->hold_list)) | ||
3519 | conf->bypass_count = 0; | ||
3520 | else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) { | ||
3521 | if (conf->hold_list.next == conf->last_hold) | ||
3522 | conf->bypass_count++; | ||
3523 | else { | ||
3524 | conf->last_hold = conf->hold_list.next; | ||
3525 | conf->bypass_count -= conf->bypass_threshold; | ||
3526 | if (conf->bypass_count < 0) | ||
3527 | conf->bypass_count = 0; | ||
3528 | } | ||
3529 | } | ||
3530 | } else if (!list_empty(&conf->hold_list) && | ||
3531 | ((conf->bypass_threshold && | ||
3532 | conf->bypass_count > conf->bypass_threshold) || | ||
3533 | atomic_read(&conf->pending_full_writes) == 0)) { | ||
3534 | sh = list_entry(conf->hold_list.next, | ||
3535 | typeof(*sh), lru); | ||
3536 | conf->bypass_count -= conf->bypass_threshold; | ||
3537 | if (conf->bypass_count < 0) | ||
3538 | conf->bypass_count = 0; | ||
3539 | } else | ||
3540 | return NULL; | ||
3541 | |||
3542 | list_del_init(&sh->lru); | ||
3543 | atomic_inc(&sh->count); | ||
3544 | BUG_ON(atomic_read(&sh->count) != 1); | ||
3545 | return sh; | ||
3546 | } | ||
3445 | 3547 | ||
3446 | static int make_request(struct request_queue *q, struct bio * bi) | 3548 | static int make_request(struct request_queue *q, struct bio * bi) |
3447 | { | 3549 | { |
@@ -3914,7 +4016,6 @@ static void raid5d(mddev_t *mddev) | |||
3914 | handled = 0; | 4016 | handled = 0; |
3915 | spin_lock_irq(&conf->device_lock); | 4017 | spin_lock_irq(&conf->device_lock); |
3916 | while (1) { | 4018 | while (1) { |
3917 | struct list_head *first; | ||
3918 | struct bio *bio; | 4019 | struct bio *bio; |
3919 | 4020 | ||
3920 | if (conf->seq_flush != conf->seq_write) { | 4021 | if (conf->seq_flush != conf->seq_write) { |
@@ -3936,17 +4037,12 @@ static void raid5d(mddev_t *mddev) | |||
3936 | handled++; | 4037 | handled++; |
3937 | } | 4038 | } |
3938 | 4039 | ||
3939 | if (list_empty(&conf->handle_list)) { | 4040 | sh = __get_priority_stripe(conf); |
4041 | |||
4042 | if (!sh) { | ||
3940 | async_tx_issue_pending_all(); | 4043 | async_tx_issue_pending_all(); |
3941 | break; | 4044 | break; |
3942 | } | 4045 | } |
3943 | |||
3944 | first = conf->handle_list.next; | ||
3945 | sh = list_entry(first, struct stripe_head, lru); | ||
3946 | |||
3947 | list_del_init(first); | ||
3948 | atomic_inc(&sh->count); | ||
3949 | BUG_ON(atomic_read(&sh->count)!= 1); | ||
3950 | spin_unlock_irq(&conf->device_lock); | 4046 | spin_unlock_irq(&conf->device_lock); |
3951 | 4047 | ||
3952 | handled++; | 4048 | handled++; |
@@ -3978,15 +4074,13 @@ static ssize_t | |||
3978 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | 4074 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) |
3979 | { | 4075 | { |
3980 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4076 | raid5_conf_t *conf = mddev_to_conf(mddev); |
3981 | char *end; | 4077 | unsigned long new; |
3982 | int new; | ||
3983 | if (len >= PAGE_SIZE) | 4078 | if (len >= PAGE_SIZE) |
3984 | return -EINVAL; | 4079 | return -EINVAL; |
3985 | if (!conf) | 4080 | if (!conf) |
3986 | return -ENODEV; | 4081 | return -ENODEV; |
3987 | 4082 | ||
3988 | new = simple_strtoul(page, &end, 10); | 4083 | if (strict_strtoul(page, 10, &new)) |
3989 | if (!*page || (*end && *end != '\n') ) | ||
3990 | return -EINVAL; | 4084 | return -EINVAL; |
3991 | if (new <= 16 || new > 32768) | 4085 | if (new <= 16 || new > 32768) |
3992 | return -EINVAL; | 4086 | return -EINVAL; |
@@ -4011,6 +4105,40 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR, | |||
4011 | raid5_store_stripe_cache_size); | 4105 | raid5_store_stripe_cache_size); |
4012 | 4106 | ||
4013 | static ssize_t | 4107 | static ssize_t |
4108 | raid5_show_preread_threshold(mddev_t *mddev, char *page) | ||
4109 | { | ||
4110 | raid5_conf_t *conf = mddev_to_conf(mddev); | ||
4111 | if (conf) | ||
4112 | return sprintf(page, "%d\n", conf->bypass_threshold); | ||
4113 | else | ||
4114 | return 0; | ||
4115 | } | ||
4116 | |||
4117 | static ssize_t | ||
4118 | raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len) | ||
4119 | { | ||
4120 | raid5_conf_t *conf = mddev_to_conf(mddev); | ||
4121 | unsigned long new; | ||
4122 | if (len >= PAGE_SIZE) | ||
4123 | return -EINVAL; | ||
4124 | if (!conf) | ||
4125 | return -ENODEV; | ||
4126 | |||
4127 | if (strict_strtoul(page, 10, &new)) | ||
4128 | return -EINVAL; | ||
4129 | if (new > conf->max_nr_stripes) | ||
4130 | return -EINVAL; | ||
4131 | conf->bypass_threshold = new; | ||
4132 | return len; | ||
4133 | } | ||
4134 | |||
4135 | static struct md_sysfs_entry | ||
4136 | raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, | ||
4137 | S_IRUGO | S_IWUSR, | ||
4138 | raid5_show_preread_threshold, | ||
4139 | raid5_store_preread_threshold); | ||
4140 | |||
4141 | static ssize_t | ||
4014 | stripe_cache_active_show(mddev_t *mddev, char *page) | 4142 | stripe_cache_active_show(mddev_t *mddev, char *page) |
4015 | { | 4143 | { |
4016 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4144 | raid5_conf_t *conf = mddev_to_conf(mddev); |
@@ -4026,6 +4154,7 @@ raid5_stripecache_active = __ATTR_RO(stripe_cache_active); | |||
4026 | static struct attribute *raid5_attrs[] = { | 4154 | static struct attribute *raid5_attrs[] = { |
4027 | &raid5_stripecache_size.attr, | 4155 | &raid5_stripecache_size.attr, |
4028 | &raid5_stripecache_active.attr, | 4156 | &raid5_stripecache_active.attr, |
4157 | &raid5_preread_bypass_threshold.attr, | ||
4029 | NULL, | 4158 | NULL, |
4030 | }; | 4159 | }; |
4031 | static struct attribute_group raid5_attrs_group = { | 4160 | static struct attribute_group raid5_attrs_group = { |
@@ -4130,12 +4259,14 @@ static int run(mddev_t *mddev) | |||
4130 | init_waitqueue_head(&conf->wait_for_stripe); | 4259 | init_waitqueue_head(&conf->wait_for_stripe); |
4131 | init_waitqueue_head(&conf->wait_for_overlap); | 4260 | init_waitqueue_head(&conf->wait_for_overlap); |
4132 | INIT_LIST_HEAD(&conf->handle_list); | 4261 | INIT_LIST_HEAD(&conf->handle_list); |
4262 | INIT_LIST_HEAD(&conf->hold_list); | ||
4133 | INIT_LIST_HEAD(&conf->delayed_list); | 4263 | INIT_LIST_HEAD(&conf->delayed_list); |
4134 | INIT_LIST_HEAD(&conf->bitmap_list); | 4264 | INIT_LIST_HEAD(&conf->bitmap_list); |
4135 | INIT_LIST_HEAD(&conf->inactive_list); | 4265 | INIT_LIST_HEAD(&conf->inactive_list); |
4136 | atomic_set(&conf->active_stripes, 0); | 4266 | atomic_set(&conf->active_stripes, 0); |
4137 | atomic_set(&conf->preread_active_stripes, 0); | 4267 | atomic_set(&conf->preread_active_stripes, 0); |
4138 | atomic_set(&conf->active_aligned_reads, 0); | 4268 | atomic_set(&conf->active_aligned_reads, 0); |
4269 | conf->bypass_threshold = BYPASS_THRESHOLD; | ||
4139 | 4270 | ||
4140 | pr_debug("raid5: run(%s) called.\n", mdname(mddev)); | 4271 | pr_debug("raid5: run(%s) called.\n", mdname(mddev)); |
4141 | 4272 | ||
diff --git a/drivers/md/raid6algos.c b/drivers/md/raid6algos.c index 77a6e4bf503d..21987e3dbe6c 100644 --- a/drivers/md/raid6algos.c +++ b/drivers/md/raid6algos.c | |||
@@ -121,7 +121,8 @@ int __init raid6_select_algo(void) | |||
121 | j0 = jiffies; | 121 | j0 = jiffies; |
122 | while ( (j1 = jiffies) == j0 ) | 122 | while ( (j1 = jiffies) == j0 ) |
123 | cpu_relax(); | 123 | cpu_relax(); |
124 | while ( (jiffies-j1) < (1 << RAID6_TIME_JIFFIES_LG2) ) { | 124 | while (time_before(jiffies, |
125 | j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { | ||
125 | (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); | 126 | (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); |
126 | perf++; | 127 | perf++; |
127 | } | 128 | } |