aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-04-09 14:50:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-04-09 14:50:29 -0400
commit2f4084209adc77f9a1c9f38db3019a509e167882 (patch)
tree775657114c885505ecc46605e29ea1470e986f76
parent2f10ffcfb28beb35137d9e86992c771b4a6c5f2a (diff)
parent3440c49f5c5ecb4f29b0544aa87da71888404f8f (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (34 commits) cfq-iosched: Fix the incorrect timeslice accounting with forced_dispatch loop: Update mtime when writing using aops block: expose the statistics in blkio.time and blkio.sectors for the root cgroup backing-dev: Handle class_create() failure Block: Fix block/elevator.c elevator_get() off-by-one error drbd: lc_element_by_index() never returns NULL cciss: unlock on error path cfq-iosched: Do not merge queues of BE and IDLE classes cfq-iosched: Add additional blktrace log messages in CFQ for easier debugging i2o: Remove the dangerous kobj_to_i2o_device macro block: remove 16 bytes of padding from struct request on 64bits cfq-iosched: fix a kbuild regression block: make CONFIG_BLK_CGROUP visible Remove GENHD_FL_DRIVERFS block: Export max number of segments and max segment size in sysfs block: Finalize conversion of block limits functions block: Fix overrun in lcm() and move it to lib vfs: improve writeback_inodes_wb() paride: fix off-by-one test drbd: fix al-to-on-disk-bitmap for 4k logical_block_size ...
-rw-r--r--Documentation/DocBook/tracepoint.tmpl13
-rw-r--r--Documentation/block/biodoc.txt4
-rw-r--r--block/Kconfig3
-rw-r--r--block/blk-settings.c11
-rw-r--r--block/blk-sysfs.c25
-rw-r--r--block/cfq-iosched.c41
-rw-r--r--block/elevator.c2
-rw-r--r--drivers/block/DAC960.c1
-rw-r--r--drivers/block/drbd/drbd_actlog.c19
-rw-r--r--drivers/block/drbd/drbd_bitmap.c10
-rw-r--r--drivers/block/drbd/drbd_int.h12
-rw-r--r--drivers/block/drbd/drbd_main.c20
-rw-r--r--drivers/block/drbd/drbd_nl.c44
-rw-r--r--drivers/block/drbd/drbd_receiver.c34
-rw-r--r--drivers/block/drbd/drbd_worker.c18
-rw-r--r--drivers/block/loop.c2
-rw-r--r--drivers/block/paride/pcd.c4
-rw-r--r--drivers/block/paride/pf.c4
-rw-r--r--drivers/block/paride/pt.c4
-rw-r--r--drivers/block/virtio_blk.c5
-rw-r--r--drivers/scsi/sd.c2
-rw-r--r--fs/bio.c4
-rw-r--r--fs/fs-writeback.c133
-rw-r--r--include/linux/blkdev.h35
-rw-r--r--include/linux/drbd.h2
-rw-r--r--include/linux/drbd_nl.h3
-rw-r--r--include/linux/genhd.h2
-rw-r--r--include/linux/i2o.h1
-rw-r--r--include/linux/lcm.h8
-rw-r--r--include/linux/writeback.h3
-rw-r--r--include/trace/events/block.h164
-rw-r--r--lib/Makefile2
-rw-r--r--lib/lcm.c15
-rw-r--r--mm/backing-dev.c3
34 files changed, 475 insertions, 178 deletions
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
index 8bca1d5cec09..e8473eae2a20 100644
--- a/Documentation/DocBook/tracepoint.tmpl
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -16,6 +16,15 @@
16 </address> 16 </address>
17 </affiliation> 17 </affiliation>
18 </author> 18 </author>
19 <author>
20 <firstname>William</firstname>
21 <surname>Cohen</surname>
22 <affiliation>
23 <address>
24 <email>wcohen@redhat.com</email>
25 </address>
26 </affiliation>
27 </author>
19 </authorgroup> 28 </authorgroup>
20 29
21 <legalnotice> 30 <legalnotice>
@@ -91,4 +100,8 @@
91!Iinclude/trace/events/signal.h 100!Iinclude/trace/events/signal.h
92 </chapter> 101 </chapter>
93 102
103 <chapter id="block">
104 <title>Block IO</title>
105!Iinclude/trace/events/block.h
106 </chapter>
94</book> 107</book>
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 6fab97ea7e6b..508b5b2b0289 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -1162,8 +1162,8 @@ where a driver received a request ala this before:
1162 1162
1163As mentioned, there is no virtual mapping of a bio. For DMA, this is 1163As mentioned, there is no virtual mapping of a bio. For DMA, this is
1164not a problem as the driver probably never will need a virtual mapping. 1164not a problem as the driver probably never will need a virtual mapping.
1165Instead it needs a bus mapping (pci_map_page for a single segment or 1165Instead it needs a bus mapping (dma_map_page for a single segment or
1166use blk_rq_map_sg for scatter gather) to be able to ship it to the driver. For 1166use dma_map_sg for scatter gather) to be able to ship it to the driver. For
1167PIO drivers (or drivers that need to revert to PIO transfer once in a 1167PIO drivers (or drivers that need to revert to PIO transfer once in a
1168while (IDE for example)), where the CPU is doing the actual data 1168while (IDE for example)), where the CPU is doing the actual data
1169transfer a virtual mapping is needed. If the driver supports highmem I/O, 1169transfer a virtual mapping is needed. If the driver supports highmem I/O,
diff --git a/block/Kconfig b/block/Kconfig
index 62a5921321cd..f9e89f4d94bb 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -78,8 +78,9 @@ config BLK_DEV_INTEGRITY
78 Protection. If in doubt, say N. 78 Protection. If in doubt, say N.
79 79
80config BLK_CGROUP 80config BLK_CGROUP
81 tristate 81 tristate "Block cgroup support"
82 depends on CGROUPS 82 depends on CGROUPS
83 depends on CFQ_GROUP_IOSCHED
83 default n 84 default n
84 ---help--- 85 ---help---
85 Generic block IO controller cgroup interface. This is the common 86 Generic block IO controller cgroup interface. This is the common
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d9a9db5f0a2b..f5ed5a1187ba 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -8,6 +8,7 @@
8#include <linux/blkdev.h> 8#include <linux/blkdev.h>
9#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ 9#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
10#include <linux/gcd.h> 10#include <linux/gcd.h>
11#include <linux/lcm.h>
11#include <linux/jiffies.h> 12#include <linux/jiffies.h>
12#include <linux/gfp.h> 13#include <linux/gfp.h>
13 14
@@ -462,16 +463,6 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
462} 463}
463EXPORT_SYMBOL(blk_queue_stack_limits); 464EXPORT_SYMBOL(blk_queue_stack_limits);
464 465
465static unsigned int lcm(unsigned int a, unsigned int b)
466{
467 if (a && b)
468 return (a * b) / gcd(a, b);
469 else if (b)
470 return b;
471
472 return a;
473}
474
475/** 466/**
476 * blk_stack_limits - adjust queue_limits for stacked devices 467 * blk_stack_limits - adjust queue_limits for stacked devices
477 * @t: the stacking driver limits (top device) 468 * @t: the stacking driver limits (top device)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index c2b821fa324a..306759bbdf1b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -107,6 +107,19 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
107 return queue_var_show(max_sectors_kb, (page)); 107 return queue_var_show(max_sectors_kb, (page));
108} 108}
109 109
110static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
111{
112 return queue_var_show(queue_max_segments(q), (page));
113}
114
115static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
116{
117 if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
118 return queue_var_show(queue_max_segment_size(q), (page));
119
120 return queue_var_show(PAGE_CACHE_SIZE, (page));
121}
122
110static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page) 123static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
111{ 124{
112 return queue_var_show(queue_logical_block_size(q), page); 125 return queue_var_show(queue_logical_block_size(q), page);
@@ -281,6 +294,16 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
281 .show = queue_max_hw_sectors_show, 294 .show = queue_max_hw_sectors_show,
282}; 295};
283 296
297static struct queue_sysfs_entry queue_max_segments_entry = {
298 .attr = {.name = "max_segments", .mode = S_IRUGO },
299 .show = queue_max_segments_show,
300};
301
302static struct queue_sysfs_entry queue_max_segment_size_entry = {
303 .attr = {.name = "max_segment_size", .mode = S_IRUGO },
304 .show = queue_max_segment_size_show,
305};
306
284static struct queue_sysfs_entry queue_iosched_entry = { 307static struct queue_sysfs_entry queue_iosched_entry = {
285 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, 308 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
286 .show = elv_iosched_show, 309 .show = elv_iosched_show,
@@ -356,6 +379,8 @@ static struct attribute *default_attrs[] = {
356 &queue_ra_entry.attr, 379 &queue_ra_entry.attr,
357 &queue_max_hw_sectors_entry.attr, 380 &queue_max_hw_sectors_entry.attr,
358 &queue_max_sectors_entry.attr, 381 &queue_max_sectors_entry.attr,
382 &queue_max_segments_entry.attr,
383 &queue_max_segment_size_entry.attr,
359 &queue_iosched_entry.attr, 384 &queue_iosched_entry.attr,
360 &queue_hw_sector_size_entry.attr, 385 &queue_hw_sector_size_entry.attr,
361 &queue_logical_block_size_entry.attr, 386 &queue_logical_block_size_entry.attr,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index fc98a48554fd..838834be115b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -48,6 +48,7 @@ static const int cfq_hist_divisor = 4;
48#define CFQ_SERVICE_SHIFT 12 48#define CFQ_SERVICE_SHIFT 12
49 49
50#define CFQQ_SEEK_THR (sector_t)(8 * 100) 50#define CFQQ_SEEK_THR (sector_t)(8 * 100)
51#define CFQQ_CLOSE_THR (sector_t)(8 * 1024)
51#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) 52#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
52#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) 53#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
53 54
@@ -948,6 +949,11 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
948 unsigned int major, minor; 949 unsigned int major, minor;
949 950
950 cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); 951 cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
952 if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
953 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
954 cfqg->blkg.dev = MKDEV(major, minor);
955 goto done;
956 }
951 if (cfqg || !create) 957 if (cfqg || !create)
952 goto done; 958 goto done;
953 959
@@ -1518,7 +1524,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
1518 struct cfq_queue *cfqq) 1524 struct cfq_queue *cfqq)
1519{ 1525{
1520 if (cfqq) { 1526 if (cfqq) {
1521 cfq_log_cfqq(cfqd, cfqq, "set_active"); 1527 cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
1528 cfqd->serving_prio, cfqd->serving_type);
1522 cfqq->slice_start = 0; 1529 cfqq->slice_start = 0;
1523 cfqq->dispatch_start = jiffies; 1530 cfqq->dispatch_start = jiffies;
1524 cfqq->allocated_slice = 0; 1531 cfqq->allocated_slice = 0;
@@ -1661,9 +1668,9 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
1661} 1668}
1662 1669
1663static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1670static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1664 struct request *rq, bool for_preempt) 1671 struct request *rq)
1665{ 1672{
1666 return cfq_dist_from_last(cfqd, rq) <= CFQQ_SEEK_THR; 1673 return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR;
1667} 1674}
1668 1675
1669static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, 1676static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
@@ -1690,7 +1697,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
1690 * will contain the closest sector. 1697 * will contain the closest sector.
1691 */ 1698 */
1692 __cfqq = rb_entry(parent, struct cfq_queue, p_node); 1699 __cfqq = rb_entry(parent, struct cfq_queue, p_node);
1693 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false)) 1700 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
1694 return __cfqq; 1701 return __cfqq;
1695 1702
1696 if (blk_rq_pos(__cfqq->next_rq) < sector) 1703 if (blk_rq_pos(__cfqq->next_rq) < sector)
@@ -1701,7 +1708,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
1701 return NULL; 1708 return NULL;
1702 1709
1703 __cfqq = rb_entry(node, struct cfq_queue, p_node); 1710 __cfqq = rb_entry(node, struct cfq_queue, p_node);
1704 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false)) 1711 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
1705 return __cfqq; 1712 return __cfqq;
1706 1713
1707 return NULL; 1714 return NULL;
@@ -1722,6 +1729,8 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
1722{ 1729{
1723 struct cfq_queue *cfqq; 1730 struct cfq_queue *cfqq;
1724 1731
1732 if (cfq_class_idle(cur_cfqq))
1733 return NULL;
1725 if (!cfq_cfqq_sync(cur_cfqq)) 1734 if (!cfq_cfqq_sync(cur_cfqq))
1726 return NULL; 1735 return NULL;
1727 if (CFQQ_SEEKY(cur_cfqq)) 1736 if (CFQQ_SEEKY(cur_cfqq))
@@ -1788,7 +1797,11 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1788 * Otherwise, we do only if they are the last ones 1797 * Otherwise, we do only if they are the last ones
1789 * in their service tree. 1798 * in their service tree.
1790 */ 1799 */
1791 return service_tree->count == 1 && cfq_cfqq_sync(cfqq); 1800 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
1801 return 1;
1802 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
1803 service_tree->count);
1804 return 0;
1792} 1805}
1793 1806
1794static void cfq_arm_slice_timer(struct cfq_data *cfqd) 1807static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@ -1833,8 +1846,11 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
1833 * time slice. 1846 * time slice.
1834 */ 1847 */
1835 if (sample_valid(cic->ttime_samples) && 1848 if (sample_valid(cic->ttime_samples) &&
1836 (cfqq->slice_end - jiffies < cic->ttime_mean)) 1849 (cfqq->slice_end - jiffies < cic->ttime_mean)) {
1850 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d",
1851 cic->ttime_mean);
1837 return; 1852 return;
1853 }
1838 1854
1839 cfq_mark_cfqq_wait_request(cfqq); 1855 cfq_mark_cfqq_wait_request(cfqq);
1840 1856
@@ -2042,6 +2058,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2042 slice = max(slice, 2 * cfqd->cfq_slice_idle); 2058 slice = max(slice, 2 * cfqd->cfq_slice_idle);
2043 2059
2044 slice = max_t(unsigned, slice, CFQ_MIN_TT); 2060 slice = max_t(unsigned, slice, CFQ_MIN_TT);
2061 cfq_log(cfqd, "workload slice:%d", slice);
2045 cfqd->workload_expires = jiffies + slice; 2062 cfqd->workload_expires = jiffies + slice;
2046 cfqd->noidle_tree_requires_idle = false; 2063 cfqd->noidle_tree_requires_idle = false;
2047} 2064}
@@ -2189,10 +2206,13 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
2189 struct cfq_queue *cfqq; 2206 struct cfq_queue *cfqq;
2190 int dispatched = 0; 2207 int dispatched = 0;
2191 2208
2192 while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) 2209 /* Expire the timeslice of the current active queue first */
2210 cfq_slice_expired(cfqd, 0);
2211 while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) {
2212 __cfq_set_active_queue(cfqd, cfqq);
2193 dispatched += __cfq_forced_dispatch_cfqq(cfqq); 2213 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
2214 }
2194 2215
2195 cfq_slice_expired(cfqd, 0);
2196 BUG_ON(cfqd->busy_queues); 2216 BUG_ON(cfqd->busy_queues);
2197 2217
2198 cfq_log(cfqd, "forced_dispatch=%d", dispatched); 2218 cfq_log(cfqd, "forced_dispatch=%d", dispatched);
@@ -3104,7 +3124,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3104 * if this request is as-good as one we would expect from the 3124 * if this request is as-good as one we would expect from the
3105 * current cfqq, let it preempt 3125 * current cfqq, let it preempt
3106 */ 3126 */
3107 if (cfq_rq_close(cfqd, cfqq, rq, true)) 3127 if (cfq_rq_close(cfqd, cfqq, rq))
3108 return true; 3128 return true;
3109 3129
3110 return false; 3130 return false;
@@ -3308,6 +3328,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3308 if (cfq_should_wait_busy(cfqd, cfqq)) { 3328 if (cfq_should_wait_busy(cfqd, cfqq)) {
3309 cfqq->slice_end = jiffies + cfqd->cfq_slice_idle; 3329 cfqq->slice_end = jiffies + cfqd->cfq_slice_idle;
3310 cfq_mark_cfqq_wait_busy(cfqq); 3330 cfq_mark_cfqq_wait_busy(cfqq);
3331 cfq_log_cfqq(cfqd, cfqq, "will busy wait");
3311 } 3332 }
3312 3333
3313 /* 3334 /*
diff --git a/block/elevator.c b/block/elevator.c
index df75676f6671..76e3702d5381 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -154,7 +154,7 @@ static struct elevator_type *elevator_get(const char *name)
154 154
155 spin_unlock(&elv_list_lock); 155 spin_unlock(&elv_list_lock);
156 156
157 sprintf(elv, "%s-iosched", name); 157 snprintf(elv, sizeof(elv), "%s-iosched", name);
158 158
159 request_module("%s", elv); 159 request_module("%s", elv);
160 spin_lock(&elv_list_lock); 160 spin_lock(&elv_list_lock);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 459f1bc25a7b..c5f22bb0a48e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -2533,7 +2533,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
2533 Controller->RequestQueue[n] = RequestQueue; 2533 Controller->RequestQueue[n] = RequestQueue;
2534 blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit); 2534 blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
2535 RequestQueue->queuedata = Controller; 2535 RequestQueue->queuedata = Controller;
2536 blk_queue_max_hw_segments(RequestQueue, Controller->DriverScatterGatherLimit);
2537 blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit); 2536 blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit);
2538 blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand); 2537 blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
2539 disk->queue = RequestQueue; 2538 disk->queue = RequestQueue;
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 17956ff6a08d..df018990c422 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -536,7 +536,9 @@ static void atodb_endio(struct bio *bio, int error)
536 put_ldev(mdev); 536 put_ldev(mdev);
537} 537}
538 538
539/* sector to word */
539#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) 540#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
541
540/* activity log to on disk bitmap -- prepare bio unless that sector 542/* activity log to on disk bitmap -- prepare bio unless that sector
541 * is already covered by previously prepared bios */ 543 * is already covered by previously prepared bios */
542static int atodb_prepare_unless_covered(struct drbd_conf *mdev, 544static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
@@ -546,13 +548,20 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
546{ 548{
547 struct bio *bio; 549 struct bio *bio;
548 struct page *page; 550 struct page *page;
549 sector_t on_disk_sector = enr + mdev->ldev->md.md_offset 551 sector_t on_disk_sector;
550 + mdev->ldev->md.bm_offset;
551 unsigned int page_offset = PAGE_SIZE; 552 unsigned int page_offset = PAGE_SIZE;
552 int offset; 553 int offset;
553 int i = 0; 554 int i = 0;
554 int err = -ENOMEM; 555 int err = -ENOMEM;
555 556
557 /* We always write aligned, full 4k blocks,
558 * so we can ignore the logical_block_size (for now) */
559 enr &= ~7U;
560 on_disk_sector = enr + mdev->ldev->md.md_offset
561 + mdev->ldev->md.bm_offset;
562
563 D_ASSERT(!(on_disk_sector & 7U));
564
556 /* Check if that enr is already covered by an already created bio. 565 /* Check if that enr is already covered by an already created bio.
557 * Caution, bios[] is not NULL terminated, 566 * Caution, bios[] is not NULL terminated,
558 * but only initialized to all NULL. 567 * but only initialized to all NULL.
@@ -588,7 +597,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
588 597
589 offset = S2W(enr); 598 offset = S2W(enr);
590 drbd_bm_get_lel(mdev, offset, 599 drbd_bm_get_lel(mdev, offset,
591 min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset), 600 min_t(size_t, S2W(8), drbd_bm_words(mdev) - offset),
592 kmap(page) + page_offset); 601 kmap(page) + page_offset);
593 kunmap(page); 602 kunmap(page);
594 603
@@ -597,7 +606,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
597 bio->bi_bdev = mdev->ldev->md_bdev; 606 bio->bi_bdev = mdev->ldev->md_bdev;
598 bio->bi_sector = on_disk_sector; 607 bio->bi_sector = on_disk_sector;
599 608
600 if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE) 609 if (bio_add_page(bio, page, 4096, page_offset) != 4096)
601 goto out_put_page; 610 goto out_put_page;
602 611
603 atomic_inc(&wc->count); 612 atomic_inc(&wc->count);
@@ -1327,7 +1336,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev)
1327 /* ok, ->resync is there. */ 1336 /* ok, ->resync is there. */
1328 for (i = 0; i < mdev->resync->nr_elements; i++) { 1337 for (i = 0; i < mdev->resync->nr_elements; i++) {
1329 e = lc_element_by_index(mdev->resync, i); 1338 e = lc_element_by_index(mdev->resync, i);
1330 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1339 bm_ext = lc_entry(e, struct bm_extent, lce);
1331 if (bm_ext->lce.lc_number == LC_FREE) 1340 if (bm_ext->lce.lc_number == LC_FREE)
1332 continue; 1341 continue;
1333 if (bm_ext->lce.lc_number == mdev->resync_wenr) { 1342 if (bm_ext->lce.lc_number == mdev->resync_wenr) {
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 3d6f3d988949..3390716898d5 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -67,7 +67,7 @@ struct drbd_bitmap {
67 size_t bm_words; 67 size_t bm_words;
68 size_t bm_number_of_pages; 68 size_t bm_number_of_pages;
69 sector_t bm_dev_capacity; 69 sector_t bm_dev_capacity;
70 struct semaphore bm_change; /* serializes resize operations */ 70 struct mutex bm_change; /* serializes resize operations */
71 71
72 atomic_t bm_async_io; 72 atomic_t bm_async_io;
73 wait_queue_head_t bm_io_wait; 73 wait_queue_head_t bm_io_wait;
@@ -115,7 +115,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why)
115 return; 115 return;
116 } 116 }
117 117
118 trylock_failed = down_trylock(&b->bm_change); 118 trylock_failed = !mutex_trylock(&b->bm_change);
119 119
120 if (trylock_failed) { 120 if (trylock_failed) {
121 dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", 121 dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
@@ -126,7 +126,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why)
126 b->bm_task == mdev->receiver.task ? "receiver" : 126 b->bm_task == mdev->receiver.task ? "receiver" :
127 b->bm_task == mdev->asender.task ? "asender" : 127 b->bm_task == mdev->asender.task ? "asender" :
128 b->bm_task == mdev->worker.task ? "worker" : "?"); 128 b->bm_task == mdev->worker.task ? "worker" : "?");
129 down(&b->bm_change); 129 mutex_lock(&b->bm_change);
130 } 130 }
131 if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) 131 if (__test_and_set_bit(BM_LOCKED, &b->bm_flags))
132 dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); 132 dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
@@ -148,7 +148,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
148 148
149 b->bm_why = NULL; 149 b->bm_why = NULL;
150 b->bm_task = NULL; 150 b->bm_task = NULL;
151 up(&b->bm_change); 151 mutex_unlock(&b->bm_change);
152} 152}
153 153
154/* word offset to long pointer */ 154/* word offset to long pointer */
@@ -296,7 +296,7 @@ int drbd_bm_init(struct drbd_conf *mdev)
296 if (!b) 296 if (!b)
297 return -ENOMEM; 297 return -ENOMEM;
298 spin_lock_init(&b->bm_lock); 298 spin_lock_init(&b->bm_lock);
299 init_MUTEX(&b->bm_change); 299 mutex_init(&b->bm_change);
300 init_waitqueue_head(&b->bm_io_wait); 300 init_waitqueue_head(&b->bm_io_wait);
301 301
302 mdev->bitmap = b; 302 mdev->bitmap = b;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d9301e861d9f..e5e86a781820 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -261,6 +261,9 @@ static inline const char *cmdname(enum drbd_packets cmd)
261 [P_OV_REQUEST] = "OVRequest", 261 [P_OV_REQUEST] = "OVRequest",
262 [P_OV_REPLY] = "OVReply", 262 [P_OV_REPLY] = "OVReply",
263 [P_OV_RESULT] = "OVResult", 263 [P_OV_RESULT] = "OVResult",
264 [P_CSUM_RS_REQUEST] = "CsumRSRequest",
265 [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
266 [P_COMPRESSED_BITMAP] = "CBitmap",
264 [P_MAX_CMD] = NULL, 267 [P_MAX_CMD] = NULL,
265 }; 268 };
266 269
@@ -443,13 +446,18 @@ struct p_rs_param_89 {
443 char csums_alg[SHARED_SECRET_MAX]; 446 char csums_alg[SHARED_SECRET_MAX];
444} __packed; 447} __packed;
445 448
449enum drbd_conn_flags {
450 CF_WANT_LOSE = 1,
451 CF_DRY_RUN = 2,
452};
453
446struct p_protocol { 454struct p_protocol {
447 struct p_header head; 455 struct p_header head;
448 u32 protocol; 456 u32 protocol;
449 u32 after_sb_0p; 457 u32 after_sb_0p;
450 u32 after_sb_1p; 458 u32 after_sb_1p;
451 u32 after_sb_2p; 459 u32 after_sb_2p;
452 u32 want_lose; 460 u32 conn_flags;
453 u32 two_primaries; 461 u32 two_primaries;
454 462
455 /* Since protocol version 87 and higher. */ 463 /* Since protocol version 87 and higher. */
@@ -791,6 +799,8 @@ enum {
791 * while this is set. */ 799 * while this is set. */
792 RESIZE_PENDING, /* Size change detected locally, waiting for the response from 800 RESIZE_PENDING, /* Size change detected locally, waiting for the response from
793 * the peer, if it changed there as well. */ 801 * the peer, if it changed there as well. */
802 CONN_DRY_RUN, /* Expect disconnect after resync handshake. */
803 GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */
794}; 804};
795 805
796struct drbd_bitmap; /* opaque for drbd_conf */ 806struct drbd_bitmap; /* opaque for drbd_conf */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index ab871e00ffc5..67e0fc542249 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1668,7 +1668,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
1668int drbd_send_protocol(struct drbd_conf *mdev) 1668int drbd_send_protocol(struct drbd_conf *mdev)
1669{ 1669{
1670 struct p_protocol *p; 1670 struct p_protocol *p;
1671 int size, rv; 1671 int size, cf, rv;
1672 1672
1673 size = sizeof(struct p_protocol); 1673 size = sizeof(struct p_protocol);
1674 1674
@@ -1685,9 +1685,21 @@ int drbd_send_protocol(struct drbd_conf *mdev)
1685 p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p); 1685 p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p);
1686 p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p); 1686 p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p);
1687 p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p); 1687 p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p);
1688 p->want_lose = cpu_to_be32(mdev->net_conf->want_lose);
1689 p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries); 1688 p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
1690 1689
1690 cf = 0;
1691 if (mdev->net_conf->want_lose)
1692 cf |= CF_WANT_LOSE;
1693 if (mdev->net_conf->dry_run) {
1694 if (mdev->agreed_pro_version >= 92)
1695 cf |= CF_DRY_RUN;
1696 else {
1697 dev_err(DEV, "--dry-run is not supported by peer");
1698 return 0;
1699 }
1700 }
1701 p->conn_flags = cpu_to_be32(cf);
1702
1691 if (mdev->agreed_pro_version >= 87) 1703 if (mdev->agreed_pro_version >= 87)
1692 strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); 1704 strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
1693 1705
@@ -3161,14 +3173,18 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
3161void drbd_free_sock(struct drbd_conf *mdev) 3173void drbd_free_sock(struct drbd_conf *mdev)
3162{ 3174{
3163 if (mdev->data.socket) { 3175 if (mdev->data.socket) {
3176 mutex_lock(&mdev->data.mutex);
3164 kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR); 3177 kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
3165 sock_release(mdev->data.socket); 3178 sock_release(mdev->data.socket);
3166 mdev->data.socket = NULL; 3179 mdev->data.socket = NULL;
3180 mutex_unlock(&mdev->data.mutex);
3167 } 3181 }
3168 if (mdev->meta.socket) { 3182 if (mdev->meta.socket) {
3183 mutex_lock(&mdev->meta.mutex);
3169 kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR); 3184 kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
3170 sock_release(mdev->meta.socket); 3185 sock_release(mdev->meta.socket);
3171 mdev->meta.socket = NULL; 3186 mdev->meta.socket = NULL;
3187 mutex_unlock(&mdev->meta.mutex);
3172 } 3188 }
3173} 3189}
3174 3190
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 4df3b40b1057..6429d2b19e06 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -285,8 +285,8 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
285 } 285 }
286 286
287 if (r == SS_NO_UP_TO_DATE_DISK && force && 287 if (r == SS_NO_UP_TO_DATE_DISK && force &&
288 (mdev->state.disk == D_INCONSISTENT || 288 (mdev->state.disk < D_UP_TO_DATE &&
289 mdev->state.disk == D_OUTDATED)) { 289 mdev->state.disk >= D_INCONSISTENT)) {
290 mask.disk = D_MASK; 290 mask.disk = D_MASK;
291 val.disk = D_UP_TO_DATE; 291 val.disk = D_UP_TO_DATE;
292 forced = 1; 292 forced = 1;
@@ -407,7 +407,7 @@ static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
407 } 407 }
408 408
409 reply->ret_code = 409 reply->ret_code =
410 drbd_set_role(mdev, R_PRIMARY, primary_args.overwrite_peer); 410 drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force);
411 411
412 return 0; 412 return 0;
413} 413}
@@ -941,6 +941,25 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
941 941
942 drbd_md_set_sector_offsets(mdev, nbc); 942 drbd_md_set_sector_offsets(mdev, nbc);
943 943
944 /* allocate a second IO page if logical_block_size != 512 */
945 logical_block_size = bdev_logical_block_size(nbc->md_bdev);
946 if (logical_block_size == 0)
947 logical_block_size = MD_SECTOR_SIZE;
948
949 if (logical_block_size != MD_SECTOR_SIZE) {
950 if (!mdev->md_io_tmpp) {
951 struct page *page = alloc_page(GFP_NOIO);
952 if (!page)
953 goto force_diskless_dec;
954
955 dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
956 logical_block_size, MD_SECTOR_SIZE);
957 dev_warn(DEV, "Workaround engaged (has performance impact).\n");
958
959 mdev->md_io_tmpp = page;
960 }
961 }
962
944 if (!mdev->bitmap) { 963 if (!mdev->bitmap) {
945 if (drbd_bm_init(mdev)) { 964 if (drbd_bm_init(mdev)) {
946 retcode = ERR_NOMEM; 965 retcode = ERR_NOMEM;
@@ -980,25 +999,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
980 goto force_diskless_dec; 999 goto force_diskless_dec;
981 } 1000 }
982 1001
983 /* allocate a second IO page if logical_block_size != 512 */
984 logical_block_size = bdev_logical_block_size(nbc->md_bdev);
985 if (logical_block_size == 0)
986 logical_block_size = MD_SECTOR_SIZE;
987
988 if (logical_block_size != MD_SECTOR_SIZE) {
989 if (!mdev->md_io_tmpp) {
990 struct page *page = alloc_page(GFP_NOIO);
991 if (!page)
992 goto force_diskless_dec;
993
994 dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
995 logical_block_size, MD_SECTOR_SIZE);
996 dev_warn(DEV, "Workaround engaged (has performance impact).\n");
997
998 mdev->md_io_tmpp = page;
999 }
1000 }
1001
1002 /* Reset the "barriers don't work" bits here, then force meta data to 1002 /* Reset the "barriers don't work" bits here, then force meta data to
1003 * be written, to ensure we determine if barriers are supported. */ 1003 * be written, to ensure we determine if barriers are supported. */
1004 if (nbc->dc.no_md_flush) 1004 if (nbc->dc.no_md_flush)
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index d065c646b35a..ed9f1de24a71 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2513,6 +2513,10 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
2513 } 2513 }
2514 2514
2515 if (hg == -100) { 2515 if (hg == -100) {
2516 /* FIXME this log message is not correct if we end up here
2517 * after an attempted attach on a diskless node.
2518 * We just refuse to attach -- well, we drop the "connection"
2519 * to that disk, in a way... */
2516 dev_alert(DEV, "Split-Brain detected, dropping connection!\n"); 2520 dev_alert(DEV, "Split-Brain detected, dropping connection!\n");
2517 drbd_khelper(mdev, "split-brain"); 2521 drbd_khelper(mdev, "split-brain");
2518 return C_MASK; 2522 return C_MASK;
@@ -2538,6 +2542,16 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
2538 } 2542 }
2539 } 2543 }
2540 2544
2545 if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
2546 if (hg == 0)
2547 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2548 else
2549 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2550 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2551 abs(hg) >= 2 ? "full" : "bit-map based");
2552 return C_MASK;
2553 }
2554
2541 if (abs(hg) >= 2) { 2555 if (abs(hg) >= 2) {
2542 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); 2556 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
2543 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) 2557 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake"))
@@ -2585,7 +2599,7 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
2585 struct p_protocol *p = (struct p_protocol *)h; 2599 struct p_protocol *p = (struct p_protocol *)h;
2586 int header_size, data_size; 2600 int header_size, data_size;
2587 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; 2601 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
2588 int p_want_lose, p_two_primaries; 2602 int p_want_lose, p_two_primaries, cf;
2589 char p_integrity_alg[SHARED_SECRET_MAX] = ""; 2603 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2590 2604
2591 header_size = sizeof(*p) - sizeof(*h); 2605 header_size = sizeof(*p) - sizeof(*h);
@@ -2598,8 +2612,14 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
2598 p_after_sb_0p = be32_to_cpu(p->after_sb_0p); 2612 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2599 p_after_sb_1p = be32_to_cpu(p->after_sb_1p); 2613 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2600 p_after_sb_2p = be32_to_cpu(p->after_sb_2p); 2614 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
2601 p_want_lose = be32_to_cpu(p->want_lose);
2602 p_two_primaries = be32_to_cpu(p->two_primaries); 2615 p_two_primaries = be32_to_cpu(p->two_primaries);
2616 cf = be32_to_cpu(p->conn_flags);
2617 p_want_lose = cf & CF_WANT_LOSE;
2618
2619 clear_bit(CONN_DRY_RUN, &mdev->flags);
2620
2621 if (cf & CF_DRY_RUN)
2622 set_bit(CONN_DRY_RUN, &mdev->flags);
2603 2623
2604 if (p_proto != mdev->net_conf->wire_protocol) { 2624 if (p_proto != mdev->net_conf->wire_protocol) {
2605 dev_err(DEV, "incompatible communication protocols\n"); 2625 dev_err(DEV, "incompatible communication protocols\n");
@@ -3118,13 +3138,16 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
3118 3138
3119 put_ldev(mdev); 3139 put_ldev(mdev);
3120 if (nconn == C_MASK) { 3140 if (nconn == C_MASK) {
3141 nconn = C_CONNECTED;
3121 if (mdev->state.disk == D_NEGOTIATING) { 3142 if (mdev->state.disk == D_NEGOTIATING) {
3122 drbd_force_state(mdev, NS(disk, D_DISKLESS)); 3143 drbd_force_state(mdev, NS(disk, D_DISKLESS));
3123 nconn = C_CONNECTED;
3124 } else if (peer_state.disk == D_NEGOTIATING) { 3144 } else if (peer_state.disk == D_NEGOTIATING) {
3125 dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); 3145 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3126 peer_state.disk = D_DISKLESS; 3146 peer_state.disk = D_DISKLESS;
3147 real_peer_disk = D_DISKLESS;
3127 } else { 3148 } else {
3149 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
3150 return FALSE;
3128 D_ASSERT(oconn == C_WF_REPORT_PARAMS); 3151 D_ASSERT(oconn == C_WF_REPORT_PARAMS);
3129 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3152 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3130 return FALSE; 3153 return FALSE;
@@ -3594,10 +3617,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3594 3617
3595 /* asender does not clean up anything. it must not interfere, either */ 3618 /* asender does not clean up anything. it must not interfere, either */
3596 drbd_thread_stop(&mdev->asender); 3619 drbd_thread_stop(&mdev->asender);
3597
3598 mutex_lock(&mdev->data.mutex);
3599 drbd_free_sock(mdev); 3620 drbd_free_sock(mdev);
3600 mutex_unlock(&mdev->data.mutex);
3601 3621
3602 spin_lock_irq(&mdev->req_lock); 3622 spin_lock_irq(&mdev->req_lock);
3603 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); 3623 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
@@ -4054,6 +4074,8 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h)
4054{ 4074{
4055 /* restore idle timeout */ 4075 /* restore idle timeout */
4056 mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; 4076 mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
4077 if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
4078 wake_up(&mdev->misc_wait);
4057 4079
4058 return TRUE; 4080 return TRUE;
4059} 4081}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index b453c2bca3be..44bf6d11197e 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -938,7 +938,8 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
938 938
939 if (eq) { 939 if (eq) {
940 drbd_set_in_sync(mdev, e->sector, e->size); 940 drbd_set_in_sync(mdev, e->sector, e->size);
941 mdev->rs_same_csum++; 941 /* rs_same_csums unit is BM_BLOCK_SIZE */
942 mdev->rs_same_csum += e->size >> BM_BLOCK_SHIFT;
942 ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); 943 ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
943 } else { 944 } else {
944 inc_rs_pending(mdev); 945 inc_rs_pending(mdev);
@@ -1288,6 +1289,14 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
1288 return retcode; 1289 return retcode;
1289} 1290}
1290 1291
1292static void ping_peer(struct drbd_conf *mdev)
1293{
1294 clear_bit(GOT_PING_ACK, &mdev->flags);
1295 request_ping(mdev);
1296 wait_event(mdev->misc_wait,
1297 test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
1298}
1299
1291/** 1300/**
1292 * drbd_start_resync() - Start the resync process 1301 * drbd_start_resync() - Start the resync process
1293 * @mdev: DRBD device. 1302 * @mdev: DRBD device.
@@ -1371,7 +1380,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1371 _drbd_pause_after(mdev); 1380 _drbd_pause_after(mdev);
1372 } 1381 }
1373 write_unlock_irq(&global_state_lock); 1382 write_unlock_irq(&global_state_lock);
1374 drbd_state_unlock(mdev);
1375 put_ldev(mdev); 1383 put_ldev(mdev);
1376 1384
1377 if (r == SS_SUCCESS) { 1385 if (r == SS_SUCCESS) {
@@ -1382,11 +1390,8 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1382 1390
1383 if (mdev->rs_total == 0) { 1391 if (mdev->rs_total == 0) {
1384 /* Peer still reachable? Beware of failing before-resync-target handlers! */ 1392 /* Peer still reachable? Beware of failing before-resync-target handlers! */
1385 request_ping(mdev); 1393 ping_peer(mdev);
1386 __set_current_state(TASK_INTERRUPTIBLE);
1387 schedule_timeout(mdev->net_conf->ping_timeo*HZ/9); /* 9 instead 10 */
1388 drbd_resync_finished(mdev); 1394 drbd_resync_finished(mdev);
1389 return;
1390 } 1395 }
1391 1396
1392 /* ns.conn may already be != mdev->state.conn, 1397 /* ns.conn may already be != mdev->state.conn,
@@ -1398,6 +1403,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1398 1403
1399 drbd_md_sync(mdev); 1404 drbd_md_sync(mdev);
1400 } 1405 }
1406 drbd_state_unlock(mdev);
1401} 1407}
1402 1408
1403int drbd_worker(struct drbd_thread *thi) 1409int drbd_worker(struct drbd_thread *thi)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cb69929d917a..8546d123b9a7 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -237,6 +237,8 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
237 if (ret) 237 if (ret)
238 goto fail; 238 goto fail;
239 239
240 file_update_time(file);
241
240 transfer_result = lo_do_transfer(lo, WRITE, page, offset, 242 transfer_result = lo_do_transfer(lo, WRITE, page, offset,
241 bvec->bv_page, bv_offs, size, IV); 243 bvec->bv_page, bv_offs, size, IV);
242 copied = size; 244 copied = size;
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 8866ca369d5e..71acf4e53356 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -341,11 +341,11 @@ static int pcd_wait(struct pcd_unit *cd, int go, int stop, char *fun, char *msg)
341 && (j++ < PCD_SPIN)) 341 && (j++ < PCD_SPIN))
342 udelay(PCD_DELAY); 342 udelay(PCD_DELAY);
343 343
344 if ((r & (IDE_ERR & stop)) || (j >= PCD_SPIN)) { 344 if ((r & (IDE_ERR & stop)) || (j > PCD_SPIN)) {
345 s = read_reg(cd, 7); 345 s = read_reg(cd, 7);
346 e = read_reg(cd, 1); 346 e = read_reg(cd, 1);
347 p = read_reg(cd, 2); 347 p = read_reg(cd, 2);
348 if (j >= PCD_SPIN) 348 if (j > PCD_SPIN)
349 e |= 0x100; 349 e |= 0x100;
350 if (fun) 350 if (fun)
351 printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x" 351 printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index ddb4f9abd480..c059aab3006b 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -391,11 +391,11 @@ static int pf_wait(struct pf_unit *pf, int go, int stop, char *fun, char *msg)
391 && (j++ < PF_SPIN)) 391 && (j++ < PF_SPIN))
392 udelay(PF_SPIN_DEL); 392 udelay(PF_SPIN_DEL);
393 393
394 if ((r & (STAT_ERR & stop)) || (j >= PF_SPIN)) { 394 if ((r & (STAT_ERR & stop)) || (j > PF_SPIN)) {
395 s = read_reg(pf, 7); 395 s = read_reg(pf, 7);
396 e = read_reg(pf, 1); 396 e = read_reg(pf, 1);
397 p = read_reg(pf, 2); 397 p = read_reg(pf, 2);
398 if (j >= PF_SPIN) 398 if (j > PF_SPIN)
399 e |= 0x100; 399 e |= 0x100;
400 if (fun) 400 if (fun)
401 printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x" 401 printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 1e4006e18f03..bc5825fdeaab 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -274,11 +274,11 @@ static int pt_wait(struct pt_unit *tape, int go, int stop, char *fun, char *msg)
274 && (j++ < PT_SPIN)) 274 && (j++ < PT_SPIN))
275 udelay(PT_SPIN_DEL); 275 udelay(PT_SPIN_DEL);
276 276
277 if ((r & (STAT_ERR & stop)) || (j >= PT_SPIN)) { 277 if ((r & (STAT_ERR & stop)) || (j > PT_SPIN)) {
278 s = read_reg(pi, 7); 278 s = read_reg(pi, 7);
279 e = read_reg(pi, 1); 279 e = read_reg(pi, 1);
280 p = read_reg(pi, 2); 280 p = read_reg(pi, 2);
281 if (j >= PT_SPIN) 281 if (j > PT_SPIN)
282 e |= 0x100; 282 e |= 0x100;
283 if (fun) 283 if (fun)
284 printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x" 284 printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4b12b820c9a6..2138a7ae050c 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -348,14 +348,13 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
348 set_capacity(vblk->disk, cap); 348 set_capacity(vblk->disk, cap);
349 349
350 /* We can handle whatever the host told us to handle. */ 350 /* We can handle whatever the host told us to handle. */
351 blk_queue_max_phys_segments(q, vblk->sg_elems-2); 351 blk_queue_max_segments(q, vblk->sg_elems-2);
352 blk_queue_max_hw_segments(q, vblk->sg_elems-2);
353 352
354 /* No need to bounce any requests */ 353 /* No need to bounce any requests */
355 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); 354 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
356 355
357 /* No real sector limit. */ 356 /* No real sector limit. */
358 blk_queue_max_sectors(q, -1U); 357 blk_queue_max_hw_sectors(q, -1U);
359 358
360 /* Host can optionally specify maximum segment size and number of 359 /* Host can optionally specify maximum segment size and number of
361 * segments. */ 360 * segments. */
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 58c62ff42ab3..8b827f37b03e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2186,7 +2186,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
2186 blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); 2186 blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
2187 2187
2188 gd->driverfs_dev = &sdp->sdev_gendev; 2188 gd->driverfs_dev = &sdp->sdev_gendev;
2189 gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS; 2189 gd->flags = GENHD_FL_EXT_DEVT;
2190 if (sdp->removable) 2190 if (sdp->removable)
2191 gd->flags |= GENHD_FL_REMOVABLE; 2191 gd->flags |= GENHD_FL_REMOVABLE;
2192 2192
diff --git a/fs/bio.c b/fs/bio.c
index e1f922184b45..e7bf6ca64dcf 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -554,7 +554,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
554 .bi_rw = bio->bi_rw, 554 .bi_rw = bio->bi_rw,
555 }; 555 };
556 556
557 if (q->merge_bvec_fn(q, &bvm, prev) < len) { 557 if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
558 prev->bv_len -= len; 558 prev->bv_len -= len;
559 return 0; 559 return 0;
560 } 560 }
@@ -607,7 +607,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
607 * merge_bvec_fn() returns number of bytes it can accept 607 * merge_bvec_fn() returns number of bytes it can accept
608 * at this offset 608 * at this offset
609 */ 609 */
610 if (q->merge_bvec_fn(q, &bvm, bvec) < len) { 610 if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
611 bvec->bv_page = NULL; 611 bvec->bv_page = NULL;
612 bvec->bv_len = 0; 612 bvec->bv_len = 0;
613 bvec->bv_offset = 0; 613 bvec->bv_offset = 0;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 781a322ccb45..4b37f7cea4dd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -554,108 +554,85 @@ select_queue:
554 return ret; 554 return ret;
555} 555}
556 556
557static void unpin_sb_for_writeback(struct super_block **psb) 557static void unpin_sb_for_writeback(struct super_block *sb)
558{ 558{
559 struct super_block *sb = *psb; 559 up_read(&sb->s_umount);
560 560 put_super(sb);
561 if (sb) {
562 up_read(&sb->s_umount);
563 put_super(sb);
564 *psb = NULL;
565 }
566} 561}
567 562
563enum sb_pin_state {
564 SB_PINNED,
565 SB_NOT_PINNED,
566 SB_PIN_FAILED
567};
568
568/* 569/*
569 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned 570 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
570 * before calling writeback. So make sure that we do pin it, so it doesn't 571 * before calling writeback. So make sure that we do pin it, so it doesn't
571 * go away while we are writing inodes from it. 572 * go away while we are writing inodes from it.
572 *
573 * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
574 * 1 if we failed.
575 */ 573 */
576static int pin_sb_for_writeback(struct writeback_control *wbc, 574static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
577 struct inode *inode, struct super_block **psb) 575 struct super_block *sb)
578{ 576{
579 struct super_block *sb = inode->i_sb;
580
581 /*
582 * If this sb is already pinned, nothing more to do. If not and
583 * *psb is non-NULL, unpin the old one first
584 */
585 if (sb == *psb)
586 return 0;
587 else if (*psb)
588 unpin_sb_for_writeback(psb);
589
590 /* 577 /*
591 * Caller must already hold the ref for this 578 * Caller must already hold the ref for this
592 */ 579 */
593 if (wbc->sync_mode == WB_SYNC_ALL) { 580 if (wbc->sync_mode == WB_SYNC_ALL) {
594 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 581 WARN_ON(!rwsem_is_locked(&sb->s_umount));
595 return 0; 582 return SB_NOT_PINNED;
596 } 583 }
597
598 spin_lock(&sb_lock); 584 spin_lock(&sb_lock);
599 sb->s_count++; 585 sb->s_count++;
600 if (down_read_trylock(&sb->s_umount)) { 586 if (down_read_trylock(&sb->s_umount)) {
601 if (sb->s_root) { 587 if (sb->s_root) {
602 spin_unlock(&sb_lock); 588 spin_unlock(&sb_lock);
603 goto pinned; 589 return SB_PINNED;
604 } 590 }
605 /* 591 /*
606 * umounted, drop rwsem again and fall through to failure 592 * umounted, drop rwsem again and fall through to failure
607 */ 593 */
608 up_read(&sb->s_umount); 594 up_read(&sb->s_umount);
609 } 595 }
610
611 sb->s_count--; 596 sb->s_count--;
612 spin_unlock(&sb_lock); 597 spin_unlock(&sb_lock);
613 return 1; 598 return SB_PIN_FAILED;
614pinned:
615 *psb = sb;
616 return 0;
617} 599}
618 600
619static void writeback_inodes_wb(struct bdi_writeback *wb, 601/*
620 struct writeback_control *wbc) 602 * Write a portion of b_io inodes which belong to @sb.
603 * If @wbc->sb != NULL, then find and write all such
604 * inodes. Otherwise write only ones which go sequentially
605 * in reverse order.
606 * Return 1, if the caller writeback routine should be
607 * interrupted. Otherwise return 0.
608 */
609static int writeback_sb_inodes(struct super_block *sb,
610 struct bdi_writeback *wb,
611 struct writeback_control *wbc)
621{ 612{
622 struct super_block *sb = wbc->sb, *pin_sb = NULL;
623 const unsigned long start = jiffies; /* livelock avoidance */
624
625 spin_lock(&inode_lock);
626
627 if (!wbc->for_kupdate || list_empty(&wb->b_io))
628 queue_io(wb, wbc->older_than_this);
629
630 while (!list_empty(&wb->b_io)) { 613 while (!list_empty(&wb->b_io)) {
631 struct inode *inode = list_entry(wb->b_io.prev,
632 struct inode, i_list);
633 long pages_skipped; 614 long pages_skipped;
634 615 struct inode *inode = list_entry(wb->b_io.prev,
635 /* 616 struct inode, i_list);
636 * super block given and doesn't match, skip this inode 617 if (wbc->sb && sb != inode->i_sb) {
637 */ 618 /* super block given and doesn't
638 if (sb && sb != inode->i_sb) { 619 match, skip this inode */
639 redirty_tail(inode); 620 redirty_tail(inode);
640 continue; 621 continue;
641 } 622 }
642 623 if (sb != inode->i_sb)
624 /* finish with this superblock */
625 return 0;
643 if (inode->i_state & (I_NEW | I_WILL_FREE)) { 626 if (inode->i_state & (I_NEW | I_WILL_FREE)) {
644 requeue_io(inode); 627 requeue_io(inode);
645 continue; 628 continue;
646 } 629 }
647
648 /* 630 /*
649 * Was this inode dirtied after sync_sb_inodes was called? 631 * Was this inode dirtied after sync_sb_inodes was called?
650 * This keeps sync from extra jobs and livelock. 632 * This keeps sync from extra jobs and livelock.
651 */ 633 */
652 if (inode_dirtied_after(inode, start)) 634 if (inode_dirtied_after(inode, wbc->wb_start))
653 break; 635 return 1;
654
655 if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
656 requeue_io(inode);
657 continue;
658 }
659 636
660 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); 637 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
661 __iget(inode); 638 __iget(inode);
@@ -674,14 +651,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
674 spin_lock(&inode_lock); 651 spin_lock(&inode_lock);
675 if (wbc->nr_to_write <= 0) { 652 if (wbc->nr_to_write <= 0) {
676 wbc->more_io = 1; 653 wbc->more_io = 1;
677 break; 654 return 1;
678 } 655 }
679 if (!list_empty(&wb->b_more_io)) 656 if (!list_empty(&wb->b_more_io))
680 wbc->more_io = 1; 657 wbc->more_io = 1;
681 } 658 }
659 /* b_io is empty */
660 return 1;
661}
662
663static void writeback_inodes_wb(struct bdi_writeback *wb,
664 struct writeback_control *wbc)
665{
666 int ret = 0;
682 667
683 unpin_sb_for_writeback(&pin_sb); 668 wbc->wb_start = jiffies; /* livelock avoidance */
669 spin_lock(&inode_lock);
670 if (!wbc->for_kupdate || list_empty(&wb->b_io))
671 queue_io(wb, wbc->older_than_this);
672
673 while (!list_empty(&wb->b_io)) {
674 struct inode *inode = list_entry(wb->b_io.prev,
675 struct inode, i_list);
676 struct super_block *sb = inode->i_sb;
677 enum sb_pin_state state;
678
679 if (wbc->sb && sb != wbc->sb) {
680 /* super block given and doesn't
681 match, skip this inode */
682 redirty_tail(inode);
683 continue;
684 }
685 state = pin_sb_for_writeback(wbc, sb);
686
687 if (state == SB_PIN_FAILED) {
688 requeue_io(inode);
689 continue;
690 }
691 ret = writeback_sb_inodes(sb, wb, wbc);
684 692
693 if (state == SB_PINNED)
694 unpin_sb_for_writeback(sb);
695 if (ret)
696 break;
697 }
685 spin_unlock(&inode_lock); 698 spin_unlock(&inode_lock);
686 /* Leave any unwritten inodes on b_io */ 699 /* Leave any unwritten inodes on b_io */
687} 700}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ebd22dbed861..6690e8bae7bb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -158,7 +158,6 @@ enum rq_flag_bits {
158struct request { 158struct request {
159 struct list_head queuelist; 159 struct list_head queuelist;
160 struct call_single_data csd; 160 struct call_single_data csd;
161 int cpu;
162 161
163 struct request_queue *q; 162 struct request_queue *q;
164 163
@@ -166,9 +165,11 @@ struct request {
166 enum rq_cmd_type_bits cmd_type; 165 enum rq_cmd_type_bits cmd_type;
167 unsigned long atomic_flags; 166 unsigned long atomic_flags;
168 167
168 int cpu;
169
169 /* the following two fields are internal, NEVER access directly */ 170 /* the following two fields are internal, NEVER access directly */
170 sector_t __sector; /* sector cursor */
171 unsigned int __data_len; /* total data len */ 171 unsigned int __data_len; /* total data len */
172 sector_t __sector; /* sector cursor */
172 173
173 struct bio *bio; 174 struct bio *bio;
174 struct bio *biotail; 175 struct bio *biotail;
@@ -201,20 +202,20 @@ struct request {
201 202
202 unsigned short ioprio; 203 unsigned short ioprio;
203 204
205 int ref_count;
206
204 void *special; /* opaque pointer available for LLD use */ 207 void *special; /* opaque pointer available for LLD use */
205 char *buffer; /* kaddr of the current segment if available */ 208 char *buffer; /* kaddr of the current segment if available */
206 209
207 int tag; 210 int tag;
208 int errors; 211 int errors;
209 212
210 int ref_count;
211
212 /* 213 /*
213 * when request is used as a packet command carrier 214 * when request is used as a packet command carrier
214 */ 215 */
215 unsigned short cmd_len;
216 unsigned char __cmd[BLK_MAX_CDB]; 216 unsigned char __cmd[BLK_MAX_CDB];
217 unsigned char *cmd; 217 unsigned char *cmd;
218 unsigned short cmd_len;
218 219
219 unsigned int extra_len; /* length of alignment and padding */ 220 unsigned int extra_len; /* length of alignment and padding */
220 unsigned int sense_len; 221 unsigned int sense_len;
@@ -921,26 +922,7 @@ extern void blk_cleanup_queue(struct request_queue *);
921extern void blk_queue_make_request(struct request_queue *, make_request_fn *); 922extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
922extern void blk_queue_bounce_limit(struct request_queue *, u64); 923extern void blk_queue_bounce_limit(struct request_queue *, u64);
923extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); 924extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
924
925/* Temporary compatibility wrapper */
926static inline void blk_queue_max_sectors(struct request_queue *q, unsigned int max)
927{
928 blk_queue_max_hw_sectors(q, max);
929}
930
931extern void blk_queue_max_segments(struct request_queue *, unsigned short); 925extern void blk_queue_max_segments(struct request_queue *, unsigned short);
932
933static inline void blk_queue_max_phys_segments(struct request_queue *q, unsigned short max)
934{
935 blk_queue_max_segments(q, max);
936}
937
938static inline void blk_queue_max_hw_segments(struct request_queue *q, unsigned short max)
939{
940 blk_queue_max_segments(q, max);
941}
942
943
944extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); 926extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
945extern void blk_queue_max_discard_sectors(struct request_queue *q, 927extern void blk_queue_max_discard_sectors(struct request_queue *q,
946 unsigned int max_discard_sectors); 928 unsigned int max_discard_sectors);
@@ -1030,11 +1012,6 @@ static inline int sb_issue_discard(struct super_block *sb,
1030 1012
1031extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); 1013extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
1032 1014
1033#define MAX_PHYS_SEGMENTS 128
1034#define MAX_HW_SEGMENTS 128
1035#define SAFE_MAX_SECTORS 255
1036#define MAX_SEGMENT_SIZE 65536
1037
1038enum blk_default_limits { 1015enum blk_default_limits {
1039 BLK_MAX_SEGMENTS = 128, 1016 BLK_MAX_SEGMENTS = 128,
1040 BLK_SAFE_MAX_SECTORS = 255, 1017 BLK_SAFE_MAX_SECTORS = 255,
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 78962272338a..4341b1a97a34 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void);
56#define REL_VERSION "8.3.7" 56#define REL_VERSION "8.3.7"
57#define API_VERSION 88 57#define API_VERSION 88
58#define PRO_VERSION_MIN 86 58#define PRO_VERSION_MIN 86
59#define PRO_VERSION_MAX 91 59#define PRO_VERSION_MAX 92
60 60
61 61
62enum drbd_io_error_p { 62enum drbd_io_error_p {
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index a4d82f895994..f7431a4ca608 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -12,7 +12,7 @@
12#endif 12#endif
13 13
14NL_PACKET(primary, 1, 14NL_PACKET(primary, 1,
15 NL_BIT( 1, T_MAY_IGNORE, overwrite_peer) 15 NL_BIT( 1, T_MAY_IGNORE, primary_force)
16) 16)
17 17
18NL_PACKET(secondary, 2, ) 18NL_PACKET(secondary, 2, )
@@ -63,6 +63,7 @@ NL_PACKET(net_conf, 5,
63 NL_BIT( 41, T_MAY_IGNORE, always_asbp) 63 NL_BIT( 41, T_MAY_IGNORE, always_asbp)
64 NL_BIT( 61, T_MAY_IGNORE, no_cork) 64 NL_BIT( 61, T_MAY_IGNORE, no_cork)
65 NL_BIT( 62, T_MANDATORY, auto_sndbuf_size) 65 NL_BIT( 62, T_MANDATORY, auto_sndbuf_size)
66 NL_BIT( 70, T_MANDATORY, dry_run)
66) 67)
67 68
68NL_PACKET(disconnect, 6, ) 69NL_PACKET(disconnect, 6, )
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 56b50514ab25..5f2f4c4d8fb0 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -109,7 +109,7 @@ struct hd_struct {
109}; 109};
110 110
111#define GENHD_FL_REMOVABLE 1 111#define GENHD_FL_REMOVABLE 1
112#define GENHD_FL_DRIVERFS 2 112/* 2 is unused */
113#define GENHD_FL_MEDIA_CHANGE_NOTIFY 4 113#define GENHD_FL_MEDIA_CHANGE_NOTIFY 4
114#define GENHD_FL_CD 8 114#define GENHD_FL_CD 8
115#define GENHD_FL_UP 16 115#define GENHD_FL_UP 16
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 87018dc5527d..9e7a12d6385d 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -782,7 +782,6 @@ extern int i2o_exec_lct_get(struct i2o_controller *);
782#define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver) 782#define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver)
783#define to_i2o_device(dev) container_of(dev, struct i2o_device, device) 783#define to_i2o_device(dev) container_of(dev, struct i2o_device, device)
784#define to_i2o_controller(dev) container_of(dev, struct i2o_controller, device) 784#define to_i2o_controller(dev) container_of(dev, struct i2o_controller, device)
785#define kobj_to_i2o_device(kobj) to_i2o_device(container_of(kobj, struct device, kobj))
786 785
787/** 786/**
788 * i2o_out_to_virt - Turn an I2O message to a virtual address 787 * i2o_out_to_virt - Turn an I2O message to a virtual address
diff --git a/include/linux/lcm.h b/include/linux/lcm.h
new file mode 100644
index 000000000000..7bf01d779b45
--- /dev/null
+++ b/include/linux/lcm.h
@@ -0,0 +1,8 @@
1#ifndef _LCM_H
2#define _LCM_H
3
4#include <linux/compiler.h>
5
6unsigned long lcm(unsigned long a, unsigned long b) __attribute_const__;
7
8#endif /* _LCM_H */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 76e8903cd204..36520ded3e06 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -34,6 +34,9 @@ struct writeback_control {
34 enum writeback_sync_modes sync_mode; 34 enum writeback_sync_modes sync_mode;
35 unsigned long *older_than_this; /* If !NULL, only write back inodes 35 unsigned long *older_than_this; /* If !NULL, only write back inodes
36 older than this */ 36 older than this */
37 unsigned long wb_start; /* Time writeback_inodes_wb was
38 called. This is needed to avoid
39 extra jobs and livelock */
37 long nr_to_write; /* Write this many pages, and decrement 40 long nr_to_write; /* Write this many pages, and decrement
38 this for each page written */ 41 this for each page written */
39 long pages_skipped; /* Pages which were not written */ 42 long pages_skipped; /* Pages which were not written */
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 5fb72733331e..d870a918559c 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -40,6 +40,16 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
40 __entry->nr_sector, __entry->errors) 40 __entry->nr_sector, __entry->errors)
41); 41);
42 42
43/**
44 * block_rq_abort - abort block operation request
45 * @q: queue containing the block operation request
46 * @rq: block IO operation request
47 *
48 * Called immediately after pending block IO operation request @rq in
49 * queue @q is aborted. The fields in the operation request @rq
50 * can be examined to determine which device and sectors the pending
51 * operation would access.
52 */
43DEFINE_EVENT(block_rq_with_error, block_rq_abort, 53DEFINE_EVENT(block_rq_with_error, block_rq_abort,
44 54
45 TP_PROTO(struct request_queue *q, struct request *rq), 55 TP_PROTO(struct request_queue *q, struct request *rq),
@@ -47,6 +57,15 @@ DEFINE_EVENT(block_rq_with_error, block_rq_abort,
47 TP_ARGS(q, rq) 57 TP_ARGS(q, rq)
48); 58);
49 59
60/**
61 * block_rq_requeue - place block IO request back on a queue
62 * @q: queue holding operation
63 * @rq: block IO operation request
64 *
65 * The block operation request @rq is being placed back into queue
66 * @q. For some reason the request was not completed and needs to be
67 * put back in the queue.
68 */
50DEFINE_EVENT(block_rq_with_error, block_rq_requeue, 69DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
51 70
52 TP_PROTO(struct request_queue *q, struct request *rq), 71 TP_PROTO(struct request_queue *q, struct request *rq),
@@ -54,6 +73,17 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
54 TP_ARGS(q, rq) 73 TP_ARGS(q, rq)
55); 74);
56 75
76/**
77 * block_rq_complete - block IO operation completed by device driver
78 * @q: queue containing the block operation request
79 * @rq: block operations request
80 *
81 * The block_rq_complete tracepoint event indicates that some portion
82 * of operation request has been completed by the device driver. If
83 * the @rq->bio is %NULL, then there is absolutely no additional work to
84 * do for the request. If @rq->bio is non-NULL then there is
85 * additional work required to complete the request.
86 */
57DEFINE_EVENT(block_rq_with_error, block_rq_complete, 87DEFINE_EVENT(block_rq_with_error, block_rq_complete,
58 88
59 TP_PROTO(struct request_queue *q, struct request *rq), 89 TP_PROTO(struct request_queue *q, struct request *rq),
@@ -95,6 +125,16 @@ DECLARE_EVENT_CLASS(block_rq,
95 __entry->nr_sector, __entry->comm) 125 __entry->nr_sector, __entry->comm)
96); 126);
97 127
128/**
129 * block_rq_insert - insert block operation request into queue
130 * @q: target queue
131 * @rq: block IO operation request
132 *
133 * Called immediately before block operation request @rq is inserted
134 * into queue @q. The fields in the operation request @rq struct can
135 * be examined to determine which device and sectors the pending
136 * operation would access.
137 */
98DEFINE_EVENT(block_rq, block_rq_insert, 138DEFINE_EVENT(block_rq, block_rq_insert,
99 139
100 TP_PROTO(struct request_queue *q, struct request *rq), 140 TP_PROTO(struct request_queue *q, struct request *rq),
@@ -102,6 +142,14 @@ DEFINE_EVENT(block_rq, block_rq_insert,
102 TP_ARGS(q, rq) 142 TP_ARGS(q, rq)
103); 143);
104 144
145/**
146 * block_rq_issue - issue pending block IO request operation to device driver
147 * @q: queue holding operation
148 * @rq: block IO operation operation request
149 *
150 * Called when block operation request @rq from queue @q is sent to a
151 * device driver for processing.
152 */
105DEFINE_EVENT(block_rq, block_rq_issue, 153DEFINE_EVENT(block_rq, block_rq_issue,
106 154
107 TP_PROTO(struct request_queue *q, struct request *rq), 155 TP_PROTO(struct request_queue *q, struct request *rq),
@@ -109,6 +157,17 @@ DEFINE_EVENT(block_rq, block_rq_issue,
109 TP_ARGS(q, rq) 157 TP_ARGS(q, rq)
110); 158);
111 159
160/**
161 * block_bio_bounce - used bounce buffer when processing block operation
162 * @q: queue holding the block operation
163 * @bio: block operation
164 *
165 * A bounce buffer was used to handle the block operation @bio in @q.
166 * This occurs when hardware limitations prevent a direct transfer of
167 * data between the @bio data memory area and the IO device. Use of a
168 * bounce buffer requires extra copying of data and decreases
169 * performance.
170 */
112TRACE_EVENT(block_bio_bounce, 171TRACE_EVENT(block_bio_bounce,
113 172
114 TP_PROTO(struct request_queue *q, struct bio *bio), 173 TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -138,6 +197,14 @@ TRACE_EVENT(block_bio_bounce,
138 __entry->nr_sector, __entry->comm) 197 __entry->nr_sector, __entry->comm)
139); 198);
140 199
200/**
201 * block_bio_complete - completed all work on the block operation
202 * @q: queue holding the block operation
203 * @bio: block operation completed
204 *
205 * This tracepoint indicates there is no further work to do on this
206 * block IO operation @bio.
207 */
141TRACE_EVENT(block_bio_complete, 208TRACE_EVENT(block_bio_complete,
142 209
143 TP_PROTO(struct request_queue *q, struct bio *bio), 210 TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -193,6 +260,14 @@ DECLARE_EVENT_CLASS(block_bio,
193 __entry->nr_sector, __entry->comm) 260 __entry->nr_sector, __entry->comm)
194); 261);
195 262
263/**
264 * block_bio_backmerge - merging block operation to the end of an existing operation
265 * @q: queue holding operation
266 * @bio: new block operation to merge
267 *
268 * Merging block request @bio to the end of an existing block request
269 * in queue @q.
270 */
196DEFINE_EVENT(block_bio, block_bio_backmerge, 271DEFINE_EVENT(block_bio, block_bio_backmerge,
197 272
198 TP_PROTO(struct request_queue *q, struct bio *bio), 273 TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -200,6 +275,14 @@ DEFINE_EVENT(block_bio, block_bio_backmerge,
200 TP_ARGS(q, bio) 275 TP_ARGS(q, bio)
201); 276);
202 277
278/**
279 * block_bio_frontmerge - merging block operation to the beginning of an existing operation
280 * @q: queue holding operation
281 * @bio: new block operation to merge
282 *
283 * Merging block IO operation @bio to the beginning of an existing block
284 * operation in queue @q.
285 */
203DEFINE_EVENT(block_bio, block_bio_frontmerge, 286DEFINE_EVENT(block_bio, block_bio_frontmerge,
204 287
205 TP_PROTO(struct request_queue *q, struct bio *bio), 288 TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -207,6 +290,13 @@ DEFINE_EVENT(block_bio, block_bio_frontmerge,
207 TP_ARGS(q, bio) 290 TP_ARGS(q, bio)
208); 291);
209 292
293/**
294 * block_bio_queue - putting new block IO operation in queue
295 * @q: queue holding operation
296 * @bio: new block operation
297 *
298 * About to place the block IO operation @bio into queue @q.
299 */
210DEFINE_EVENT(block_bio, block_bio_queue, 300DEFINE_EVENT(block_bio, block_bio_queue,
211 301
212 TP_PROTO(struct request_queue *q, struct bio *bio), 302 TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -243,6 +333,15 @@ DECLARE_EVENT_CLASS(block_get_rq,
243 __entry->nr_sector, __entry->comm) 333 __entry->nr_sector, __entry->comm)
244); 334);
245 335
336/**
337 * block_getrq - get a free request entry in queue for block IO operations
338 * @q: queue for operations
339 * @bio: pending block IO operation
340 * @rw: low bit indicates a read (%0) or a write (%1)
341 *
342 * A request struct for queue @q has been allocated to handle the
343 * block IO operation @bio.
344 */
246DEFINE_EVENT(block_get_rq, block_getrq, 345DEFINE_EVENT(block_get_rq, block_getrq,
247 346
248 TP_PROTO(struct request_queue *q, struct bio *bio, int rw), 347 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
@@ -250,6 +349,17 @@ DEFINE_EVENT(block_get_rq, block_getrq,
250 TP_ARGS(q, bio, rw) 349 TP_ARGS(q, bio, rw)
251); 350);
252 351
352/**
353 * block_sleeprq - waiting to get a free request entry in queue for block IO operation
354 * @q: queue for operation
355 * @bio: pending block IO operation
356 * @rw: low bit indicates a read (%0) or a write (%1)
357 *
358 * In the case where a request struct cannot be provided for queue @q
359 * the process needs to wait for an request struct to become
360 * available. This tracepoint event is generated each time the
361 * process goes to sleep waiting for request struct become available.
362 */
253DEFINE_EVENT(block_get_rq, block_sleeprq, 363DEFINE_EVENT(block_get_rq, block_sleeprq,
254 364
255 TP_PROTO(struct request_queue *q, struct bio *bio, int rw), 365 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
@@ -257,6 +367,14 @@ DEFINE_EVENT(block_get_rq, block_sleeprq,
257 TP_ARGS(q, bio, rw) 367 TP_ARGS(q, bio, rw)
258); 368);
259 369
370/**
371 * block_plug - keep operations requests in request queue
372 * @q: request queue to plug
373 *
374 * Plug the request queue @q. Do not allow block operation requests
375 * to be sent to the device driver. Instead, accumulate requests in
376 * the queue to improve throughput performance of the block device.
377 */
260TRACE_EVENT(block_plug, 378TRACE_EVENT(block_plug,
261 379
262 TP_PROTO(struct request_queue *q), 380 TP_PROTO(struct request_queue *q),
@@ -293,6 +411,13 @@ DECLARE_EVENT_CLASS(block_unplug,
293 TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) 411 TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
294); 412);
295 413
414/**
415 * block_unplug_timer - timed release of operations requests in queue to device driver
416 * @q: request queue to unplug
417 *
418 * Unplug the request queue @q because a timer expired and allow block
419 * operation requests to be sent to the device driver.
420 */
296DEFINE_EVENT(block_unplug, block_unplug_timer, 421DEFINE_EVENT(block_unplug, block_unplug_timer,
297 422
298 TP_PROTO(struct request_queue *q), 423 TP_PROTO(struct request_queue *q),
@@ -300,6 +425,13 @@ DEFINE_EVENT(block_unplug, block_unplug_timer,
300 TP_ARGS(q) 425 TP_ARGS(q)
301); 426);
302 427
428/**
429 * block_unplug_io - release of operations requests in request queue
430 * @q: request queue to unplug
431 *
432 * Unplug request queue @q because device driver is scheduled to work
433 * on elements in the request queue.
434 */
303DEFINE_EVENT(block_unplug, block_unplug_io, 435DEFINE_EVENT(block_unplug, block_unplug_io,
304 436
305 TP_PROTO(struct request_queue *q), 437 TP_PROTO(struct request_queue *q),
@@ -307,6 +439,17 @@ DEFINE_EVENT(block_unplug, block_unplug_io,
307 TP_ARGS(q) 439 TP_ARGS(q)
308); 440);
309 441
442/**
443 * block_split - split a single bio struct into two bio structs
444 * @q: queue containing the bio
445 * @bio: block operation being split
446 * @new_sector: The starting sector for the new bio
447 *
448 * The bio request @bio in request queue @q needs to be split into two
449 * bio requests. The newly created @bio request starts at
450 * @new_sector. This split may be required due to hardware limitation
451 * such as operation crossing device boundaries in a RAID system.
452 */
310TRACE_EVENT(block_split, 453TRACE_EVENT(block_split,
311 454
312 TP_PROTO(struct request_queue *q, struct bio *bio, 455 TP_PROTO(struct request_queue *q, struct bio *bio,
@@ -337,6 +480,16 @@ TRACE_EVENT(block_split,
337 __entry->comm) 480 __entry->comm)
338); 481);
339 482
483/**
484 * block_remap - map request for a partition to the raw device
485 * @q: queue holding the operation
486 * @bio: revised operation
487 * @dev: device for the operation
488 * @from: original sector for the operation
489 *
490 * An operation for a partition on a block device has been mapped to the
491 * raw block device.
492 */
340TRACE_EVENT(block_remap, 493TRACE_EVENT(block_remap,
341 494
342 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, 495 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
@@ -370,6 +523,17 @@ TRACE_EVENT(block_remap,
370 (unsigned long long)__entry->old_sector) 523 (unsigned long long)__entry->old_sector)
371); 524);
372 525
526/**
527 * block_rq_remap - map request for a block operation request
528 * @q: queue holding the operation
529 * @rq: block IO operation request
530 * @dev: device for the operation
531 * @from: original sector for the operation
532 *
533 * The block operation request @rq in @q has been remapped. The block
534 * operation request @rq holds the current information and @from hold
535 * the original sector.
536 */
373TRACE_EVENT(block_rq_remap, 537TRACE_EVENT(block_rq_remap,
374 538
375 TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, 539 TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
diff --git a/lib/Makefile b/lib/Makefile
index 2e152aed7198..0d4015205c64 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o
21 21
22obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ 22obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
23 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ 23 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
24 string_helpers.o gcd.o list_sort.o 24 string_helpers.o gcd.o lcm.o list_sort.o
25 25
26ifeq ($(CONFIG_DEBUG_KOBJECT),y) 26ifeq ($(CONFIG_DEBUG_KOBJECT),y)
27CFLAGS_kobject.o += -DDEBUG 27CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/lcm.c b/lib/lcm.c
new file mode 100644
index 000000000000..157cd88a6ffc
--- /dev/null
+++ b/lib/lcm.c
@@ -0,0 +1,15 @@
1#include <linux/kernel.h>
2#include <linux/gcd.h>
3#include <linux/module.h>
4
5/* Lowest common multiple */
6unsigned long lcm(unsigned long a, unsigned long b)
7{
8 if (a && b)
9 return (a * b) / gcd(a, b);
10 else if (b)
11 return b;
12
13 return a;
14}
15EXPORT_SYMBOL_GPL(lcm);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca0347707..f13e067e1467 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -227,6 +227,9 @@ static struct device_attribute bdi_dev_attrs[] = {
227static __init int bdi_class_init(void) 227static __init int bdi_class_init(void)
228{ 228{
229 bdi_class = class_create(THIS_MODULE, "bdi"); 229 bdi_class = class_create(THIS_MODULE, "bdi");
230 if (IS_ERR(bdi_class))
231 return PTR_ERR(bdi_class);
232
230 bdi_class->dev_attrs = bdi_dev_attrs; 233 bdi_class->dev_attrs = bdi_dev_attrs;
231 bdi_debug_init(); 234 bdi_debug_init();
232 return 0; 235 return 0;