aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-30 11:46:42 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-30 11:46:42 -0500
commitae704e9f92f87b12c5938b07245792857c7c9c14 (patch)
treeb30f065b6bc815a0c0ce7ccb7d1c8a74b7e14b08
parentdbeb17016e4d0affccfa07f4e8f61feac75c5a18 (diff)
parent3a9a3f6cc55418dd1525e636dccbbe13c394f652 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: cfq-iosched: Allow RT requests to pre-empt ongoing BE timeslice block: add sysfs file for controlling io stats accounting Mark mandatory elevator functions in the biodoc.txt include/linux: Add bsg.h to the Kernel exported headers block: silently error an unsupported barrier bio block: Fix documentation for blkdev_issue_flush() block: add bio_rw_flagged() for testing bio->bi_rw block: seperate bio/request unplug and sync bits block: export SSD/non-rotational queue flag through sysfs Fix small typo in bio.h's documentation block: get rid of the manual directory counting in blktrace block: Allow empty integrity profile block: Remove obsolete BUG_ON block: Don't verify integrity metadata on read error
-rw-r--r--Documentation/block/biodoc.txt6
-rw-r--r--block/blk-barrier.c2
-rw-r--r--block/blk-core.c100
-rw-r--r--block/blk-integrity.c25
-rw-r--r--block/blk-sysfs.c58
-rw-r--r--block/blktrace.c72
-rw-r--r--block/cfq-iosched.c39
-rw-r--r--fs/bio-integrity.c26
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/bio.h45
-rw-r--r--include/linux/blkdev.h8
11 files changed, 249 insertions, 133 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 5d2480d33b43..ecad6ee75705 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -954,14 +954,14 @@ elevator_allow_merge_fn called whenever the block layer determines
954 results in some sort of conflict internally, 954 results in some sort of conflict internally,
955 this hook allows it to do that. 955 this hook allows it to do that.
956 956
957elevator_dispatch_fn fills the dispatch queue with ready requests. 957elevator_dispatch_fn* fills the dispatch queue with ready requests.
958 I/O schedulers are free to postpone requests by 958 I/O schedulers are free to postpone requests by
959 not filling the dispatch queue unless @force 959 not filling the dispatch queue unless @force
960 is non-zero. Once dispatched, I/O schedulers 960 is non-zero. Once dispatched, I/O schedulers
961 are not allowed to manipulate the requests - 961 are not allowed to manipulate the requests -
962 they belong to generic dispatch queue. 962 they belong to generic dispatch queue.
963 963
964elevator_add_req_fn called to add a new request into the scheduler 964elevator_add_req_fn* called to add a new request into the scheduler
965 965
966elevator_queue_empty_fn returns true if the merge queue is empty. 966elevator_queue_empty_fn returns true if the merge queue is empty.
967 Drivers shouldn't use this, but rather check 967 Drivers shouldn't use this, but rather check
@@ -991,7 +991,7 @@ elevator_activate_req_fn Called when device driver first sees a request.
991elevator_deactivate_req_fn Called when device driver decides to delay 991elevator_deactivate_req_fn Called when device driver decides to delay
992 a request by requeueing it. 992 a request by requeueing it.
993 993
994elevator_init_fn 994elevator_init_fn*
995elevator_exit_fn Allocate and free any elevator specific storage 995elevator_exit_fn Allocate and free any elevator specific storage
996 for a queue. 996 for a queue.
997 997
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 8eba4e43bb0c..f7dae57e6cab 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -302,7 +302,7 @@ static void bio_end_empty_barrier(struct bio *bio, int err)
302 * Description: 302 * Description:
303 * Issue a flush for the block device in question. Caller can supply 303 * Issue a flush for the block device in question. Caller can supply
304 * room for storing the error offset in case of a flush error, if they 304 * room for storing the error offset in case of a flush error, if they
305 * wish to. Caller must run wait_for_completion() on its own. 305 * wish to.
306 */ 306 */
307int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) 307int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
308{ 308{
diff --git a/block/blk-core.c b/block/blk-core.c
index a824e49c0d0a..ca69f3d94100 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -64,11 +64,12 @@ static struct workqueue_struct *kblockd_workqueue;
64 64
65static void drive_stat_acct(struct request *rq, int new_io) 65static void drive_stat_acct(struct request *rq, int new_io)
66{ 66{
67 struct gendisk *disk = rq->rq_disk;
67 struct hd_struct *part; 68 struct hd_struct *part;
68 int rw = rq_data_dir(rq); 69 int rw = rq_data_dir(rq);
69 int cpu; 70 int cpu;
70 71
71 if (!blk_fs_request(rq) || !rq->rq_disk) 72 if (!blk_fs_request(rq) || !disk || !blk_queue_io_stat(disk->queue))
72 return; 73 return;
73 74
74 cpu = part_stat_lock(); 75 cpu = part_stat_lock();
@@ -599,8 +600,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
599 q->request_fn = rfn; 600 q->request_fn = rfn;
600 q->prep_rq_fn = NULL; 601 q->prep_rq_fn = NULL;
601 q->unplug_fn = generic_unplug_device; 602 q->unplug_fn = generic_unplug_device;
602 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER | 603 q->queue_flags = QUEUE_FLAG_DEFAULT;
603 1 << QUEUE_FLAG_STACKABLE);
604 q->queue_lock = lock; 604 q->queue_lock = lock;
605 605
606 blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK); 606 blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
@@ -1125,6 +1125,8 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1125 1125
1126 if (bio_sync(bio)) 1126 if (bio_sync(bio))
1127 req->cmd_flags |= REQ_RW_SYNC; 1127 req->cmd_flags |= REQ_RW_SYNC;
1128 if (bio_unplug(bio))
1129 req->cmd_flags |= REQ_UNPLUG;
1128 if (bio_rw_meta(bio)) 1130 if (bio_rw_meta(bio))
1129 req->cmd_flags |= REQ_RW_META; 1131 req->cmd_flags |= REQ_RW_META;
1130 1132
@@ -1141,6 +1143,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1141 int el_ret, nr_sectors; 1143 int el_ret, nr_sectors;
1142 const unsigned short prio = bio_prio(bio); 1144 const unsigned short prio = bio_prio(bio);
1143 const int sync = bio_sync(bio); 1145 const int sync = bio_sync(bio);
1146 const int unplug = bio_unplug(bio);
1144 int rw_flags; 1147 int rw_flags;
1145 1148
1146 nr_sectors = bio_sectors(bio); 1149 nr_sectors = bio_sectors(bio);
@@ -1244,7 +1247,7 @@ get_rq:
1244 blk_plug_device(q); 1247 blk_plug_device(q);
1245 add_request(q, req); 1248 add_request(q, req);
1246out: 1249out:
1247 if (sync || blk_queue_nonrot(q)) 1250 if (unplug || blk_queue_nonrot(q))
1248 __generic_unplug_device(q); 1251 __generic_unplug_device(q);
1249 spin_unlock_irq(q->queue_lock); 1252 spin_unlock_irq(q->queue_lock);
1250 return 0; 1253 return 0;
@@ -1448,6 +1451,11 @@ static inline void __generic_make_request(struct bio *bio)
1448 err = -EOPNOTSUPP; 1451 err = -EOPNOTSUPP;
1449 goto end_io; 1452 goto end_io;
1450 } 1453 }
1454 if (bio_barrier(bio) && bio_has_data(bio) &&
1455 (q->next_ordered == QUEUE_ORDERED_NONE)) {
1456 err = -EOPNOTSUPP;
1457 goto end_io;
1458 }
1451 1459
1452 ret = q->make_request_fn(q, bio); 1460 ret = q->make_request_fn(q, bio);
1453 } while (ret); 1461 } while (ret);
@@ -1655,6 +1663,55 @@ void blkdev_dequeue_request(struct request *req)
1655} 1663}
1656EXPORT_SYMBOL(blkdev_dequeue_request); 1664EXPORT_SYMBOL(blkdev_dequeue_request);
1657 1665
1666static void blk_account_io_completion(struct request *req, unsigned int bytes)
1667{
1668 struct gendisk *disk = req->rq_disk;
1669
1670 if (!disk || !blk_queue_io_stat(disk->queue))
1671 return;
1672
1673 if (blk_fs_request(req)) {
1674 const int rw = rq_data_dir(req);
1675 struct hd_struct *part;
1676 int cpu;
1677
1678 cpu = part_stat_lock();
1679 part = disk_map_sector_rcu(req->rq_disk, req->sector);
1680 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1681 part_stat_unlock();
1682 }
1683}
1684
1685static void blk_account_io_done(struct request *req)
1686{
1687 struct gendisk *disk = req->rq_disk;
1688
1689 if (!disk || !blk_queue_io_stat(disk->queue))
1690 return;
1691
1692 /*
1693 * Account IO completion. bar_rq isn't accounted as a normal
1694 * IO on queueing nor completion. Accounting the containing
1695 * request is enough.
1696 */
1697 if (blk_fs_request(req) && req != &req->q->bar_rq) {
1698 unsigned long duration = jiffies - req->start_time;
1699 const int rw = rq_data_dir(req);
1700 struct hd_struct *part;
1701 int cpu;
1702
1703 cpu = part_stat_lock();
1704 part = disk_map_sector_rcu(disk, req->sector);
1705
1706 part_stat_inc(cpu, part, ios[rw]);
1707 part_stat_add(cpu, part, ticks[rw], duration);
1708 part_round_stats(cpu, part);
1709 part_dec_in_flight(part);
1710
1711 part_stat_unlock();
1712 }
1713}
1714
1658/** 1715/**
1659 * __end_that_request_first - end I/O on a request 1716 * __end_that_request_first - end I/O on a request
1660 * @req: the request being processed 1717 * @req: the request being processed
@@ -1690,16 +1747,7 @@ static int __end_that_request_first(struct request *req, int error,
1690 (unsigned long long)req->sector); 1747 (unsigned long long)req->sector);
1691 } 1748 }
1692 1749
1693 if (blk_fs_request(req) && req->rq_disk) { 1750 blk_account_io_completion(req, nr_bytes);
1694 const int rw = rq_data_dir(req);
1695 struct hd_struct *part;
1696 int cpu;
1697
1698 cpu = part_stat_lock();
1699 part = disk_map_sector_rcu(req->rq_disk, req->sector);
1700 part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);
1701 part_stat_unlock();
1702 }
1703 1751
1704 total_bytes = bio_nbytes = 0; 1752 total_bytes = bio_nbytes = 0;
1705 while ((bio = req->bio) != NULL) { 1753 while ((bio = req->bio) != NULL) {
@@ -1779,8 +1827,6 @@ static int __end_that_request_first(struct request *req, int error,
1779 */ 1827 */
1780static void end_that_request_last(struct request *req, int error) 1828static void end_that_request_last(struct request *req, int error)
1781{ 1829{
1782 struct gendisk *disk = req->rq_disk;
1783
1784 if (blk_rq_tagged(req)) 1830 if (blk_rq_tagged(req))
1785 blk_queue_end_tag(req->q, req); 1831 blk_queue_end_tag(req->q, req);
1786 1832
@@ -1792,27 +1838,7 @@ static void end_that_request_last(struct request *req, int error)
1792 1838
1793 blk_delete_timer(req); 1839 blk_delete_timer(req);
1794 1840
1795 /* 1841 blk_account_io_done(req);
1796 * Account IO completion. bar_rq isn't accounted as a normal
1797 * IO on queueing nor completion. Accounting the containing
1798 * request is enough.
1799 */
1800 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
1801 unsigned long duration = jiffies - req->start_time;
1802 const int rw = rq_data_dir(req);
1803 struct hd_struct *part;
1804 int cpu;
1805
1806 cpu = part_stat_lock();
1807 part = disk_map_sector_rcu(disk, req->sector);
1808
1809 part_stat_inc(cpu, part, ios[rw]);
1810 part_stat_add(cpu, part, ticks[rw], duration);
1811 part_round_stats(cpu, part);
1812 part_dec_in_flight(part);
1813
1814 part_stat_unlock();
1815 }
1816 1842
1817 if (req->end_io) 1843 if (req->end_io)
1818 req->end_io(req, error); 1844 req->end_io(req, error);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 61a8e2f8fdd0..91fa8e06b6a5 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -309,24 +309,24 @@ static struct kobj_type integrity_ktype = {
309/** 309/**
310 * blk_integrity_register - Register a gendisk as being integrity-capable 310 * blk_integrity_register - Register a gendisk as being integrity-capable
311 * @disk: struct gendisk pointer to make integrity-aware 311 * @disk: struct gendisk pointer to make integrity-aware
312 * @template: integrity profile 312 * @template: optional integrity profile to register
313 * 313 *
314 * Description: When a device needs to advertise itself as being able 314 * Description: When a device needs to advertise itself as being able
315 * to send/receive integrity metadata it must use this function to 315 * to send/receive integrity metadata it must use this function to
316 * register the capability with the block layer. The template is a 316 * register the capability with the block layer. The template is a
317 * blk_integrity struct with values appropriate for the underlying 317 * blk_integrity struct with values appropriate for the underlying
318 * hardware. See Documentation/block/data-integrity.txt. 318 * hardware. If template is NULL the new profile is allocated but
319 * not filled out. See Documentation/block/data-integrity.txt.
319 */ 320 */
320int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) 321int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
321{ 322{
322 struct blk_integrity *bi; 323 struct blk_integrity *bi;
323 324
324 BUG_ON(disk == NULL); 325 BUG_ON(disk == NULL);
325 BUG_ON(template == NULL);
326 326
327 if (disk->integrity == NULL) { 327 if (disk->integrity == NULL) {
328 bi = kmem_cache_alloc(integrity_cachep, 328 bi = kmem_cache_alloc(integrity_cachep,
329 GFP_KERNEL | __GFP_ZERO); 329 GFP_KERNEL | __GFP_ZERO);
330 if (!bi) 330 if (!bi)
331 return -1; 331 return -1;
332 332
@@ -346,13 +346,16 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
346 bi = disk->integrity; 346 bi = disk->integrity;
347 347
348 /* Use the provided profile as template */ 348 /* Use the provided profile as template */
349 bi->name = template->name; 349 if (template != NULL) {
350 bi->generate_fn = template->generate_fn; 350 bi->name = template->name;
351 bi->verify_fn = template->verify_fn; 351 bi->generate_fn = template->generate_fn;
352 bi->tuple_size = template->tuple_size; 352 bi->verify_fn = template->verify_fn;
353 bi->set_tag_fn = template->set_tag_fn; 353 bi->tuple_size = template->tuple_size;
354 bi->get_tag_fn = template->get_tag_fn; 354 bi->set_tag_fn = template->set_tag_fn;
355 bi->tag_size = template->tag_size; 355 bi->get_tag_fn = template->get_tag_fn;
356 bi->tag_size = template->tag_size;
357 } else
358 bi->name = "unsupported";
356 359
357 return 0; 360 return 0;
358} 361}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index a29cb788e408..e29ddfc73cf4 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -130,6 +130,27 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
130 return queue_var_show(max_hw_sectors_kb, (page)); 130 return queue_var_show(max_hw_sectors_kb, (page));
131} 131}
132 132
133static ssize_t queue_nonrot_show(struct request_queue *q, char *page)
134{
135 return queue_var_show(!blk_queue_nonrot(q), page);
136}
137
138static ssize_t queue_nonrot_store(struct request_queue *q, const char *page,
139 size_t count)
140{
141 unsigned long nm;
142 ssize_t ret = queue_var_store(&nm, page, count);
143
144 spin_lock_irq(q->queue_lock);
145 if (nm)
146 queue_flag_clear(QUEUE_FLAG_NONROT, q);
147 else
148 queue_flag_set(QUEUE_FLAG_NONROT, q);
149 spin_unlock_irq(q->queue_lock);
150
151 return ret;
152}
153
133static ssize_t queue_nomerges_show(struct request_queue *q, char *page) 154static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
134{ 155{
135 return queue_var_show(blk_queue_nomerges(q), page); 156 return queue_var_show(blk_queue_nomerges(q), page);
@@ -146,8 +167,8 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
146 queue_flag_set(QUEUE_FLAG_NOMERGES, q); 167 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
147 else 168 else
148 queue_flag_clear(QUEUE_FLAG_NOMERGES, q); 169 queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
149
150 spin_unlock_irq(q->queue_lock); 170 spin_unlock_irq(q->queue_lock);
171
151 return ret; 172 return ret;
152} 173}
153 174
@@ -176,6 +197,27 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
176 return ret; 197 return ret;
177} 198}
178 199
200static ssize_t queue_iostats_show(struct request_queue *q, char *page)
201{
202 return queue_var_show(blk_queue_io_stat(q), page);
203}
204
205static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
206 size_t count)
207{
208 unsigned long stats;
209 ssize_t ret = queue_var_store(&stats, page, count);
210
211 spin_lock_irq(q->queue_lock);
212 if (stats)
213 queue_flag_set(QUEUE_FLAG_IO_STAT, q);
214 else
215 queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
216 spin_unlock_irq(q->queue_lock);
217
218 return ret;
219}
220
179static struct queue_sysfs_entry queue_requests_entry = { 221static struct queue_sysfs_entry queue_requests_entry = {
180 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, 222 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
181 .show = queue_requests_show, 223 .show = queue_requests_show,
@@ -210,6 +252,12 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = {
210 .show = queue_hw_sector_size_show, 252 .show = queue_hw_sector_size_show,
211}; 253};
212 254
255static struct queue_sysfs_entry queue_nonrot_entry = {
256 .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
257 .show = queue_nonrot_show,
258 .store = queue_nonrot_store,
259};
260
213static struct queue_sysfs_entry queue_nomerges_entry = { 261static struct queue_sysfs_entry queue_nomerges_entry = {
214 .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR }, 262 .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
215 .show = queue_nomerges_show, 263 .show = queue_nomerges_show,
@@ -222,6 +270,12 @@ static struct queue_sysfs_entry queue_rq_affinity_entry = {
222 .store = queue_rq_affinity_store, 270 .store = queue_rq_affinity_store,
223}; 271};
224 272
273static struct queue_sysfs_entry queue_iostats_entry = {
274 .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
275 .show = queue_iostats_show,
276 .store = queue_iostats_store,
277};
278
225static struct attribute *default_attrs[] = { 279static struct attribute *default_attrs[] = {
226 &queue_requests_entry.attr, 280 &queue_requests_entry.attr,
227 &queue_ra_entry.attr, 281 &queue_ra_entry.attr,
@@ -229,8 +283,10 @@ static struct attribute *default_attrs[] = {
229 &queue_max_sectors_entry.attr, 283 &queue_max_sectors_entry.attr,
230 &queue_iosched_entry.attr, 284 &queue_iosched_entry.attr,
231 &queue_hw_sector_size_entry.attr, 285 &queue_hw_sector_size_entry.attr,
286 &queue_nonrot_entry.attr,
232 &queue_nomerges_entry.attr, 287 &queue_nomerges_entry.attr,
233 &queue_rq_affinity_entry.attr, 288 &queue_rq_affinity_entry.attr,
289 &queue_iostats_entry.attr,
234 NULL, 290 NULL,
235}; 291};
236 292
diff --git a/block/blktrace.c b/block/blktrace.c
index b0a2cae886db..39cc3bfe56e4 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -187,59 +187,12 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
187 187
188static struct dentry *blk_tree_root; 188static struct dentry *blk_tree_root;
189static DEFINE_MUTEX(blk_tree_mutex); 189static DEFINE_MUTEX(blk_tree_mutex);
190static unsigned int root_users;
191
192static inline void blk_remove_root(void)
193{
194 if (blk_tree_root) {
195 debugfs_remove(blk_tree_root);
196 blk_tree_root = NULL;
197 }
198}
199
200static void blk_remove_tree(struct dentry *dir)
201{
202 mutex_lock(&blk_tree_mutex);
203 debugfs_remove(dir);
204 if (--root_users == 0)
205 blk_remove_root();
206 mutex_unlock(&blk_tree_mutex);
207}
208
209static struct dentry *blk_create_tree(const char *blk_name)
210{
211 struct dentry *dir = NULL;
212 int created = 0;
213
214 mutex_lock(&blk_tree_mutex);
215
216 if (!blk_tree_root) {
217 blk_tree_root = debugfs_create_dir("block", NULL);
218 if (!blk_tree_root)
219 goto err;
220 created = 1;
221 }
222
223 dir = debugfs_create_dir(blk_name, blk_tree_root);
224 if (dir)
225 root_users++;
226 else {
227 /* Delete root only if we created it */
228 if (created)
229 blk_remove_root();
230 }
231
232err:
233 mutex_unlock(&blk_tree_mutex);
234 return dir;
235}
236 190
237static void blk_trace_cleanup(struct blk_trace *bt) 191static void blk_trace_cleanup(struct blk_trace *bt)
238{ 192{
239 relay_close(bt->rchan);
240 debugfs_remove(bt->msg_file); 193 debugfs_remove(bt->msg_file);
241 debugfs_remove(bt->dropped_file); 194 debugfs_remove(bt->dropped_file);
242 blk_remove_tree(bt->dir); 195 relay_close(bt->rchan);
243 free_percpu(bt->sequence); 196 free_percpu(bt->sequence);
244 free_percpu(bt->msg_data); 197 free_percpu(bt->msg_data);
245 kfree(bt); 198 kfree(bt);
@@ -346,7 +299,18 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
346 299
347static int blk_remove_buf_file_callback(struct dentry *dentry) 300static int blk_remove_buf_file_callback(struct dentry *dentry)
348{ 301{
302 struct dentry *parent = dentry->d_parent;
349 debugfs_remove(dentry); 303 debugfs_remove(dentry);
304
305 /*
306 * this will fail for all but the last file, but that is ok. what we
307 * care about is the top level buts->name directory going away, when
308 * the last trace file is gone. Then we don't have to rmdir() that
309 * manually on trace stop, so it nicely solves the issue with
310 * force killing of running traces.
311 */
312
313 debugfs_remove(parent);
350 return 0; 314 return 0;
351} 315}
352 316
@@ -404,7 +368,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
404 goto err; 368 goto err;
405 369
406 ret = -ENOENT; 370 ret = -ENOENT;
407 dir = blk_create_tree(buts->name); 371
372 if (!blk_tree_root) {
373 blk_tree_root = debugfs_create_dir("block", NULL);
374 if (!blk_tree_root)
375 return -ENOMEM;
376 }
377
378 dir = debugfs_create_dir(buts->name, blk_tree_root);
379
408 if (!dir) 380 if (!dir)
409 goto err; 381 goto err;
410 382
@@ -458,8 +430,6 @@ probe_err:
458 atomic_dec(&blk_probes_ref); 430 atomic_dec(&blk_probes_ref);
459 mutex_unlock(&blk_probe_mutex); 431 mutex_unlock(&blk_probe_mutex);
460err: 432err:
461 if (dir)
462 blk_remove_tree(dir);
463 if (bt) { 433 if (bt) {
464 if (bt->msg_file) 434 if (bt->msg_file)
465 debugfs_remove(bt->msg_file); 435 debugfs_remove(bt->msg_file);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e8525fa72823..664ebfd092ec 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -84,6 +84,11 @@ struct cfq_data {
84 */ 84 */
85 struct cfq_rb_root service_tree; 85 struct cfq_rb_root service_tree;
86 unsigned int busy_queues; 86 unsigned int busy_queues;
87 /*
88 * Used to track any pending rt requests so we can pre-empt current
89 * non-RT cfqq in service when this value is non-zero.
90 */
91 unsigned int busy_rt_queues;
87 92
88 int rq_in_driver; 93 int rq_in_driver;
89 int sync_flight; 94 int sync_flight;
@@ -562,6 +567,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
562 BUG_ON(cfq_cfqq_on_rr(cfqq)); 567 BUG_ON(cfq_cfqq_on_rr(cfqq));
563 cfq_mark_cfqq_on_rr(cfqq); 568 cfq_mark_cfqq_on_rr(cfqq);
564 cfqd->busy_queues++; 569 cfqd->busy_queues++;
570 if (cfq_class_rt(cfqq))
571 cfqd->busy_rt_queues++;
565 572
566 cfq_resort_rr_list(cfqd, cfqq); 573 cfq_resort_rr_list(cfqd, cfqq);
567} 574}
@@ -581,6 +588,8 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
581 588
582 BUG_ON(!cfqd->busy_queues); 589 BUG_ON(!cfqd->busy_queues);
583 cfqd->busy_queues--; 590 cfqd->busy_queues--;
591 if (cfq_class_rt(cfqq))
592 cfqd->busy_rt_queues--;
584} 593}
585 594
586/* 595/*
@@ -1005,6 +1014,20 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1005 goto expire; 1014 goto expire;
1006 1015
1007 /* 1016 /*
1017 * If we have a RT cfqq waiting, then we pre-empt the current non-rt
1018 * cfqq.
1019 */
1020 if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) {
1021 /*
1022 * We simulate this as cfqq timed out so that it gets to bank
1023 * the remaining of its time slice.
1024 */
1025 cfq_log_cfqq(cfqd, cfqq, "preempt");
1026 cfq_slice_expired(cfqd, 1);
1027 goto new_queue;
1028 }
1029
1030 /*
1008 * The active queue has requests and isn't expired, allow it to 1031 * The active queue has requests and isn't expired, allow it to
1009 * dispatch. 1032 * dispatch.
1010 */ 1033 */
@@ -1067,6 +1090,13 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1067 if (RB_EMPTY_ROOT(&cfqq->sort_list)) 1090 if (RB_EMPTY_ROOT(&cfqq->sort_list))
1068 break; 1091 break;
1069 1092
1093 /*
1094 * If there is a non-empty RT cfqq waiting for current
1095 * cfqq's timeslice to complete, pre-empt this cfqq
1096 */
1097 if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues)
1098 break;
1099
1070 } while (dispatched < max_dispatch); 1100 } while (dispatched < max_dispatch);
1071 1101
1072 /* 1102 /*
@@ -1801,6 +1831,12 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1801 if (rq_is_meta(rq) && !cfqq->meta_pending) 1831 if (rq_is_meta(rq) && !cfqq->meta_pending)
1802 return 1; 1832 return 1;
1803 1833
1834 /*
1835 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
1836 */
1837 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
1838 return 1;
1839
1804 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) 1840 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
1805 return 0; 1841 return 0;
1806 1842
@@ -1870,7 +1906,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1870 /* 1906 /*
1871 * not the active queue - expire current slice if it is 1907 * not the active queue - expire current slice if it is
1872 * idle and has expired it's mean thinktime or this new queue 1908 * idle and has expired it's mean thinktime or this new queue
1873 * has some old slice time left and is of higher priority 1909 * has some old slice time left and is of higher priority or
1910 * this new queue is RT and the current one is BE
1874 */ 1911 */
1875 cfq_preempt_queue(cfqd, cfqq); 1912 cfq_preempt_queue(cfqd, cfqq);
1876 cfq_mark_cfqq_must_dispatch(cfqq); 1913 cfq_mark_cfqq_must_dispatch(cfqq);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 77ebc3c263d6..549b0144da11 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -140,7 +140,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
140 140
141 iv = bip_vec_idx(bip, bip->bip_vcnt); 141 iv = bip_vec_idx(bip, bip->bip_vcnt);
142 BUG_ON(iv == NULL); 142 BUG_ON(iv == NULL);
143 BUG_ON(iv->bv_page != NULL);
144 143
145 iv->bv_page = page; 144 iv->bv_page = page;
146 iv->bv_len = len; 145 iv->bv_len = len;
@@ -465,7 +464,7 @@ static int bio_integrity_verify(struct bio *bio)
465 464
466 if (ret) { 465 if (ret) {
467 kunmap_atomic(kaddr, KM_USER0); 466 kunmap_atomic(kaddr, KM_USER0);
468 break; 467 return ret;
469 } 468 }
470 469
471 sectors = bv->bv_len / bi->sector_size; 470 sectors = bv->bv_len / bi->sector_size;
@@ -493,18 +492,13 @@ static void bio_integrity_verify_fn(struct work_struct *work)
493 struct bio_integrity_payload *bip = 492 struct bio_integrity_payload *bip =
494 container_of(work, struct bio_integrity_payload, bip_work); 493 container_of(work, struct bio_integrity_payload, bip_work);
495 struct bio *bio = bip->bip_bio; 494 struct bio *bio = bip->bip_bio;
496 int error = bip->bip_error; 495 int error;
497 496
498 if (bio_integrity_verify(bio)) { 497 error = bio_integrity_verify(bio);
499 clear_bit(BIO_UPTODATE, &bio->bi_flags);
500 error = -EIO;
501 }
502 498
503 /* Restore original bio completion handler */ 499 /* Restore original bio completion handler */
504 bio->bi_end_io = bip->bip_end_io; 500 bio->bi_end_io = bip->bip_end_io;
505 501 bio_endio(bio, error);
506 if (bio->bi_end_io)
507 bio->bi_end_io(bio, error);
508} 502}
509 503
510/** 504/**
@@ -525,7 +519,17 @@ void bio_integrity_endio(struct bio *bio, int error)
525 519
526 BUG_ON(bip->bip_bio != bio); 520 BUG_ON(bip->bip_bio != bio);
527 521
528 bip->bip_error = error; 522 /* In case of an I/O error there is no point in verifying the
523 * integrity metadata. Restore original bio end_io handler
524 * and run it.
525 */
526 if (error) {
527 bio->bi_end_io = bip->bip_end_io;
528 bio_endio(bio, error);
529
530 return;
531 }
532
529 INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); 533 INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
530 queue_work(kintegrityd_wq, &bip->bip_work); 534 queue_work(kintegrityd_wq, &bip->bip_work);
531} 535}
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 12e9a2957caf..2124c063a7ef 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -41,6 +41,7 @@ header-y += baycom.h
41header-y += bfs_fs.h 41header-y += bfs_fs.h
42header-y += blkpg.h 42header-y += blkpg.h
43header-y += bpqether.h 43header-y += bpqether.h
44header-y += bsg.h
44header-y += can.h 45header-y += can.h
45header-y += cdk.h 46header-y += cdk.h
46header-y += chio.h 47header-y += chio.h
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 18462c5b8fff..0942765cf8c0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -144,7 +144,7 @@ struct bio {
144 * bit 1 -- rw-ahead when set 144 * bit 1 -- rw-ahead when set
145 * bit 2 -- barrier 145 * bit 2 -- barrier
146 * Insert a serialization point in the IO queue, forcing previously 146 * Insert a serialization point in the IO queue, forcing previously
147 * submitted IO to be completed before this oen is issued. 147 * submitted IO to be completed before this one is issued.
148 * bit 3 -- synchronous I/O hint: the block layer will unplug immediately 148 * bit 3 -- synchronous I/O hint: the block layer will unplug immediately
149 * Note that this does NOT indicate that the IO itself is sync, just 149 * Note that this does NOT indicate that the IO itself is sync, just
150 * that the block layer will not postpone issue of this IO by plugging. 150 * that the block layer will not postpone issue of this IO by plugging.
@@ -163,12 +163,33 @@ struct bio {
163#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ 163#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */
164#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ 164#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */
165#define BIO_RW_BARRIER 2 165#define BIO_RW_BARRIER 2
166#define BIO_RW_SYNC 3 166#define BIO_RW_SYNCIO 3
167#define BIO_RW_META 4 167#define BIO_RW_UNPLUG 4
168#define BIO_RW_DISCARD 5 168#define BIO_RW_META 5
169#define BIO_RW_FAILFAST_DEV 6 169#define BIO_RW_DISCARD 6
170#define BIO_RW_FAILFAST_TRANSPORT 7 170#define BIO_RW_FAILFAST_DEV 7
171#define BIO_RW_FAILFAST_DRIVER 8 171#define BIO_RW_FAILFAST_TRANSPORT 8
172#define BIO_RW_FAILFAST_DRIVER 9
173
174#define BIO_RW_SYNC (BIO_RW_SYNCIO | BIO_RW_UNPLUG)
175
176#define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag)))
177
178/*
179 * Old defines, these should eventually be replaced by direct usage of
180 * bio_rw_flagged()
181 */
182#define bio_barrier(bio) bio_rw_flagged(bio, BIO_RW_BARRIER)
183#define bio_sync(bio) bio_rw_flagged(bio, BIO_RW_SYNCIO)
184#define bio_unplug(bio) bio_rw_flagged(bio, BIO_RW_UNPLUG)
185#define bio_failfast_dev(bio) bio_rw_flagged(bio, BIO_RW_FAILFAST_DEV)
186#define bio_failfast_transport(bio) \
187 bio_rw_flagged(bio, BIO_RW_FAILFAST_TRANSPORT)
188#define bio_failfast_driver(bio) \
189 bio_rw_flagged(bio, BIO_RW_FAILFAST_DRIVER)
190#define bio_rw_ahead(bio) bio_rw_flagged(bio, BIO_RW_AHEAD)
191#define bio_rw_meta(bio) bio_rw_flagged(bio, BIO_RW_META)
192#define bio_discard(bio) bio_rw_flagged(bio, BIO_RW_DISCARD)
172 193
173/* 194/*
174 * upper 16 bits of bi_rw define the io priority of this bio 195 * upper 16 bits of bi_rw define the io priority of this bio
@@ -193,15 +214,6 @@ struct bio {
193#define bio_offset(bio) bio_iovec((bio))->bv_offset 214#define bio_offset(bio) bio_iovec((bio))->bv_offset
194#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) 215#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
195#define bio_sectors(bio) ((bio)->bi_size >> 9) 216#define bio_sectors(bio) ((bio)->bi_size >> 9)
196#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
197#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))
198#define bio_failfast_dev(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DEV))
199#define bio_failfast_transport(bio) \
200 ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_TRANSPORT))
201#define bio_failfast_driver(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DRIVER))
202#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
203#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META))
204#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD))
205#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) 217#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio))
206 218
207static inline unsigned int bio_cur_sectors(struct bio *bio) 219static inline unsigned int bio_cur_sectors(struct bio *bio)
@@ -312,7 +324,6 @@ struct bio_integrity_payload {
312 void *bip_buf; /* generated integrity data */ 324 void *bip_buf; /* generated integrity data */
313 bio_end_io_t *bip_end_io; /* saved I/O completion fn */ 325 bio_end_io_t *bip_end_io; /* saved I/O completion fn */
314 326
315 int bip_error; /* saved I/O error */
316 unsigned int bip_size; 327 unsigned int bip_size;
317 328
318 unsigned short bip_pool; /* pool the ivec came from */ 329 unsigned short bip_pool; /* pool the ivec came from */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 044467ef7b11..d08c4b8219a6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -108,6 +108,7 @@ enum rq_flag_bits {
108 __REQ_RW_META, /* metadata io request */ 108 __REQ_RW_META, /* metadata io request */
109 __REQ_COPY_USER, /* contains copies of user pages */ 109 __REQ_COPY_USER, /* contains copies of user pages */
110 __REQ_INTEGRITY, /* integrity metadata has been remapped */ 110 __REQ_INTEGRITY, /* integrity metadata has been remapped */
111 __REQ_UNPLUG, /* unplug queue on submission */
111 __REQ_NR_BITS, /* stops here */ 112 __REQ_NR_BITS, /* stops here */
112}; 113};
113 114
@@ -134,6 +135,7 @@ enum rq_flag_bits {
134#define REQ_RW_META (1 << __REQ_RW_META) 135#define REQ_RW_META (1 << __REQ_RW_META)
135#define REQ_COPY_USER (1 << __REQ_COPY_USER) 136#define REQ_COPY_USER (1 << __REQ_COPY_USER)
136#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) 137#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
138#define REQ_UNPLUG (1 << __REQ_UNPLUG)
137 139
138#define BLK_MAX_CDB 16 140#define BLK_MAX_CDB 16
139 141
@@ -449,6 +451,11 @@ struct request_queue
449#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ 451#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */
450#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ 452#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */
451#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ 453#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
454#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */
455
456#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
457 (1 << QUEUE_FLAG_CLUSTER) | \
458 1 << QUEUE_FLAG_STACKABLE)
452 459
453static inline int queue_is_locked(struct request_queue *q) 460static inline int queue_is_locked(struct request_queue *q)
454{ 461{
@@ -565,6 +572,7 @@ enum {
565#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 572#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
566#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 573#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
567#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) 574#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
575#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
568#define blk_queue_flushing(q) ((q)->ordseq) 576#define blk_queue_flushing(q) ((q)->ordseq)
569#define blk_queue_stackable(q) \ 577#define blk_queue_stackable(q) \
570 test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) 578 test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)