From e93b9fb7d85da4fd9d5171649e5ddcac1dd572bf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 12:38:33 +0900 Subject: hd: fix locking hd dance around local irq and HD_IRQ enable without achieving much. It ends up transferring data from irq handler with both local irq and HD_IRQ disabled. The only place it actually does something is while transferring the first block of a request which it does with HD_IRQ disabled but local irq enabled. Unfortunately, the dancing is horribly broken from locking POV. IRQ and timeout handlers access block queue without grabbing the queue lock and running the driver in SMP configuration crashes the whole machine pretty quickly. Remove meaningless irq enable/disable dancing and add proper locking in issue, irq and timeout paths. Signed-off-by: Tejun Heo --- drivers/block/hd.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'drivers/block/hd.c') diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 3c11f062a18c..baaa9e486e50 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -509,7 +509,6 @@ ok_to_write: if (i > 0) { SET_HANDLER(&write_intr); outsw(HD_DATA, req->buffer, 256); - local_irq_enable(); } else { #if (HD_DELAY > 0) last_req = read_timer(); @@ -541,8 +540,7 @@ static void hd_times_out(unsigned long dummy) if (!CURRENT) return; - disable_irq(HD_IRQ); - local_irq_enable(); + spin_lock_irq(hd_queue->queue_lock); reset = 1; name = CURRENT->rq_disk->disk_name; printk("%s: timeout\n", name); @@ -552,9 +550,8 @@ static void hd_times_out(unsigned long dummy) #endif end_request(CURRENT, 0); } - local_irq_disable(); hd_request(); - enable_irq(HD_IRQ); + spin_unlock_irq(hd_queue->queue_lock); } static int do_special_op(struct hd_i_struct *disk, struct request *req) @@ -592,7 +589,6 @@ static void hd_request(void) return; repeat: del_timer(&device_timer); - local_irq_enable(); req = CURRENT; if (!req) { @@ -601,7 +597,6 @@ repeat: } if (reset) { - local_irq_disable(); reset_hd(); return; } @@ -660,9 +655,7 @@ repeat: static void do_hd_request(struct request_queue *q) { - disable_irq(HD_IRQ); hd_request(); - enable_irq(HD_IRQ); } static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo) @@ -684,12 +677,16 @@ static irqreturn_t hd_interrupt(int irq, void *dev_id) { void (*handler)(void) = do_hd; + spin_lock(hd_queue->queue_lock); + do_hd = NULL; del_timer(&device_timer); if (!handler) handler = unexpected_hd_interrupt; handler(); - local_irq_enable(); + + spin_unlock(hd_queue->queue_lock); + return IRQ_HANDLED; } -- cgit v1.2.2 From f06d9a2b52e246a66b606130cea3f0d7b7be17a7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:19 +0900 Subject: block: replace end_request() with [__]blk_end_request_cur() end_request() has been kept around for backward compatibility; however, it's about time for it to go away. * There aren't too many users left. * Its use of @updtodate is pretty confusing. * In some cases, newer code ends up using mixture of end_request() and [__]blk_end_request[_all](), which is way too confusing. So, add [__]blk_end_request_cur() and replace end_request() with it. Most conversions are straightforward. Noteworthy ones are... * paride/pcd: next_request() updated to take 0/-errno instead of 1/0. * paride/pf: pf_end_request() and next_request() updated to take 0/-errno instead of 1/0. * xd: xd_readwrite() updated to return 0/-errno instead of 1/0. * mtd/mtd_blkdevs: blktrans_discard_request() updated to return 0/-errno instead of 1/0. Unnecessary local variable res initialization removed from mtd_blktrans_thread(). [ Impact: cleanup ] Signed-off-by: Tejun Heo Acked-by: Joerg Dorchain Acked-by: Geert Uytterhoeven Acked-by: Grant Likely Acked-by: Laurent Vivier Cc: Tim Waugh Cc: Stephen Rothwell Cc: Paul Mackerras Cc: Jeremy Fitzhardinge Cc: Markus Lidel Cc: David Woodhouse Cc: Pete Zaitcev Cc: unsik Kim --- drivers/block/hd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/block/hd.c') diff --git a/drivers/block/hd.c b/drivers/block/hd.c index baaa9e486e50..5cb300b81c6a 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -410,7 +410,7 @@ static void bad_rw_intr(void) if (req != NULL) { struct hd_i_struct *disk = req->rq_disk->private_data; if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); disk->special_op = disk->recalibrate = 1; } else if (req->errors % RESET_FREQ == 0) reset = 1; @@ -466,7 +466,7 @@ ok_to_read: req->buffer+512); #endif if (req->current_nr_sectors <= 0) - end_request(req, 1); + __blk_end_request_cur(req, 0); if (i > 0) { SET_HANDLER(&read_intr); return; @@ -505,7 +505,7 @@ ok_to_write: --req->current_nr_sectors; req->buffer += 512; if (!i || (req->bio && req->current_nr_sectors <= 0)) - end_request(req, 1); + __blk_end_request_cur(req, 0); if (i > 0) { SET_HANDLER(&write_intr); outsw(HD_DATA, req->buffer, 256); @@ -548,7 +548,7 @@ static void hd_times_out(unsigned long dummy) #ifdef DEBUG printk("%s: too many errors\n", name); #endif - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); } hd_request(); spin_unlock_irq(hd_queue->queue_lock); @@ -563,7 +563,7 @@ static int do_special_op(struct hd_i_struct *disk, struct request *req) } if (disk->head > 16) { printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } disk->special_op = 0; return 1; @@ -607,7 +607,7 @@ repeat: ((block+nsect) > get_capacity(req->rq_disk))) { printk("%s: bad access: block=%d, count=%d\n", req->rq_disk->disk_name, block, nsect); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); goto repeat; } @@ -647,7 +647,7 @@ repeat: break; default: printk("unknown hd-command\n"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); break; } } -- cgit v1.2.2 From e091eb67af957bac4e4f7410c5d1aa263ee483a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:11 +0900 Subject: hd: clean up request completion paths hd read/write_intr() functions manually manipulate request to incrementally complete it, which block layer already supports. Simply use block layer completion routines instead of manual partial completion. While at it, clear unnecessary elv_next_request() check at the tail of read_intr(). This also makes read and write_intr() more consistent. [ Impact: cleanup ] Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/hd.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) (limited to 'drivers/block/hd.c') diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 5cb300b81c6a..75b9ca95c4eb 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -452,32 +452,25 @@ static void read_intr(void) bad_rw_intr(); hd_request(); return; + ok_to_read: req = CURRENT; insw(HD_DATA, req->buffer, 256); - req->sector++; - req->buffer += 512; - req->errors = 0; - i = --req->nr_sectors; - --req->current_nr_sectors; #ifdef DEBUG printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n", - req->rq_disk->disk_name, req->sector, req->nr_sectors, + req->rq_disk->disk_name, req->sector + 1, req->nr_sectors - 1, req->buffer+512); #endif - if (req->current_nr_sectors <= 0) - __blk_end_request_cur(req, 0); - if (i > 0) { + if (__blk_end_request(req, 0, 512)) { SET_HANDLER(&read_intr); return; } + (void) inb_p(HD_STATUS); #if (HD_DELAY > 0) last_req = read_timer(); #endif - if (elv_next_request(QUEUE)) - hd_request(); - return; + hd_request(); } static void write_intr(void) @@ -499,23 +492,18 @@ static void write_intr(void) bad_rw_intr(); hd_request(); return; + ok_to_write: - req->sector++; - i = --req->nr_sectors; - --req->current_nr_sectors; - req->buffer += 512; - if (!i || (req->bio && req->current_nr_sectors <= 0)) - __blk_end_request_cur(req, 0); - if (i > 0) { + if (__blk_end_request(req, 0, 512)) { SET_HANDLER(&write_intr); outsw(HD_DATA, req->buffer, 256); - } else { + return; + } + #if (HD_DELAY > 0) - last_req = read_timer(); + last_req = read_timer(); #endif - hd_request(); - } - return; + hd_request(); } static void recal_intr(void) -- cgit v1.2.2 From 83096ebf1263b2c1ee5e653ba37d993d02e3eb7b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:39 +0900 Subject: block: convert to pos and nr_sectors accessors With recent cleanups, there is no place where low level driver directly manipulates request fields. This means that the 'hard' request fields always equal the !hard fields. Convert all rq->sectors, nr_sectors and current_nr_sectors references to accessors. While at it, drop superflous blk_rq_pos() < 0 test in swim.c. [ Impact: use pos and nr_sectors accessors ] Signed-off-by: Tejun Heo Acked-by: Geert Uytterhoeven Tested-by: Grant Likely Acked-by: Grant Likely Tested-by: Adrian McMenamin Acked-by: Adrian McMenamin Acked-by: Mike Miller Cc: James Bottomley Cc: Bartlomiej Zolnierkiewicz Cc: Borislav Petkov Cc: Sergei Shtylyov Cc: Eric Moore Cc: Alan Stern Cc: FUJITA Tomonori Cc: Pete Zaitcev Cc: Stephen Rothwell Cc: Paul Clements Cc: Tim Waugh Cc: Jeff Garzik Cc: Jeremy Fitzhardinge Cc: Alex Dubov Cc: David Woodhouse Cc: Martin Schwidefsky Cc: Dario Ballabio Cc: David S. Miller Cc: Rusty Russell Cc: unsik Kim Cc: Laurent Vivier Signed-off-by: Jens Axboe --- drivers/block/hd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/block/hd.c') diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 75b9ca95c4eb..a3b39940ce02 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -228,7 +228,7 @@ static void dump_status(const char *msg, unsigned int stat) printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL), inb(HD_CURRENT) & 0xf, inb(HD_SECTOR)); if (CURRENT) - printk(", sector=%ld", CURRENT->sector); + printk(", sector=%ld", blk_rq_pos(CURRENT)); } printk("\n"); } @@ -457,9 +457,9 @@ ok_to_read: req = CURRENT; insw(HD_DATA, req->buffer, 256); #ifdef DEBUG - printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n", - req->rq_disk->disk_name, req->sector + 1, req->nr_sectors - 1, - req->buffer+512); + printk("%s: read: sector %ld, remaining = %u, buffer=%p\n", + req->rq_disk->disk_name, blk_rq_pos(req) + 1, + blk_rq_sectors(req) - 1, req->buffer+512); #endif if (__blk_end_request(req, 0, 512)) { SET_HANDLER(&read_intr); @@ -485,7 +485,7 @@ static void write_intr(void) continue; if (!OK_STATUS(i)) break; - if ((req->nr_sectors <= 1) || (i & DRQ_STAT)) + if ((blk_rq_sectors(req) <= 1) || (i & DRQ_STAT)) goto ok_to_write; } while (--retries > 0); dump_status("write_intr", i); @@ -589,8 +589,8 @@ repeat: return; } disk = req->rq_disk->private_data; - block = req->sector; - nsect = req->nr_sectors; + block = blk_rq_pos(req); + nsect = blk_rq_sectors(req); if (block >= get_capacity(req->rq_disk) || ((block+nsect) > get_capacity(req->rq_disk))) { printk("%s: bad access: block=%d, count=%d\n", -- cgit v1.2.2 From 8a12c4a456c7ee261a85c2cbec835601e6aae05a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 8 May 2009 11:54:02 +0900 Subject: hd: dequeue and track in-flight request hd has at most single request in flight. Till now, whenever it needs to access the in-flight request it called elv_next_request(). This patch makes hd track the in-flight request directly and dequeue it when processing starts. The added complexity is minimal and this will help future block layer changes. [ Impact: dequeue in-flight request, one elv_next_request() per request ] Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/hd.c | 63 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 23 deletions(-) (limited to 'drivers/block/hd.c') diff --git a/drivers/block/hd.c b/drivers/block/hd.c index a3b39940ce02..288ab63c1029 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -98,10 +98,9 @@ static DEFINE_SPINLOCK(hd_lock); static struct request_queue *hd_queue; +static struct request *hd_req; #define MAJOR_NR HD_MAJOR -#define QUEUE (hd_queue) -#define CURRENT elv_next_request(hd_queue) #define TIMEOUT_VALUE (6*HZ) #define HD_DELAY 0 @@ -195,11 +194,24 @@ static void __init hd_setup(char *str, int *ints) NR_HD = hdind+1; } +static bool hd_end_request(int err, unsigned int bytes) +{ + if (__blk_end_request(hd_req, err, bytes)) + return true; + hd_req = NULL; + return false; +} + +static bool hd_end_request_cur(int err) +{ + return hd_end_request(err, blk_rq_cur_bytes(hd_req)); +} + static void dump_status(const char *msg, unsigned int stat) { char *name = "hd?"; - if (CURRENT) - name = CURRENT->rq_disk->disk_name; + if (hd_req) + name = hd_req->rq_disk->disk_name; #ifdef VERBOSE_ERRORS printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff); @@ -227,8 +239,8 @@ static void dump_status(const char *msg, unsigned int stat) if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) { printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL), inb(HD_CURRENT) & 0xf, inb(HD_SECTOR)); - if (CURRENT) - printk(", sector=%ld", blk_rq_pos(CURRENT)); + if (hd_req) + printk(", sector=%ld", blk_rq_pos(hd_req)); } printk("\n"); } @@ -406,11 +418,12 @@ static void unexpected_hd_interrupt(void) */ static void bad_rw_intr(void) { - struct request *req = CURRENT; + struct request *req = hd_req; + if (req != NULL) { struct hd_i_struct *disk = req->rq_disk->private_data; if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) { - __blk_end_request_cur(req, -EIO); + hd_end_request_cur(-EIO); disk->special_op = disk->recalibrate = 1; } else if (req->errors % RESET_FREQ == 0) reset = 1; @@ -454,14 +467,14 @@ static void read_intr(void) return; ok_to_read: - req = CURRENT; + req = hd_req; insw(HD_DATA, req->buffer, 256); #ifdef DEBUG printk("%s: read: sector %ld, remaining = %u, buffer=%p\n", req->rq_disk->disk_name, blk_rq_pos(req) + 1, blk_rq_sectors(req) - 1, req->buffer+512); #endif - if (__blk_end_request(req, 0, 512)) { + if (hd_end_request(0, 512)) { SET_HANDLER(&read_intr); return; } @@ -475,7 +488,7 @@ ok_to_read: static void write_intr(void) { - struct request *req = CURRENT; + struct request *req = hd_req; int i; int retries = 100000; @@ -494,7 +507,7 @@ static void write_intr(void) return; ok_to_write: - if (__blk_end_request(req, 0, 512)) { + if (hd_end_request(0, 512)) { SET_HANDLER(&write_intr); outsw(HD_DATA, req->buffer, 256); return; @@ -525,18 +538,18 @@ static void hd_times_out(unsigned long dummy) do_hd = NULL; - if (!CURRENT) + if (!hd_req) return; spin_lock_irq(hd_queue->queue_lock); reset = 1; - name = CURRENT->rq_disk->disk_name; + name = hd_req->rq_disk->disk_name; printk("%s: timeout\n", name); - if (++CURRENT->errors >= MAX_ERRORS) { + if (++hd_req->errors >= MAX_ERRORS) { #ifdef DEBUG printk("%s: too many errors\n", name); #endif - __blk_end_request_cur(CURRENT, -EIO); + hd_end_request_cur(-EIO); } hd_request(); spin_unlock_irq(hd_queue->queue_lock); @@ -551,7 +564,7 @@ static int do_special_op(struct hd_i_struct *disk, struct request *req) } if (disk->head > 16) { printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name); - __blk_end_request_cur(req, -EIO); + hd_end_request_cur(-EIO); } disk->special_op = 0; return 1; @@ -578,11 +591,15 @@ static void hd_request(void) repeat: del_timer(&device_timer); - req = CURRENT; - if (!req) { - do_hd = NULL; - return; + if (!hd_req) { + hd_req = elv_next_request(hd_queue); + if (!hd_req) { + do_hd = NULL; + return; + } + blkdev_dequeue_request(hd_req); } + req = hd_req; if (reset) { reset_hd(); @@ -595,7 +612,7 @@ repeat: ((block+nsect) > get_capacity(req->rq_disk))) { printk("%s: bad access: block=%d, count=%d\n", req->rq_disk->disk_name, block, nsect); - __blk_end_request_cur(req, -EIO); + hd_end_request_cur(-EIO); goto repeat; } @@ -635,7 +652,7 @@ repeat: break; default: printk("unknown hd-command\n"); - __blk_end_request_cur(req, -EIO); + hd_end_request_cur(-EIO); break; } } -- cgit v1.2.2 From 9934c8c04561413609d2bc38c6b9f268cba774a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 8 May 2009 11:54:16 +0900 Subject: block: implement and enforce request peek/start/fetch Till now block layer allowed two separate modes of request execution. A request is always acquired from the request queue via elv_next_request(). After that, drivers are free to either dequeue it or process it without dequeueing. Dequeue allows elv_next_request() to return the next request so that multiple requests can be in flight. Executing requests without dequeueing has its merits mostly in allowing drivers for simpler devices which can't do sg to deal with segments only without considering request boundary. However, the benefit this brings is dubious and declining while the cost of the API ambiguity is increasing. Segment based drivers are usually for very old or limited devices and as converting to dequeueing model isn't difficult, it doesn't justify the API overhead it puts on block layer and its more modern users. Previous patches converted all block low level drivers to dequeueing model. This patch completes the API transition by... * renaming elv_next_request() to blk_peek_request() * renaming blkdev_dequeue_request() to blk_start_request() * adding blk_fetch_request() which is combination of peek and start * disallowing completion of queued (not started) requests * applying new API to all LLDs Renamings are for consistency and to break out of tree code so that it's apparent that out of tree drivers need updating. [ Impact: block request issue API cleanup, no functional change ] Signed-off-by: Tejun Heo Cc: Rusty Russell Cc: James Bottomley Cc: Mike Miller Cc: unsik Kim Cc: Paul Clements Cc: Tim Waugh Cc: Geert Uytterhoeven Cc: David S. Miller Cc: Laurent Vivier Cc: Jeff Garzik Cc: Jeremy Fitzhardinge Cc: Grant Likely Cc: Adrian McMenamin Cc: Stephen Rothwell Cc: Bartlomiej Zolnierkiewicz Cc: Borislav Petkov Cc: Sergei Shtylyov Cc: Alex Dubov Cc: Pierre Ossman Cc: David Woodhouse Cc: Markus Lidel Cc: Stefan Weinhuber Cc: Martin Schwidefsky Cc: Pete Zaitcev Cc: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/block/hd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/block/hd.c') diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 288ab63c1029..961de56d00a9 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -592,12 +592,11 @@ repeat: del_timer(&device_timer); if (!hd_req) { - hd_req = elv_next_request(hd_queue); + hd_req = blk_fetch_request(hd_queue); if (!hd_req) { do_hd = NULL; return; } - blkdev_dequeue_request(hd_req); } req = hd_req; -- cgit v1.2.2 From e1defc4ff0cf57aca6c5e3ff99fa503f5943c1f1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:49 -0400 Subject: block: Do away with the notion of hardsect_size Until now we have had a 1:1 mapping between storage device physical block size and the logical block sized used when addressing the device. With SATA 4KB drives coming out that will no longer be the case. The sector size will be 4KB but the logical block size will remain 512-bytes. Hence we need to distinguish between the physical block size and the logical ditto. This patch renames hardsect_size to logical_block_size. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/block/hd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/hd.c') diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 961de56d00a9..f65b3f369eb0 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -724,7 +724,7 @@ static int __init hd_init(void) blk_queue_max_sectors(hd_queue, 255); init_timer(&device_timer); device_timer.function = hd_times_out; - blk_queue_hardsect_size(hd_queue, 512); + blk_queue_logical_block_size(hd_queue, 512); if (!NR_HD) { /* -- cgit v1.2.2