aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-02-06 16:38:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-06 16:38:33 -0500
commit2110cf029a67237db572299bb51e0de9e3e3d4dd (patch)
tree6837ff139a807037f06952c3eb72c3e350bc05c8
parent1589a3e7777631ff56dd58cd7dcdf275185e62b5 (diff)
parent1383923d1985cef2bceb8128094fbe5e05de7435 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: "I've got a few bits pending for 3.8 final, that I better get sent out. It's all been sitting for a while, I consider it safe. It contains: - Two bug fixes for mtip32xx, fixing a driver hang and a crash. - A few-liner protocol error fix for drbd. - A few fixes for the xen block front/back driver, fixing a potential data corruption issue. - A race fix for disk_clear_events(), causing spurious warnings. Out of the Chrome OS base. - A deadlock fix for disk_clear_events(), moving it to the a unfreezable workqueue. Also from the Chrome OS base." * 'for-linus' of git://git.kernel.dk/linux-block: drbd: fix potential protocol error and resulting disconnect/reconnect mtip32xx: fix for crash when the device surprise removed during rebuild mtip32xx: fix for driver hang after a command timeout block: prevent race/cleanup block: remove deadlock in disk_clear_events xen-blkfront: handle bvecs with partial data llist/xen-blkfront: implement safe version of llist_for_each_entry xen-blkback: implement safe iterator for the list of persistent grants
-rw-r--r--block/genhd.c42
-rw-r--r--drivers/block/drbd/drbd_req.c2
-rw-r--r--drivers/block/drbd/drbd_req.h1
-rw-r--r--drivers/block/drbd/drbd_state.c7
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c24
-rw-r--r--drivers/block/xen-blkback/blkback.c18
-rw-r--r--drivers/block/xen-blkfront.c10
-rw-r--r--include/linux/llist.h25
8 files changed, 101 insertions, 28 deletions
diff --git a/block/genhd.c b/block/genhd.c
index 9a289d7c84bb..3993ebf4135f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -35,6 +35,8 @@ static DEFINE_IDR(ext_devt_idr);
35 35
36static struct device_type disk_type; 36static struct device_type disk_type;
37 37
38static void disk_check_events(struct disk_events *ev,
39 unsigned int *clearing_ptr);
38static void disk_alloc_events(struct gendisk *disk); 40static void disk_alloc_events(struct gendisk *disk);
39static void disk_add_events(struct gendisk *disk); 41static void disk_add_events(struct gendisk *disk);
40static void disk_del_events(struct gendisk *disk); 42static void disk_del_events(struct gendisk *disk);
@@ -1549,6 +1551,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
1549 const struct block_device_operations *bdops = disk->fops; 1551 const struct block_device_operations *bdops = disk->fops;
1550 struct disk_events *ev = disk->ev; 1552 struct disk_events *ev = disk->ev;
1551 unsigned int pending; 1553 unsigned int pending;
1554 unsigned int clearing = mask;
1552 1555
1553 if (!ev) { 1556 if (!ev) {
1554 /* for drivers still using the old ->media_changed method */ 1557 /* for drivers still using the old ->media_changed method */
@@ -1558,34 +1561,53 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
1558 return 0; 1561 return 0;
1559 } 1562 }
1560 1563
1561 /* tell the workfn about the events being cleared */ 1564 disk_block_events(disk);
1565
1566 /*
1567 * store the union of mask and ev->clearing on the stack so that the
1568 * race with disk_flush_events does not cause ambiguity (ev->clearing
1569 * can still be modified even if events are blocked).
1570 */
1562 spin_lock_irq(&ev->lock); 1571 spin_lock_irq(&ev->lock);
1563 ev->clearing |= mask; 1572 clearing |= ev->clearing;
1573 ev->clearing = 0;
1564 spin_unlock_irq(&ev->lock); 1574 spin_unlock_irq(&ev->lock);
1565 1575
1566 /* uncondtionally schedule event check and wait for it to finish */ 1576 disk_check_events(ev, &clearing);
1567 disk_block_events(disk); 1577 /*
1568 queue_delayed_work(system_freezable_wq, &ev->dwork, 0); 1578 * if ev->clearing is not 0, the disk_flush_events got called in the
1569 flush_delayed_work(&ev->dwork); 1579 * middle of this function, so we want to run the workfn without delay.
1570 __disk_unblock_events(disk, false); 1580 */
1581 __disk_unblock_events(disk, ev->clearing ? true : false);
1571 1582
1572 /* then, fetch and clear pending events */ 1583 /* then, fetch and clear pending events */
1573 spin_lock_irq(&ev->lock); 1584 spin_lock_irq(&ev->lock);
1574 WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */
1575 pending = ev->pending & mask; 1585 pending = ev->pending & mask;
1576 ev->pending &= ~mask; 1586 ev->pending &= ~mask;
1577 spin_unlock_irq(&ev->lock); 1587 spin_unlock_irq(&ev->lock);
1588 WARN_ON_ONCE(clearing & mask);
1578 1589
1579 return pending; 1590 return pending;
1580} 1591}
1581 1592
1593/*
1594 * Separate this part out so that a different pointer for clearing_ptr can be
1595 * passed in for disk_clear_events.
1596 */
1582static void disk_events_workfn(struct work_struct *work) 1597static void disk_events_workfn(struct work_struct *work)
1583{ 1598{
1584 struct delayed_work *dwork = to_delayed_work(work); 1599 struct delayed_work *dwork = to_delayed_work(work);
1585 struct disk_events *ev = container_of(dwork, struct disk_events, dwork); 1600 struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
1601
1602 disk_check_events(ev, &ev->clearing);
1603}
1604
1605static void disk_check_events(struct disk_events *ev,
1606 unsigned int *clearing_ptr)
1607{
1586 struct gendisk *disk = ev->disk; 1608 struct gendisk *disk = ev->disk;
1587 char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; 1609 char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
1588 unsigned int clearing = ev->clearing; 1610 unsigned int clearing = *clearing_ptr;
1589 unsigned int events; 1611 unsigned int events;
1590 unsigned long intv; 1612 unsigned long intv;
1591 int nr_events = 0, i; 1613 int nr_events = 0, i;
@@ -1598,7 +1620,7 @@ static void disk_events_workfn(struct work_struct *work)
1598 1620
1599 events &= ~ev->pending; 1621 events &= ~ev->pending;
1600 ev->pending |= events; 1622 ev->pending |= events;
1601 ev->clearing &= ~clearing; 1623 *clearing_ptr &= ~clearing;
1602 1624
1603 intv = disk_events_poll_jiffies(disk); 1625 intv = disk_events_poll_jiffies(disk);
1604 if (!ev->block && intv) 1626 if (!ev->block && intv)
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index f58a4a4b4dfb..2b8303ad63c9 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -168,7 +168,7 @@ static void wake_all_senders(struct drbd_tconn *tconn) {
168} 168}
169 169
170/* must hold resource->req_lock */ 170/* must hold resource->req_lock */
171static void start_new_tl_epoch(struct drbd_tconn *tconn) 171void start_new_tl_epoch(struct drbd_tconn *tconn)
172{ 172{
173 /* no point closing an epoch, if it is empty, anyways. */ 173 /* no point closing an epoch, if it is empty, anyways. */
174 if (tconn->current_tle_writes == 0) 174 if (tconn->current_tle_writes == 0)
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 016de6b8bb57..c08d22964d06 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -267,6 +267,7 @@ struct bio_and_error {
267 int error; 267 int error;
268}; 268};
269 269
270extern void start_new_tl_epoch(struct drbd_tconn *tconn);
270extern void drbd_req_destroy(struct kref *kref); 271extern void drbd_req_destroy(struct kref *kref);
271extern void _req_may_be_done(struct drbd_request *req, 272extern void _req_may_be_done(struct drbd_request *req,
272 struct bio_and_error *m); 273 struct bio_and_error *m);
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 53bf6182bac4..0fe220cfb9e9 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -931,6 +931,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
931 enum drbd_state_rv rv = SS_SUCCESS; 931 enum drbd_state_rv rv = SS_SUCCESS;
932 enum sanitize_state_warnings ssw; 932 enum sanitize_state_warnings ssw;
933 struct after_state_chg_work *ascw; 933 struct after_state_chg_work *ascw;
934 bool did_remote, should_do_remote;
934 935
935 os = drbd_read_state(mdev); 936 os = drbd_read_state(mdev);
936 937
@@ -981,11 +982,17 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
981 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) 982 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
982 atomic_inc(&mdev->local_cnt); 983 atomic_inc(&mdev->local_cnt);
983 984
985 did_remote = drbd_should_do_remote(mdev->state);
984 mdev->state.i = ns.i; 986 mdev->state.i = ns.i;
987 should_do_remote = drbd_should_do_remote(mdev->state);
985 mdev->tconn->susp = ns.susp; 988 mdev->tconn->susp = ns.susp;
986 mdev->tconn->susp_nod = ns.susp_nod; 989 mdev->tconn->susp_nod = ns.susp_nod;
987 mdev->tconn->susp_fen = ns.susp_fen; 990 mdev->tconn->susp_fen = ns.susp_fen;
988 991
992 /* put replicated vs not-replicated requests in seperate epochs */
993 if (did_remote != should_do_remote)
994 start_new_tl_epoch(mdev->tconn);
995
989 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) 996 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
990 drbd_print_uuids(mdev, "attached to UUIDs"); 997 drbd_print_uuids(mdev, "attached to UUIDs");
991 998
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 9694dd99bbbc..3fd100990453 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -626,12 +626,13 @@ static void mtip_timeout_function(unsigned long int data)
626 } 626 }
627 } 627 }
628 628
629 if (cmdto_cnt && !test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { 629 if (cmdto_cnt) {
630 print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); 630 print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
631 631 if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
632 mtip_restart_port(port); 632 mtip_restart_port(port);
633 wake_up_interruptible(&port->svc_wait);
634 }
633 clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); 635 clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
634 wake_up_interruptible(&port->svc_wait);
635 } 636 }
636 637
637 if (port->ic_pause_timer) { 638 if (port->ic_pause_timer) {
@@ -3887,7 +3888,12 @@ static int mtip_block_remove(struct driver_data *dd)
3887 * Delete our gendisk structure. This also removes the device 3888 * Delete our gendisk structure. This also removes the device
3888 * from /dev 3889 * from /dev
3889 */ 3890 */
3890 del_gendisk(dd->disk); 3891 if (dd->disk) {
3892 if (dd->disk->queue)
3893 del_gendisk(dd->disk);
3894 else
3895 put_disk(dd->disk);
3896 }
3891 3897
3892 spin_lock(&rssd_index_lock); 3898 spin_lock(&rssd_index_lock);
3893 ida_remove(&rssd_index_ida, dd->index); 3899 ida_remove(&rssd_index_ida, dd->index);
@@ -3921,7 +3927,13 @@ static int mtip_block_shutdown(struct driver_data *dd)
3921 "Shutting down %s ...\n", dd->disk->disk_name); 3927 "Shutting down %s ...\n", dd->disk->disk_name);
3922 3928
3923 /* Delete our gendisk structure, and cleanup the blk queue. */ 3929 /* Delete our gendisk structure, and cleanup the blk queue. */
3924 del_gendisk(dd->disk); 3930 if (dd->disk) {
3931 if (dd->disk->queue)
3932 del_gendisk(dd->disk);
3933 else
3934 put_disk(dd->disk);
3935 }
3936
3925 3937
3926 spin_lock(&rssd_index_lock); 3938 spin_lock(&rssd_index_lock);
3927 ida_remove(&rssd_index_ida, dd->index); 3939 ida_remove(&rssd_index_ida, dd->index);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 74374fb762aa..5ac841ff6cc7 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -161,10 +161,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
161static void make_response(struct xen_blkif *blkif, u64 id, 161static void make_response(struct xen_blkif *blkif, u64 id,
162 unsigned short op, int st); 162 unsigned short op, int st);
163 163
164#define foreach_grant(pos, rbtree, node) \ 164#define foreach_grant_safe(pos, n, rbtree, node) \
165 for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \ 165 for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \
166 (n) = rb_next(&(pos)->node); \
166 &(pos)->node != NULL; \ 167 &(pos)->node != NULL; \
167 (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node)) 168 (pos) = container_of(n, typeof(*(pos)), node), \
169 (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
168 170
169 171
170static void add_persistent_gnt(struct rb_root *root, 172static void add_persistent_gnt(struct rb_root *root,
@@ -217,10 +219,11 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
217 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 219 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
218 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 220 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
219 struct persistent_gnt *persistent_gnt; 221 struct persistent_gnt *persistent_gnt;
222 struct rb_node *n;
220 int ret = 0; 223 int ret = 0;
221 int segs_to_unmap = 0; 224 int segs_to_unmap = 0;
222 225
223 foreach_grant(persistent_gnt, root, node) { 226 foreach_grant_safe(persistent_gnt, n, root, node) {
224 BUG_ON(persistent_gnt->handle == 227 BUG_ON(persistent_gnt->handle ==
225 BLKBACK_INVALID_HANDLE); 228 BLKBACK_INVALID_HANDLE);
226 gnttab_set_unmap_op(&unmap[segs_to_unmap], 229 gnttab_set_unmap_op(&unmap[segs_to_unmap],
@@ -230,9 +233,6 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
230 persistent_gnt->handle); 233 persistent_gnt->handle);
231 234
232 pages[segs_to_unmap] = persistent_gnt->page; 235 pages[segs_to_unmap] = persistent_gnt->page;
233 rb_erase(&persistent_gnt->node, root);
234 kfree(persistent_gnt);
235 num--;
236 236
237 if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || 237 if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
238 !rb_next(&persistent_gnt->node)) { 238 !rb_next(&persistent_gnt->node)) {
@@ -241,6 +241,10 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
241 BUG_ON(ret); 241 BUG_ON(ret);
242 segs_to_unmap = 0; 242 segs_to_unmap = 0;
243 } 243 }
244
245 rb_erase(&persistent_gnt->node, root);
246 kfree(persistent_gnt);
247 num--;
244 } 248 }
245 BUG_ON(num != 0); 249 BUG_ON(num != 0);
246} 250}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 96e9b00db081..11043c18ac5a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -792,6 +792,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
792{ 792{
793 struct llist_node *all_gnts; 793 struct llist_node *all_gnts;
794 struct grant *persistent_gnt; 794 struct grant *persistent_gnt;
795 struct llist_node *n;
795 796
796 /* Prevent new requests being issued until we fix things up. */ 797 /* Prevent new requests being issued until we fix things up. */
797 spin_lock_irq(&info->io_lock); 798 spin_lock_irq(&info->io_lock);
@@ -804,7 +805,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
804 /* Remove all persistent grants */ 805 /* Remove all persistent grants */
805 if (info->persistent_gnts_c) { 806 if (info->persistent_gnts_c) {
806 all_gnts = llist_del_all(&info->persistent_gnts); 807 all_gnts = llist_del_all(&info->persistent_gnts);
807 llist_for_each_entry(persistent_gnt, all_gnts, node) { 808 llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) {
808 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); 809 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
809 __free_page(pfn_to_page(persistent_gnt->pfn)); 810 __free_page(pfn_to_page(persistent_gnt->pfn));
810 kfree(persistent_gnt); 811 kfree(persistent_gnt);
@@ -835,7 +836,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
835static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, 836static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
836 struct blkif_response *bret) 837 struct blkif_response *bret)
837{ 838{
838 int i; 839 int i = 0;
839 struct bio_vec *bvec; 840 struct bio_vec *bvec;
840 struct req_iterator iter; 841 struct req_iterator iter;
841 unsigned long flags; 842 unsigned long flags;
@@ -852,7 +853,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
852 */ 853 */
853 rq_for_each_segment(bvec, s->request, iter) { 854 rq_for_each_segment(bvec, s->request, iter) {
854 BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); 855 BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
855 i = offset >> PAGE_SHIFT; 856 if (bvec->bv_offset < offset)
857 i++;
856 BUG_ON(i >= s->req.u.rw.nr_segments); 858 BUG_ON(i >= s->req.u.rw.nr_segments);
857 shared_data = kmap_atomic( 859 shared_data = kmap_atomic(
858 pfn_to_page(s->grants_used[i]->pfn)); 860 pfn_to_page(s->grants_used[i]->pfn));
@@ -861,7 +863,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
861 bvec->bv_len); 863 bvec->bv_len);
862 bvec_kunmap_irq(bvec_data, &flags); 864 bvec_kunmap_irq(bvec_data, &flags);
863 kunmap_atomic(shared_data); 865 kunmap_atomic(shared_data);
864 offset += bvec->bv_len; 866 offset = bvec->bv_offset + bvec->bv_len;
865 } 867 }
866 } 868 }
867 /* Add the persistent grant into the list of free grants */ 869 /* Add the persistent grant into the list of free grants */
diff --git a/include/linux/llist.h b/include/linux/llist.h
index a5199f6d0e82..d0ab98f73d38 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -125,6 +125,31 @@ static inline void init_llist_head(struct llist_head *list)
125 (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) 125 (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
126 126
127/** 127/**
128 * llist_for_each_entry_safe - iterate safely against remove over some entries
129 * of lock-less list of given type.
130 * @pos: the type * to use as a loop cursor.
131 * @n: another type * to use as a temporary storage.
132 * @node: the fist entry of deleted list entries.
133 * @member: the name of the llist_node with the struct.
134 *
135 * In general, some entries of the lock-less list can be traversed
136 * safely only after being removed from list, so start with an entry
137 * instead of list head. This variant allows removal of entries
138 * as we iterate.
139 *
140 * If being used on entries deleted from lock-less list directly, the
141 * traverse order is from the newest to the oldest added entry. If
142 * you want to traverse from the oldest to the newest, you must
143 * reverse the order by yourself before traversing.
144 */
145#define llist_for_each_entry_safe(pos, n, node, member) \
146 for ((pos) = llist_entry((node), typeof(*(pos)), member), \
147 (n) = (pos)->member.next; \
148 &(pos)->member != NULL; \
149 (pos) = llist_entry(n, typeof(*(pos)), member), \
150 (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL)
151
152/**
128 * llist_empty - tests whether a lock-less list is empty 153 * llist_empty - tests whether a lock-less list is empty
129 * @head: the list to test 154 * @head: the list to test
130 * 155 *