diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-22 20:47:08 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-22 20:47:08 -0400 |
| commit | 002b758b6dc4d840e662f25625f696d7b43d48f4 (patch) | |
| tree | 99c83d7622066cdf7bb5d467f0017b2360fb7ada | |
| parent | 369c4f542fd5e197ace5f9fdd33c558fb2358480 (diff) | |
| parent | 642c0dbde32f34baa7886e988a067089992adc8f (diff) | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph fixes from Sage Weil:
"There are a couple of fixes from Yan for bad pointer dereferences in
the messenger code and when fiddling with page->private after page
migration, a fix from Alex for a use-after-free in the osd client
code, and a couple fixes for the message refcounting and shutdown
ordering."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
libceph: flush msgr queue during mon_client shutdown
rbd: Clear ceph_msg->bio_iter for retransmitted message
libceph: use con get/put ops from osd_client
libceph: osd_client: don't drop reply reference too early
ceph: check PG_Private flag before accessing page->private
| -rw-r--r-- | fs/ceph/addr.c | 21 | ||||
| -rw-r--r-- | net/ceph/ceph_common.c | 7 | ||||
| -rw-r--r-- | net/ceph/messenger.c | 4 | ||||
| -rw-r--r-- | net/ceph/mon_client.c | 8 | ||||
| -rw-r--r-- | net/ceph/osd_client.c | 12 |
5 files changed, 30 insertions, 22 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 173b1d22e59b..8b67304e4b80 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -54,7 +54,12 @@ | |||
| 54 | (CONGESTION_ON_THRESH(congestion_kb) - \ | 54 | (CONGESTION_ON_THRESH(congestion_kb) - \ |
| 55 | (CONGESTION_ON_THRESH(congestion_kb) >> 2)) | 55 | (CONGESTION_ON_THRESH(congestion_kb) >> 2)) |
| 56 | 56 | ||
| 57 | 57 | static inline struct ceph_snap_context *page_snap_context(struct page *page) | |
| 58 | { | ||
| 59 | if (PagePrivate(page)) | ||
| 60 | return (void *)page->private; | ||
| 61 | return NULL; | ||
| 62 | } | ||
| 58 | 63 | ||
| 59 | /* | 64 | /* |
| 60 | * Dirty a page. Optimistically adjust accounting, on the assumption | 65 | * Dirty a page. Optimistically adjust accounting, on the assumption |
| @@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset) | |||
| 142 | { | 147 | { |
| 143 | struct inode *inode; | 148 | struct inode *inode; |
| 144 | struct ceph_inode_info *ci; | 149 | struct ceph_inode_info *ci; |
| 145 | struct ceph_snap_context *snapc = (void *)page->private; | 150 | struct ceph_snap_context *snapc = page_snap_context(page); |
| 146 | 151 | ||
| 147 | BUG_ON(!PageLocked(page)); | 152 | BUG_ON(!PageLocked(page)); |
| 148 | BUG_ON(!page->private); | ||
| 149 | BUG_ON(!PagePrivate(page)); | 153 | BUG_ON(!PagePrivate(page)); |
| 150 | BUG_ON(!page->mapping); | 154 | BUG_ON(!page->mapping); |
| 151 | 155 | ||
| @@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g) | |||
| 182 | struct inode *inode = page->mapping ? page->mapping->host : NULL; | 186 | struct inode *inode = page->mapping ? page->mapping->host : NULL; |
| 183 | dout("%p releasepage %p idx %lu\n", inode, page, page->index); | 187 | dout("%p releasepage %p idx %lu\n", inode, page, page->index); |
| 184 | WARN_ON(PageDirty(page)); | 188 | WARN_ON(PageDirty(page)); |
| 185 | WARN_ON(page->private); | ||
| 186 | WARN_ON(PagePrivate(page)); | 189 | WARN_ON(PagePrivate(page)); |
| 187 | return 0; | 190 | return 0; |
| 188 | } | 191 | } |
| @@ -443,7 +446,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
| 443 | osdc = &fsc->client->osdc; | 446 | osdc = &fsc->client->osdc; |
| 444 | 447 | ||
| 445 | /* verify this is a writeable snap context */ | 448 | /* verify this is a writeable snap context */ |
| 446 | snapc = (void *)page->private; | 449 | snapc = page_snap_context(page); |
| 447 | if (snapc == NULL) { | 450 | if (snapc == NULL) { |
| 448 | dout("writepage %p page %p not dirty?\n", inode, page); | 451 | dout("writepage %p page %p not dirty?\n", inode, page); |
| 449 | goto out; | 452 | goto out; |
| @@ -451,7 +454,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
| 451 | oldest = get_oldest_context(inode, &snap_size); | 454 | oldest = get_oldest_context(inode, &snap_size); |
| 452 | if (snapc->seq > oldest->seq) { | 455 | if (snapc->seq > oldest->seq) { |
| 453 | dout("writepage %p page %p snapc %p not writeable - noop\n", | 456 | dout("writepage %p page %p snapc %p not writeable - noop\n", |
| 454 | inode, page, (void *)page->private); | 457 | inode, page, snapc); |
| 455 | /* we should only noop if called by kswapd */ | 458 | /* we should only noop if called by kswapd */ |
| 456 | WARN_ON((current->flags & PF_MEMALLOC) == 0); | 459 | WARN_ON((current->flags & PF_MEMALLOC) == 0); |
| 457 | ceph_put_snap_context(oldest); | 460 | ceph_put_snap_context(oldest); |
| @@ -591,7 +594,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 591 | clear_bdi_congested(&fsc->backing_dev_info, | 594 | clear_bdi_congested(&fsc->backing_dev_info, |
| 592 | BLK_RW_ASYNC); | 595 | BLK_RW_ASYNC); |
| 593 | 596 | ||
| 594 | ceph_put_snap_context((void *)page->private); | 597 | ceph_put_snap_context(page_snap_context(page)); |
| 595 | page->private = 0; | 598 | page->private = 0; |
| 596 | ClearPagePrivate(page); | 599 | ClearPagePrivate(page); |
| 597 | dout("unlocking %d %p\n", i, page); | 600 | dout("unlocking %d %p\n", i, page); |
| @@ -795,7 +798,7 @@ get_more_pages: | |||
| 795 | } | 798 | } |
| 796 | 799 | ||
| 797 | /* only if matching snap context */ | 800 | /* only if matching snap context */ |
| 798 | pgsnapc = (void *)page->private; | 801 | pgsnapc = page_snap_context(page); |
| 799 | if (pgsnapc->seq > snapc->seq) { | 802 | if (pgsnapc->seq > snapc->seq) { |
| 800 | dout("page snapc %p %lld > oldest %p %lld\n", | 803 | dout("page snapc %p %lld > oldest %p %lld\n", |
| 801 | pgsnapc, pgsnapc->seq, snapc, snapc->seq); | 804 | pgsnapc, pgsnapc->seq, snapc, snapc->seq); |
| @@ -984,7 +987,7 @@ retry_locked: | |||
| 984 | BUG_ON(!ci->i_snap_realm); | 987 | BUG_ON(!ci->i_snap_realm); |
| 985 | down_read(&mdsc->snap_rwsem); | 988 | down_read(&mdsc->snap_rwsem); |
| 986 | BUG_ON(!ci->i_snap_realm->cached_context); | 989 | BUG_ON(!ci->i_snap_realm->cached_context); |
| 987 | snapc = (void *)page->private; | 990 | snapc = page_snap_context(page); |
| 988 | if (snapc && snapc != ci->i_head_snapc) { | 991 | if (snapc && snapc != ci->i_head_snapc) { |
| 989 | /* | 992 | /* |
| 990 | * this page is already dirty in another (older) snap | 993 | * this page is already dirty in another (older) snap |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index a776f751edbf..ba4323bce0e9 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
| @@ -504,13 +504,6 @@ void ceph_destroy_client(struct ceph_client *client) | |||
| 504 | /* unmount */ | 504 | /* unmount */ |
| 505 | ceph_osdc_stop(&client->osdc); | 505 | ceph_osdc_stop(&client->osdc); |
| 506 | 506 | ||
| 507 | /* | ||
| 508 | * make sure osd connections close out before destroying the | ||
| 509 | * auth module, which is needed to free those connections' | ||
| 510 | * ceph_authorizers. | ||
| 511 | */ | ||
| 512 | ceph_msgr_flush(); | ||
| 513 | |||
| 514 | ceph_monc_stop(&client->monc); | 507 | ceph_monc_stop(&client->monc); |
| 515 | 508 | ||
| 516 | ceph_debugfs_client_cleanup(client); | 509 | ceph_debugfs_client_cleanup(client); |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 524f4e4f598b..b332c3d76059 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -563,6 +563,10 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 563 | m->hdr.seq = cpu_to_le64(++con->out_seq); | 563 | m->hdr.seq = cpu_to_le64(++con->out_seq); |
| 564 | m->needs_out_seq = false; | 564 | m->needs_out_seq = false; |
| 565 | } | 565 | } |
| 566 | #ifdef CONFIG_BLOCK | ||
| 567 | else | ||
| 568 | m->bio_iter = NULL; | ||
| 569 | #endif | ||
| 566 | 570 | ||
| 567 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", | 571 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", |
| 568 | m, con->out_seq, le16_to_cpu(m->hdr.type), | 572 | m, con->out_seq, le16_to_cpu(m->hdr.type), |
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 10d6008d31f2..d0649a9655be 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
| @@ -847,6 +847,14 @@ void ceph_monc_stop(struct ceph_mon_client *monc) | |||
| 847 | 847 | ||
| 848 | mutex_unlock(&monc->mutex); | 848 | mutex_unlock(&monc->mutex); |
| 849 | 849 | ||
| 850 | /* | ||
| 851 | * flush msgr queue before we destroy ourselves to ensure that: | ||
| 852 | * - any work that references our embedded con is finished. | ||
| 853 | * - any osd_client or other work that may reference an authorizer | ||
| 854 | * finishes before we shut down the auth subsystem. | ||
| 855 | */ | ||
| 856 | ceph_msgr_flush(); | ||
| 857 | |||
| 850 | ceph_auth_destroy(monc->auth); | 858 | ceph_auth_destroy(monc->auth); |
| 851 | 859 | ||
| 852 | ceph_msg_put(monc->m_auth); | 860 | ceph_msg_put(monc->m_auth); |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 1ffebed5ce0f..ca59e66c9787 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
| @@ -139,15 +139,15 @@ void ceph_osdc_release_request(struct kref *kref) | |||
| 139 | 139 | ||
| 140 | if (req->r_request) | 140 | if (req->r_request) |
| 141 | ceph_msg_put(req->r_request); | 141 | ceph_msg_put(req->r_request); |
| 142 | if (req->r_reply) | ||
| 143 | ceph_msg_put(req->r_reply); | ||
| 144 | if (req->r_con_filling_msg) { | 142 | if (req->r_con_filling_msg) { |
| 145 | dout("release_request revoking pages %p from con %p\n", | 143 | dout("release_request revoking pages %p from con %p\n", |
| 146 | req->r_pages, req->r_con_filling_msg); | 144 | req->r_pages, req->r_con_filling_msg); |
| 147 | ceph_con_revoke_message(req->r_con_filling_msg, | 145 | ceph_con_revoke_message(req->r_con_filling_msg, |
| 148 | req->r_reply); | 146 | req->r_reply); |
| 149 | ceph_con_put(req->r_con_filling_msg); | 147 | req->r_con_filling_msg->ops->put(req->r_con_filling_msg); |
| 150 | } | 148 | } |
| 149 | if (req->r_reply) | ||
| 150 | ceph_msg_put(req->r_reply); | ||
| 151 | if (req->r_own_pages) | 151 | if (req->r_own_pages) |
| 152 | ceph_release_page_vector(req->r_pages, | 152 | ceph_release_page_vector(req->r_pages, |
| 153 | req->r_num_pages); | 153 | req->r_num_pages); |
| @@ -1216,7 +1216,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, | |||
| 1216 | if (req->r_con_filling_msg == con && req->r_reply == msg) { | 1216 | if (req->r_con_filling_msg == con && req->r_reply == msg) { |
| 1217 | dout(" dropping con_filling_msg ref %p\n", con); | 1217 | dout(" dropping con_filling_msg ref %p\n", con); |
| 1218 | req->r_con_filling_msg = NULL; | 1218 | req->r_con_filling_msg = NULL; |
| 1219 | ceph_con_put(con); | 1219 | con->ops->put(con); |
| 1220 | } | 1220 | } |
| 1221 | 1221 | ||
| 1222 | if (!req->r_got_reply) { | 1222 | if (!req->r_got_reply) { |
| @@ -2028,7 +2028,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
| 2028 | dout("get_reply revoking msg %p from old con %p\n", | 2028 | dout("get_reply revoking msg %p from old con %p\n", |
| 2029 | req->r_reply, req->r_con_filling_msg); | 2029 | req->r_reply, req->r_con_filling_msg); |
| 2030 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); | 2030 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); |
| 2031 | ceph_con_put(req->r_con_filling_msg); | 2031 | req->r_con_filling_msg->ops->put(req->r_con_filling_msg); |
| 2032 | req->r_con_filling_msg = NULL; | 2032 | req->r_con_filling_msg = NULL; |
| 2033 | } | 2033 | } |
| 2034 | 2034 | ||
| @@ -2063,7 +2063,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
| 2063 | #endif | 2063 | #endif |
| 2064 | } | 2064 | } |
| 2065 | *skip = 0; | 2065 | *skip = 0; |
| 2066 | req->r_con_filling_msg = ceph_con_get(con); | 2066 | req->r_con_filling_msg = con->ops->get(con); |
| 2067 | dout("get_reply tid %lld %p\n", tid, m); | 2067 | dout("get_reply tid %lld %p\n", tid, m); |
| 2068 | 2068 | ||
| 2069 | out: | 2069 | out: |
