aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorJosh Durgin <josh.durgin@inktank.com>2013-12-02 22:11:48 -0500
committerSage Weil <sage@inktank.com>2013-12-13 12:13:29 -0500
commitd29adb34a94715174c88ca93e8aba955850c9bde (patch)
tree8927b921055484dd0f187761d11a7121394d935e /net/ceph
parentaa8b60e077fa2a4383e79b092b65cd5455ea5ab2 (diff)
libceph: block I/O when PAUSE or FULL osd map flags are set
The PAUSEWR and PAUSERD flags are meant to stop the cluster from processing writes and reads, respectively. The FULL flag is set when the cluster determines that it is out of space, and will no longer process writes. PAUSEWR and PAUSERD are purely client-side settings already implemented in userspace clients. The osd does nothing special with these flags. When the FULL flag is set, however, the osd responds to all writes with -ENOSPC. For cephfs, this makes sense, but for rbd the block layer translates this into EIO. If a cluster goes from full to non-full quickly, a filesystem on top of rbd will not behave well, since some writes succeed while others get EIO. Fix this by blocking any writes when the FULL flag is set in the osd client. This is the same strategy used by userspace, so apply it by default. A follow-on patch makes this configurable. __map_request() is called to re-target osd requests in case the available osds changed. Add a paused field to a ceph_osd_request, and set it whenever an appropriate osd map flag is set. Avoid queueing paused requests in __map_request(), but force them to be resent if they become unpaused. Also subscribe to the next osd map from the monitor if any of these flags are set, so paused requests can be unblocked as soon as possible. Fixes: http://tracker.ceph.com/issues/6079 Reviewed-by: Sage Weil <sage@inktank.com> Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/osd_client.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a17eaae820f8..1ad9866dc707 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1232,6 +1232,22 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
1232EXPORT_SYMBOL(ceph_osdc_set_request_linger); 1232EXPORT_SYMBOL(ceph_osdc_set_request_linger);
1233 1233
1234/* 1234/*
1235 * Returns whether a request should be blocked from being sent
1236 * based on the current osdmap and osd_client settings.
1237 *
1238 * Caller should hold map_sem for read.
1239 */
1240static bool __req_should_be_paused(struct ceph_osd_client *osdc,
1241 struct ceph_osd_request *req)
1242{
1243 bool pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD);
1244 bool pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) ||
1245 ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL);
1246 return (req->r_flags & CEPH_OSD_FLAG_READ && pauserd) ||
1247 (req->r_flags & CEPH_OSD_FLAG_WRITE && pausewr);
1248}
1249
1250/*
1235 * Pick an osd (the first 'up' osd in the pg), allocate the osd struct 1251 * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
1236 * (as needed), and set the request r_osd appropriately. If there is 1252 * (as needed), and set the request r_osd appropriately. If there is
1237 * no up osd, set r_osd to NULL. Move the request to the appropriate list 1253 * no up osd, set r_osd to NULL. Move the request to the appropriate list
@@ -1248,6 +1264,7 @@ static int __map_request(struct ceph_osd_client *osdc,
1248 int acting[CEPH_PG_MAX_SIZE]; 1264 int acting[CEPH_PG_MAX_SIZE];
1249 int o = -1, num = 0; 1265 int o = -1, num = 0;
1250 int err; 1266 int err;
1267 bool was_paused;
1251 1268
1252 dout("map_request %p tid %lld\n", req, req->r_tid); 1269 dout("map_request %p tid %lld\n", req, req->r_tid);
1253 err = ceph_calc_ceph_pg(&pgid, req->r_oid, osdc->osdmap, 1270 err = ceph_calc_ceph_pg(&pgid, req->r_oid, osdc->osdmap,
@@ -1264,12 +1281,18 @@ static int __map_request(struct ceph_osd_client *osdc,
1264 num = err; 1281 num = err;
1265 } 1282 }
1266 1283
1284 was_paused = req->r_paused;
1285 req->r_paused = __req_should_be_paused(osdc, req);
1286 if (was_paused && !req->r_paused)
1287 force_resend = 1;
1288
1267 if ((!force_resend && 1289 if ((!force_resend &&
1268 req->r_osd && req->r_osd->o_osd == o && 1290 req->r_osd && req->r_osd->o_osd == o &&
1269 req->r_sent >= req->r_osd->o_incarnation && 1291 req->r_sent >= req->r_osd->o_incarnation &&
1270 req->r_num_pg_osds == num && 1292 req->r_num_pg_osds == num &&
1271 memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || 1293 memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
1272 (req->r_osd == NULL && o == -1)) 1294 (req->r_osd == NULL && o == -1) ||
1295 req->r_paused)
1273 return 0; /* no change */ 1296 return 0; /* no change */
1274 1297
1275 dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n", 1298 dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n",
@@ -1811,7 +1834,9 @@ done:
1811 * we find out when we are no longer full and stop returning 1834 * we find out when we are no longer full and stop returning
1812 * ENOSPC. 1835 * ENOSPC.
1813 */ 1836 */
1814 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) 1837 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) ||
1838 ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD) ||
1839 ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR))
1815 ceph_monc_request_next_osdmap(&osdc->client->monc); 1840 ceph_monc_request_next_osdmap(&osdc->client->monc);
1816 1841
1817 mutex_lock(&osdc->request_mutex); 1842 mutex_lock(&osdc->request_mutex);