aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_main.c')
-rw-r--r--drivers/block/drbd/drbd_main.c158
1 files changed, 139 insertions, 19 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 93d1f9b469d4..be2d2da9cdba 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -684,6 +684,9 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
684 else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) 684 else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
685 rv = SS_NO_REMOTE_DISK; 685 rv = SS_NO_REMOTE_DISK;
686 686
687 else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
688 rv = SS_NO_UP_TO_DATE_DISK;
689
687 else if ((ns.conn == C_CONNECTED || 690 else if ((ns.conn == C_CONNECTED ||
688 ns.conn == C_WF_BITMAP_S || 691 ns.conn == C_WF_BITMAP_S ||
689 ns.conn == C_SYNC_SOURCE || 692 ns.conn == C_SYNC_SOURCE ||
@@ -840,7 +843,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
840 break; 843 break;
841 case C_WF_BITMAP_S: 844 case C_WF_BITMAP_S:
842 case C_PAUSED_SYNC_S: 845 case C_PAUSED_SYNC_S:
843 ns.pdsk = D_OUTDATED; 846 /* remap any consistent state to D_OUTDATED,
847 * but disallow "upgrade" of not even consistent states.
848 */
849 ns.pdsk =
850 (D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED)
851 ? os.pdsk : D_OUTDATED;
844 break; 852 break;
845 case C_SYNC_SOURCE: 853 case C_SYNC_SOURCE:
846 ns.pdsk = D_INCONSISTENT; 854 ns.pdsk = D_INCONSISTENT;
@@ -1205,21 +1213,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1205 && (ns.pdsk < D_INCONSISTENT || 1213 && (ns.pdsk < D_INCONSISTENT ||
1206 ns.pdsk == D_UNKNOWN || 1214 ns.pdsk == D_UNKNOWN ||
1207 ns.pdsk == D_OUTDATED)) { 1215 ns.pdsk == D_OUTDATED)) {
1208 kfree(mdev->p_uuid);
1209 mdev->p_uuid = NULL;
1210 if (get_ldev(mdev)) { 1216 if (get_ldev(mdev)) {
1211 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && 1217 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
1212 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { 1218 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE &&
1213 drbd_uuid_new_current(mdev); 1219 !atomic_read(&mdev->new_c_uuid))
1214 drbd_send_uuids(mdev); 1220 atomic_set(&mdev->new_c_uuid, 2);
1215 }
1216 put_ldev(mdev); 1221 put_ldev(mdev);
1217 } 1222 }
1218 } 1223 }
1219 1224
1220 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { 1225 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
1221 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) 1226 /* Diskless peer becomes primary or got connected do diskless, primary peer. */
1222 drbd_uuid_new_current(mdev); 1227 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 &&
1228 !atomic_read(&mdev->new_c_uuid))
1229 atomic_set(&mdev->new_c_uuid, 2);
1223 1230
1224 /* D_DISKLESS Peer becomes secondary */ 1231 /* D_DISKLESS Peer becomes secondary */
1225 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) 1232 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
@@ -1232,7 +1239,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1232 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { 1239 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
1233 kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */ 1240 kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */
1234 mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */ 1241 mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */
1235 drbd_send_sizes(mdev, 0); /* to start sync... */ 1242 drbd_send_sizes(mdev, 0, 0); /* to start sync... */
1236 drbd_send_uuids(mdev); 1243 drbd_send_uuids(mdev);
1237 drbd_send_state(mdev); 1244 drbd_send_state(mdev);
1238 } 1245 }
@@ -1343,6 +1350,24 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1343 drbd_md_sync(mdev); 1350 drbd_md_sync(mdev);
1344} 1351}
1345 1352
1353static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1354{
1355 if (get_ldev(mdev)) {
1356 if (mdev->ldev->md.uuid[UI_BITMAP] == 0) {
1357 drbd_uuid_new_current(mdev);
1358 if (get_net_conf(mdev)) {
1359 drbd_send_uuids(mdev);
1360 put_net_conf(mdev);
1361 }
1362 drbd_md_sync(mdev);
1363 }
1364 put_ldev(mdev);
1365 }
1366 atomic_dec(&mdev->new_c_uuid);
1367 wake_up(&mdev->misc_wait);
1368
1369 return 1;
1370}
1346 1371
1347static int drbd_thread_setup(void *arg) 1372static int drbd_thread_setup(void *arg)
1348{ 1373{
@@ -1755,7 +1780,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
1755 (struct p_header *)&p, sizeof(p)); 1780 (struct p_header *)&p, sizeof(p));
1756} 1781}
1757 1782
1758int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) 1783int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
1759{ 1784{
1760 struct p_sizes p; 1785 struct p_sizes p;
1761 sector_t d_size, u_size; 1786 sector_t d_size, u_size;
@@ -1767,7 +1792,6 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply)
1767 d_size = drbd_get_max_capacity(mdev->ldev); 1792 d_size = drbd_get_max_capacity(mdev->ldev);
1768 u_size = mdev->ldev->dc.disk_size; 1793 u_size = mdev->ldev->dc.disk_size;
1769 q_order_type = drbd_queue_order_type(mdev); 1794 q_order_type = drbd_queue_order_type(mdev);
1770 p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev));
1771 put_ldev(mdev); 1795 put_ldev(mdev);
1772 } else { 1796 } else {
1773 d_size = 0; 1797 d_size = 0;
@@ -1779,7 +1803,8 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply)
1779 p.u_size = cpu_to_be64(u_size); 1803 p.u_size = cpu_to_be64(u_size);
1780 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); 1804 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
1781 p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue)); 1805 p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue));
1782 p.queue_order_type = cpu_to_be32(q_order_type); 1806 p.queue_order_type = cpu_to_be16(q_order_type);
1807 p.dds_flags = cpu_to_be16(flags);
1783 1808
1784 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, 1809 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
1785 (struct p_header *)&p, sizeof(p)); 1810 (struct p_header *)&p, sizeof(p));
@@ -2180,6 +2205,43 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
2180 return ok; 2205 return ok;
2181} 2206}
2182 2207
2208static int drbd_send_delay_probe(struct drbd_conf *mdev, struct drbd_socket *ds)
2209{
2210 struct p_delay_probe dp;
2211 int offset, ok = 0;
2212 struct timeval now;
2213
2214 mutex_lock(&ds->mutex);
2215 if (likely(ds->socket)) {
2216 do_gettimeofday(&now);
2217 offset = now.tv_usec - mdev->dps_time.tv_usec +
2218 (now.tv_sec - mdev->dps_time.tv_sec) * 1000000;
2219 dp.seq_num = cpu_to_be32(mdev->delay_seq);
2220 dp.offset = cpu_to_be32(offset);
2221
2222 ok = _drbd_send_cmd(mdev, ds->socket, P_DELAY_PROBE,
2223 (struct p_header *)&dp, sizeof(dp), 0);
2224 }
2225 mutex_unlock(&ds->mutex);
2226
2227 return ok;
2228}
2229
2230static int drbd_send_delay_probes(struct drbd_conf *mdev)
2231{
2232 int ok;
2233
2234 mdev->delay_seq++;
2235 do_gettimeofday(&mdev->dps_time);
2236 ok = drbd_send_delay_probe(mdev, &mdev->meta);
2237 ok = ok && drbd_send_delay_probe(mdev, &mdev->data);
2238
2239 mdev->dp_volume_last = mdev->send_cnt;
2240 mod_timer(&mdev->delay_probe_timer, jiffies + mdev->sync_conf.dp_interval * HZ / 10);
2241
2242 return ok;
2243}
2244
2183/* called on sndtimeo 2245/* called on sndtimeo
2184 * returns FALSE if we should retry, 2246 * returns FALSE if we should retry,
2185 * TRUE if we think connection is dead 2247 * TRUE if we think connection is dead
@@ -2309,6 +2371,44 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
2309 return 1; 2371 return 1;
2310} 2372}
2311 2373
2374static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
2375{
2376 struct page *page = e->pages;
2377 unsigned len = e->size;
2378 page_chain_for_each(page) {
2379 unsigned l = min_t(unsigned, len, PAGE_SIZE);
2380 if (!_drbd_send_page(mdev, page, 0, l))
2381 return 0;
2382 len -= l;
2383 }
2384 return 1;
2385}
2386
2387static void consider_delay_probes(struct drbd_conf *mdev)
2388{
2389 if (mdev->state.conn != C_SYNC_SOURCE || mdev->agreed_pro_version < 93)
2390 return;
2391
2392 if (mdev->dp_volume_last + mdev->sync_conf.dp_volume * 2 < mdev->send_cnt)
2393 drbd_send_delay_probes(mdev);
2394}
2395
2396static int w_delay_probes(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
2397{
2398 if (!cancel && mdev->state.conn == C_SYNC_SOURCE)
2399 drbd_send_delay_probes(mdev);
2400
2401 return 1;
2402}
2403
2404static void delay_probe_timer_fn(unsigned long data)
2405{
2406 struct drbd_conf *mdev = (struct drbd_conf *) data;
2407
2408 if (list_empty(&mdev->delay_probe_work.list))
2409 drbd_queue_work(&mdev->data.work, &mdev->delay_probe_work);
2410}
2411
2312/* Used to send write requests 2412/* Used to send write requests
2313 * R_PRIMARY -> Peer (P_DATA) 2413 * R_PRIMARY -> Peer (P_DATA)
2314 */ 2414 */
@@ -2360,7 +2460,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2360 drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); 2460 drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE));
2361 if (ok && dgs) { 2461 if (ok && dgs) {
2362 dgb = mdev->int_dig_out; 2462 dgb = mdev->int_dig_out;
2363 drbd_csum(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); 2463 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
2364 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); 2464 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
2365 } 2465 }
2366 if (ok) { 2466 if (ok) {
@@ -2371,6 +2471,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2371 } 2471 }
2372 2472
2373 drbd_put_data_sock(mdev); 2473 drbd_put_data_sock(mdev);
2474
2475 if (ok)
2476 consider_delay_probes(mdev);
2477
2374 return ok; 2478 return ok;
2375} 2479}
2376 2480
@@ -2409,13 +2513,17 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
2409 sizeof(p), MSG_MORE); 2513 sizeof(p), MSG_MORE);
2410 if (ok && dgs) { 2514 if (ok && dgs) {
2411 dgb = mdev->int_dig_out; 2515 dgb = mdev->int_dig_out;
2412 drbd_csum(mdev, mdev->integrity_w_tfm, e->private_bio, dgb); 2516 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
2413 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); 2517 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
2414 } 2518 }
2415 if (ok) 2519 if (ok)
2416 ok = _drbd_send_zc_bio(mdev, e->private_bio); 2520 ok = _drbd_send_zc_ee(mdev, e);
2417 2521
2418 drbd_put_data_sock(mdev); 2522 drbd_put_data_sock(mdev);
2523
2524 if (ok)
2525 consider_delay_probes(mdev);
2526
2419 return ok; 2527 return ok;
2420} 2528}
2421 2529
@@ -2600,6 +2708,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2600 atomic_set(&mdev->net_cnt, 0); 2708 atomic_set(&mdev->net_cnt, 0);
2601 atomic_set(&mdev->packet_seq, 0); 2709 atomic_set(&mdev->packet_seq, 0);
2602 atomic_set(&mdev->pp_in_use, 0); 2710 atomic_set(&mdev->pp_in_use, 0);
2711 atomic_set(&mdev->new_c_uuid, 0);
2603 2712
2604 mutex_init(&mdev->md_io_mutex); 2713 mutex_init(&mdev->md_io_mutex);
2605 mutex_init(&mdev->data.mutex); 2714 mutex_init(&mdev->data.mutex);
@@ -2628,16 +2737,26 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2628 INIT_LIST_HEAD(&mdev->unplug_work.list); 2737 INIT_LIST_HEAD(&mdev->unplug_work.list);
2629 INIT_LIST_HEAD(&mdev->md_sync_work.list); 2738 INIT_LIST_HEAD(&mdev->md_sync_work.list);
2630 INIT_LIST_HEAD(&mdev->bm_io_work.w.list); 2739 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
2740 INIT_LIST_HEAD(&mdev->delay_probes);
2741 INIT_LIST_HEAD(&mdev->delay_probe_work.list);
2742 INIT_LIST_HEAD(&mdev->uuid_work.list);
2743
2631 mdev->resync_work.cb = w_resync_inactive; 2744 mdev->resync_work.cb = w_resync_inactive;
2632 mdev->unplug_work.cb = w_send_write_hint; 2745 mdev->unplug_work.cb = w_send_write_hint;
2633 mdev->md_sync_work.cb = w_md_sync; 2746 mdev->md_sync_work.cb = w_md_sync;
2634 mdev->bm_io_work.w.cb = w_bitmap_io; 2747 mdev->bm_io_work.w.cb = w_bitmap_io;
2748 mdev->delay_probe_work.cb = w_delay_probes;
2749 mdev->uuid_work.cb = w_new_current_uuid;
2635 init_timer(&mdev->resync_timer); 2750 init_timer(&mdev->resync_timer);
2636 init_timer(&mdev->md_sync_timer); 2751 init_timer(&mdev->md_sync_timer);
2752 init_timer(&mdev->delay_probe_timer);
2637 mdev->resync_timer.function = resync_timer_fn; 2753 mdev->resync_timer.function = resync_timer_fn;
2638 mdev->resync_timer.data = (unsigned long) mdev; 2754 mdev->resync_timer.data = (unsigned long) mdev;
2639 mdev->md_sync_timer.function = md_sync_timer_fn; 2755 mdev->md_sync_timer.function = md_sync_timer_fn;
2640 mdev->md_sync_timer.data = (unsigned long) mdev; 2756 mdev->md_sync_timer.data = (unsigned long) mdev;
2757 mdev->delay_probe_timer.function = delay_probe_timer_fn;
2758 mdev->delay_probe_timer.data = (unsigned long) mdev;
2759
2641 2760
2642 init_waitqueue_head(&mdev->misc_wait); 2761 init_waitqueue_head(&mdev->misc_wait);
2643 init_waitqueue_head(&mdev->state_wait); 2762 init_waitqueue_head(&mdev->state_wait);
@@ -2680,7 +2799,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
2680 drbd_set_my_capacity(mdev, 0); 2799 drbd_set_my_capacity(mdev, 0);
2681 if (mdev->bitmap) { 2800 if (mdev->bitmap) {
2682 /* maybe never allocated. */ 2801 /* maybe never allocated. */
2683 drbd_bm_resize(mdev, 0); 2802 drbd_bm_resize(mdev, 0, 1);
2684 drbd_bm_cleanup(mdev); 2803 drbd_bm_cleanup(mdev);
2685 } 2804 }
2686 2805
@@ -3129,7 +3248,7 @@ int __init drbd_init(void)
3129 if (err) 3248 if (err)
3130 goto Enomem; 3249 goto Enomem;
3131 3250
3132 drbd_proc = proc_create("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops); 3251 drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
3133 if (!drbd_proc) { 3252 if (!drbd_proc) {
3134 printk(KERN_ERR "drbd: unable to register proc file\n"); 3253 printk(KERN_ERR "drbd: unable to register proc file\n");
3135 goto Enomem; 3254 goto Enomem;
@@ -3660,7 +3779,8 @@ _drbd_fault_str(unsigned int type) {
3660 [DRBD_FAULT_DT_RD] = "Data read", 3779 [DRBD_FAULT_DT_RD] = "Data read",
3661 [DRBD_FAULT_DT_RA] = "Data read ahead", 3780 [DRBD_FAULT_DT_RA] = "Data read ahead",
3662 [DRBD_FAULT_BM_ALLOC] = "BM allocation", 3781 [DRBD_FAULT_BM_ALLOC] = "BM allocation",
3663 [DRBD_FAULT_AL_EE] = "EE allocation" 3782 [DRBD_FAULT_AL_EE] = "EE allocation",
3783 [DRBD_FAULT_RECEIVE] = "receive data corruption",
3664 }; 3784 };
3665 3785
3666 return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**"; 3786 return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";