aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2013-03-19 13:16:47 -0400
committerJens Axboe <axboe@kernel.dk>2013-03-22 20:13:59 -0400
commitc04ccaa669e147ffb66e4e74d82c7dbfc100ec5e (patch)
treeb5f0b09749ef3615c59fa2ad3cf3e721f6b123c3
parentcccac9857d624dab74b23bafe0482fcdd91df7d8 (diff)
drbd: read meta data early, base on-disk offsets on super block
We used to calculate all on-disk meta data offsets, and then compare the stored offsets, basically treating them as magic numbers. Now with the activity log striping, the activity log size is no longer fixed. We need to first read the super block, then base the activity log and bitmap offsets on the stored offsets/al stripe settings. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--drivers/block/drbd/drbd_actlog.c11
-rw-r--r--drivers/block/drbd/drbd_main.c131
-rw-r--r--drivers/block/drbd/drbd_nl.c15
-rw-r--r--drivers/block/drbd/drbd_worker.c3
4 files changed, 123 insertions, 37 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 7e7680e8da6c..c79625aa8cf2 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -168,7 +168,11 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
168 bio->bi_end_io = drbd_md_io_complete; 168 bio->bi_end_io = drbd_md_io_complete;
169 bio->bi_rw = rw; 169 bio->bi_rw = rw;
170 170
171 if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ 171 if (!(rw & WRITE) && mdev->state.disk == D_DISKLESS && mdev->ldev == NULL)
172 /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */
173 ;
174 else if (!get_ldev_if_state(mdev, D_ATTACHING)) {
175 /* Corresponding put_ldev in drbd_md_io_complete() */
172 dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); 176 dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
173 err = -ENODEV; 177 err = -ENODEV;
174 goto out; 178 goto out;
@@ -199,9 +203,10 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
199 203
200 BUG_ON(!bdev->md_bdev); 204 BUG_ON(!bdev->md_bdev);
201 205
202 dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n", 206 dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
203 current->comm, current->pid, __func__, 207 current->comm, current->pid, __func__,
204 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); 208 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
209 (void*)_RET_IP_ );
205 210
206 if (sector < drbd_md_first_sector(bdev) || 211 if (sector < drbd_md_first_sector(bdev) ||
207 sector + 7 > drbd_md_last_sector(bdev)) 212 sector + 7 > drbd_md_last_sector(bdev))
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 6b956fc04dc8..e55271d6e7f6 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2968,6 +2968,86 @@ err:
2968 return -EINVAL; 2968 return -EINVAL;
2969} 2969}
2970 2970
2971static int check_offsets_and_sizes(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
2972{
2973 sector_t capacity = drbd_get_capacity(bdev->md_bdev);
2974 struct drbd_md *in_core = &bdev->md;
2975 s32 on_disk_al_sect;
2976 s32 on_disk_bm_sect;
2977
2978 /* The on-disk size of the activity log, calculated from offsets, and
2979 * the size of the activity log calculated from the stripe settings,
2980 * should match.
2981 * Though we could relax this a bit: it is ok, if the striped activity log
2982 * fits in the available on-disk activity log size.
2983 * Right now, that would break how resize is implemented.
2984 * TODO: make drbd_determine_dev_size() (and the drbdmeta tool) aware
2985 * of possible unused padding space in the on disk layout. */
2986 if (in_core->al_offset < 0) {
2987 if (in_core->bm_offset > in_core->al_offset)
2988 goto err;
2989 on_disk_al_sect = -in_core->al_offset;
2990 on_disk_bm_sect = in_core->al_offset - in_core->bm_offset;
2991 } else {
2992 if (in_core->al_offset != MD_4kB_SECT)
2993 goto err;
2994 if (in_core->bm_offset < in_core->al_offset + in_core->al_size_4k * MD_4kB_SECT)
2995 goto err;
2996
2997 on_disk_al_sect = in_core->bm_offset - MD_4kB_SECT;
2998 on_disk_bm_sect = in_core->md_size_sect - in_core->bm_offset;
2999 }
3000
3001 /* old fixed size meta data is exactly that: fixed. */
3002 if (in_core->meta_dev_idx >= 0) {
3003 if (in_core->md_size_sect != MD_128MB_SECT
3004 || in_core->al_offset != MD_4kB_SECT
3005 || in_core->bm_offset != MD_4kB_SECT + MD_32kB_SECT
3006 || in_core->al_stripes != 1
3007 || in_core->al_stripe_size_4k != MD_32kB_SECT/8)
3008 goto err;
3009 }
3010
3011 if (capacity < in_core->md_size_sect)
3012 goto err;
3013 if (capacity - in_core->md_size_sect < drbd_md_first_sector(bdev))
3014 goto err;
3015
3016 /* should be aligned, and at least 32k */
3017 if ((on_disk_al_sect & 7) || (on_disk_al_sect < MD_32kB_SECT))
3018 goto err;
3019
3020 /* should fit (for now: exactly) into the available on-disk space;
3021 * overflow prevention is in check_activity_log_stripe_size() above. */
3022 if (on_disk_al_sect != in_core->al_size_4k * MD_4kB_SECT)
3023 goto err;
3024
3025 /* again, should be aligned */
3026 if (in_core->bm_offset & 7)
3027 goto err;
3028
3029 /* FIXME check for device grow with flex external meta data? */
3030
3031 /* can the available bitmap space cover the last agreed device size? */
3032 if (on_disk_bm_sect < (in_core->la_size_sect+7)/MD_4kB_SECT/8/512)
3033 goto err;
3034
3035 return 0;
3036
3037err:
3038 dev_err(DEV, "meta data offsets don't make sense: idx=%d "
3039 "al_s=%u, al_sz4k=%u, al_offset=%d, bm_offset=%d, "
3040 "md_size_sect=%u, la_size=%llu, md_capacity=%llu\n",
3041 in_core->meta_dev_idx,
3042 in_core->al_stripes, in_core->al_stripe_size_4k,
3043 in_core->al_offset, in_core->bm_offset, in_core->md_size_sect,
3044 (unsigned long long)in_core->la_size_sect,
3045 (unsigned long long)capacity);
3046
3047 return -EINVAL;
3048}
3049
3050
2971/** 3051/**
2972 * drbd_md_read() - Reads in the meta data super block 3052 * drbd_md_read() - Reads in the meta data super block
2973 * @mdev: DRBD device. 3053 * @mdev: DRBD device.
@@ -2976,7 +3056,8 @@ err:
2976 * Return NO_ERROR on success, and an enum drbd_ret_code in case 3056 * Return NO_ERROR on success, and an enum drbd_ret_code in case
2977 * something goes wrong. 3057 * something goes wrong.
2978 * 3058 *
2979 * Called exactly once during drbd_adm_attach() 3059 * Called exactly once during drbd_adm_attach(), while still being D_DISKLESS,
3060 * even before @bdev is assigned to @mdev->ldev.
2980 */ 3061 */
2981int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) 3062int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
2982{ 3063{
@@ -2984,14 +3065,15 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
2984 u32 magic, flags; 3065 u32 magic, flags;
2985 int i, rv = NO_ERROR; 3066 int i, rv = NO_ERROR;
2986 3067
2987 if (!get_ldev_if_state(mdev, D_ATTACHING)) 3068 if (mdev->state.disk != D_DISKLESS)
2988 return ERR_IO_MD_DISK; 3069 return ERR_DISK_CONFIGURED;
2989 3070
2990 buffer = drbd_md_get_buffer(mdev); 3071 buffer = drbd_md_get_buffer(mdev);
2991 if (!buffer) 3072 if (!buffer)
2992 goto out; 3073 return ERR_NOMEM;
2993 3074
2994 /* First, figure out where our meta data superblock is located. */ 3075 /* First, figure out where our meta data superblock is located,
3076 * and read it. */
2995 bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx; 3077 bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
2996 bdev->md.md_offset = drbd_md_ss(bdev); 3078 bdev->md.md_offset = drbd_md_ss(bdev);
2997 3079
@@ -3022,14 +3104,29 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3022 goto err; 3104 goto err;
3023 } 3105 }
3024 3106
3025 if (check_activity_log_stripe_size(mdev, buffer, &bdev->md)) 3107 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
3108 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
3109 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
3026 goto err; 3110 goto err;
3111 }
3027 3112
3028 if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) { 3113
3029 dev_err(DEV, "unexpected al_offset: %d (expected %d)\n", 3114 /* convert to in_core endian */
3030 be32_to_cpu(buffer->al_offset), bdev->md.al_offset); 3115 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect);
3116 for (i = UI_CURRENT; i < UI_SIZE; i++)
3117 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
3118 bdev->md.flags = be32_to_cpu(buffer->flags);
3119 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
3120
3121 bdev->md.md_size_sect = be32_to_cpu(buffer->md_size_sect);
3122 bdev->md.al_offset = be32_to_cpu(buffer->al_offset);
3123 bdev->md.bm_offset = be32_to_cpu(buffer->bm_offset);
3124
3125 if (check_activity_log_stripe_size(mdev, buffer, &bdev->md))
3031 goto err; 3126 goto err;
3032 } 3127 if (check_offsets_and_sizes(mdev, bdev))
3128 goto err;
3129
3033 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { 3130 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
3034 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", 3131 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
3035 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); 3132 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
@@ -3041,20 +3138,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3041 goto err; 3138 goto err;
3042 } 3139 }
3043 3140
3044 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
3045 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
3046 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
3047 goto err;
3048 }
3049
3050 rv = NO_ERROR; 3141 rv = NO_ERROR;
3051 3142
3052 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect);
3053 for (i = UI_CURRENT; i < UI_SIZE; i++)
3054 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
3055 bdev->md.flags = be32_to_cpu(buffer->flags);
3056 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
3057
3058 spin_lock_irq(&mdev->tconn->req_lock); 3143 spin_lock_irq(&mdev->tconn->req_lock);
3059 if (mdev->state.conn < C_CONNECTED) { 3144 if (mdev->state.conn < C_CONNECTED) {
3060 unsigned int peer; 3145 unsigned int peer;
@@ -3066,8 +3151,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3066 3151
3067 err: 3152 err:
3068 drbd_md_put_buffer(mdev); 3153 drbd_md_put_buffer(mdev);
3069 out:
3070 put_ldev(mdev);
3071 3154
3072 return rv; 3155 return rv;
3073} 3156}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index d5211b06df45..974ea47a656a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -721,7 +721,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
721 struct drbd_backing_dev *bdev) 721 struct drbd_backing_dev *bdev)
722{ 722{
723 sector_t md_size_sect = 0; 723 sector_t md_size_sect = 0;
724 unsigned int al_size_sect = MD_32kB_SECT; 724 unsigned int al_size_sect = bdev->md.al_size_4k * 8;
725 725
726 bdev->md.md_offset = drbd_md_ss(bdev); 726 bdev->md.md_offset = drbd_md_ss(bdev);
727 727
@@ -1413,8 +1413,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1413 goto fail; 1413 goto fail;
1414 } 1414 }
1415 1415
1416 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ 1416 /* Read our meta data super block early.
1417 drbd_md_set_sector_offsets(mdev, nbc); 1417 * This also sets other on-disk offsets. */
1418 retcode = drbd_md_read(mdev, nbc);
1419 if (retcode != NO_ERROR)
1420 goto fail;
1418 1421
1419 if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { 1422 if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1420 dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", 1423 dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
@@ -1481,8 +1484,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1481 if (!get_ldev_if_state(mdev, D_ATTACHING)) 1484 if (!get_ldev_if_state(mdev, D_ATTACHING))
1482 goto force_diskless; 1485 goto force_diskless;
1483 1486
1484 drbd_md_set_sector_offsets(mdev, nbc);
1485
1486 if (!mdev->bitmap) { 1487 if (!mdev->bitmap) {
1487 if (drbd_bm_init(mdev)) { 1488 if (drbd_bm_init(mdev)) {
1488 retcode = ERR_NOMEM; 1489 retcode = ERR_NOMEM;
@@ -1490,10 +1491,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1490 } 1491 }
1491 } 1492 }
1492 1493
1493 retcode = drbd_md_read(mdev, nbc);
1494 if (retcode != NO_ERROR)
1495 goto force_diskless_dec;
1496
1497 if (mdev->state.conn < C_CONNECTED && 1494 if (mdev->state.conn < C_CONNECTED &&
1498 mdev->state.role == R_PRIMARY && 1495 mdev->state.role == R_PRIMARY &&
1499 (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { 1496 (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 424dc7bdf9b7..34b5d5d23ac4 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -89,7 +89,8 @@ void drbd_md_io_complete(struct bio *bio, int error)
89 md_io->done = 1; 89 md_io->done = 1;
90 wake_up(&mdev->misc_wait); 90 wake_up(&mdev->misc_wait);
91 bio_put(bio); 91 bio_put(bio);
92 put_ldev(mdev); 92 if (mdev->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
93 put_ldev(mdev);
93} 94}
94 95
95/* reads on behalf of the partner, 96/* reads on behalf of the partner,