aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2013-03-19 13:16:44 -0400
committerJens Axboe <axboe@kernel.dk>2013-03-22 20:13:59 -0400
commit3a4d4eb3cb03fbc66696fc8cd472701d56f3aee7 (patch)
tree06a5d9a18bb35f23e68c3cca745f28b3f409e41c
parentae8bf312e97d554b6aa32e7b2ceb993812ad0835 (diff)
drbd: prepare for new striped layout of activity log
Introduce two new on-disk meta data fields: al_stripes and al_stripe_size_4k The intended use case is activity log on RAID 0 or similar. Logically consecutive transactions will advance their on-disk position by al_stripe_size_4k 4kB (transaction sized) blocks. Right now, these are still asserted to be the backward compatible values al_stripes = 1, al_stripe_size_4k = 8 (which amounts to 32kB). Also introduce a caching member for meta_dev_idx in the in-core structure: even though it is initially passed in in the rcu-protected disk_conf structure, it cannot change without a detach/attach cycle. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--drivers/block/drbd/drbd_actlog.c6
-rw-r--r--drivers/block/drbd/drbd_int.h46
-rw-r--r--drivers/block/drbd/drbd_main.c77
-rw-r--r--drivers/block/drbd/drbd_nl.c5
4 files changed, 94 insertions, 40 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index b230d91ec430..7e7680e8da6c 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -353,11 +353,11 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
353 353
354static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev) 354static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev)
355{ 355{
356 const unsigned int stripes = 1; 356 const unsigned int stripes = mdev->ldev->md.al_stripes;
357 const unsigned int stripe_size_4kB = MD_32kB_SECT/MD_4kB_SECT; 357 const unsigned int stripe_size_4kB = mdev->ldev->md.al_stripe_size_4k;
358 358
359 /* transaction number, modulo on-disk ring buffer wrap around */ 359 /* transaction number, modulo on-disk ring buffer wrap around */
360 unsigned int t = mdev->al_tr_number % (stripe_size_4kB * stripes); 360 unsigned int t = mdev->al_tr_number % (mdev->ldev->md.al_size_4k);
361 361
362 /* ... to aligned 4k on disk block */ 362 /* ... to aligned 4k on disk block */
363 t = ((t % stripes) * stripe_size_4kB) + t/stripes; 363 t = ((t % stripes) * stripe_size_4kB) + t/stripes;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 60c89e5b298c..ee19ba28b59a 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -755,6 +755,14 @@ struct drbd_md {
755 755
756 s32 al_offset; /* signed relative sector offset to activity log */ 756 s32 al_offset; /* signed relative sector offset to activity log */
757 s32 bm_offset; /* signed relative sector offset to bitmap */ 757 s32 bm_offset; /* signed relative sector offset to bitmap */
758
759 /* cached value of bdev->disk_conf->meta_dev_idx (see below) */
760 s32 meta_dev_idx;
761
762 /* see al_tr_number_to_on_disk_sector() */
763 u32 al_stripes;
764 u32 al_stripe_size_4k;
765 u32 al_size_4k; /* cached product of the above */
758}; 766};
759 767
760struct drbd_backing_dev { 768struct drbd_backing_dev {
@@ -1862,38 +1870,24 @@ static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
1862} 1870}
1863 1871
1864/** 1872/**
1865 * drbd_md_ss__() - Return the sector number of our meta data super block 1873 * drbd_md_ss() - Return the sector number of our meta data super block
1866 * @mdev: DRBD device.
1867 * @bdev: Meta data block device. 1874 * @bdev: Meta data block device.
1868 */ 1875 */
1869static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, 1876static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev)
1870 struct drbd_backing_dev *bdev)
1871{ 1877{
1872 int meta_dev_idx; 1878 const int meta_dev_idx = bdev->md.meta_dev_idx;
1873 1879
1874 rcu_read_lock(); 1880 if (meta_dev_idx == DRBD_MD_INDEX_FLEX_EXT)
1875 meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; 1881 return 0;
1876 rcu_read_unlock();
1877 1882
1878 switch (meta_dev_idx) { 1883 /* Since drbd08, internal meta data is always "flexible".
1879 default: /* external, some index; this is the old fixed size layout */ 1884 * position: last 4k aligned block of 4k size */
1880 return MD_128MB_SECT * meta_dev_idx; 1885 if (meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1881 case DRBD_MD_INDEX_INTERNAL: 1886 meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)
1882 /* with drbd08, internal meta data is always "flexible" */
1883 case DRBD_MD_INDEX_FLEX_INT:
1884 if (!bdev->backing_bdev) {
1885 if (__ratelimit(&drbd_ratelimit_state)) {
1886 dev_err(DEV, "bdev->backing_bdev==NULL\n");
1887 dump_stack();
1888 }
1889 return 0;
1890 }
1891 /* sizeof(struct md_on_disk_07) == 4k
1892 * position: last 4k aligned block of 4k size */
1893 return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - 8; 1887 return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - 8;
1894 case DRBD_MD_INDEX_FLEX_EXT: 1888
1895 return 0; 1889 /* external, some index; this is the old fixed size layout */
1896 } 1890 return MD_128MB_SECT * bdev->md.meta_dev_idx;
1897} 1891}
1898 1892
1899static inline void 1893static inline void
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 76faeab40c8f..7a2e07b45ecf 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2850,7 +2850,11 @@ struct meta_data_on_disk {
2850 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ 2850 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
2851 u32 la_peer_max_bio_size; /* last peer max_bio_size */ 2851 u32 la_peer_max_bio_size; /* last peer max_bio_size */
2852 2852
2853 u8 reserved_u8[4096 - (7*8 + 8*4)]; 2853 /* see al_tr_number_to_on_disk_sector() */
2854 u32 al_stripes;
2855 u32 al_stripe_size_4k;
2856
2857 u8 reserved_u8[4096 - (7*8 + 10*4)];
2854} __packed; 2858} __packed;
2855 2859
2856/** 2860/**
@@ -2898,7 +2902,10 @@ void drbd_md_sync(struct drbd_conf *mdev)
2898 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); 2902 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
2899 buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size); 2903 buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
2900 2904
2901 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); 2905 buffer->al_stripes = cpu_to_be32(mdev->ldev->md.al_stripes);
2906 buffer->al_stripe_size_4k = cpu_to_be32(mdev->ldev->md.al_stripe_size_4k);
2907
2908 D_ASSERT(drbd_md_ss(mdev->ldev) == mdev->ldev->md.md_offset);
2902 sector = mdev->ldev->md.md_offset; 2909 sector = mdev->ldev->md.md_offset;
2903 2910
2904 if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { 2911 if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
@@ -2916,13 +2923,60 @@ out:
2916 put_ldev(mdev); 2923 put_ldev(mdev);
2917} 2924}
2918 2925
2926static int check_activity_log_stripe_size(struct drbd_conf *mdev,
2927 struct meta_data_on_disk *on_disk,
2928 struct drbd_md *in_core)
2929{
2930 u32 al_stripes = be32_to_cpu(on_disk->al_stripes);
2931 u32 al_stripe_size_4k = be32_to_cpu(on_disk->al_stripe_size_4k);
2932 u64 al_size_4k;
2933
2934 /* both not set: default to old fixed size activity log */
2935 if (al_stripes == 0 && al_stripe_size_4k == 0) {
2936 al_stripes = 1;
2937 al_stripe_size_4k = MD_32kB_SECT/8;
2938 }
2939
2940 /* some paranoia plausibility checks */
2941
2942 /* we need both values to be set */
2943 if (al_stripes == 0 || al_stripe_size_4k == 0)
2944 goto err;
2945
2946 al_size_4k = (u64)al_stripes * al_stripe_size_4k;
2947
2948 /* Upper limit of activity log area, to avoid potential overflow
2949 * problems in al_tr_number_to_on_disk_sector(). As right now, more
2950 * than 72 * 4k blocks total only increases the amount of history,
2951 * limiting this arbitrarily to 16 GB is not a real limitation ;-) */
2952 if (al_size_4k > (16 * 1024 * 1024/4))
2953 goto err;
2954
2955 /* Lower limit: we need at least 8 transaction slots (32kB)
2956 * to not break existing setups */
2957 if (al_size_4k < MD_32kB_SECT/8)
2958 goto err;
2959
2960 in_core->al_stripe_size_4k = al_stripe_size_4k;
2961 in_core->al_stripes = al_stripes;
2962 in_core->al_size_4k = al_size_4k;
2963
2964 return 0;
2965err:
2966 dev_err(DEV, "invalid activity log striping: al_stripes=%u, al_stripe_size_4k=%u\n",
2967 al_stripes, al_stripe_size_4k);
2968 return -EINVAL;
2969}
2970
2919/** 2971/**
2920 * drbd_md_read() - Reads in the meta data super block 2972 * drbd_md_read() - Reads in the meta data super block
2921 * @mdev: DRBD device. 2973 * @mdev: DRBD device.
2922 * @bdev: Device from which the meta data should be read in. 2974 * @bdev: Device from which the meta data should be read in.
2923 * 2975 *
2924 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case 2976 * Return NO_ERROR on success, and an enum drbd_ret_code in case
2925 * something goes wrong. 2977 * something goes wrong.
2978 *
2979 * Called exactly once during drbd_adm_attach()
2926 */ 2980 */
2927int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) 2981int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
2928{ 2982{
@@ -2937,6 +2991,10 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
2937 if (!buffer) 2991 if (!buffer)
2938 goto out; 2992 goto out;
2939 2993
2994 /* First, figure out where our meta data superblock is located. */
2995 bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
2996 bdev->md.md_offset = drbd_md_ss(bdev);
2997
2940 if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { 2998 if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
2941 /* NOTE: can't do normal error processing here as this is 2999 /* NOTE: can't do normal error processing here as this is
2942 called BEFORE disk is attached */ 3000 called BEFORE disk is attached */
@@ -2954,40 +3012,43 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
2954 rv = ERR_MD_UNCLEAN; 3012 rv = ERR_MD_UNCLEAN;
2955 goto err; 3013 goto err;
2956 } 3014 }
3015
3016 rv = ERR_MD_INVALID;
2957 if (magic != DRBD_MD_MAGIC_08) { 3017 if (magic != DRBD_MD_MAGIC_08) {
2958 if (magic == DRBD_MD_MAGIC_07) 3018 if (magic == DRBD_MD_MAGIC_07)
2959 dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); 3019 dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n");
2960 else 3020 else
2961 dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); 3021 dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n");
2962 rv = ERR_MD_INVALID;
2963 goto err; 3022 goto err;
2964 } 3023 }
3024
3025 if (check_activity_log_stripe_size(mdev, buffer, &bdev->md))
3026 goto err;
3027
2965 if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) { 3028 if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
2966 dev_err(DEV, "unexpected al_offset: %d (expected %d)\n", 3029 dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
2967 be32_to_cpu(buffer->al_offset), bdev->md.al_offset); 3030 be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
2968 rv = ERR_MD_INVALID;
2969 goto err; 3031 goto err;
2970 } 3032 }
2971 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { 3033 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
2972 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", 3034 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
2973 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); 3035 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
2974 rv = ERR_MD_INVALID;
2975 goto err; 3036 goto err;
2976 } 3037 }
2977 if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) { 3038 if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
2978 dev_err(DEV, "unexpected md_size: %u (expected %u)\n", 3039 dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
2979 be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect); 3040 be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
2980 rv = ERR_MD_INVALID;
2981 goto err; 3041 goto err;
2982 } 3042 }
2983 3043
2984 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { 3044 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
2985 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n", 3045 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
2986 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); 3046 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
2987 rv = ERR_MD_INVALID;
2988 goto err; 3047 goto err;
2989 } 3048 }
2990 3049
3050 rv = NO_ERROR;
3051
2991 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size); 3052 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
2992 for (i = UI_CURRENT; i < UI_SIZE; i++) 3053 for (i = UI_CURRENT; i < UI_SIZE; i++)
2993 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); 3054 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 581f6800cc30..104b7cea691e 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -727,24 +727,23 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
727 rcu_read_lock(); 727 rcu_read_lock();
728 meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; 728 meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
729 729
730 bdev->md.md_offset = drbd_md_ss(bdev);
731
730 switch (meta_dev_idx) { 732 switch (meta_dev_idx) {
731 default: 733 default:
732 /* v07 style fixed size indexed meta data */ 734 /* v07 style fixed size indexed meta data */
733 bdev->md.md_size_sect = MD_128MB_SECT; 735 bdev->md.md_size_sect = MD_128MB_SECT;
734 bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
735 bdev->md.al_offset = MD_4kB_SECT; 736 bdev->md.al_offset = MD_4kB_SECT;
736 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; 737 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
737 break; 738 break;
738 case DRBD_MD_INDEX_FLEX_EXT: 739 case DRBD_MD_INDEX_FLEX_EXT:
739 /* just occupy the full device; unit: sectors */ 740 /* just occupy the full device; unit: sectors */
740 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev); 741 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
741 bdev->md.md_offset = 0;
742 bdev->md.al_offset = MD_4kB_SECT; 742 bdev->md.al_offset = MD_4kB_SECT;
743 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; 743 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
744 break; 744 break;
745 case DRBD_MD_INDEX_INTERNAL: 745 case DRBD_MD_INDEX_INTERNAL:
746 case DRBD_MD_INDEX_FLEX_INT: 746 case DRBD_MD_INDEX_FLEX_INT:
747 bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
748 /* al size is still fixed */ 747 /* al size is still fixed */
749 bdev->md.al_offset = -al_size_sect; 748 bdev->md.al_offset = -al_size_sect;
750 /* we need (slightly less than) ~ this much bitmap sectors: */ 749 /* we need (slightly less than) ~ this much bitmap sectors: */