diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2013-03-19 13:16:44 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2013-03-22 20:13:59 -0400 |
commit | 3a4d4eb3cb03fbc66696fc8cd472701d56f3aee7 (patch) | |
tree | 06a5d9a18bb35f23e68c3cca745f28b3f409e41c | |
parent | ae8bf312e97d554b6aa32e7b2ceb993812ad0835 (diff) |
drbd: prepare for new striped layout of activity log
Introduce two new on-disk meta data fields: al_stripes and al_stripe_size_4k
The intended use case is activity log on RAID 0 or similar.
Logically consecutive transactions will advance their on-disk position
by al_stripe_size_4k 4kB (transaction sized) blocks.
Right now, these are still asserted to be the backward compatible
values al_stripes = 1, al_stripe_size_4k = 8 (which amounts to 32kB).
Also introduce a caching member for meta_dev_idx in the in-core
structure: even though it is initially passed in in the rcu-protected
disk_conf structure, it cannot change without a detach/attach cycle.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 6 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 46 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 77 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 5 |
4 files changed, 94 insertions, 40 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index b230d91ec430..7e7680e8da6c 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -353,11 +353,11 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) | |||
353 | 353 | ||
354 | static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev) | 354 | static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev) |
355 | { | 355 | { |
356 | const unsigned int stripes = 1; | 356 | const unsigned int stripes = mdev->ldev->md.al_stripes; |
357 | const unsigned int stripe_size_4kB = MD_32kB_SECT/MD_4kB_SECT; | 357 | const unsigned int stripe_size_4kB = mdev->ldev->md.al_stripe_size_4k; |
358 | 358 | ||
359 | /* transaction number, modulo on-disk ring buffer wrap around */ | 359 | /* transaction number, modulo on-disk ring buffer wrap around */ |
360 | unsigned int t = mdev->al_tr_number % (stripe_size_4kB * stripes); | 360 | unsigned int t = mdev->al_tr_number % (mdev->ldev->md.al_size_4k); |
361 | 361 | ||
362 | /* ... to aligned 4k on disk block */ | 362 | /* ... to aligned 4k on disk block */ |
363 | t = ((t % stripes) * stripe_size_4kB) + t/stripes; | 363 | t = ((t % stripes) * stripe_size_4kB) + t/stripes; |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 60c89e5b298c..ee19ba28b59a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -755,6 +755,14 @@ struct drbd_md { | |||
755 | 755 | ||
756 | s32 al_offset; /* signed relative sector offset to activity log */ | 756 | s32 al_offset; /* signed relative sector offset to activity log */ |
757 | s32 bm_offset; /* signed relative sector offset to bitmap */ | 757 | s32 bm_offset; /* signed relative sector offset to bitmap */ |
758 | |||
759 | /* cached value of bdev->disk_conf->meta_dev_idx (see below) */ | ||
760 | s32 meta_dev_idx; | ||
761 | |||
762 | /* see al_tr_number_to_on_disk_sector() */ | ||
763 | u32 al_stripes; | ||
764 | u32 al_stripe_size_4k; | ||
765 | u32 al_size_4k; /* cached product of the above */ | ||
758 | }; | 766 | }; |
759 | 767 | ||
760 | struct drbd_backing_dev { | 768 | struct drbd_backing_dev { |
@@ -1862,38 +1870,24 @@ static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) | |||
1862 | } | 1870 | } |
1863 | 1871 | ||
1864 | /** | 1872 | /** |
1865 | * drbd_md_ss__() - Return the sector number of our meta data super block | 1873 | * drbd_md_ss() - Return the sector number of our meta data super block |
1866 | * @mdev: DRBD device. | ||
1867 | * @bdev: Meta data block device. | 1874 | * @bdev: Meta data block device. |
1868 | */ | 1875 | */ |
1869 | static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, | 1876 | static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev) |
1870 | struct drbd_backing_dev *bdev) | ||
1871 | { | 1877 | { |
1872 | int meta_dev_idx; | 1878 | const int meta_dev_idx = bdev->md.meta_dev_idx; |
1873 | 1879 | ||
1874 | rcu_read_lock(); | 1880 | if (meta_dev_idx == DRBD_MD_INDEX_FLEX_EXT) |
1875 | meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; | 1881 | return 0; |
1876 | rcu_read_unlock(); | ||
1877 | 1882 | ||
1878 | switch (meta_dev_idx) { | 1883 | /* Since drbd08, internal meta data is always "flexible". |
1879 | default: /* external, some index; this is the old fixed size layout */ | 1884 | * position: last 4k aligned block of 4k size */ |
1880 | return MD_128MB_SECT * meta_dev_idx; | 1885 | if (meta_dev_idx == DRBD_MD_INDEX_INTERNAL || |
1881 | case DRBD_MD_INDEX_INTERNAL: | 1886 | meta_dev_idx == DRBD_MD_INDEX_FLEX_INT) |
1882 | /* with drbd08, internal meta data is always "flexible" */ | ||
1883 | case DRBD_MD_INDEX_FLEX_INT: | ||
1884 | if (!bdev->backing_bdev) { | ||
1885 | if (__ratelimit(&drbd_ratelimit_state)) { | ||
1886 | dev_err(DEV, "bdev->backing_bdev==NULL\n"); | ||
1887 | dump_stack(); | ||
1888 | } | ||
1889 | return 0; | ||
1890 | } | ||
1891 | /* sizeof(struct md_on_disk_07) == 4k | ||
1892 | * position: last 4k aligned block of 4k size */ | ||
1893 | return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - 8; | 1887 | return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - 8; |
1894 | case DRBD_MD_INDEX_FLEX_EXT: | 1888 | |
1895 | return 0; | 1889 | /* external, some index; this is the old fixed size layout */ |
1896 | } | 1890 | return MD_128MB_SECT * bdev->md.meta_dev_idx; |
1897 | } | 1891 | } |
1898 | 1892 | ||
1899 | static inline void | 1893 | static inline void |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 76faeab40c8f..7a2e07b45ecf 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -2850,7 +2850,11 @@ struct meta_data_on_disk { | |||
2850 | u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ | 2850 | u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ |
2851 | u32 la_peer_max_bio_size; /* last peer max_bio_size */ | 2851 | u32 la_peer_max_bio_size; /* last peer max_bio_size */ |
2852 | 2852 | ||
2853 | u8 reserved_u8[4096 - (7*8 + 8*4)]; | 2853 | /* see al_tr_number_to_on_disk_sector() */ |
2854 | u32 al_stripes; | ||
2855 | u32 al_stripe_size_4k; | ||
2856 | |||
2857 | u8 reserved_u8[4096 - (7*8 + 10*4)]; | ||
2854 | } __packed; | 2858 | } __packed; |
2855 | 2859 | ||
2856 | /** | 2860 | /** |
@@ -2898,7 +2902,10 @@ void drbd_md_sync(struct drbd_conf *mdev) | |||
2898 | buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); | 2902 | buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); |
2899 | buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size); | 2903 | buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size); |
2900 | 2904 | ||
2901 | D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); | 2905 | buffer->al_stripes = cpu_to_be32(mdev->ldev->md.al_stripes); |
2906 | buffer->al_stripe_size_4k = cpu_to_be32(mdev->ldev->md.al_stripe_size_4k); | ||
2907 | |||
2908 | D_ASSERT(drbd_md_ss(mdev->ldev) == mdev->ldev->md.md_offset); | ||
2902 | sector = mdev->ldev->md.md_offset; | 2909 | sector = mdev->ldev->md.md_offset; |
2903 | 2910 | ||
2904 | if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { | 2911 | if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { |
@@ -2916,13 +2923,60 @@ out: | |||
2916 | put_ldev(mdev); | 2923 | put_ldev(mdev); |
2917 | } | 2924 | } |
2918 | 2925 | ||
2926 | static int check_activity_log_stripe_size(struct drbd_conf *mdev, | ||
2927 | struct meta_data_on_disk *on_disk, | ||
2928 | struct drbd_md *in_core) | ||
2929 | { | ||
2930 | u32 al_stripes = be32_to_cpu(on_disk->al_stripes); | ||
2931 | u32 al_stripe_size_4k = be32_to_cpu(on_disk->al_stripe_size_4k); | ||
2932 | u64 al_size_4k; | ||
2933 | |||
2934 | /* both not set: default to old fixed size activity log */ | ||
2935 | if (al_stripes == 0 && al_stripe_size_4k == 0) { | ||
2936 | al_stripes = 1; | ||
2937 | al_stripe_size_4k = MD_32kB_SECT/8; | ||
2938 | } | ||
2939 | |||
2940 | /* some paranoia plausibility checks */ | ||
2941 | |||
2942 | /* we need both values to be set */ | ||
2943 | if (al_stripes == 0 || al_stripe_size_4k == 0) | ||
2944 | goto err; | ||
2945 | |||
2946 | al_size_4k = (u64)al_stripes * al_stripe_size_4k; | ||
2947 | |||
2948 | /* Upper limit of activity log area, to avoid potential overflow | ||
2949 | * problems in al_tr_number_to_on_disk_sector(). As right now, more | ||
2950 | * than 72 * 4k blocks total only increases the amount of history, | ||
2951 | * limiting this arbitrarily to 16 GB is not a real limitation ;-) */ | ||
2952 | if (al_size_4k > (16 * 1024 * 1024/4)) | ||
2953 | goto err; | ||
2954 | |||
2955 | /* Lower limit: we need at least 8 transaction slots (32kB) | ||
2956 | * to not break existing setups */ | ||
2957 | if (al_size_4k < MD_32kB_SECT/8) | ||
2958 | goto err; | ||
2959 | |||
2960 | in_core->al_stripe_size_4k = al_stripe_size_4k; | ||
2961 | in_core->al_stripes = al_stripes; | ||
2962 | in_core->al_size_4k = al_size_4k; | ||
2963 | |||
2964 | return 0; | ||
2965 | err: | ||
2966 | dev_err(DEV, "invalid activity log striping: al_stripes=%u, al_stripe_size_4k=%u\n", | ||
2967 | al_stripes, al_stripe_size_4k); | ||
2968 | return -EINVAL; | ||
2969 | } | ||
2970 | |||
2919 | /** | 2971 | /** |
2920 | * drbd_md_read() - Reads in the meta data super block | 2972 | * drbd_md_read() - Reads in the meta data super block |
2921 | * @mdev: DRBD device. | 2973 | * @mdev: DRBD device. |
2922 | * @bdev: Device from which the meta data should be read in. | 2974 | * @bdev: Device from which the meta data should be read in. |
2923 | * | 2975 | * |
2924 | * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case | 2976 | * Return NO_ERROR on success, and an enum drbd_ret_code in case |
2925 | * something goes wrong. | 2977 | * something goes wrong. |
2978 | * | ||
2979 | * Called exactly once during drbd_adm_attach() | ||
2926 | */ | 2980 | */ |
2927 | int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | 2981 | int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) |
2928 | { | 2982 | { |
@@ -2937,6 +2991,10 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
2937 | if (!buffer) | 2991 | if (!buffer) |
2938 | goto out; | 2992 | goto out; |
2939 | 2993 | ||
2994 | /* First, figure out where our meta data superblock is located. */ | ||
2995 | bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx; | ||
2996 | bdev->md.md_offset = drbd_md_ss(bdev); | ||
2997 | |||
2940 | if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { | 2998 | if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { |
2941 | /* NOTE: can't do normal error processing here as this is | 2999 | /* NOTE: can't do normal error processing here as this is |
2942 | called BEFORE disk is attached */ | 3000 | called BEFORE disk is attached */ |
@@ -2954,40 +3012,43 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
2954 | rv = ERR_MD_UNCLEAN; | 3012 | rv = ERR_MD_UNCLEAN; |
2955 | goto err; | 3013 | goto err; |
2956 | } | 3014 | } |
3015 | |||
3016 | rv = ERR_MD_INVALID; | ||
2957 | if (magic != DRBD_MD_MAGIC_08) { | 3017 | if (magic != DRBD_MD_MAGIC_08) { |
2958 | if (magic == DRBD_MD_MAGIC_07) | 3018 | if (magic == DRBD_MD_MAGIC_07) |
2959 | dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); | 3019 | dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); |
2960 | else | 3020 | else |
2961 | dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); | 3021 | dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); |
2962 | rv = ERR_MD_INVALID; | ||
2963 | goto err; | 3022 | goto err; |
2964 | } | 3023 | } |
3024 | |||
3025 | if (check_activity_log_stripe_size(mdev, buffer, &bdev->md)) | ||
3026 | goto err; | ||
3027 | |||
2965 | if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) { | 3028 | if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) { |
2966 | dev_err(DEV, "unexpected al_offset: %d (expected %d)\n", | 3029 | dev_err(DEV, "unexpected al_offset: %d (expected %d)\n", |
2967 | be32_to_cpu(buffer->al_offset), bdev->md.al_offset); | 3030 | be32_to_cpu(buffer->al_offset), bdev->md.al_offset); |
2968 | rv = ERR_MD_INVALID; | ||
2969 | goto err; | 3031 | goto err; |
2970 | } | 3032 | } |
2971 | if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { | 3033 | if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { |
2972 | dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", | 3034 | dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", |
2973 | be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); | 3035 | be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); |
2974 | rv = ERR_MD_INVALID; | ||
2975 | goto err; | 3036 | goto err; |
2976 | } | 3037 | } |
2977 | if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) { | 3038 | if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) { |
2978 | dev_err(DEV, "unexpected md_size: %u (expected %u)\n", | 3039 | dev_err(DEV, "unexpected md_size: %u (expected %u)\n", |
2979 | be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect); | 3040 | be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect); |
2980 | rv = ERR_MD_INVALID; | ||
2981 | goto err; | 3041 | goto err; |
2982 | } | 3042 | } |
2983 | 3043 | ||
2984 | if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { | 3044 | if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { |
2985 | dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n", | 3045 | dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n", |
2986 | be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); | 3046 | be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); |
2987 | rv = ERR_MD_INVALID; | ||
2988 | goto err; | 3047 | goto err; |
2989 | } | 3048 | } |
2990 | 3049 | ||
3050 | rv = NO_ERROR; | ||
3051 | |||
2991 | bdev->md.la_size_sect = be64_to_cpu(buffer->la_size); | 3052 | bdev->md.la_size_sect = be64_to_cpu(buffer->la_size); |
2992 | for (i = UI_CURRENT; i < UI_SIZE; i++) | 3053 | for (i = UI_CURRENT; i < UI_SIZE; i++) |
2993 | bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); | 3054 | bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 581f6800cc30..104b7cea691e 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -727,24 +727,23 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, | |||
727 | rcu_read_lock(); | 727 | rcu_read_lock(); |
728 | meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; | 728 | meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; |
729 | 729 | ||
730 | bdev->md.md_offset = drbd_md_ss(bdev); | ||
731 | |||
730 | switch (meta_dev_idx) { | 732 | switch (meta_dev_idx) { |
731 | default: | 733 | default: |
732 | /* v07 style fixed size indexed meta data */ | 734 | /* v07 style fixed size indexed meta data */ |
733 | bdev->md.md_size_sect = MD_128MB_SECT; | 735 | bdev->md.md_size_sect = MD_128MB_SECT; |
734 | bdev->md.md_offset = drbd_md_ss__(mdev, bdev); | ||
735 | bdev->md.al_offset = MD_4kB_SECT; | 736 | bdev->md.al_offset = MD_4kB_SECT; |
736 | bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; | 737 | bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; |
737 | break; | 738 | break; |
738 | case DRBD_MD_INDEX_FLEX_EXT: | 739 | case DRBD_MD_INDEX_FLEX_EXT: |
739 | /* just occupy the full device; unit: sectors */ | 740 | /* just occupy the full device; unit: sectors */ |
740 | bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev); | 741 | bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev); |
741 | bdev->md.md_offset = 0; | ||
742 | bdev->md.al_offset = MD_4kB_SECT; | 742 | bdev->md.al_offset = MD_4kB_SECT; |
743 | bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; | 743 | bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; |
744 | break; | 744 | break; |
745 | case DRBD_MD_INDEX_INTERNAL: | 745 | case DRBD_MD_INDEX_INTERNAL: |
746 | case DRBD_MD_INDEX_FLEX_INT: | 746 | case DRBD_MD_INDEX_FLEX_INT: |
747 | bdev->md.md_offset = drbd_md_ss__(mdev, bdev); | ||
748 | /* al size is still fixed */ | 747 | /* al size is still fixed */ |
749 | bdev->md.al_offset = -al_size_sect; | 748 | bdev->md.al_offset = -al_size_sect; |
750 | /* we need (slightly less than) ~ this much bitmap sectors: */ | 749 | /* we need (slightly less than) ~ this much bitmap sectors: */ |