diff options
author | Boaz Harrosh <bharrosh@panasas.com> | 2010-01-28 04:58:08 -0500 |
---|---|---|
committer | Boaz Harrosh <bharrosh@panasas.com> | 2010-02-28 06:35:28 -0500 |
commit | d9c740d2253e75db8cef8f87a3125c450f3ebd82 (patch) | |
tree | 7217cf62b8d102e00257be6e0675d25852045bc6 /fs | |
parent | 46f4d973f6874c06b7a41a3bf8f4c1717d90f97a (diff) |
exofs: Define on-disk per-inode optional layout attribute
* Layouts describe the way a file is spread on multiple devices.
The layout information is stored in the objects attribute introduced
in this patch.
* There can be multiple generating function for the layout.
Currently defined:
- No attribute present - use below moving-window on global
device table, all devices.
(This is the only one currently used in exofs)
- an obj_id generated moving window - the obj_id is a randomizing
factor in the otherwise global map layout.
- An explicit layout stored, including a data_map and a device
index list.
- More might be defined in future ...
* There are two attributes defined of the same structure:
A-data-files-layout - This layout is used by data-files. If present
at a directory, all files of that directory will
be created with this layout.
A-meta-data-layout - This layout is used by a directory and other
meta-data information. Also inherited at creation
of subdirectories.
* At creation time inodes are created with the layout specified above.
A usermode utility may change the creation layout on a give directory
or file. Which in the case of directories, will also apply to newly
created files/subdirectories, children of that directory.
In the simple unaltered case of a newly created exofs, no layout
attributes are present, and all layouts adhere to the layout specified
at the device-table.
* In case of a future file system loaded in an old exofs-driver.
At iget(), the generating_function is inspected and if not supported
will return an IO error to the application and the inode will not
be loaded. So not to damage any data.
Note: After this patch we do not yet support any type of layout
only the RAID0 patch that enables striping at the super-block
level will add support for RAID0 layouts above. This way we
are past and future compatible and fully bisectable.
* Access to the device table is done by an accessor since
it will change according to above information.
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/exofs/common.h | 39 | ||||
-rw-r--r-- | fs/exofs/exofs.h | 6 | ||||
-rw-r--r-- | fs/exofs/inode.c | 56 | ||||
-rw-r--r-- | fs/exofs/ios.c | 23 |
4 files changed, 114 insertions, 10 deletions
diff --git a/fs/exofs/common.h b/fs/exofs/common.h index b1b178e61718..f0d520312d8b 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h | |||
@@ -55,6 +55,8 @@ | |||
55 | /* exofs Application specific page/attribute */ | 55 | /* exofs Application specific page/attribute */ |
56 | # define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) | 56 | # define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) |
57 | # define EXOFS_ATTR_INODE_DATA 1 | 57 | # define EXOFS_ATTR_INODE_DATA 1 |
58 | # define EXOFS_ATTR_INODE_FILE_LAYOUT 2 | ||
59 | # define EXOFS_ATTR_INODE_DIR_LAYOUT 3 | ||
58 | 60 | ||
59 | /* | 61 | /* |
60 | * The maximum number of files we can have is limited by the size of the | 62 | * The maximum number of files we can have is limited by the size of the |
@@ -206,4 +208,41 @@ enum { | |||
206 | (((name_len) + offsetof(struct exofs_dir_entry, name) + \ | 208 | (((name_len) + offsetof(struct exofs_dir_entry, name) + \ |
207 | EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) | 209 | EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) |
208 | 210 | ||
211 | /* | ||
212 | * The on-disk (optional) layout structure. | ||
213 | * sits in an EXOFS_ATTR_INODE_FILE_LAYOUT or EXOFS_ATTR_INODE_DIR_LAYOUT | ||
214 | * attribute, attached to any inode, usually to a directory. | ||
215 | */ | ||
216 | |||
217 | enum exofs_inode_layout_gen_functions { | ||
218 | LAYOUT_MOVING_WINDOW = 0, | ||
219 | LAYOUT_IMPLICT = 1, | ||
220 | }; | ||
221 | |||
222 | struct exofs_on_disk_inode_layout { | ||
223 | __le16 gen_func; /* One of enum exofs_inode_layout_gen_functions */ | ||
224 | __le16 pad; | ||
225 | union { | ||
226 | /* gen_func == LAYOUT_MOVING_WINDOW (default) */ | ||
227 | struct exofs_layout_sliding_window { | ||
228 | __le32 num_devices; /* first n devices in global-table*/ | ||
229 | } sliding_window __packed; | ||
230 | |||
231 | /* gen_func == LAYOUT_IMPLICT */ | ||
232 | struct exofs_layout_implict_list { | ||
233 | struct exofs_dt_data_map data_map; | ||
234 | /* Variable array of size data_map.cb_num_comps. These | ||
235 | * are device indexes of the devices in the global table | ||
236 | */ | ||
237 | __le32 dev_indexes[]; | ||
238 | } implict __packed; | ||
239 | }; | ||
240 | } __packed; | ||
241 | |||
242 | static inline size_t exofs_on_disk_inode_layout_size(unsigned max_devs) | ||
243 | { | ||
244 | return sizeof(struct exofs_on_disk_inode_layout) + | ||
245 | max_devs * sizeof(__le32); | ||
246 | } | ||
247 | |||
209 | #endif /*ifndef __EXOFS_COM_H__*/ | 248 | #endif /*ifndef __EXOFS_COM_H__*/ |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 33c68568b338..09e331935514 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -186,6 +186,12 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) | |||
186 | } | 186 | } |
187 | 187 | ||
188 | /* | 188 | /* |
189 | * Given a layout, object_number and stripe_index return the associated global | ||
190 | * dev_index | ||
191 | */ | ||
192 | unsigned exofs_layout_od_id(struct exofs_layout *layout, | ||
193 | osd_id obj_no, unsigned layout_index); | ||
194 | /* | ||
189 | * Maximum count of links to a file | 195 | * Maximum count of links to a file |
190 | */ | 196 | */ |
191 | #define EXOFS_LINK_MAX 32000 | 197 | #define EXOFS_LINK_MAX 32000 |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 03189a958b33..0163546ba05a 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -859,6 +859,15 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
859 | return error; | 859 | return error; |
860 | } | 860 | } |
861 | 861 | ||
862 | static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( | ||
863 | EXOFS_APAGE_FS_DATA, | ||
864 | EXOFS_ATTR_INODE_FILE_LAYOUT, | ||
865 | 0); | ||
866 | static const struct osd_attr g_attr_inode_dir_layout = ATTR_DEF( | ||
867 | EXOFS_APAGE_FS_DATA, | ||
868 | EXOFS_ATTR_INODE_DIR_LAYOUT, | ||
869 | 0); | ||
870 | |||
862 | /* | 871 | /* |
863 | * Read an inode from the OSD, and return it as is. We also return the size | 872 | * Read an inode from the OSD, and return it as is. We also return the size |
864 | * attribute in the 'obj_size' argument. | 873 | * attribute in the 'obj_size' argument. |
@@ -867,11 +876,16 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | |||
867 | struct exofs_fcb *inode, uint64_t *obj_size) | 876 | struct exofs_fcb *inode, uint64_t *obj_size) |
868 | { | 877 | { |
869 | struct exofs_sb_info *sbi = sb->s_fs_info; | 878 | struct exofs_sb_info *sbi = sb->s_fs_info; |
870 | struct osd_attr attrs[2]; | 879 | struct osd_attr attrs[] = { |
880 | [0] = g_attr_inode_data, | ||
881 | [1] = g_attr_inode_file_layout, | ||
882 | [2] = g_attr_inode_dir_layout, | ||
883 | [3] = g_attr_logical_length, | ||
884 | }; | ||
871 | struct exofs_io_state *ios; | 885 | struct exofs_io_state *ios; |
886 | struct exofs_on_disk_inode_layout *layout; | ||
872 | int ret; | 887 | int ret; |
873 | 888 | ||
874 | *obj_size = ~0; | ||
875 | ret = exofs_get_io_state(&sbi->layout, &ios); | 889 | ret = exofs_get_io_state(&sbi->layout, &ios); |
876 | if (unlikely(ret)) { | 890 | if (unlikely(ret)) { |
877 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); | 891 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); |
@@ -882,8 +896,9 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | |||
882 | exofs_make_credential(oi->i_cred, &ios->obj); | 896 | exofs_make_credential(oi->i_cred, &ios->obj); |
883 | ios->cred = oi->i_cred; | 897 | ios->cred = oi->i_cred; |
884 | 898 | ||
885 | attrs[0] = g_attr_inode_data; | 899 | attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); |
886 | attrs[1] = g_attr_logical_length; | 900 | attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); |
901 | |||
887 | ios->in_attr = attrs; | 902 | ios->in_attr = attrs; |
888 | ios->in_attr_len = ARRAY_SIZE(attrs); | 903 | ios->in_attr_len = ARRAY_SIZE(attrs); |
889 | 904 | ||
@@ -901,11 +916,42 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | |||
901 | 916 | ||
902 | ret = extract_attr_from_ios(ios, &attrs[1]); | 917 | ret = extract_attr_from_ios(ios, &attrs[1]); |
903 | if (ret) { | 918 | if (ret) { |
919 | EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); | ||
920 | goto out; | ||
921 | } | ||
922 | if (attrs[1].len) { | ||
923 | layout = attrs[1].val_ptr; | ||
924 | if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { | ||
925 | EXOFS_ERR("%s: unsupported files layout %d\n", | ||
926 | __func__, layout->gen_func); | ||
927 | ret = -ENOTSUPP; | ||
928 | goto out; | ||
929 | } | ||
930 | } | ||
931 | |||
932 | ret = extract_attr_from_ios(ios, &attrs[2]); | ||
933 | if (ret) { | ||
934 | EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); | ||
935 | goto out; | ||
936 | } | ||
937 | if (attrs[2].len) { | ||
938 | layout = attrs[2].val_ptr; | ||
939 | if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { | ||
940 | EXOFS_ERR("%s: unsupported meta-data layout %d\n", | ||
941 | __func__, layout->gen_func); | ||
942 | ret = -ENOTSUPP; | ||
943 | goto out; | ||
944 | } | ||
945 | } | ||
946 | |||
947 | *obj_size = ~0; | ||
948 | ret = extract_attr_from_ios(ios, &attrs[3]); | ||
949 | if (ret) { | ||
904 | EXOFS_ERR("%s: extract_attr of logical_length failed\n", | 950 | EXOFS_ERR("%s: extract_attr of logical_length failed\n", |
905 | __func__); | 951 | __func__); |
906 | goto out; | 952 | goto out; |
907 | } | 953 | } |
908 | *obj_size = get_unaligned_be64(attrs[1].val_ptr); | 954 | *obj_size = get_unaligned_be64(attrs[3].val_ptr); |
909 | 955 | ||
910 | out: | 956 | out: |
911 | exofs_put_io_state(ios); | 957 | exofs_put_io_state(ios); |
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 4f679317ca54..2b81f99fd62c 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c | |||
@@ -107,6 +107,19 @@ void exofs_put_io_state(struct exofs_io_state *ios) | |||
107 | } | 107 | } |
108 | } | 108 | } |
109 | 109 | ||
110 | unsigned exofs_layout_od_id(struct exofs_layout *layout, | ||
111 | osd_id obj_no, unsigned layout_index) | ||
112 | { | ||
113 | return layout_index; | ||
114 | } | ||
115 | |||
116 | static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios, | ||
117 | unsigned layout_index) | ||
118 | { | ||
119 | return ios->layout->s_ods[ | ||
120 | exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)]; | ||
121 | } | ||
122 | |||
110 | static void _sync_done(struct exofs_io_state *ios, void *p) | 123 | static void _sync_done(struct exofs_io_state *ios, void *p) |
111 | { | 124 | { |
112 | struct completion *waiting = p; | 125 | struct completion *waiting = p; |
@@ -242,7 +255,7 @@ int exofs_sbi_create(struct exofs_io_state *ios) | |||
242 | for (i = 0; i < ios->layout->s_numdevs; i++) { | 255 | for (i = 0; i < ios->layout->s_numdevs; i++) { |
243 | struct osd_request *or; | 256 | struct osd_request *or; |
244 | 257 | ||
245 | or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); | 258 | or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); |
246 | if (unlikely(!or)) { | 259 | if (unlikely(!or)) { |
247 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | 260 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); |
248 | ret = -ENOMEM; | 261 | ret = -ENOMEM; |
@@ -266,7 +279,7 @@ int exofs_sbi_remove(struct exofs_io_state *ios) | |||
266 | for (i = 0; i < ios->layout->s_numdevs; i++) { | 279 | for (i = 0; i < ios->layout->s_numdevs; i++) { |
267 | struct osd_request *or; | 280 | struct osd_request *or; |
268 | 281 | ||
269 | or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); | 282 | or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); |
270 | if (unlikely(!or)) { | 283 | if (unlikely(!or)) { |
271 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | 284 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); |
272 | ret = -ENOMEM; | 285 | ret = -ENOMEM; |
@@ -290,7 +303,7 @@ int exofs_sbi_write(struct exofs_io_state *ios) | |||
290 | for (i = 0; i < ios->layout->s_numdevs; i++) { | 303 | for (i = 0; i < ios->layout->s_numdevs; i++) { |
291 | struct osd_request *or; | 304 | struct osd_request *or; |
292 | 305 | ||
293 | or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); | 306 | or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); |
294 | if (unlikely(!or)) { | 307 | if (unlikely(!or)) { |
295 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | 308 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); |
296 | ret = -ENOMEM; | 309 | ret = -ENOMEM; |
@@ -361,7 +374,7 @@ int exofs_sbi_read(struct exofs_io_state *ios) | |||
361 | unsigned first_dev = (unsigned)ios->obj.id; | 374 | unsigned first_dev = (unsigned)ios->obj.id; |
362 | 375 | ||
363 | first_dev %= ios->layout->s_numdevs; | 376 | first_dev %= ios->layout->s_numdevs; |
364 | or = osd_start_request(ios->layout->s_ods[first_dev], GFP_KERNEL); | 377 | or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); |
365 | if (unlikely(!or)) { | 378 | if (unlikely(!or)) { |
366 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | 379 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); |
367 | return -ENOMEM; | 380 | return -ENOMEM; |
@@ -442,7 +455,7 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) | |||
442 | for (i = 0; i < sbi->layout.s_numdevs; i++) { | 455 | for (i = 0; i < sbi->layout.s_numdevs; i++) { |
443 | struct osd_request *or; | 456 | struct osd_request *or; |
444 | 457 | ||
445 | or = osd_start_request(sbi->layout.s_ods[i], GFP_KERNEL); | 458 | or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); |
446 | if (unlikely(!or)) { | 459 | if (unlikely(!or)) { |
447 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | 460 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); |
448 | ret = -ENOMEM; | 461 | ret = -ENOMEM; |