diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-10 12:32:24 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-10 12:32:24 -0500 |
commit | a5eba3f66f812cbc076a1170b3f888ad63f850b2 (patch) | |
tree | 32bcbbc77e2bc1c04c5ed577ff8f24612148631c | |
parent | fc1495bf99de6f65066b3234813180301ff8b693 (diff) | |
parent | 04dc1e88ad9c9f9639019e9646a89ce0ebf706bb (diff) |
Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd
* 'for-linus' of git://git.open-osd.org/linux-open-osd:
exofs: Multi-device mirror support
exofs: Move all operations to an io_engine
exofs: move osd.c to ios.c
exofs: statfs blocks is sectors not FS blocks
exofs: Prints on mount and unmout
exofs: refactor exofs_i_info initialization into common helper
exofs: dbg-print less
exofs: More sane debug print
trivial: some small fixes in exofs documentation
-rw-r--r-- | Documentation/filesystems/00-INDEX | 2 | ||||
-rw-r--r-- | Documentation/filesystems/exofs.txt | 23 | ||||
-rw-r--r-- | fs/exofs/Kbuild | 2 | ||||
-rw-r--r-- | fs/exofs/common.h | 81 | ||||
-rw-r--r-- | fs/exofs/exofs.h | 97 | ||||
-rw-r--r-- | fs/exofs/inode.c | 409 | ||||
-rw-r--r-- | fs/exofs/ios.c | 421 | ||||
-rw-r--r-- | fs/exofs/osd.c | 125 | ||||
-rw-r--r-- | fs/exofs/pnfs.h | 51 | ||||
-rw-r--r-- | fs/exofs/super.c | 353 |
10 files changed, 1107 insertions, 457 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index f15621ee5599..7001782ab932 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX | |||
@@ -36,6 +36,8 @@ dnotify.txt | |||
36 | - info about directory notification in Linux. | 36 | - info about directory notification in Linux. |
37 | ecryptfs.txt | 37 | ecryptfs.txt |
38 | - docs on eCryptfs: stacked cryptographic filesystem for Linux. | 38 | - docs on eCryptfs: stacked cryptographic filesystem for Linux. |
39 | exofs.txt | ||
40 | - info, usage, mount options, design about EXOFS. | ||
39 | ext2.txt | 41 | ext2.txt |
40 | - info, mount options and specifications for the Ext2 filesystem. | 42 | - info, mount options and specifications for the Ext2 filesystem. |
41 | ext3.txt | 43 | ext3.txt |
diff --git a/Documentation/filesystems/exofs.txt b/Documentation/filesystems/exofs.txt index 0ced74c2f73c..abd2a9b5b787 100644 --- a/Documentation/filesystems/exofs.txt +++ b/Documentation/filesystems/exofs.txt | |||
@@ -60,13 +60,13 @@ USAGE | |||
60 | 60 | ||
61 | mkfs.exofs --pid=65536 --format /dev/osd0 | 61 | mkfs.exofs --pid=65536 --format /dev/osd0 |
62 | 62 | ||
63 | The --format is optional if not specified no OSD_FORMAT will be | 63 | The --format is optional. If not specified, no OSD_FORMAT will be |
64 | preformed and a clean file system will be created in the specified pid, | 64 | performed and a clean file system will be created in the specified pid, |
65 | in the available space of the target. (Use --format=size_in_meg to limit | 65 | in the available space of the target. (Use --format=size_in_meg to limit |
66 | the total LUN space available) | 66 | the total LUN space available) |
67 | 67 | ||
68 | If pid already exist it will be deleted and a new one will be created in it's | 68 | If pid already exists, it will be deleted and a new one will be created in |
69 | place. Be careful. | 69 | its place. Be careful. |
70 | 70 | ||
71 | An exofs lives inside a single OSD partition. You can create multiple exofs | 71 | An exofs lives inside a single OSD partition. You can create multiple exofs |
72 | filesystems on the same device using multiple pids. | 72 | filesystems on the same device using multiple pids. |
@@ -81,7 +81,7 @@ USAGE | |||
81 | 81 | ||
82 | 7. For reference (See do-exofs example script): | 82 | 7. For reference (See do-exofs example script): |
83 | do-exofs start - an example of how to perform the above steps. | 83 | do-exofs start - an example of how to perform the above steps. |
84 | do-exofs stop - an example of how to unmount the file system. | 84 | do-exofs stop - an example of how to unmount the file system. |
85 | do-exofs format - an example of how to format and mkfs a new exofs. | 85 | do-exofs format - an example of how to format and mkfs a new exofs. |
86 | 86 | ||
87 | 8. Extra compilation flags (uncomment in fs/exofs/Kbuild): | 87 | 8. Extra compilation flags (uncomment in fs/exofs/Kbuild): |
@@ -104,8 +104,8 @@ Where: | |||
104 | exofs specific options: Options are separated by commas (,) | 104 | exofs specific options: Options are separated by commas (,) |
105 | pid=<integer> - The partition number to mount/create as | 105 | pid=<integer> - The partition number to mount/create as |
106 | container of the filesystem. | 106 | container of the filesystem. |
107 | This option is mandatory | 107 | This option is mandatory. |
108 | to=<integer> - Timeout in ticks for a single command | 108 | to=<integer> - Timeout in ticks for a single command. |
109 | default is (60 * HZ) [for debugging only] | 109 | default is (60 * HZ) [for debugging only] |
110 | 110 | ||
111 | =============================================================================== | 111 | =============================================================================== |
@@ -116,7 +116,7 @@ DESIGN | |||
116 | with a special ID (defined in common.h). | 116 | with a special ID (defined in common.h). |
117 | Information included in the file system control block is used to fill the | 117 | Information included in the file system control block is used to fill the |
118 | in-memory superblock structure at mount time. This object is created before | 118 | in-memory superblock structure at mount time. This object is created before |
119 | the file system is used by mkexofs.c It contains information such as: | 119 | the file system is used by mkexofs.c. It contains information such as: |
120 | - The file system's magic number | 120 | - The file system's magic number |
121 | - The next inode number to be allocated | 121 | - The next inode number to be allocated |
122 | 122 | ||
@@ -134,8 +134,8 @@ DESIGN | |||
134 | attributes. This applies to both regular files and other types (directories, | 134 | attributes. This applies to both regular files and other types (directories, |
135 | device files, symlinks, etc.). | 135 | device files, symlinks, etc.). |
136 | 136 | ||
137 | * Credentials are generated per object (inode and superblock) when they is | 137 | * Credentials are generated per object (inode and superblock) when they are |
138 | created in memory (read off disk or created). The credential works for all | 138 | created in memory (read from disk or created). The credential works for all |
139 | operations and is used as long as the object remains in memory. | 139 | operations and is used as long as the object remains in memory. |
140 | 140 | ||
141 | * Async OSD operations are used whenever possible, but the target may execute | 141 | * Async OSD operations are used whenever possible, but the target may execute |
@@ -145,7 +145,8 @@ DESIGN | |||
145 | from executing in reverse order: | 145 | from executing in reverse order: |
146 | - The following are handled with the OBJ_CREATED and OBJ_2BCREATED | 146 | - The following are handled with the OBJ_CREATED and OBJ_2BCREATED |
147 | flags. OBJ_CREATED is set when we know the object exists on the OSD - | 147 | flags. OBJ_CREATED is set when we know the object exists on the OSD - |
148 | in create's callback function, and when we successfully do a read_inode. | 148 | in create's callback function, and when we successfully do a |
149 | read_inode. | ||
149 | OBJ_2BCREATED is set in the beginning of the create function, so we | 150 | OBJ_2BCREATED is set in the beginning of the create function, so we |
150 | know that we should wait. | 151 | know that we should wait. |
151 | - create/delete: delete should wait until the object is created | 152 | - create/delete: delete should wait until the object is created |
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild index cc2d22db119c..2d0f757fda3e 100644 --- a/fs/exofs/Kbuild +++ b/fs/exofs/Kbuild | |||
@@ -12,5 +12,5 @@ | |||
12 | # Kbuild - Gets included from the Kernels Makefile and build system | 12 | # Kbuild - Gets included from the Kernels Makefile and build system |
13 | # | 13 | # |
14 | 14 | ||
15 | exofs-y := osd.o inode.o file.o symlink.o namei.o dir.o super.o | 15 | exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o |
16 | obj-$(CONFIG_EXOFS_FS) += exofs.o | 16 | obj-$(CONFIG_EXOFS_FS) += exofs.o |
diff --git a/fs/exofs/common.h b/fs/exofs/common.h index c6718e4817fe..b1b178e61718 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h | |||
@@ -49,6 +49,7 @@ | |||
49 | #define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ | 49 | #define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ |
50 | #define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ | 50 | #define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ |
51 | #define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ | 51 | #define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ |
52 | #define EXOFS_DEVTABLE_ID 0x10001 /* object ID for on-disk device table */ | ||
52 | #define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ | 53 | #define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ |
53 | 54 | ||
54 | /* exofs Application specific page/attribute */ | 55 | /* exofs Application specific page/attribute */ |
@@ -78,17 +79,67 @@ enum { | |||
78 | #define EXOFS_SUPER_MAGIC 0x5DF5 | 79 | #define EXOFS_SUPER_MAGIC 0x5DF5 |
79 | 80 | ||
80 | /* | 81 | /* |
81 | * The file system control block - stored in an object's data (mainly, the one | 82 | * The file system control block - stored in object EXOFS_SUPER_ID's data. |
82 | * with ID EXOFS_SUPER_ID). This is where the in-memory superblock is stored | 83 | * This is where the in-memory superblock is stored on disk. |
83 | * on disk. Right now it just has a magic value, which is basically a sanity | ||
84 | * check on our ability to communicate with the object store. | ||
85 | */ | 84 | */ |
85 | enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1}; | ||
86 | struct exofs_fscb { | 86 | struct exofs_fscb { |
87 | __le64 s_nextid; /* Highest object ID used */ | 87 | __le64 s_nextid; /* Highest object ID used */ |
88 | __le32 s_numfiles; /* Number of files on fs */ | 88 | __le64 s_numfiles; /* Number of files on fs */ |
89 | __le32 s_version; /* == EXOFS_FSCB_VER */ | ||
89 | __le16 s_magic; /* Magic signature */ | 90 | __le16 s_magic; /* Magic signature */ |
90 | __le16 s_newfs; /* Non-zero if this is a new fs */ | 91 | __le16 s_newfs; /* Non-zero if this is a new fs */ |
91 | }; | 92 | |
93 | /* From here on it's a static part, only written by mkexofs */ | ||
94 | __le64 s_dev_table_oid; /* Resurved, not used */ | ||
95 | __le64 s_dev_table_count; /* == 0 means no dev_table */ | ||
96 | } __packed; | ||
97 | |||
98 | /* | ||
99 | * Describes the raid used in the FS. It is part of the device table. | ||
100 | * This here is taken from the pNFS-objects definition. In exofs we | ||
101 | * use one raid policy through-out the filesystem. (NOTE: the funny | ||
102 | * alignment at begining. We take care of it at exofs_device_table. | ||
103 | */ | ||
104 | struct exofs_dt_data_map { | ||
105 | __le32 cb_num_comps; | ||
106 | __le64 cb_stripe_unit; | ||
107 | __le32 cb_group_width; | ||
108 | __le32 cb_group_depth; | ||
109 | __le32 cb_mirror_cnt; | ||
110 | __le32 cb_raid_algorithm; | ||
111 | } __packed; | ||
112 | |||
113 | /* | ||
114 | * This is an osd device information descriptor. It is a single entry in | ||
115 | * the exofs device table. It describes an osd target lun which | ||
116 | * contains data belonging to this FS. (Same partition_id on all devices) | ||
117 | */ | ||
118 | struct exofs_dt_device_info { | ||
119 | __le32 systemid_len; | ||
120 | u8 systemid[OSD_SYSTEMID_LEN]; | ||
121 | __le64 long_name_offset; /* If !0 then offset-in-file */ | ||
122 | __le32 osdname_len; /* */ | ||
123 | u8 osdname[44]; /* Embbeded, Ususally an asci uuid */ | ||
124 | } __packed; | ||
125 | |||
126 | /* | ||
127 | * The EXOFS device table - stored in object EXOFS_DEVTABLE_ID's data. | ||
128 | * It contains the raid used for this multy-device FS and an array of | ||
129 | * participating devices. | ||
130 | */ | ||
131 | struct exofs_device_table { | ||
132 | __le32 dt_version; /* == EXOFS_DT_VER */ | ||
133 | struct exofs_dt_data_map dt_data_map; /* Raid policy to use */ | ||
134 | |||
135 | /* Resurved space For future use. Total includeing this: | ||
136 | * (8 * sizeof(le64)) | ||
137 | */ | ||
138 | __le64 __Resurved[4]; | ||
139 | |||
140 | __le64 dt_num_devices; /* Array size */ | ||
141 | struct exofs_dt_device_info dt_dev_table[]; /* Array of devices */ | ||
142 | } __packed; | ||
92 | 143 | ||
93 | /**************************************************************************** | 144 | /**************************************************************************** |
94 | * inode-related things | 145 | * inode-related things |
@@ -155,22 +206,4 @@ enum { | |||
155 | (((name_len) + offsetof(struct exofs_dir_entry, name) + \ | 206 | (((name_len) + offsetof(struct exofs_dir_entry, name) + \ |
156 | EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) | 207 | EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) |
157 | 208 | ||
158 | /************************* | ||
159 | * function declarations * | ||
160 | *************************/ | ||
161 | /* osd.c */ | ||
162 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | ||
163 | const struct osd_obj_id *obj); | ||
164 | |||
165 | int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid); | ||
166 | static inline int exofs_check_ok(struct osd_request *or) | ||
167 | { | ||
168 | return exofs_check_ok_resid(or, NULL, NULL); | ||
169 | } | ||
170 | int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred); | ||
171 | int exofs_async_op(struct osd_request *or, | ||
172 | osd_req_done_fn *async_done, void *caller_context, u8 *cred); | ||
173 | |||
174 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); | ||
175 | |||
176 | #endif /*ifndef __EXOFS_COM_H__*/ | 209 | #endif /*ifndef __EXOFS_COM_H__*/ |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 5ec72e020b22..c35fd4623986 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -30,13 +30,17 @@ | |||
30 | * along with exofs; if not, write to the Free Software | 30 | * along with exofs; if not, write to the Free Software |
31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
32 | */ | 32 | */ |
33 | #ifndef __EXOFS_H__ | ||
34 | #define __EXOFS_H__ | ||
33 | 35 | ||
34 | #include <linux/fs.h> | 36 | #include <linux/fs.h> |
35 | #include <linux/time.h> | 37 | #include <linux/time.h> |
36 | #include "common.h" | 38 | #include "common.h" |
37 | 39 | ||
38 | #ifndef __EXOFS_H__ | 40 | /* FIXME: Remove once pnfs hits mainline |
39 | #define __EXOFS_H__ | 41 | * #include <linux/exportfs/pnfs_osd_xdr.h> |
42 | */ | ||
43 | #include "pnfs.h" | ||
40 | 44 | ||
41 | #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) | 45 | #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) |
42 | 46 | ||
@@ -55,7 +59,7 @@ | |||
55 | * our extension to the in-memory superblock | 59 | * our extension to the in-memory superblock |
56 | */ | 60 | */ |
57 | struct exofs_sb_info { | 61 | struct exofs_sb_info { |
58 | struct osd_dev *s_dev; /* returned by get_osd_dev */ | 62 | struct exofs_fscb s_fscb; /* Written often, pre-allocate*/ |
59 | osd_id s_pid; /* partition ID of file system*/ | 63 | osd_id s_pid; /* partition ID of file system*/ |
60 | int s_timeout; /* timeout for OSD operations */ | 64 | int s_timeout; /* timeout for OSD operations */ |
61 | uint64_t s_nextid; /* highest object ID used */ | 65 | uint64_t s_nextid; /* highest object ID used */ |
@@ -63,7 +67,11 @@ struct exofs_sb_info { | |||
63 | spinlock_t s_next_gen_lock; /* spinlock for gen # update */ | 67 | spinlock_t s_next_gen_lock; /* spinlock for gen # update */ |
64 | u32 s_next_generation; /* next gen # to use */ | 68 | u32 s_next_generation; /* next gen # to use */ |
65 | atomic_t s_curr_pending; /* number of pending commands */ | 69 | atomic_t s_curr_pending; /* number of pending commands */ |
66 | uint8_t s_cred[OSD_CAP_LEN]; /* all-powerful credential */ | 70 | uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ |
71 | |||
72 | struct pnfs_osd_data_map data_map; /* Default raid to use */ | ||
73 | unsigned s_numdevs; /* Num of devices in array */ | ||
74 | struct osd_dev *s_ods[1]; /* Variable length, minimum 1 */ | ||
67 | }; | 75 | }; |
68 | 76 | ||
69 | /* | 77 | /* |
@@ -79,6 +87,50 @@ struct exofs_i_info { | |||
79 | struct inode vfs_inode; /* normal in-memory inode */ | 87 | struct inode vfs_inode; /* normal in-memory inode */ |
80 | }; | 88 | }; |
81 | 89 | ||
90 | static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) | ||
91 | { | ||
92 | return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; | ||
93 | } | ||
94 | |||
95 | struct exofs_io_state; | ||
96 | typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private); | ||
97 | |||
98 | struct exofs_io_state { | ||
99 | struct kref kref; | ||
100 | |||
101 | void *private; | ||
102 | exofs_io_done_fn done; | ||
103 | |||
104 | struct exofs_sb_info *sbi; | ||
105 | struct osd_obj_id obj; | ||
106 | u8 *cred; | ||
107 | |||
108 | /* Global read/write IO*/ | ||
109 | loff_t offset; | ||
110 | unsigned long length; | ||
111 | void *kern_buff; | ||
112 | struct bio *bio; | ||
113 | |||
114 | /* Attributes */ | ||
115 | unsigned in_attr_len; | ||
116 | struct osd_attr *in_attr; | ||
117 | unsigned out_attr_len; | ||
118 | struct osd_attr *out_attr; | ||
119 | |||
120 | /* Variable array of size numdevs */ | ||
121 | unsigned numdevs; | ||
122 | struct exofs_per_dev_state { | ||
123 | struct osd_request *or; | ||
124 | struct bio *bio; | ||
125 | } per_dev[]; | ||
126 | }; | ||
127 | |||
128 | static inline unsigned exofs_io_state_size(unsigned numdevs) | ||
129 | { | ||
130 | return sizeof(struct exofs_io_state) + | ||
131 | sizeof(struct exofs_per_dev_state) * numdevs; | ||
132 | } | ||
133 | |||
82 | /* | 134 | /* |
83 | * our inode flags | 135 | * our inode flags |
84 | */ | 136 | */ |
@@ -130,6 +182,42 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) | |||
130 | /************************* | 182 | /************************* |
131 | * function declarations * | 183 | * function declarations * |
132 | *************************/ | 184 | *************************/ |
185 | |||
186 | /* ios.c */ | ||
187 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | ||
188 | const struct osd_obj_id *obj); | ||
189 | int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, | ||
190 | u64 offset, void *p, unsigned length); | ||
191 | |||
192 | int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios); | ||
193 | void exofs_put_io_state(struct exofs_io_state *ios); | ||
194 | |||
195 | int exofs_check_io(struct exofs_io_state *ios, u64 *resid); | ||
196 | |||
197 | int exofs_sbi_create(struct exofs_io_state *ios); | ||
198 | int exofs_sbi_remove(struct exofs_io_state *ios); | ||
199 | int exofs_sbi_write(struct exofs_io_state *ios); | ||
200 | int exofs_sbi_read(struct exofs_io_state *ios); | ||
201 | |||
202 | int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr); | ||
203 | |||
204 | int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); | ||
205 | static inline int exofs_oi_write(struct exofs_i_info *oi, | ||
206 | struct exofs_io_state *ios) | ||
207 | { | ||
208 | ios->obj.id = exofs_oi_objno(oi); | ||
209 | ios->cred = oi->i_cred; | ||
210 | return exofs_sbi_write(ios); | ||
211 | } | ||
212 | |||
213 | static inline int exofs_oi_read(struct exofs_i_info *oi, | ||
214 | struct exofs_io_state *ios) | ||
215 | { | ||
216 | ios->obj.id = exofs_oi_objno(oi); | ||
217 | ios->cred = oi->i_cred; | ||
218 | return exofs_sbi_read(ios); | ||
219 | } | ||
220 | |||
133 | /* inode.c */ | 221 | /* inode.c */ |
134 | void exofs_truncate(struct inode *inode); | 222 | void exofs_truncate(struct inode *inode); |
135 | int exofs_setattr(struct dentry *, struct iattr *); | 223 | int exofs_setattr(struct dentry *, struct iattr *); |
@@ -169,6 +257,7 @@ extern const struct file_operations exofs_file_operations; | |||
169 | 257 | ||
170 | /* inode.c */ | 258 | /* inode.c */ |
171 | extern const struct address_space_operations exofs_aops; | 259 | extern const struct address_space_operations exofs_aops; |
260 | extern const struct osd_attr g_attr_logical_length; | ||
172 | 261 | ||
173 | /* namei.c */ | 262 | /* namei.c */ |
174 | extern const struct inode_operations exofs_dir_inode_operations; | 263 | extern const struct inode_operations exofs_dir_inode_operations; |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 6c10f7476699..698a8636d39c 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -37,15 +37,18 @@ | |||
37 | 37 | ||
38 | #include "exofs.h" | 38 | #include "exofs.h" |
39 | 39 | ||
40 | #ifdef CONFIG_EXOFS_DEBUG | 40 | #define EXOFS_DBGMSG2(M...) do {} while (0) |
41 | # define EXOFS_DEBUG_OBJ_ISIZE 1 | 41 | |
42 | #endif | 42 | enum { BIO_MAX_PAGES_KMALLOC = |
43 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | ||
44 | }; | ||
43 | 45 | ||
44 | struct page_collect { | 46 | struct page_collect { |
45 | struct exofs_sb_info *sbi; | 47 | struct exofs_sb_info *sbi; |
46 | struct request_queue *req_q; | 48 | struct request_queue *req_q; |
47 | struct inode *inode; | 49 | struct inode *inode; |
48 | unsigned expected_pages; | 50 | unsigned expected_pages; |
51 | struct exofs_io_state *ios; | ||
49 | 52 | ||
50 | struct bio *bio; | 53 | struct bio *bio; |
51 | unsigned nr_pages; | 54 | unsigned nr_pages; |
@@ -54,22 +57,23 @@ struct page_collect { | |||
54 | }; | 57 | }; |
55 | 58 | ||
56 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | 59 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, |
57 | struct inode *inode) | 60 | struct inode *inode) |
58 | { | 61 | { |
59 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | 62 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
60 | 63 | ||
61 | pcol->sbi = sbi; | 64 | pcol->sbi = sbi; |
62 | pcol->req_q = osd_request_queue(sbi->s_dev); | 65 | /* Create master bios on first Q, later on cloning, each clone will be |
66 | * allocated on it's destination Q | ||
67 | */ | ||
68 | pcol->req_q = osd_request_queue(sbi->s_ods[0]); | ||
63 | pcol->inode = inode; | 69 | pcol->inode = inode; |
64 | pcol->expected_pages = expected_pages; | 70 | pcol->expected_pages = expected_pages; |
65 | 71 | ||
72 | pcol->ios = NULL; | ||
66 | pcol->bio = NULL; | 73 | pcol->bio = NULL; |
67 | pcol->nr_pages = 0; | 74 | pcol->nr_pages = 0; |
68 | pcol->length = 0; | 75 | pcol->length = 0; |
69 | pcol->pg_first = -1; | 76 | pcol->pg_first = -1; |
70 | |||
71 | EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino, | ||
72 | expected_pages); | ||
73 | } | 77 | } |
74 | 78 | ||
75 | static void _pcol_reset(struct page_collect *pcol) | 79 | static void _pcol_reset(struct page_collect *pcol) |
@@ -80,35 +84,49 @@ static void _pcol_reset(struct page_collect *pcol) | |||
80 | pcol->nr_pages = 0; | 84 | pcol->nr_pages = 0; |
81 | pcol->length = 0; | 85 | pcol->length = 0; |
82 | pcol->pg_first = -1; | 86 | pcol->pg_first = -1; |
83 | EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", | 87 | pcol->ios = NULL; |
84 | pcol->inode->i_ino, pcol->expected_pages); | ||
85 | 88 | ||
86 | /* this is probably the end of the loop but in writes | 89 | /* this is probably the end of the loop but in writes |
87 | * it might not end here. don't be left with nothing | 90 | * it might not end here. don't be left with nothing |
88 | */ | 91 | */ |
89 | if (!pcol->expected_pages) | 92 | if (!pcol->expected_pages) |
90 | pcol->expected_pages = 128; | 93 | pcol->expected_pages = BIO_MAX_PAGES_KMALLOC; |
91 | } | 94 | } |
92 | 95 | ||
93 | static int pcol_try_alloc(struct page_collect *pcol) | 96 | static int pcol_try_alloc(struct page_collect *pcol) |
94 | { | 97 | { |
95 | int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); | 98 | int pages = min_t(unsigned, pcol->expected_pages, |
99 | BIO_MAX_PAGES_KMALLOC); | ||
100 | |||
101 | if (!pcol->ios) { /* First time allocate io_state */ | ||
102 | int ret = exofs_get_io_state(pcol->sbi, &pcol->ios); | ||
103 | |||
104 | if (ret) | ||
105 | return ret; | ||
106 | } | ||
96 | 107 | ||
97 | for (; pages; pages >>= 1) { | 108 | for (; pages; pages >>= 1) { |
98 | pcol->bio = bio_alloc(GFP_KERNEL, pages); | 109 | pcol->bio = bio_kmalloc(GFP_KERNEL, pages); |
99 | if (likely(pcol->bio)) | 110 | if (likely(pcol->bio)) |
100 | return 0; | 111 | return 0; |
101 | } | 112 | } |
102 | 113 | ||
103 | EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", | 114 | EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n", |
104 | pcol->expected_pages); | 115 | pcol->expected_pages); |
105 | return -ENOMEM; | 116 | return -ENOMEM; |
106 | } | 117 | } |
107 | 118 | ||
108 | static void pcol_free(struct page_collect *pcol) | 119 | static void pcol_free(struct page_collect *pcol) |
109 | { | 120 | { |
110 | bio_put(pcol->bio); | 121 | if (pcol->bio) { |
111 | pcol->bio = NULL; | 122 | bio_put(pcol->bio); |
123 | pcol->bio = NULL; | ||
124 | } | ||
125 | |||
126 | if (pcol->ios) { | ||
127 | exofs_put_io_state(pcol->ios); | ||
128 | pcol->ios = NULL; | ||
129 | } | ||
112 | } | 130 | } |
113 | 131 | ||
114 | static int pcol_add_page(struct page_collect *pcol, struct page *page, | 132 | static int pcol_add_page(struct page_collect *pcol, struct page *page, |
@@ -161,22 +179,17 @@ static void update_write_page(struct page *page, int ret) | |||
161 | /* Called at the end of reads, to optionally unlock pages and update their | 179 | /* Called at the end of reads, to optionally unlock pages and update their |
162 | * status. | 180 | * status. |
163 | */ | 181 | */ |
164 | static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | 182 | static int __readpages_done(struct page_collect *pcol, bool do_unlock) |
165 | bool do_unlock) | ||
166 | { | 183 | { |
167 | struct bio_vec *bvec; | 184 | struct bio_vec *bvec; |
168 | int i; | 185 | int i; |
169 | u64 resid; | 186 | u64 resid; |
170 | u64 good_bytes; | 187 | u64 good_bytes; |
171 | u64 length = 0; | 188 | u64 length = 0; |
172 | int ret = exofs_check_ok_resid(or, &resid, NULL); | 189 | int ret = exofs_check_io(pcol->ios, &resid); |
173 | |||
174 | osd_end_request(or); | ||
175 | 190 | ||
176 | if (likely(!ret)) | 191 | if (likely(!ret)) |
177 | good_bytes = pcol->length; | 192 | good_bytes = pcol->length; |
178 | else if (!resid) | ||
179 | good_bytes = 0; | ||
180 | else | 193 | else |
181 | good_bytes = pcol->length - resid; | 194 | good_bytes = pcol->length - resid; |
182 | 195 | ||
@@ -198,7 +211,7 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | |||
198 | else | 211 | else |
199 | page_stat = ret; | 212 | page_stat = ret; |
200 | 213 | ||
201 | EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", | 214 | EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n", |
202 | inode->i_ino, page->index, | 215 | inode->i_ino, page->index, |
203 | page_stat ? "bad_bytes" : "good_bytes"); | 216 | page_stat ? "bad_bytes" : "good_bytes"); |
204 | 217 | ||
@@ -214,13 +227,13 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | |||
214 | } | 227 | } |
215 | 228 | ||
216 | /* callback of async reads */ | 229 | /* callback of async reads */ |
217 | static void readpages_done(struct osd_request *or, void *p) | 230 | static void readpages_done(struct exofs_io_state *ios, void *p) |
218 | { | 231 | { |
219 | struct page_collect *pcol = p; | 232 | struct page_collect *pcol = p; |
220 | 233 | ||
221 | __readpages_done(or, pcol, true); | 234 | __readpages_done(pcol, true); |
222 | atomic_dec(&pcol->sbi->s_curr_pending); | 235 | atomic_dec(&pcol->sbi->s_curr_pending); |
223 | kfree(p); | 236 | kfree(pcol); |
224 | } | 237 | } |
225 | 238 | ||
226 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | 239 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) |
@@ -238,17 +251,13 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | |||
238 | 251 | ||
239 | unlock_page(page); | 252 | unlock_page(page); |
240 | } | 253 | } |
241 | pcol_free(pcol); | ||
242 | } | 254 | } |
243 | 255 | ||
244 | static int read_exec(struct page_collect *pcol, bool is_sync) | 256 | static int read_exec(struct page_collect *pcol, bool is_sync) |
245 | { | 257 | { |
246 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 258 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
247 | struct osd_obj_id obj = {pcol->sbi->s_pid, | 259 | struct exofs_io_state *ios = pcol->ios; |
248 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
249 | struct osd_request *or = NULL; | ||
250 | struct page_collect *pcol_copy = NULL; | 260 | struct page_collect *pcol_copy = NULL; |
251 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
252 | int ret; | 261 | int ret; |
253 | 262 | ||
254 | if (!pcol->bio) | 263 | if (!pcol->bio) |
@@ -257,17 +266,13 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
257 | /* see comment in _readpage() about sync reads */ | 266 | /* see comment in _readpage() about sync reads */ |
258 | WARN_ON(is_sync && (pcol->nr_pages != 1)); | 267 | WARN_ON(is_sync && (pcol->nr_pages != 1)); |
259 | 268 | ||
260 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | 269 | ios->bio = pcol->bio; |
261 | if (unlikely(!or)) { | 270 | ios->length = pcol->length; |
262 | ret = -ENOMEM; | 271 | ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; |
263 | goto err; | ||
264 | } | ||
265 | |||
266 | osd_req_read(or, &obj, i_start, pcol->bio, pcol->length); | ||
267 | 272 | ||
268 | if (is_sync) { | 273 | if (is_sync) { |
269 | exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); | 274 | exofs_oi_read(oi, pcol->ios); |
270 | return __readpages_done(or, pcol, false); | 275 | return __readpages_done(pcol, false); |
271 | } | 276 | } |
272 | 277 | ||
273 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 278 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
@@ -277,14 +282,16 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
277 | } | 282 | } |
278 | 283 | ||
279 | *pcol_copy = *pcol; | 284 | *pcol_copy = *pcol; |
280 | ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); | 285 | ios->done = readpages_done; |
286 | ios->private = pcol_copy; | ||
287 | ret = exofs_oi_read(oi, ios); | ||
281 | if (unlikely(ret)) | 288 | if (unlikely(ret)) |
282 | goto err; | 289 | goto err; |
283 | 290 | ||
284 | atomic_inc(&pcol->sbi->s_curr_pending); | 291 | atomic_inc(&pcol->sbi->s_curr_pending); |
285 | 292 | ||
286 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", | 293 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", |
287 | obj.id, _LLU(i_start), pcol->length); | 294 | ios->obj.id, _LLU(ios->offset), pcol->length); |
288 | 295 | ||
289 | /* pages ownership was passed to pcol_copy */ | 296 | /* pages ownership was passed to pcol_copy */ |
290 | _pcol_reset(pcol); | 297 | _pcol_reset(pcol); |
@@ -293,12 +300,10 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
293 | err: | 300 | err: |
294 | if (!is_sync) | 301 | if (!is_sync) |
295 | _unlock_pcol_pages(pcol, ret, READ); | 302 | _unlock_pcol_pages(pcol, ret, READ); |
296 | else /* Pages unlocked by caller in sync mode only free bio */ | 303 | |
297 | pcol_free(pcol); | 304 | pcol_free(pcol); |
298 | 305 | ||
299 | kfree(pcol_copy); | 306 | kfree(pcol_copy); |
300 | if (or) | ||
301 | osd_end_request(or); | ||
302 | return ret; | 307 | return ret; |
303 | } | 308 | } |
304 | 309 | ||
@@ -370,12 +375,12 @@ try_again: | |||
370 | if (len != PAGE_CACHE_SIZE) | 375 | if (len != PAGE_CACHE_SIZE) |
371 | zero_user(page, len, PAGE_CACHE_SIZE - len); | 376 | zero_user(page, len, PAGE_CACHE_SIZE - len); |
372 | 377 | ||
373 | EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", | 378 | EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", |
374 | inode->i_ino, page->index, len); | 379 | inode->i_ino, page->index, len); |
375 | 380 | ||
376 | ret = pcol_add_page(pcol, page, len); | 381 | ret = pcol_add_page(pcol, page, len); |
377 | if (ret) { | 382 | if (ret) { |
378 | EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " | 383 | EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p " |
379 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", | 384 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", |
380 | page, len, pcol->nr_pages, pcol->length); | 385 | page, len, pcol->nr_pages, pcol->length); |
381 | 386 | ||
@@ -419,9 +424,8 @@ static int _readpage(struct page *page, bool is_sync) | |||
419 | 424 | ||
420 | _pcol_init(&pcol, 1, page->mapping->host); | 425 | _pcol_init(&pcol, 1, page->mapping->host); |
421 | 426 | ||
422 | /* readpage_strip might call read_exec(,async) inside at several places | 427 | /* readpage_strip might call read_exec(,is_sync==false) at several |
423 | * but this is safe for is_async=0 since read_exec will not do anything | 428 | * places but not if we have a single page. |
424 | * when we have a single page. | ||
425 | */ | 429 | */ |
426 | ret = readpage_strip(&pcol, page); | 430 | ret = readpage_strip(&pcol, page); |
427 | if (ret) { | 431 | if (ret) { |
@@ -440,8 +444,8 @@ static int exofs_readpage(struct file *file, struct page *page) | |||
440 | return _readpage(page, false); | 444 | return _readpage(page, false); |
441 | } | 445 | } |
442 | 446 | ||
443 | /* Callback for osd_write. All writes are asynchronouse */ | 447 | /* Callback for osd_write. All writes are asynchronous */ |
444 | static void writepages_done(struct osd_request *or, void *p) | 448 | static void writepages_done(struct exofs_io_state *ios, void *p) |
445 | { | 449 | { |
446 | struct page_collect *pcol = p; | 450 | struct page_collect *pcol = p; |
447 | struct bio_vec *bvec; | 451 | struct bio_vec *bvec; |
@@ -449,16 +453,12 @@ static void writepages_done(struct osd_request *or, void *p) | |||
449 | u64 resid; | 453 | u64 resid; |
450 | u64 good_bytes; | 454 | u64 good_bytes; |
451 | u64 length = 0; | 455 | u64 length = 0; |
456 | int ret = exofs_check_io(ios, &resid); | ||
452 | 457 | ||
453 | int ret = exofs_check_ok_resid(or, NULL, &resid); | ||
454 | |||
455 | osd_end_request(or); | ||
456 | atomic_dec(&pcol->sbi->s_curr_pending); | 458 | atomic_dec(&pcol->sbi->s_curr_pending); |
457 | 459 | ||
458 | if (likely(!ret)) | 460 | if (likely(!ret)) |
459 | good_bytes = pcol->length; | 461 | good_bytes = pcol->length; |
460 | else if (!resid) | ||
461 | good_bytes = 0; | ||
462 | else | 462 | else |
463 | good_bytes = pcol->length - resid; | 463 | good_bytes = pcol->length - resid; |
464 | 464 | ||
@@ -482,7 +482,7 @@ static void writepages_done(struct osd_request *or, void *p) | |||
482 | 482 | ||
483 | update_write_page(page, page_stat); | 483 | update_write_page(page, page_stat); |
484 | unlock_page(page); | 484 | unlock_page(page); |
485 | EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", | 485 | EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", |
486 | inode->i_ino, page->index, page_stat); | 486 | inode->i_ino, page->index, page_stat); |
487 | 487 | ||
488 | length += bvec->bv_len; | 488 | length += bvec->bv_len; |
@@ -496,23 +496,13 @@ static void writepages_done(struct osd_request *or, void *p) | |||
496 | static int write_exec(struct page_collect *pcol) | 496 | static int write_exec(struct page_collect *pcol) |
497 | { | 497 | { |
498 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 498 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
499 | struct osd_obj_id obj = {pcol->sbi->s_pid, | 499 | struct exofs_io_state *ios = pcol->ios; |
500 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
501 | struct osd_request *or = NULL; | ||
502 | struct page_collect *pcol_copy = NULL; | 500 | struct page_collect *pcol_copy = NULL; |
503 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
504 | int ret; | 501 | int ret; |
505 | 502 | ||
506 | if (!pcol->bio) | 503 | if (!pcol->bio) |
507 | return 0; | 504 | return 0; |
508 | 505 | ||
509 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
510 | if (unlikely(!or)) { | ||
511 | EXOFS_ERR("write_exec: Faild to osd_start_request()\n"); | ||
512 | ret = -ENOMEM; | ||
513 | goto err; | ||
514 | } | ||
515 | |||
516 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 506 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
517 | if (!pcol_copy) { | 507 | if (!pcol_copy) { |
518 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); | 508 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); |
@@ -523,16 +513,22 @@ static int write_exec(struct page_collect *pcol) | |||
523 | *pcol_copy = *pcol; | 513 | *pcol_copy = *pcol; |
524 | 514 | ||
525 | pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ | 515 | pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ |
526 | osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); | 516 | |
527 | ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); | 517 | ios->bio = pcol_copy->bio; |
518 | ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; | ||
519 | ios->length = pcol_copy->length; | ||
520 | ios->done = writepages_done; | ||
521 | ios->private = pcol_copy; | ||
522 | |||
523 | ret = exofs_oi_write(oi, ios); | ||
528 | if (unlikely(ret)) { | 524 | if (unlikely(ret)) { |
529 | EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); | 525 | EXOFS_ERR("write_exec: exofs_oi_write() Faild\n"); |
530 | goto err; | 526 | goto err; |
531 | } | 527 | } |
532 | 528 | ||
533 | atomic_inc(&pcol->sbi->s_curr_pending); | 529 | atomic_inc(&pcol->sbi->s_curr_pending); |
534 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", | 530 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", |
535 | pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), | 531 | pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset), |
536 | pcol->length); | 532 | pcol->length); |
537 | /* pages ownership was passed to pcol_copy */ | 533 | /* pages ownership was passed to pcol_copy */ |
538 | _pcol_reset(pcol); | 534 | _pcol_reset(pcol); |
@@ -540,9 +536,9 @@ static int write_exec(struct page_collect *pcol) | |||
540 | 536 | ||
541 | err: | 537 | err: |
542 | _unlock_pcol_pages(pcol, ret, WRITE); | 538 | _unlock_pcol_pages(pcol, ret, WRITE); |
539 | pcol_free(pcol); | ||
543 | kfree(pcol_copy); | 540 | kfree(pcol_copy); |
544 | if (or) | 541 | |
545 | osd_end_request(or); | ||
546 | return ret; | 542 | return ret; |
547 | } | 543 | } |
548 | 544 | ||
@@ -586,6 +582,9 @@ static int writepage_strip(struct page *page, | |||
586 | if (PageError(page)) | 582 | if (PageError(page)) |
587 | ClearPageError(page); | 583 | ClearPageError(page); |
588 | unlock_page(page); | 584 | unlock_page(page); |
585 | EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) " | ||
586 | "outside the limits\n", | ||
587 | inode->i_ino, page->index); | ||
589 | return 0; | 588 | return 0; |
590 | } | 589 | } |
591 | } | 590 | } |
@@ -600,6 +599,9 @@ try_again: | |||
600 | ret = write_exec(pcol); | 599 | ret = write_exec(pcol); |
601 | if (unlikely(ret)) | 600 | if (unlikely(ret)) |
602 | goto fail; | 601 | goto fail; |
602 | |||
603 | EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n", | ||
604 | inode->i_ino, page->index); | ||
603 | goto try_again; | 605 | goto try_again; |
604 | } | 606 | } |
605 | 607 | ||
@@ -609,7 +611,7 @@ try_again: | |||
609 | goto fail; | 611 | goto fail; |
610 | } | 612 | } |
611 | 613 | ||
612 | EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", | 614 | EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", |
613 | inode->i_ino, page->index, len); | 615 | inode->i_ino, page->index, len); |
614 | 616 | ||
615 | ret = pcol_add_page(pcol, page, len); | 617 | ret = pcol_add_page(pcol, page, len); |
@@ -634,6 +636,8 @@ try_again: | |||
634 | return 0; | 636 | return 0; |
635 | 637 | ||
636 | fail: | 638 | fail: |
639 | EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", | ||
640 | inode->i_ino, page->index, ret); | ||
637 | set_bit(AS_EIO, &page->mapping->flags); | 641 | set_bit(AS_EIO, &page->mapping->flags); |
638 | unlock_page(page); | 642 | unlock_page(page); |
639 | return ret; | 643 | return ret; |
@@ -652,14 +656,17 @@ static int exofs_writepages(struct address_space *mapping, | |||
652 | wbc->range_end >> PAGE_CACHE_SHIFT; | 656 | wbc->range_end >> PAGE_CACHE_SHIFT; |
653 | 657 | ||
654 | if (start || end) | 658 | if (start || end) |
655 | expected_pages = min(end - start + 1, 32L); | 659 | expected_pages = end - start + 1; |
656 | else | 660 | else |
657 | expected_pages = mapping->nrpages; | 661 | expected_pages = mapping->nrpages; |
658 | 662 | ||
659 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" | 663 | if (expected_pages < 32L) |
660 | " m->nrpages=%lu start=0x%lx end=0x%lx\n", | 664 | expected_pages = 32L; |
665 | |||
666 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " | ||
667 | "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", | ||
661 | mapping->host->i_ino, wbc->range_start, wbc->range_end, | 668 | mapping->host->i_ino, wbc->range_start, wbc->range_end, |
662 | mapping->nrpages, start, end); | 669 | mapping->nrpages, start, end, expected_pages); |
663 | 670 | ||
664 | _pcol_init(&pcol, expected_pages, mapping->host); | 671 | _pcol_init(&pcol, expected_pages, mapping->host); |
665 | 672 | ||
@@ -771,19 +778,28 @@ static int exofs_get_block(struct inode *inode, sector_t iblock, | |||
771 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | 778 | const struct osd_attr g_attr_logical_length = ATTR_DEF( |
772 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | 779 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); |
773 | 780 | ||
781 | static int _do_truncate(struct inode *inode) | ||
782 | { | ||
783 | struct exofs_i_info *oi = exofs_i(inode); | ||
784 | loff_t isize = i_size_read(inode); | ||
785 | int ret; | ||
786 | |||
787 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
788 | |||
789 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | ||
790 | |||
791 | ret = exofs_oi_truncate(oi, (u64)isize); | ||
792 | EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize); | ||
793 | return ret; | ||
794 | } | ||
795 | |||
774 | /* | 796 | /* |
775 | * Truncate a file to the specified size - all we have to do is set the size | 797 | * Truncate a file to the specified size - all we have to do is set the size |
776 | * attribute. We make sure the object exists first. | 798 | * attribute. We make sure the object exists first. |
777 | */ | 799 | */ |
778 | void exofs_truncate(struct inode *inode) | 800 | void exofs_truncate(struct inode *inode) |
779 | { | 801 | { |
780 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
781 | struct exofs_i_info *oi = exofs_i(inode); | 802 | struct exofs_i_info *oi = exofs_i(inode); |
782 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
783 | struct osd_request *or; | ||
784 | struct osd_attr attr; | ||
785 | loff_t isize = i_size_read(inode); | ||
786 | __be64 newsize; | ||
787 | int ret; | 803 | int ret; |
788 | 804 | ||
789 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) | 805 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) |
@@ -793,22 +809,6 @@ void exofs_truncate(struct inode *inode) | |||
793 | return; | 809 | return; |
794 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 810 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
795 | return; | 811 | return; |
796 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
797 | |||
798 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | ||
799 | |||
800 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
801 | if (unlikely(!or)) { | ||
802 | EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n"); | ||
803 | goto fail; | ||
804 | } | ||
805 | |||
806 | osd_req_set_attributes(or, &obj); | ||
807 | |||
808 | newsize = cpu_to_be64((u64)isize); | ||
809 | attr = g_attr_logical_length; | ||
810 | attr.val_ptr = &newsize; | ||
811 | osd_req_add_set_attr_list(or, &attr, 1); | ||
812 | 812 | ||
813 | /* if we are about to truncate an object, and it hasn't been | 813 | /* if we are about to truncate an object, and it hasn't been |
814 | * created yet, wait | 814 | * created yet, wait |
@@ -816,8 +816,7 @@ void exofs_truncate(struct inode *inode) | |||
816 | if (unlikely(wait_obj_created(oi))) | 816 | if (unlikely(wait_obj_created(oi))) |
817 | goto fail; | 817 | goto fail; |
818 | 818 | ||
819 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | 819 | ret = _do_truncate(inode); |
820 | osd_end_request(or); | ||
821 | if (ret) | 820 | if (ret) |
822 | goto fail; | 821 | goto fail; |
823 | 822 | ||
@@ -847,65 +846,62 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
847 | 846 | ||
848 | /* | 847 | /* |
849 | * Read an inode from the OSD, and return it as is. We also return the size | 848 | * Read an inode from the OSD, and return it as is. We also return the size |
850 | * attribute in the 'sanity' argument if we got compiled with debugging turned | 849 | * attribute in the 'obj_size' argument. |
851 | * on. | ||
852 | */ | 850 | */ |
853 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | 851 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, |
854 | struct exofs_fcb *inode, uint64_t *sanity) | 852 | struct exofs_fcb *inode, uint64_t *obj_size) |
855 | { | 853 | { |
856 | struct exofs_sb_info *sbi = sb->s_fs_info; | 854 | struct exofs_sb_info *sbi = sb->s_fs_info; |
857 | struct osd_request *or; | 855 | struct osd_attr attrs[2]; |
858 | struct osd_attr attr; | 856 | struct exofs_io_state *ios; |
859 | struct osd_obj_id obj = {sbi->s_pid, | ||
860 | oi->vfs_inode.i_ino + EXOFS_OBJ_OFF}; | ||
861 | int ret; | 857 | int ret; |
862 | 858 | ||
863 | exofs_make_credential(oi->i_cred, &obj); | 859 | *obj_size = ~0; |
864 | 860 | ret = exofs_get_io_state(sbi, &ios); | |
865 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 861 | if (unlikely(ret)) { |
866 | if (unlikely(!or)) { | 862 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); |
867 | EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); | 863 | return ret; |
868 | return -ENOMEM; | ||
869 | } | 864 | } |
870 | osd_req_get_attributes(or, &obj); | ||
871 | 865 | ||
872 | /* we need the inode attribute */ | 866 | ios->obj.id = exofs_oi_objno(oi); |
873 | osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); | 867 | exofs_make_credential(oi->i_cred, &ios->obj); |
868 | ios->cred = oi->i_cred; | ||
874 | 869 | ||
875 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | 870 | attrs[0] = g_attr_inode_data; |
876 | /* we get the size attributes to do a sanity check */ | 871 | attrs[1] = g_attr_logical_length; |
877 | osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); | 872 | ios->in_attr = attrs; |
878 | #endif | 873 | ios->in_attr_len = ARRAY_SIZE(attrs); |
879 | 874 | ||
880 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | 875 | ret = exofs_sbi_read(ios); |
881 | if (ret) | 876 | if (ret) |
882 | goto out; | 877 | goto out; |
883 | 878 | ||
884 | attr = g_attr_inode_data; | 879 | ret = extract_attr_from_ios(ios, &attrs[0]); |
885 | ret = extract_attr_from_req(or, &attr); | ||
886 | if (ret) { | 880 | if (ret) { |
887 | EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); | 881 | EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); |
888 | goto out; | 882 | goto out; |
889 | } | 883 | } |
884 | WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE); | ||
885 | memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE); | ||
890 | 886 | ||
891 | WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); | 887 | ret = extract_attr_from_ios(ios, &attrs[1]); |
892 | memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE); | ||
893 | |||
894 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
895 | attr = g_attr_logical_length; | ||
896 | ret = extract_attr_from_req(or, &attr); | ||
897 | if (ret) { | 888 | if (ret) { |
898 | EXOFS_ERR("ERROR: extract attr from or failed\n"); | 889 | EXOFS_ERR("%s: extract_attr of logical_length failed\n", |
890 | __func__); | ||
899 | goto out; | 891 | goto out; |
900 | } | 892 | } |
901 | *sanity = get_unaligned_be64(attr.val_ptr); | 893 | *obj_size = get_unaligned_be64(attrs[1].val_ptr); |
902 | #endif | ||
903 | 894 | ||
904 | out: | 895 | out: |
905 | osd_end_request(or); | 896 | exofs_put_io_state(ios); |
906 | return ret; | 897 | return ret; |
907 | } | 898 | } |
908 | 899 | ||
900 | static void __oi_init(struct exofs_i_info *oi) | ||
901 | { | ||
902 | init_waitqueue_head(&oi->i_wq); | ||
903 | oi->i_flags = 0; | ||
904 | } | ||
909 | /* | 905 | /* |
910 | * Fill in an inode read from the OSD and set it up for use | 906 | * Fill in an inode read from the OSD and set it up for use |
911 | */ | 907 | */ |
@@ -914,7 +910,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
914 | struct exofs_i_info *oi; | 910 | struct exofs_i_info *oi; |
915 | struct exofs_fcb fcb; | 911 | struct exofs_fcb fcb; |
916 | struct inode *inode; | 912 | struct inode *inode; |
917 | uint64_t uninitialized_var(sanity); | 913 | uint64_t obj_size; |
918 | int ret; | 914 | int ret; |
919 | 915 | ||
920 | inode = iget_locked(sb, ino); | 916 | inode = iget_locked(sb, ino); |
@@ -923,13 +919,13 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
923 | if (!(inode->i_state & I_NEW)) | 919 | if (!(inode->i_state & I_NEW)) |
924 | return inode; | 920 | return inode; |
925 | oi = exofs_i(inode); | 921 | oi = exofs_i(inode); |
922 | __oi_init(oi); | ||
926 | 923 | ||
927 | /* read the inode from the osd */ | 924 | /* read the inode from the osd */ |
928 | ret = exofs_get_inode(sb, oi, &fcb, &sanity); | 925 | ret = exofs_get_inode(sb, oi, &fcb, &obj_size); |
929 | if (ret) | 926 | if (ret) |
930 | goto bad_inode; | 927 | goto bad_inode; |
931 | 928 | ||
932 | init_waitqueue_head(&oi->i_wq); | ||
933 | set_obj_created(oi); | 929 | set_obj_created(oi); |
934 | 930 | ||
935 | /* copy stuff from on-disk struct to in-memory struct */ | 931 | /* copy stuff from on-disk struct to in-memory struct */ |
@@ -947,14 +943,12 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
947 | inode->i_blkbits = EXOFS_BLKSHIFT; | 943 | inode->i_blkbits = EXOFS_BLKSHIFT; |
948 | inode->i_generation = le32_to_cpu(fcb.i_generation); | 944 | inode->i_generation = le32_to_cpu(fcb.i_generation); |
949 | 945 | ||
950 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | 946 | if ((inode->i_size != obj_size) && |
951 | if ((inode->i_size != sanity) && | ||
952 | (!exofs_inode_is_fast_symlink(inode))) { | 947 | (!exofs_inode_is_fast_symlink(inode))) { |
953 | EXOFS_ERR("WARNING: Size of object from inode and " | 948 | EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n", |
954 | "attributes differ (%lld != %llu)\n", | 949 | inode->i_size, _LLU(obj_size)); |
955 | inode->i_size, _LLU(sanity)); | 950 | /* FIXME: call exofs_inode_recovery() */ |
956 | } | 951 | } |
957 | #endif | ||
958 | 952 | ||
959 | oi->i_dir_start_lookup = 0; | 953 | oi->i_dir_start_lookup = 0; |
960 | 954 | ||
@@ -1020,23 +1014,30 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi) | |||
1020 | * set the obj_created flag so that other methods know that the object exists on | 1014 | * set the obj_created flag so that other methods know that the object exists on |
1021 | * the OSD. | 1015 | * the OSD. |
1022 | */ | 1016 | */ |
1023 | static void create_done(struct osd_request *or, void *p) | 1017 | static void create_done(struct exofs_io_state *ios, void *p) |
1024 | { | 1018 | { |
1025 | struct inode *inode = p; | 1019 | struct inode *inode = p; |
1026 | struct exofs_i_info *oi = exofs_i(inode); | 1020 | struct exofs_i_info *oi = exofs_i(inode); |
1027 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | 1021 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
1028 | int ret; | 1022 | int ret; |
1029 | 1023 | ||
1030 | ret = exofs_check_ok(or); | 1024 | ret = exofs_check_io(ios, NULL); |
1031 | osd_end_request(or); | 1025 | exofs_put_io_state(ios); |
1026 | |||
1032 | atomic_dec(&sbi->s_curr_pending); | 1027 | atomic_dec(&sbi->s_curr_pending); |
1033 | 1028 | ||
1034 | if (unlikely(ret)) { | 1029 | if (unlikely(ret)) { |
1035 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", | 1030 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", |
1036 | _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); | 1031 | _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid)); |
1037 | make_bad_inode(inode); | 1032 | /*TODO: When FS is corrupted creation can fail, object already |
1038 | } else | 1033 | * exist. Get rid of this asynchronous creation, if exist |
1039 | set_obj_created(oi); | 1034 | * increment the obj counter and try the next object. Until we |
1035 | * succeed. All these dangling objects will be made into lost | ||
1036 | * files by chkfs.exofs | ||
1037 | */ | ||
1038 | } | ||
1039 | |||
1040 | set_obj_created(oi); | ||
1040 | 1041 | ||
1041 | atomic_dec(&inode->i_count); | 1042 | atomic_dec(&inode->i_count); |
1042 | wake_up(&oi->i_wq); | 1043 | wake_up(&oi->i_wq); |
@@ -1051,8 +1052,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1051 | struct inode *inode; | 1052 | struct inode *inode; |
1052 | struct exofs_i_info *oi; | 1053 | struct exofs_i_info *oi; |
1053 | struct exofs_sb_info *sbi; | 1054 | struct exofs_sb_info *sbi; |
1054 | struct osd_request *or; | 1055 | struct exofs_io_state *ios; |
1055 | struct osd_obj_id obj; | ||
1056 | int ret; | 1056 | int ret; |
1057 | 1057 | ||
1058 | sb = dir->i_sb; | 1058 | sb = dir->i_sb; |
@@ -1061,8 +1061,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1061 | return ERR_PTR(-ENOMEM); | 1061 | return ERR_PTR(-ENOMEM); |
1062 | 1062 | ||
1063 | oi = exofs_i(inode); | 1063 | oi = exofs_i(inode); |
1064 | __oi_init(oi); | ||
1064 | 1065 | ||
1065 | init_waitqueue_head(&oi->i_wq); | ||
1066 | set_obj_2bcreated(oi); | 1066 | set_obj_2bcreated(oi); |
1067 | 1067 | ||
1068 | sbi = sb->s_fs_info; | 1068 | sbi = sb->s_fs_info; |
@@ -1089,28 +1089,28 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1089 | 1089 | ||
1090 | mark_inode_dirty(inode); | 1090 | mark_inode_dirty(inode); |
1091 | 1091 | ||
1092 | obj.partition = sbi->s_pid; | 1092 | ret = exofs_get_io_state(sbi, &ios); |
1093 | obj.id = inode->i_ino + EXOFS_OBJ_OFF; | 1093 | if (unlikely(ret)) { |
1094 | exofs_make_credential(oi->i_cred, &obj); | 1094 | EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); |
1095 | 1095 | return ERR_PTR(ret); | |
1096 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1097 | if (unlikely(!or)) { | ||
1098 | EXOFS_ERR("exofs_new_inode: osd_start_request failed\n"); | ||
1099 | return ERR_PTR(-ENOMEM); | ||
1100 | } | 1096 | } |
1101 | 1097 | ||
1102 | osd_req_create_object(or, &obj); | 1098 | ios->obj.id = exofs_oi_objno(oi); |
1099 | exofs_make_credential(oi->i_cred, &ios->obj); | ||
1103 | 1100 | ||
1104 | /* increment the refcount so that the inode will still be around when we | 1101 | /* increment the refcount so that the inode will still be around when we |
1105 | * reach the callback | 1102 | * reach the callback |
1106 | */ | 1103 | */ |
1107 | atomic_inc(&inode->i_count); | 1104 | atomic_inc(&inode->i_count); |
1108 | 1105 | ||
1109 | ret = exofs_async_op(or, create_done, inode, oi->i_cred); | 1106 | ios->done = create_done; |
1107 | ios->private = inode; | ||
1108 | ios->cred = oi->i_cred; | ||
1109 | ret = exofs_sbi_create(ios); | ||
1110 | if (ret) { | 1110 | if (ret) { |
1111 | atomic_dec(&inode->i_count); | 1111 | atomic_dec(&inode->i_count); |
1112 | osd_end_request(or); | 1112 | exofs_put_io_state(ios); |
1113 | return ERR_PTR(-EIO); | 1113 | return ERR_PTR(ret); |
1114 | } | 1114 | } |
1115 | atomic_inc(&sbi->s_curr_pending); | 1115 | atomic_inc(&sbi->s_curr_pending); |
1116 | 1116 | ||
@@ -1128,11 +1128,11 @@ struct updatei_args { | |||
1128 | /* | 1128 | /* |
1129 | * Callback function from exofs_update_inode(). | 1129 | * Callback function from exofs_update_inode(). |
1130 | */ | 1130 | */ |
1131 | static void updatei_done(struct osd_request *or, void *p) | 1131 | static void updatei_done(struct exofs_io_state *ios, void *p) |
1132 | { | 1132 | { |
1133 | struct updatei_args *args = p; | 1133 | struct updatei_args *args = p; |
1134 | 1134 | ||
1135 | osd_end_request(or); | 1135 | exofs_put_io_state(ios); |
1136 | 1136 | ||
1137 | atomic_dec(&args->sbi->s_curr_pending); | 1137 | atomic_dec(&args->sbi->s_curr_pending); |
1138 | 1138 | ||
@@ -1148,8 +1148,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1148 | struct exofs_i_info *oi = exofs_i(inode); | 1148 | struct exofs_i_info *oi = exofs_i(inode); |
1149 | struct super_block *sb = inode->i_sb; | 1149 | struct super_block *sb = inode->i_sb; |
1150 | struct exofs_sb_info *sbi = sb->s_fs_info; | 1150 | struct exofs_sb_info *sbi = sb->s_fs_info; |
1151 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | 1151 | struct exofs_io_state *ios; |
1152 | struct osd_request *or; | ||
1153 | struct osd_attr attr; | 1152 | struct osd_attr attr; |
1154 | struct exofs_fcb *fcb; | 1153 | struct exofs_fcb *fcb; |
1155 | struct updatei_args *args; | 1154 | struct updatei_args *args; |
@@ -1186,18 +1185,16 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1186 | } else | 1185 | } else |
1187 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); | 1186 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); |
1188 | 1187 | ||
1189 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 1188 | ret = exofs_get_io_state(sbi, &ios); |
1190 | if (unlikely(!or)) { | 1189 | if (unlikely(ret)) { |
1191 | EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); | 1190 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); |
1192 | ret = -ENOMEM; | ||
1193 | goto free_args; | 1191 | goto free_args; |
1194 | } | 1192 | } |
1195 | 1193 | ||
1196 | osd_req_set_attributes(or, &obj); | ||
1197 | |||
1198 | attr = g_attr_inode_data; | 1194 | attr = g_attr_inode_data; |
1199 | attr.val_ptr = fcb; | 1195 | attr.val_ptr = fcb; |
1200 | osd_req_add_set_attr_list(or, &attr, 1); | 1196 | ios->out_attr_len = 1; |
1197 | ios->out_attr = &attr; | ||
1201 | 1198 | ||
1202 | if (!obj_created(oi)) { | 1199 | if (!obj_created(oi)) { |
1203 | EXOFS_DBGMSG("!obj_created\n"); | 1200 | EXOFS_DBGMSG("!obj_created\n"); |
@@ -1206,22 +1203,19 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1206 | EXOFS_DBGMSG("wait_event done\n"); | 1203 | EXOFS_DBGMSG("wait_event done\n"); |
1207 | } | 1204 | } |
1208 | 1205 | ||
1209 | if (do_sync) { | 1206 | if (!do_sync) { |
1210 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
1211 | osd_end_request(or); | ||
1212 | goto free_args; | ||
1213 | } else { | ||
1214 | args->sbi = sbi; | 1207 | args->sbi = sbi; |
1208 | ios->done = updatei_done; | ||
1209 | ios->private = args; | ||
1210 | } | ||
1215 | 1211 | ||
1216 | ret = exofs_async_op(or, updatei_done, args, oi->i_cred); | 1212 | ret = exofs_oi_write(oi, ios); |
1217 | if (ret) { | 1213 | if (!do_sync && !ret) { |
1218 | osd_end_request(or); | ||
1219 | goto free_args; | ||
1220 | } | ||
1221 | atomic_inc(&sbi->s_curr_pending); | 1214 | atomic_inc(&sbi->s_curr_pending); |
1222 | goto out; /* deallocation in updatei_done */ | 1215 | goto out; /* deallocation in updatei_done */ |
1223 | } | 1216 | } |
1224 | 1217 | ||
1218 | exofs_put_io_state(ios); | ||
1225 | free_args: | 1219 | free_args: |
1226 | kfree(args); | 1220 | kfree(args); |
1227 | out: | 1221 | out: |
@@ -1238,11 +1232,12 @@ int exofs_write_inode(struct inode *inode, int wait) | |||
1238 | * Callback function from exofs_delete_inode() - don't have much cleaning up to | 1232 | * Callback function from exofs_delete_inode() - don't have much cleaning up to |
1239 | * do. | 1233 | * do. |
1240 | */ | 1234 | */ |
1241 | static void delete_done(struct osd_request *or, void *p) | 1235 | static void delete_done(struct exofs_io_state *ios, void *p) |
1242 | { | 1236 | { |
1243 | struct exofs_sb_info *sbi; | 1237 | struct exofs_sb_info *sbi = p; |
1244 | osd_end_request(or); | 1238 | |
1245 | sbi = p; | 1239 | exofs_put_io_state(ios); |
1240 | |||
1246 | atomic_dec(&sbi->s_curr_pending); | 1241 | atomic_dec(&sbi->s_curr_pending); |
1247 | } | 1242 | } |
1248 | 1243 | ||
@@ -1256,8 +1251,7 @@ void exofs_delete_inode(struct inode *inode) | |||
1256 | struct exofs_i_info *oi = exofs_i(inode); | 1251 | struct exofs_i_info *oi = exofs_i(inode); |
1257 | struct super_block *sb = inode->i_sb; | 1252 | struct super_block *sb = inode->i_sb; |
1258 | struct exofs_sb_info *sbi = sb->s_fs_info; | 1253 | struct exofs_sb_info *sbi = sb->s_fs_info; |
1259 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | 1254 | struct exofs_io_state *ios; |
1260 | struct osd_request *or; | ||
1261 | int ret; | 1255 | int ret; |
1262 | 1256 | ||
1263 | truncate_inode_pages(&inode->i_data, 0); | 1257 | truncate_inode_pages(&inode->i_data, 0); |
@@ -1274,25 +1268,26 @@ void exofs_delete_inode(struct inode *inode) | |||
1274 | 1268 | ||
1275 | clear_inode(inode); | 1269 | clear_inode(inode); |
1276 | 1270 | ||
1277 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 1271 | ret = exofs_get_io_state(sbi, &ios); |
1278 | if (unlikely(!or)) { | 1272 | if (unlikely(ret)) { |
1279 | EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); | 1273 | EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); |
1280 | return; | 1274 | return; |
1281 | } | 1275 | } |
1282 | 1276 | ||
1283 | osd_req_remove_object(or, &obj); | ||
1284 | |||
1285 | /* if we are deleting an obj that hasn't been created yet, wait */ | 1277 | /* if we are deleting an obj that hasn't been created yet, wait */ |
1286 | if (!obj_created(oi)) { | 1278 | if (!obj_created(oi)) { |
1287 | BUG_ON(!obj_2bcreated(oi)); | 1279 | BUG_ON(!obj_2bcreated(oi)); |
1288 | wait_event(oi->i_wq, obj_created(oi)); | 1280 | wait_event(oi->i_wq, obj_created(oi)); |
1289 | } | 1281 | } |
1290 | 1282 | ||
1291 | ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); | 1283 | ios->obj.id = exofs_oi_objno(oi); |
1284 | ios->done = delete_done; | ||
1285 | ios->private = sbi; | ||
1286 | ios->cred = oi->i_cred; | ||
1287 | ret = exofs_sbi_remove(ios); | ||
1292 | if (ret) { | 1288 | if (ret) { |
1293 | EXOFS_ERR( | 1289 | EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); |
1294 | "ERROR: @exofs_delete_inode exofs_async_op failed\n"); | 1290 | exofs_put_io_state(ios); |
1295 | osd_end_request(or); | ||
1296 | return; | 1291 | return; |
1297 | } | 1292 | } |
1298 | atomic_inc(&sbi->s_curr_pending); | 1293 | atomic_inc(&sbi->s_curr_pending); |
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c new file mode 100644 index 000000000000..5bad01fa1f9f --- /dev/null +++ b/fs/exofs/ios.c | |||
@@ -0,0 +1,421 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) | ||
4 | * Copyright (C) 2008, 2009 | ||
5 | * Boaz Harrosh <bharrosh@panasas.com> | ||
6 | * | ||
7 | * This file is part of exofs. | ||
8 | * | ||
9 | * exofs is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
12 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
13 | * version of GPL for exofs is version 2. | ||
14 | * | ||
15 | * exofs is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with exofs; if not, write to the Free Software | ||
22 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
23 | */ | ||
24 | |||
25 | #include <scsi/scsi_device.h> | ||
26 | |||
27 | #include "exofs.h" | ||
28 | |||
29 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) | ||
30 | { | ||
31 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | ||
32 | } | ||
33 | |||
34 | int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, | ||
35 | u64 offset, void *p, unsigned length) | ||
36 | { | ||
37 | struct osd_request *or = osd_start_request(od, GFP_KERNEL); | ||
38 | /* struct osd_sense_info osi = {.key = 0};*/ | ||
39 | int ret; | ||
40 | |||
41 | if (unlikely(!or)) { | ||
42 | EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); | ||
43 | return -ENOMEM; | ||
44 | } | ||
45 | ret = osd_req_read_kern(or, obj, offset, p, length); | ||
46 | if (unlikely(ret)) { | ||
47 | EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); | ||
48 | goto out; | ||
49 | } | ||
50 | |||
51 | ret = osd_finalize_request(or, 0, cred, NULL); | ||
52 | if (unlikely(ret)) { | ||
53 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | ||
54 | goto out; | ||
55 | } | ||
56 | |||
57 | ret = osd_execute_request(or); | ||
58 | if (unlikely(ret)) | ||
59 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | ||
60 | /* osd_req_decode_sense(or, ret); */ | ||
61 | |||
62 | out: | ||
63 | osd_end_request(or); | ||
64 | return ret; | ||
65 | } | ||
66 | |||
67 | int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios) | ||
68 | { | ||
69 | struct exofs_io_state *ios; | ||
70 | |||
71 | /*TODO: Maybe use kmem_cach per sbi of size | ||
72 | * exofs_io_state_size(sbi->s_numdevs) | ||
73 | */ | ||
74 | ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL); | ||
75 | if (unlikely(!ios)) { | ||
76 | *pios = NULL; | ||
77 | return -ENOMEM; | ||
78 | } | ||
79 | |||
80 | ios->sbi = sbi; | ||
81 | ios->obj.partition = sbi->s_pid; | ||
82 | *pios = ios; | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | void exofs_put_io_state(struct exofs_io_state *ios) | ||
87 | { | ||
88 | if (ios) { | ||
89 | unsigned i; | ||
90 | |||
91 | for (i = 0; i < ios->numdevs; i++) { | ||
92 | struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; | ||
93 | |||
94 | if (per_dev->or) | ||
95 | osd_end_request(per_dev->or); | ||
96 | if (per_dev->bio) | ||
97 | bio_put(per_dev->bio); | ||
98 | } | ||
99 | |||
100 | kfree(ios); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | static void _sync_done(struct exofs_io_state *ios, void *p) | ||
105 | { | ||
106 | struct completion *waiting = p; | ||
107 | |||
108 | complete(waiting); | ||
109 | } | ||
110 | |||
111 | static void _last_io(struct kref *kref) | ||
112 | { | ||
113 | struct exofs_io_state *ios = container_of( | ||
114 | kref, struct exofs_io_state, kref); | ||
115 | |||
116 | ios->done(ios, ios->private); | ||
117 | } | ||
118 | |||
119 | static void _done_io(struct osd_request *or, void *p) | ||
120 | { | ||
121 | struct exofs_io_state *ios = p; | ||
122 | |||
123 | kref_put(&ios->kref, _last_io); | ||
124 | } | ||
125 | |||
126 | static int exofs_io_execute(struct exofs_io_state *ios) | ||
127 | { | ||
128 | DECLARE_COMPLETION_ONSTACK(wait); | ||
129 | bool sync = (ios->done == NULL); | ||
130 | int i, ret; | ||
131 | |||
132 | if (sync) { | ||
133 | ios->done = _sync_done; | ||
134 | ios->private = &wait; | ||
135 | } | ||
136 | |||
137 | for (i = 0; i < ios->numdevs; i++) { | ||
138 | struct osd_request *or = ios->per_dev[i].or; | ||
139 | if (unlikely(!or)) | ||
140 | continue; | ||
141 | |||
142 | ret = osd_finalize_request(or, 0, ios->cred, NULL); | ||
143 | if (unlikely(ret)) { | ||
144 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", | ||
145 | ret); | ||
146 | return ret; | ||
147 | } | ||
148 | } | ||
149 | |||
150 | kref_init(&ios->kref); | ||
151 | |||
152 | for (i = 0; i < ios->numdevs; i++) { | ||
153 | struct osd_request *or = ios->per_dev[i].or; | ||
154 | if (unlikely(!or)) | ||
155 | continue; | ||
156 | |||
157 | kref_get(&ios->kref); | ||
158 | osd_execute_request_async(or, _done_io, ios); | ||
159 | } | ||
160 | |||
161 | kref_put(&ios->kref, _last_io); | ||
162 | ret = 0; | ||
163 | |||
164 | if (sync) { | ||
165 | wait_for_completion(&wait); | ||
166 | ret = exofs_check_io(ios, NULL); | ||
167 | } | ||
168 | return ret; | ||
169 | } | ||
170 | |||
171 | int exofs_check_io(struct exofs_io_state *ios, u64 *resid) | ||
172 | { | ||
173 | enum osd_err_priority acumulated_osd_err = 0; | ||
174 | int acumulated_lin_err = 0; | ||
175 | int i; | ||
176 | |||
177 | for (i = 0; i < ios->numdevs; i++) { | ||
178 | struct osd_sense_info osi; | ||
179 | int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi); | ||
180 | |||
181 | if (likely(!ret)) | ||
182 | continue; | ||
183 | |||
184 | if (unlikely(ret == -EFAULT)) { | ||
185 | EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__); | ||
186 | /*FIXME: All the pages in this device range should: | ||
187 | * clear_highpage(page); | ||
188 | */ | ||
189 | } | ||
190 | |||
191 | if (osi.osd_err_pri >= acumulated_osd_err) { | ||
192 | acumulated_osd_err = osi.osd_err_pri; | ||
193 | acumulated_lin_err = ret; | ||
194 | } | ||
195 | } | ||
196 | |||
197 | /* TODO: raid specific residual calculations */ | ||
198 | if (resid) { | ||
199 | if (likely(!acumulated_lin_err)) | ||
200 | *resid = 0; | ||
201 | else | ||
202 | *resid = ios->length; | ||
203 | } | ||
204 | |||
205 | return acumulated_lin_err; | ||
206 | } | ||
207 | |||
208 | int exofs_sbi_create(struct exofs_io_state *ios) | ||
209 | { | ||
210 | int i, ret; | ||
211 | |||
212 | for (i = 0; i < ios->sbi->s_numdevs; i++) { | ||
213 | struct osd_request *or; | ||
214 | |||
215 | or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); | ||
216 | if (unlikely(!or)) { | ||
217 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
218 | ret = -ENOMEM; | ||
219 | goto out; | ||
220 | } | ||
221 | ios->per_dev[i].or = or; | ||
222 | ios->numdevs++; | ||
223 | |||
224 | osd_req_create_object(or, &ios->obj); | ||
225 | } | ||
226 | ret = exofs_io_execute(ios); | ||
227 | |||
228 | out: | ||
229 | return ret; | ||
230 | } | ||
231 | |||
232 | int exofs_sbi_remove(struct exofs_io_state *ios) | ||
233 | { | ||
234 | int i, ret; | ||
235 | |||
236 | for (i = 0; i < ios->sbi->s_numdevs; i++) { | ||
237 | struct osd_request *or; | ||
238 | |||
239 | or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); | ||
240 | if (unlikely(!or)) { | ||
241 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
242 | ret = -ENOMEM; | ||
243 | goto out; | ||
244 | } | ||
245 | ios->per_dev[i].or = or; | ||
246 | ios->numdevs++; | ||
247 | |||
248 | osd_req_remove_object(or, &ios->obj); | ||
249 | } | ||
250 | ret = exofs_io_execute(ios); | ||
251 | |||
252 | out: | ||
253 | return ret; | ||
254 | } | ||
255 | |||
256 | int exofs_sbi_write(struct exofs_io_state *ios) | ||
257 | { | ||
258 | int i, ret; | ||
259 | |||
260 | for (i = 0; i < ios->sbi->s_numdevs; i++) { | ||
261 | struct osd_request *or; | ||
262 | |||
263 | or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); | ||
264 | if (unlikely(!or)) { | ||
265 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
266 | ret = -ENOMEM; | ||
267 | goto out; | ||
268 | } | ||
269 | ios->per_dev[i].or = or; | ||
270 | ios->numdevs++; | ||
271 | |||
272 | if (ios->bio) { | ||
273 | struct bio *bio; | ||
274 | |||
275 | if (i != 0) { | ||
276 | bio = bio_kmalloc(GFP_KERNEL, | ||
277 | ios->bio->bi_max_vecs); | ||
278 | if (unlikely(!bio)) { | ||
279 | ret = -ENOMEM; | ||
280 | goto out; | ||
281 | } | ||
282 | |||
283 | __bio_clone(bio, ios->bio); | ||
284 | bio->bi_bdev = NULL; | ||
285 | bio->bi_next = NULL; | ||
286 | ios->per_dev[i].bio = bio; | ||
287 | } else { | ||
288 | bio = ios->bio; | ||
289 | } | ||
290 | |||
291 | osd_req_write(or, &ios->obj, ios->offset, bio, | ||
292 | ios->length); | ||
293 | /* EXOFS_DBGMSG("write sync=%d\n", sync);*/ | ||
294 | } else if (ios->kern_buff) { | ||
295 | osd_req_write_kern(or, &ios->obj, ios->offset, | ||
296 | ios->kern_buff, ios->length); | ||
297 | /* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/ | ||
298 | } else { | ||
299 | osd_req_set_attributes(or, &ios->obj); | ||
300 | /* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/ | ||
301 | } | ||
302 | |||
303 | if (ios->out_attr) | ||
304 | osd_req_add_set_attr_list(or, ios->out_attr, | ||
305 | ios->out_attr_len); | ||
306 | |||
307 | if (ios->in_attr) | ||
308 | osd_req_add_get_attr_list(or, ios->in_attr, | ||
309 | ios->in_attr_len); | ||
310 | } | ||
311 | ret = exofs_io_execute(ios); | ||
312 | |||
313 | out: | ||
314 | return ret; | ||
315 | } | ||
316 | |||
317 | int exofs_sbi_read(struct exofs_io_state *ios) | ||
318 | { | ||
319 | int i, ret; | ||
320 | |||
321 | for (i = 0; i < 1; i++) { | ||
322 | struct osd_request *or; | ||
323 | unsigned first_dev = (unsigned)ios->obj.id; | ||
324 | |||
325 | first_dev %= ios->sbi->s_numdevs; | ||
326 | or = osd_start_request(ios->sbi->s_ods[first_dev], GFP_KERNEL); | ||
327 | if (unlikely(!or)) { | ||
328 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
329 | ret = -ENOMEM; | ||
330 | goto out; | ||
331 | } | ||
332 | ios->per_dev[i].or = or; | ||
333 | ios->numdevs++; | ||
334 | |||
335 | if (ios->bio) { | ||
336 | osd_req_read(or, &ios->obj, ios->offset, ios->bio, | ||
337 | ios->length); | ||
338 | /* EXOFS_DBGMSG("read sync=%d\n", sync);*/ | ||
339 | } else if (ios->kern_buff) { | ||
340 | osd_req_read_kern(or, &ios->obj, ios->offset, | ||
341 | ios->kern_buff, ios->length); | ||
342 | /* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/ | ||
343 | } else { | ||
344 | osd_req_get_attributes(or, &ios->obj); | ||
345 | /* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/ | ||
346 | } | ||
347 | |||
348 | if (ios->out_attr) | ||
349 | osd_req_add_set_attr_list(or, ios->out_attr, | ||
350 | ios->out_attr_len); | ||
351 | |||
352 | if (ios->in_attr) | ||
353 | osd_req_add_get_attr_list(or, ios->in_attr, | ||
354 | ios->in_attr_len); | ||
355 | } | ||
356 | ret = exofs_io_execute(ios); | ||
357 | |||
358 | out: | ||
359 | return ret; | ||
360 | } | ||
361 | |||
362 | int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) | ||
363 | { | ||
364 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | ||
365 | void *iter = NULL; | ||
366 | int nelem; | ||
367 | |||
368 | do { | ||
369 | nelem = 1; | ||
370 | osd_req_decode_get_attr_list(ios->per_dev[0].or, | ||
371 | &cur_attr, &nelem, &iter); | ||
372 | if ((cur_attr.attr_page == attr->attr_page) && | ||
373 | (cur_attr.attr_id == attr->attr_id)) { | ||
374 | attr->len = cur_attr.len; | ||
375 | attr->val_ptr = cur_attr.val_ptr; | ||
376 | return 0; | ||
377 | } | ||
378 | } while (iter); | ||
379 | |||
380 | return -EIO; | ||
381 | } | ||
382 | |||
383 | int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) | ||
384 | { | ||
385 | struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; | ||
386 | struct exofs_io_state *ios; | ||
387 | struct osd_attr attr; | ||
388 | __be64 newsize; | ||
389 | int i, ret; | ||
390 | |||
391 | if (exofs_get_io_state(sbi, &ios)) | ||
392 | return -ENOMEM; | ||
393 | |||
394 | ios->obj.id = exofs_oi_objno(oi); | ||
395 | ios->cred = oi->i_cred; | ||
396 | |||
397 | newsize = cpu_to_be64(size); | ||
398 | attr = g_attr_logical_length; | ||
399 | attr.val_ptr = &newsize; | ||
400 | |||
401 | for (i = 0; i < sbi->s_numdevs; i++) { | ||
402 | struct osd_request *or; | ||
403 | |||
404 | or = osd_start_request(sbi->s_ods[i], GFP_KERNEL); | ||
405 | if (unlikely(!or)) { | ||
406 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
407 | ret = -ENOMEM; | ||
408 | goto out; | ||
409 | } | ||
410 | ios->per_dev[i].or = or; | ||
411 | ios->numdevs++; | ||
412 | |||
413 | osd_req_set_attributes(or, &ios->obj); | ||
414 | osd_req_add_set_attr_list(or, &attr, 1); | ||
415 | } | ||
416 | ret = exofs_io_execute(ios); | ||
417 | |||
418 | out: | ||
419 | exofs_put_io_state(ios); | ||
420 | return ret; | ||
421 | } | ||
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c deleted file mode 100644 index 4372542df284..000000000000 --- a/fs/exofs/osd.c +++ /dev/null | |||
@@ -1,125 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) | ||
4 | * Copyright (C) 2008, 2009 | ||
5 | * Boaz Harrosh <bharrosh@panasas.com> | ||
6 | * | ||
7 | * This file is part of exofs. | ||
8 | * | ||
9 | * exofs is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
12 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
13 | * version of GPL for exofs is version 2. | ||
14 | * | ||
15 | * exofs is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with exofs; if not, write to the Free Software | ||
22 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
23 | */ | ||
24 | |||
25 | #include <scsi/scsi_device.h> | ||
26 | #include <scsi/osd_sense.h> | ||
27 | |||
28 | #include "exofs.h" | ||
29 | |||
30 | int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) | ||
31 | { | ||
32 | struct osd_sense_info osi; | ||
33 | int ret = osd_req_decode_sense(or, &osi); | ||
34 | |||
35 | if (ret) { /* translate to Linux codes */ | ||
36 | if (osi.additional_code == scsi_invalid_field_in_cdb) { | ||
37 | if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE) | ||
38 | ret = -EFAULT; | ||
39 | if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID) | ||
40 | ret = -ENOENT; | ||
41 | else | ||
42 | ret = -EINVAL; | ||
43 | } else if (osi.additional_code == osd_quota_error) | ||
44 | ret = -ENOSPC; | ||
45 | else | ||
46 | ret = -EIO; | ||
47 | } | ||
48 | |||
49 | /* FIXME: should be include in osd_sense_info */ | ||
50 | if (in_resid) | ||
51 | *in_resid = or->in.req ? or->in.req->resid_len : 0; | ||
52 | |||
53 | if (out_resid) | ||
54 | *out_resid = or->out.req ? or->out.req->resid_len : 0; | ||
55 | |||
56 | return ret; | ||
57 | } | ||
58 | |||
59 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) | ||
60 | { | ||
61 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * Perform a synchronous OSD operation. | ||
66 | */ | ||
67 | int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential) | ||
68 | { | ||
69 | int ret; | ||
70 | |||
71 | or->timeout = timeout; | ||
72 | ret = osd_finalize_request(or, 0, credential, NULL); | ||
73 | if (ret) { | ||
74 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | ||
75 | return ret; | ||
76 | } | ||
77 | |||
78 | ret = osd_execute_request(or); | ||
79 | |||
80 | if (ret) | ||
81 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | ||
82 | /* osd_req_decode_sense(or, ret); */ | ||
83 | return ret; | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Perform an asynchronous OSD operation. | ||
88 | */ | ||
89 | int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done, | ||
90 | void *caller_context, u8 *cred) | ||
91 | { | ||
92 | int ret; | ||
93 | |||
94 | ret = osd_finalize_request(or, 0, cred, NULL); | ||
95 | if (ret) { | ||
96 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | ||
97 | return ret; | ||
98 | } | ||
99 | |||
100 | ret = osd_execute_request_async(or, async_done, caller_context); | ||
101 | |||
102 | if (ret) | ||
103 | EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret); | ||
104 | return ret; | ||
105 | } | ||
106 | |||
107 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | ||
108 | { | ||
109 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | ||
110 | void *iter = NULL; | ||
111 | int nelem; | ||
112 | |||
113 | do { | ||
114 | nelem = 1; | ||
115 | osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); | ||
116 | if ((cur_attr.attr_page == attr->attr_page) && | ||
117 | (cur_attr.attr_id == attr->attr_id)) { | ||
118 | attr->len = cur_attr.len; | ||
119 | attr->val_ptr = cur_attr.val_ptr; | ||
120 | return 0; | ||
121 | } | ||
122 | } while (iter); | ||
123 | |||
124 | return -EIO; | ||
125 | } | ||
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h new file mode 100644 index 000000000000..423033addd1f --- /dev/null +++ b/fs/exofs/pnfs.h | |||
@@ -0,0 +1,51 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008, 2009 | ||
3 | * Boaz Harrosh <bharrosh@panasas.com> | ||
4 | * | ||
5 | * This file is part of exofs. | ||
6 | * | ||
7 | * exofs is free software; you can redistribute it and/or modify it under the | ||
8 | * terms of the GNU General Public License version 2 as published by the Free | ||
9 | * Software Foundation. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | /* FIXME: Remove this file once pnfs hits mainline */ | ||
14 | |||
15 | #ifndef __EXOFS_PNFS_H__ | ||
16 | #define __EXOFS_PNFS_H__ | ||
17 | |||
18 | #if defined(CONFIG_PNFS) | ||
19 | |||
20 | |||
21 | /* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */ | ||
22 | #include "../nfs/objlayout/pnfs_osd_xdr.h" | ||
23 | |||
24 | #else /* defined(CONFIG_PNFS) */ | ||
25 | |||
26 | enum pnfs_iomode { | ||
27 | IOMODE_READ = 1, | ||
28 | IOMODE_RW = 2, | ||
29 | IOMODE_ANY = 3, | ||
30 | }; | ||
31 | |||
32 | /* Layout Structure */ | ||
33 | enum pnfs_osd_raid_algorithm4 { | ||
34 | PNFS_OSD_RAID_0 = 1, | ||
35 | PNFS_OSD_RAID_4 = 2, | ||
36 | PNFS_OSD_RAID_5 = 3, | ||
37 | PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ | ||
38 | }; | ||
39 | |||
40 | struct pnfs_osd_data_map { | ||
41 | u32 odm_num_comps; | ||
42 | u64 odm_stripe_unit; | ||
43 | u32 odm_group_width; | ||
44 | u32 odm_group_depth; | ||
45 | u32 odm_mirror_cnt; | ||
46 | u32 odm_raid_algorithm; | ||
47 | }; | ||
48 | |||
49 | #endif /* else defined(CONFIG_PNFS) */ | ||
50 | |||
51 | #endif /* __EXOFS_PNFS_H__ */ | ||
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 9f500dec3b59..a1d1e77b12eb 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -203,49 +203,45 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
203 | { | 203 | { |
204 | struct exofs_sb_info *sbi; | 204 | struct exofs_sb_info *sbi; |
205 | struct exofs_fscb *fscb; | 205 | struct exofs_fscb *fscb; |
206 | struct osd_request *or; | 206 | struct exofs_io_state *ios; |
207 | struct osd_obj_id obj; | ||
208 | int ret = -ENOMEM; | 207 | int ret = -ENOMEM; |
209 | 208 | ||
210 | fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL); | ||
211 | if (!fscb) { | ||
212 | EXOFS_ERR("exofs_write_super: memory allocation failed.\n"); | ||
213 | return -ENOMEM; | ||
214 | } | ||
215 | |||
216 | lock_super(sb); | 209 | lock_super(sb); |
217 | sbi = sb->s_fs_info; | 210 | sbi = sb->s_fs_info; |
211 | fscb = &sbi->s_fscb; | ||
212 | |||
213 | ret = exofs_get_io_state(sbi, &ios); | ||
214 | if (ret) | ||
215 | goto out; | ||
216 | |||
217 | /* Note: We only write the changing part of the fscb. .i.e upto the | ||
218 | * the fscb->s_dev_table_oid member. There is no read-modify-write | ||
219 | * here. | ||
220 | */ | ||
221 | ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); | ||
222 | memset(fscb, 0, ios->length); | ||
218 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); | 223 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); |
219 | fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); | 224 | fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); |
220 | fscb->s_magic = cpu_to_le16(sb->s_magic); | 225 | fscb->s_magic = cpu_to_le16(sb->s_magic); |
221 | fscb->s_newfs = 0; | 226 | fscb->s_newfs = 0; |
227 | fscb->s_version = EXOFS_FSCB_VER; | ||
222 | 228 | ||
223 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 229 | ios->obj.id = EXOFS_SUPER_ID; |
224 | if (unlikely(!or)) { | 230 | ios->offset = 0; |
225 | EXOFS_ERR("exofs_write_super: osd_start_request failed.\n"); | 231 | ios->kern_buff = fscb; |
226 | goto out; | 232 | ios->cred = sbi->s_cred; |
227 | } | ||
228 | 233 | ||
229 | obj.partition = sbi->s_pid; | 234 | ret = exofs_sbi_write(ios); |
230 | obj.id = EXOFS_SUPER_ID; | ||
231 | ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb)); | ||
232 | if (unlikely(ret)) { | 235 | if (unlikely(ret)) { |
233 | EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n"); | 236 | EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); |
234 | goto out; | ||
235 | } | ||
236 | |||
237 | ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | ||
238 | if (unlikely(ret)) { | ||
239 | EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n"); | ||
240 | goto out; | 237 | goto out; |
241 | } | 238 | } |
242 | sb->s_dirt = 0; | 239 | sb->s_dirt = 0; |
243 | 240 | ||
244 | out: | 241 | out: |
245 | if (or) | 242 | EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); |
246 | osd_end_request(or); | 243 | exofs_put_io_state(ios); |
247 | unlock_super(sb); | 244 | unlock_super(sb); |
248 | kfree(fscb); | ||
249 | return ret; | 245 | return ret; |
250 | } | 246 | } |
251 | 247 | ||
@@ -257,6 +253,29 @@ static void exofs_write_super(struct super_block *sb) | |||
257 | sb->s_dirt = 0; | 253 | sb->s_dirt = 0; |
258 | } | 254 | } |
259 | 255 | ||
256 | static void _exofs_print_device(const char *msg, const char *dev_path, | ||
257 | struct osd_dev *od, u64 pid) | ||
258 | { | ||
259 | const struct osd_dev_info *odi = osduld_device_info(od); | ||
260 | |||
261 | printk(KERN_NOTICE "exofs: %s %s osd_name-%s pid-0x%llx\n", | ||
262 | msg, dev_path ?: "", odi->osdname, _LLU(pid)); | ||
263 | } | ||
264 | |||
265 | void exofs_free_sbi(struct exofs_sb_info *sbi) | ||
266 | { | ||
267 | while (sbi->s_numdevs) { | ||
268 | int i = --sbi->s_numdevs; | ||
269 | struct osd_dev *od = sbi->s_ods[i]; | ||
270 | |||
271 | if (od) { | ||
272 | sbi->s_ods[i] = NULL; | ||
273 | osduld_put_device(od); | ||
274 | } | ||
275 | } | ||
276 | kfree(sbi); | ||
277 | } | ||
278 | |||
260 | /* | 279 | /* |
261 | * This function is called when the vfs is freeing the superblock. We just | 280 | * This function is called when the vfs is freeing the superblock. We just |
262 | * need to free our own part. | 281 | * need to free our own part. |
@@ -279,11 +298,182 @@ static void exofs_put_super(struct super_block *sb) | |||
279 | msecs_to_jiffies(100)); | 298 | msecs_to_jiffies(100)); |
280 | } | 299 | } |
281 | 300 | ||
282 | osduld_put_device(sbi->s_dev); | 301 | _exofs_print_device("Unmounting", NULL, sbi->s_ods[0], sbi->s_pid); |
283 | kfree(sb->s_fs_info); | 302 | |
303 | exofs_free_sbi(sbi); | ||
284 | sb->s_fs_info = NULL; | 304 | sb->s_fs_info = NULL; |
285 | } | 305 | } |
286 | 306 | ||
307 | static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, | ||
308 | struct exofs_device_table *dt) | ||
309 | { | ||
310 | sbi->data_map.odm_num_comps = | ||
311 | le32_to_cpu(dt->dt_data_map.cb_num_comps); | ||
312 | sbi->data_map.odm_stripe_unit = | ||
313 | le64_to_cpu(dt->dt_data_map.cb_stripe_unit); | ||
314 | sbi->data_map.odm_group_width = | ||
315 | le32_to_cpu(dt->dt_data_map.cb_group_width); | ||
316 | sbi->data_map.odm_group_depth = | ||
317 | le32_to_cpu(dt->dt_data_map.cb_group_depth); | ||
318 | sbi->data_map.odm_mirror_cnt = | ||
319 | le32_to_cpu(dt->dt_data_map.cb_mirror_cnt); | ||
320 | sbi->data_map.odm_raid_algorithm = | ||
321 | le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); | ||
322 | |||
323 | /* FIXME: Hard coded mirror only for now. if not so do not mount */ | ||
324 | if ((sbi->data_map.odm_num_comps != numdevs) || | ||
325 | (sbi->data_map.odm_stripe_unit != EXOFS_BLKSIZE) || | ||
326 | (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) || | ||
327 | (sbi->data_map.odm_mirror_cnt != (numdevs - 1))) | ||
328 | return -EINVAL; | ||
329 | else | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | /* @odi is valid only as long as @fscb_dev is valid */ | ||
334 | static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, | ||
335 | struct osd_dev_info *odi) | ||
336 | { | ||
337 | odi->systemid_len = le32_to_cpu(dt_dev->systemid_len); | ||
338 | memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len); | ||
339 | |||
340 | odi->osdname_len = le32_to_cpu(dt_dev->osdname_len); | ||
341 | odi->osdname = dt_dev->osdname; | ||
342 | |||
343 | /* FIXME support long names. Will need a _put function */ | ||
344 | if (dt_dev->long_name_offset) | ||
345 | return -EINVAL; | ||
346 | |||
347 | /* Make sure osdname is printable! | ||
348 | * mkexofs should give us space for a null-terminator else the | ||
349 | * device-table is invalid. | ||
350 | */ | ||
351 | if (unlikely(odi->osdname_len >= sizeof(dt_dev->osdname))) | ||
352 | odi->osdname_len = sizeof(dt_dev->osdname) - 1; | ||
353 | dt_dev->osdname[odi->osdname_len] = 0; | ||
354 | |||
355 | /* If it's all zeros something is bad we read past end-of-obj */ | ||
356 | return !(odi->systemid_len || odi->osdname_len); | ||
357 | } | ||
358 | |||
359 | static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, | ||
360 | unsigned table_count) | ||
361 | { | ||
362 | struct exofs_sb_info *sbi = *psbi; | ||
363 | struct osd_dev *fscb_od; | ||
364 | struct osd_obj_id obj = {.partition = sbi->s_pid, | ||
365 | .id = EXOFS_DEVTABLE_ID}; | ||
366 | struct exofs_device_table *dt; | ||
367 | unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + | ||
368 | sizeof(*dt); | ||
369 | unsigned numdevs, i; | ||
370 | int ret; | ||
371 | |||
372 | dt = kmalloc(table_bytes, GFP_KERNEL); | ||
373 | if (unlikely(!dt)) { | ||
374 | EXOFS_ERR("ERROR: allocating %x bytes for device table\n", | ||
375 | table_bytes); | ||
376 | return -ENOMEM; | ||
377 | } | ||
378 | |||
379 | fscb_od = sbi->s_ods[0]; | ||
380 | sbi->s_ods[0] = NULL; | ||
381 | sbi->s_numdevs = 0; | ||
382 | ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); | ||
383 | if (unlikely(ret)) { | ||
384 | EXOFS_ERR("ERROR: reading device table\n"); | ||
385 | goto out; | ||
386 | } | ||
387 | |||
388 | numdevs = le64_to_cpu(dt->dt_num_devices); | ||
389 | if (unlikely(!numdevs)) { | ||
390 | ret = -EINVAL; | ||
391 | goto out; | ||
392 | } | ||
393 | WARN_ON(table_count != numdevs); | ||
394 | |||
395 | ret = _read_and_match_data_map(sbi, numdevs, dt); | ||
396 | if (unlikely(ret)) | ||
397 | goto out; | ||
398 | |||
399 | if (likely(numdevs > 1)) { | ||
400 | unsigned size = numdevs * sizeof(sbi->s_ods[0]); | ||
401 | |||
402 | sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); | ||
403 | if (unlikely(!sbi)) { | ||
404 | ret = -ENOMEM; | ||
405 | goto out; | ||
406 | } | ||
407 | memset(&sbi->s_ods[1], 0, size - sizeof(sbi->s_ods[0])); | ||
408 | *psbi = sbi; | ||
409 | } | ||
410 | |||
411 | for (i = 0; i < numdevs; i++) { | ||
412 | struct exofs_fscb fscb; | ||
413 | struct osd_dev_info odi; | ||
414 | struct osd_dev *od; | ||
415 | |||
416 | if (exofs_devs_2_odi(&dt->dt_dev_table[i], &odi)) { | ||
417 | EXOFS_ERR("ERROR: Read all-zeros device entry\n"); | ||
418 | ret = -EINVAL; | ||
419 | goto out; | ||
420 | } | ||
421 | |||
422 | printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n", | ||
423 | i, odi.osdname); | ||
424 | |||
425 | /* On all devices the device table is identical. The user can | ||
426 | * specify any one of the participating devices on the command | ||
427 | * line. We always keep them in device-table order. | ||
428 | */ | ||
429 | if (fscb_od && osduld_device_same(fscb_od, &odi)) { | ||
430 | sbi->s_ods[i] = fscb_od; | ||
431 | ++sbi->s_numdevs; | ||
432 | fscb_od = NULL; | ||
433 | continue; | ||
434 | } | ||
435 | |||
436 | od = osduld_info_lookup(&odi); | ||
437 | if (unlikely(IS_ERR(od))) { | ||
438 | ret = PTR_ERR(od); | ||
439 | EXOFS_ERR("ERROR: device requested is not found " | ||
440 | "osd_name-%s =>%d\n", odi.osdname, ret); | ||
441 | goto out; | ||
442 | } | ||
443 | |||
444 | sbi->s_ods[i] = od; | ||
445 | ++sbi->s_numdevs; | ||
446 | |||
447 | /* Read the fscb of the other devices to make sure the FS | ||
448 | * partition is there. | ||
449 | */ | ||
450 | ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, | ||
451 | sizeof(fscb)); | ||
452 | if (unlikely(ret)) { | ||
453 | EXOFS_ERR("ERROR: Malformed participating device " | ||
454 | "error reading fscb osd_name-%s\n", | ||
455 | odi.osdname); | ||
456 | goto out; | ||
457 | } | ||
458 | |||
459 | /* TODO: verify other information is correct and FS-uuid | ||
460 | * matches. Benny what did you say about device table | ||
461 | * generation and old devices? | ||
462 | */ | ||
463 | } | ||
464 | |||
465 | out: | ||
466 | kfree(dt); | ||
467 | if (unlikely(!ret && fscb_od)) { | ||
468 | EXOFS_ERR( | ||
469 | "ERROR: Bad device-table container device not present\n"); | ||
470 | osduld_put_device(fscb_od); | ||
471 | ret = -EINVAL; | ||
472 | } | ||
473 | |||
474 | return ret; | ||
475 | } | ||
476 | |||
287 | /* | 477 | /* |
288 | * Read the superblock from the OSD and fill in the fields | 478 | * Read the superblock from the OSD and fill in the fields |
289 | */ | 479 | */ |
@@ -292,24 +482,25 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
292 | struct inode *root; | 482 | struct inode *root; |
293 | struct exofs_mountopt *opts = data; | 483 | struct exofs_mountopt *opts = data; |
294 | struct exofs_sb_info *sbi; /*extended info */ | 484 | struct exofs_sb_info *sbi; /*extended info */ |
485 | struct osd_dev *od; /* Master device */ | ||
295 | struct exofs_fscb fscb; /*on-disk superblock info */ | 486 | struct exofs_fscb fscb; /*on-disk superblock info */ |
296 | struct osd_request *or = NULL; | ||
297 | struct osd_obj_id obj; | 487 | struct osd_obj_id obj; |
488 | unsigned table_count; | ||
298 | int ret; | 489 | int ret; |
299 | 490 | ||
300 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 491 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
301 | if (!sbi) | 492 | if (!sbi) |
302 | return -ENOMEM; | 493 | return -ENOMEM; |
303 | sb->s_fs_info = sbi; | ||
304 | 494 | ||
305 | /* use mount options to fill superblock */ | 495 | /* use mount options to fill superblock */ |
306 | sbi->s_dev = osduld_path_lookup(opts->dev_name); | 496 | od = osduld_path_lookup(opts->dev_name); |
307 | if (IS_ERR(sbi->s_dev)) { | 497 | if (IS_ERR(od)) { |
308 | ret = PTR_ERR(sbi->s_dev); | 498 | ret = PTR_ERR(od); |
309 | sbi->s_dev = NULL; | ||
310 | goto free_sbi; | 499 | goto free_sbi; |
311 | } | 500 | } |
312 | 501 | ||
502 | sbi->s_ods[0] = od; | ||
503 | sbi->s_numdevs = 1; | ||
313 | sbi->s_pid = opts->pid; | 504 | sbi->s_pid = opts->pid; |
314 | sbi->s_timeout = opts->timeout; | 505 | sbi->s_timeout = opts->timeout; |
315 | 506 | ||
@@ -323,35 +514,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
323 | sb->s_bdev = NULL; | 514 | sb->s_bdev = NULL; |
324 | sb->s_dev = 0; | 515 | sb->s_dev = 0; |
325 | 516 | ||
326 | /* read data from on-disk superblock object */ | ||
327 | obj.partition = sbi->s_pid; | 517 | obj.partition = sbi->s_pid; |
328 | obj.id = EXOFS_SUPER_ID; | 518 | obj.id = EXOFS_SUPER_ID; |
329 | exofs_make_credential(sbi->s_cred, &obj); | 519 | exofs_make_credential(sbi->s_cred, &obj); |
330 | 520 | ||
331 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 521 | ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); |
332 | if (unlikely(!or)) { | 522 | if (unlikely(ret)) |
333 | if (!silent) | ||
334 | EXOFS_ERR( | ||
335 | "exofs_fill_super: osd_start_request failed.\n"); | ||
336 | ret = -ENOMEM; | ||
337 | goto free_sbi; | ||
338 | } | ||
339 | ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb)); | ||
340 | if (unlikely(ret)) { | ||
341 | if (!silent) | ||
342 | EXOFS_ERR( | ||
343 | "exofs_fill_super: osd_req_read_kern failed.\n"); | ||
344 | ret = -ENOMEM; | ||
345 | goto free_sbi; | ||
346 | } | ||
347 | |||
348 | ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | ||
349 | if (unlikely(ret)) { | ||
350 | if (!silent) | ||
351 | EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n"); | ||
352 | ret = -EIO; | ||
353 | goto free_sbi; | 523 | goto free_sbi; |
354 | } | ||
355 | 524 | ||
356 | sb->s_magic = le16_to_cpu(fscb.s_magic); | 525 | sb->s_magic = le16_to_cpu(fscb.s_magic); |
357 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); | 526 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); |
@@ -364,12 +533,26 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
364 | ret = -EINVAL; | 533 | ret = -EINVAL; |
365 | goto free_sbi; | 534 | goto free_sbi; |
366 | } | 535 | } |
536 | if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) { | ||
537 | EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", | ||
538 | EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); | ||
539 | ret = -EINVAL; | ||
540 | goto free_sbi; | ||
541 | } | ||
367 | 542 | ||
368 | /* start generation numbers from a random point */ | 543 | /* start generation numbers from a random point */ |
369 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 544 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
370 | spin_lock_init(&sbi->s_next_gen_lock); | 545 | spin_lock_init(&sbi->s_next_gen_lock); |
371 | 546 | ||
547 | table_count = le64_to_cpu(fscb.s_dev_table_count); | ||
548 | if (table_count) { | ||
549 | ret = exofs_read_lookup_dev_table(&sbi, table_count); | ||
550 | if (unlikely(ret)) | ||
551 | goto free_sbi; | ||
552 | } | ||
553 | |||
372 | /* set up operation vectors */ | 554 | /* set up operation vectors */ |
555 | sb->s_fs_info = sbi; | ||
373 | sb->s_op = &exofs_sops; | 556 | sb->s_op = &exofs_sops; |
374 | sb->s_export_op = &exofs_export_ops; | 557 | sb->s_export_op = &exofs_export_ops; |
375 | root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); | 558 | root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); |
@@ -395,16 +578,15 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
395 | goto free_sbi; | 578 | goto free_sbi; |
396 | } | 579 | } |
397 | 580 | ||
398 | ret = 0; | 581 | _exofs_print_device("Mounting", opts->dev_name, sbi->s_ods[0], |
399 | out: | 582 | sbi->s_pid); |
400 | if (or) | 583 | return 0; |
401 | osd_end_request(or); | ||
402 | return ret; | ||
403 | 584 | ||
404 | free_sbi: | 585 | free_sbi: |
405 | osduld_put_device(sbi->s_dev); /* NULL safe */ | 586 | EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", |
406 | kfree(sbi); | 587 | opts->dev_name, sbi->s_pid, ret); |
407 | goto out; | 588 | exofs_free_sbi(sbi); |
589 | return ret; | ||
408 | } | 590 | } |
409 | 591 | ||
410 | /* | 592 | /* |
@@ -433,7 +615,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
433 | { | 615 | { |
434 | struct super_block *sb = dentry->d_sb; | 616 | struct super_block *sb = dentry->d_sb; |
435 | struct exofs_sb_info *sbi = sb->s_fs_info; | 617 | struct exofs_sb_info *sbi = sb->s_fs_info; |
436 | struct osd_obj_id obj = {sbi->s_pid, 0}; | 618 | struct exofs_io_state *ios; |
437 | struct osd_attr attrs[] = { | 619 | struct osd_attr attrs[] = { |
438 | ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, | 620 | ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, |
439 | OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), | 621 | OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), |
@@ -442,32 +624,33 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
442 | }; | 624 | }; |
443 | uint64_t capacity = ULLONG_MAX; | 625 | uint64_t capacity = ULLONG_MAX; |
444 | uint64_t used = ULLONG_MAX; | 626 | uint64_t used = ULLONG_MAX; |
445 | struct osd_request *or; | ||
446 | uint8_t cred_a[OSD_CAP_LEN]; | 627 | uint8_t cred_a[OSD_CAP_LEN]; |
447 | int ret; | 628 | int ret; |
448 | 629 | ||
449 | /* get used/capacity attributes */ | 630 | ret = exofs_get_io_state(sbi, &ios); |
450 | exofs_make_credential(cred_a, &obj); | 631 | if (ret) { |
451 | 632 | EXOFS_DBGMSG("exofs_get_io_state failed.\n"); | |
452 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 633 | return ret; |
453 | if (unlikely(!or)) { | ||
454 | EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n"); | ||
455 | return -ENOMEM; | ||
456 | } | 634 | } |
457 | 635 | ||
458 | osd_req_get_attributes(or, &obj); | 636 | exofs_make_credential(cred_a, &ios->obj); |
459 | osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs)); | 637 | ios->cred = sbi->s_cred; |
460 | ret = exofs_sync_op(or, sbi->s_timeout, cred_a); | 638 | ios->in_attr = attrs; |
639 | ios->in_attr_len = ARRAY_SIZE(attrs); | ||
640 | |||
641 | ret = exofs_sbi_read(ios); | ||
461 | if (unlikely(ret)) | 642 | if (unlikely(ret)) |
462 | goto out; | 643 | goto out; |
463 | 644 | ||
464 | ret = extract_attr_from_req(or, &attrs[0]); | 645 | ret = extract_attr_from_ios(ios, &attrs[0]); |
465 | if (likely(!ret)) | 646 | if (likely(!ret)) { |
466 | capacity = get_unaligned_be64(attrs[0].val_ptr); | 647 | capacity = get_unaligned_be64(attrs[0].val_ptr); |
467 | else | 648 | if (unlikely(!capacity)) |
649 | capacity = ULLONG_MAX; | ||
650 | } else | ||
468 | EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); | 651 | EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); |
469 | 652 | ||
470 | ret = extract_attr_from_req(or, &attrs[1]); | 653 | ret = extract_attr_from_ios(ios, &attrs[1]); |
471 | if (likely(!ret)) | 654 | if (likely(!ret)) |
472 | used = get_unaligned_be64(attrs[1].val_ptr); | 655 | used = get_unaligned_be64(attrs[1].val_ptr); |
473 | else | 656 | else |
@@ -476,15 +659,15 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
476 | /* fill in the stats buffer */ | 659 | /* fill in the stats buffer */ |
477 | buf->f_type = EXOFS_SUPER_MAGIC; | 660 | buf->f_type = EXOFS_SUPER_MAGIC; |
478 | buf->f_bsize = EXOFS_BLKSIZE; | 661 | buf->f_bsize = EXOFS_BLKSIZE; |
479 | buf->f_blocks = (capacity >> EXOFS_BLKSHIFT); | 662 | buf->f_blocks = capacity >> 9; |
480 | buf->f_bfree = ((capacity - used) >> EXOFS_BLKSHIFT); | 663 | buf->f_bfree = (capacity - used) >> 9; |
481 | buf->f_bavail = buf->f_bfree; | 664 | buf->f_bavail = buf->f_bfree; |
482 | buf->f_files = sbi->s_numfiles; | 665 | buf->f_files = sbi->s_numfiles; |
483 | buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; | 666 | buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; |
484 | buf->f_namelen = EXOFS_NAME_LEN; | 667 | buf->f_namelen = EXOFS_NAME_LEN; |
485 | 668 | ||
486 | out: | 669 | out: |
487 | osd_end_request(or); | 670 | exofs_put_io_state(ios); |
488 | return ret; | 671 | return ret; |
489 | } | 672 | } |
490 | 673 | ||