diff options
author | Boaz Harrosh <bharrosh@panasas.com> | 2009-11-08 07:54:08 -0500 |
---|---|---|
committer | Boaz Harrosh <bharrosh@panasas.com> | 2009-12-10 02:59:22 -0500 |
commit | 06886a5a3dc5a5abe0a4d257c26317bde7047be8 (patch) | |
tree | 858ac56e120c0473d764fc64a2660e6d79729c8c | |
parent | 8ce9bdd1fbe962933736d7977e972972cd5d754c (diff) |
exofs: Move all operations to an io_engine
In anticipation for multi-device operations, we separate osd operations
into an abstract I/O API. Currently only one device is used but later
when adding more devices, we will drive all devices in parallel according
to a "data_map" that describes how data is arranged on multiple devices.
The file system level operates, like before, as if there is one object
(inode-number) and an i_size. The io engine will split this to the same
object-number but on multiple device.
At first we introduce Mirror (raid 1) layout. But at the final outcome
we intend to fully implement the pNFS-Objects data-map, including
raid 0,4,5,6 over mirrored devices, over multiple device-groups. And
more. See: http://tools.ietf.org/html/draft-ietf-nfsv4-pnfs-obj-12
* Define an io_state based API for accessing osd storage devices
in an abstract way.
Usage:
First a caller allocates an io state with:
exofs_get_io_state(struct exofs_sb_info *sbi,
struct exofs_io_state** ios);
Then calles one of:
exofs_sbi_create(struct exofs_io_state *ios);
exofs_sbi_remove(struct exofs_io_state *ios);
exofs_sbi_write(struct exofs_io_state *ios);
exofs_sbi_read(struct exofs_io_state *ios);
exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
And when done
exofs_put_io_state(struct exofs_io_state *ios);
* Convert all source files to use this new API
* Convert from bio_alloc to bio_kmalloc
* In io engine we make use of the now fixed osd_req_decode_sense
There are no functional changes or on disk additions after this patch.
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
-rw-r--r-- | fs/exofs/common.h | 18 | ||||
-rw-r--r-- | fs/exofs/exofs.h | 87 | ||||
-rw-r--r-- | fs/exofs/inode.c | 383 | ||||
-rw-r--r-- | fs/exofs/ios.c | 386 | ||||
-rw-r--r-- | fs/exofs/super.c | 120 |
5 files changed, 644 insertions, 350 deletions
diff --git a/fs/exofs/common.h b/fs/exofs/common.h index c6718e4817fe..ce1c71692599 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h | |||
@@ -155,22 +155,4 @@ enum { | |||
155 | (((name_len) + offsetof(struct exofs_dir_entry, name) + \ | 155 | (((name_len) + offsetof(struct exofs_dir_entry, name) + \ |
156 | EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) | 156 | EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) |
157 | 157 | ||
158 | /************************* | ||
159 | * function declarations * | ||
160 | *************************/ | ||
161 | /* osd.c */ | ||
162 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | ||
163 | const struct osd_obj_id *obj); | ||
164 | |||
165 | int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid); | ||
166 | static inline int exofs_check_ok(struct osd_request *or) | ||
167 | { | ||
168 | return exofs_check_ok_resid(or, NULL, NULL); | ||
169 | } | ||
170 | int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred); | ||
171 | int exofs_async_op(struct osd_request *or, | ||
172 | osd_req_done_fn *async_done, void *caller_context, u8 *cred); | ||
173 | |||
174 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); | ||
175 | |||
176 | #endif /*ifndef __EXOFS_COM_H__*/ | 158 | #endif /*ifndef __EXOFS_COM_H__*/ |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 5ec72e020b22..2e08859a89e8 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -30,14 +30,13 @@ | |||
30 | * along with exofs; if not, write to the Free Software | 30 | * along with exofs; if not, write to the Free Software |
31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
32 | */ | 32 | */ |
33 | #ifndef __EXOFS_H__ | ||
34 | #define __EXOFS_H__ | ||
33 | 35 | ||
34 | #include <linux/fs.h> | 36 | #include <linux/fs.h> |
35 | #include <linux/time.h> | 37 | #include <linux/time.h> |
36 | #include "common.h" | 38 | #include "common.h" |
37 | 39 | ||
38 | #ifndef __EXOFS_H__ | ||
39 | #define __EXOFS_H__ | ||
40 | |||
41 | #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) | 40 | #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) |
42 | 41 | ||
43 | #ifdef CONFIG_EXOFS_DEBUG | 42 | #ifdef CONFIG_EXOFS_DEBUG |
@@ -56,6 +55,7 @@ | |||
56 | */ | 55 | */ |
57 | struct exofs_sb_info { | 56 | struct exofs_sb_info { |
58 | struct osd_dev *s_dev; /* returned by get_osd_dev */ | 57 | struct osd_dev *s_dev; /* returned by get_osd_dev */ |
58 | struct exofs_fscb s_fscb; /* Written often, pre-allocate*/ | ||
59 | osd_id s_pid; /* partition ID of file system*/ | 59 | osd_id s_pid; /* partition ID of file system*/ |
60 | int s_timeout; /* timeout for OSD operations */ | 60 | int s_timeout; /* timeout for OSD operations */ |
61 | uint64_t s_nextid; /* highest object ID used */ | 61 | uint64_t s_nextid; /* highest object ID used */ |
@@ -79,6 +79,50 @@ struct exofs_i_info { | |||
79 | struct inode vfs_inode; /* normal in-memory inode */ | 79 | struct inode vfs_inode; /* normal in-memory inode */ |
80 | }; | 80 | }; |
81 | 81 | ||
82 | static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) | ||
83 | { | ||
84 | return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; | ||
85 | } | ||
86 | |||
87 | struct exofs_io_state; | ||
88 | typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private); | ||
89 | |||
90 | struct exofs_io_state { | ||
91 | struct kref kref; | ||
92 | |||
93 | void *private; | ||
94 | exofs_io_done_fn done; | ||
95 | |||
96 | struct exofs_sb_info *sbi; | ||
97 | struct osd_obj_id obj; | ||
98 | u8 *cred; | ||
99 | |||
100 | /* Global read/write IO*/ | ||
101 | loff_t offset; | ||
102 | unsigned long length; | ||
103 | void *kern_buff; | ||
104 | struct bio *bio; | ||
105 | |||
106 | /* Attributes */ | ||
107 | unsigned in_attr_len; | ||
108 | struct osd_attr *in_attr; | ||
109 | unsigned out_attr_len; | ||
110 | struct osd_attr *out_attr; | ||
111 | |||
112 | /* Variable array of size numdevs */ | ||
113 | unsigned numdevs; | ||
114 | struct exofs_per_dev_state { | ||
115 | struct osd_request *or; | ||
116 | struct bio *bio; | ||
117 | } per_dev[]; | ||
118 | }; | ||
119 | |||
120 | static inline unsigned exofs_io_state_size(unsigned numdevs) | ||
121 | { | ||
122 | return sizeof(struct exofs_io_state) + | ||
123 | sizeof(struct exofs_per_dev_state) * numdevs; | ||
124 | } | ||
125 | |||
82 | /* | 126 | /* |
83 | * our inode flags | 127 | * our inode flags |
84 | */ | 128 | */ |
@@ -130,6 +174,42 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) | |||
130 | /************************* | 174 | /************************* |
131 | * function declarations * | 175 | * function declarations * |
132 | *************************/ | 176 | *************************/ |
177 | |||
178 | /* ios.c */ | ||
179 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | ||
180 | const struct osd_obj_id *obj); | ||
181 | int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, | ||
182 | u64 offset, void *p, unsigned length); | ||
183 | |||
184 | int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios); | ||
185 | void exofs_put_io_state(struct exofs_io_state *ios); | ||
186 | |||
187 | int exofs_check_io(struct exofs_io_state *ios, u64 *resid); | ||
188 | |||
189 | int exofs_sbi_create(struct exofs_io_state *ios); | ||
190 | int exofs_sbi_remove(struct exofs_io_state *ios); | ||
191 | int exofs_sbi_write(struct exofs_io_state *ios); | ||
192 | int exofs_sbi_read(struct exofs_io_state *ios); | ||
193 | |||
194 | int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr); | ||
195 | |||
196 | int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); | ||
197 | static inline int exofs_oi_write(struct exofs_i_info *oi, | ||
198 | struct exofs_io_state *ios) | ||
199 | { | ||
200 | ios->obj.id = exofs_oi_objno(oi); | ||
201 | ios->cred = oi->i_cred; | ||
202 | return exofs_sbi_write(ios); | ||
203 | } | ||
204 | |||
205 | static inline int exofs_oi_read(struct exofs_i_info *oi, | ||
206 | struct exofs_io_state *ios) | ||
207 | { | ||
208 | ios->obj.id = exofs_oi_objno(oi); | ||
209 | ios->cred = oi->i_cred; | ||
210 | return exofs_sbi_read(ios); | ||
211 | } | ||
212 | |||
133 | /* inode.c */ | 213 | /* inode.c */ |
134 | void exofs_truncate(struct inode *inode); | 214 | void exofs_truncate(struct inode *inode); |
135 | int exofs_setattr(struct dentry *, struct iattr *); | 215 | int exofs_setattr(struct dentry *, struct iattr *); |
@@ -169,6 +249,7 @@ extern const struct file_operations exofs_file_operations; | |||
169 | 249 | ||
170 | /* inode.c */ | 250 | /* inode.c */ |
171 | extern const struct address_space_operations exofs_aops; | 251 | extern const struct address_space_operations exofs_aops; |
252 | extern const struct osd_attr g_attr_logical_length; | ||
172 | 253 | ||
173 | /* namei.c */ | 254 | /* namei.c */ |
174 | extern const struct inode_operations exofs_dir_inode_operations; | 255 | extern const struct inode_operations exofs_dir_inode_operations; |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 7bc71a7d30a8..7578950fd135 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -37,17 +37,18 @@ | |||
37 | 37 | ||
38 | #include "exofs.h" | 38 | #include "exofs.h" |
39 | 39 | ||
40 | #ifdef CONFIG_EXOFS_DEBUG | ||
41 | # define EXOFS_DEBUG_OBJ_ISIZE 1 | ||
42 | #endif | ||
43 | |||
44 | #define EXOFS_DBGMSG2(M...) do {} while (0) | 40 | #define EXOFS_DBGMSG2(M...) do {} while (0) |
45 | 41 | ||
42 | enum { BIO_MAX_PAGES_KMALLOC = | ||
43 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | ||
44 | }; | ||
45 | |||
46 | struct page_collect { | 46 | struct page_collect { |
47 | struct exofs_sb_info *sbi; | 47 | struct exofs_sb_info *sbi; |
48 | struct request_queue *req_q; | 48 | struct request_queue *req_q; |
49 | struct inode *inode; | 49 | struct inode *inode; |
50 | unsigned expected_pages; | 50 | unsigned expected_pages; |
51 | struct exofs_io_state *ios; | ||
51 | 52 | ||
52 | struct bio *bio; | 53 | struct bio *bio; |
53 | unsigned nr_pages; | 54 | unsigned nr_pages; |
@@ -56,7 +57,7 @@ struct page_collect { | |||
56 | }; | 57 | }; |
57 | 58 | ||
58 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | 59 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, |
59 | struct inode *inode) | 60 | struct inode *inode) |
60 | { | 61 | { |
61 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | 62 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
62 | 63 | ||
@@ -65,13 +66,11 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | |||
65 | pcol->inode = inode; | 66 | pcol->inode = inode; |
66 | pcol->expected_pages = expected_pages; | 67 | pcol->expected_pages = expected_pages; |
67 | 68 | ||
69 | pcol->ios = NULL; | ||
68 | pcol->bio = NULL; | 70 | pcol->bio = NULL; |
69 | pcol->nr_pages = 0; | 71 | pcol->nr_pages = 0; |
70 | pcol->length = 0; | 72 | pcol->length = 0; |
71 | pcol->pg_first = -1; | 73 | pcol->pg_first = -1; |
72 | |||
73 | EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino, | ||
74 | expected_pages); | ||
75 | } | 74 | } |
76 | 75 | ||
77 | static void _pcol_reset(struct page_collect *pcol) | 76 | static void _pcol_reset(struct page_collect *pcol) |
@@ -82,35 +81,49 @@ static void _pcol_reset(struct page_collect *pcol) | |||
82 | pcol->nr_pages = 0; | 81 | pcol->nr_pages = 0; |
83 | pcol->length = 0; | 82 | pcol->length = 0; |
84 | pcol->pg_first = -1; | 83 | pcol->pg_first = -1; |
85 | EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", | 84 | pcol->ios = NULL; |
86 | pcol->inode->i_ino, pcol->expected_pages); | ||
87 | 85 | ||
88 | /* this is probably the end of the loop but in writes | 86 | /* this is probably the end of the loop but in writes |
89 | * it might not end here. don't be left with nothing | 87 | * it might not end here. don't be left with nothing |
90 | */ | 88 | */ |
91 | if (!pcol->expected_pages) | 89 | if (!pcol->expected_pages) |
92 | pcol->expected_pages = 128; | 90 | pcol->expected_pages = BIO_MAX_PAGES_KMALLOC; |
93 | } | 91 | } |
94 | 92 | ||
95 | static int pcol_try_alloc(struct page_collect *pcol) | 93 | static int pcol_try_alloc(struct page_collect *pcol) |
96 | { | 94 | { |
97 | int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); | 95 | int pages = min_t(unsigned, pcol->expected_pages, |
96 | BIO_MAX_PAGES_KMALLOC); | ||
97 | |||
98 | if (!pcol->ios) { /* First time allocate io_state */ | ||
99 | int ret = exofs_get_io_state(pcol->sbi, &pcol->ios); | ||
100 | |||
101 | if (ret) | ||
102 | return ret; | ||
103 | } | ||
98 | 104 | ||
99 | for (; pages; pages >>= 1) { | 105 | for (; pages; pages >>= 1) { |
100 | pcol->bio = bio_alloc(GFP_KERNEL, pages); | 106 | pcol->bio = bio_kmalloc(GFP_KERNEL, pages); |
101 | if (likely(pcol->bio)) | 107 | if (likely(pcol->bio)) |
102 | return 0; | 108 | return 0; |
103 | } | 109 | } |
104 | 110 | ||
105 | EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", | 111 | EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n", |
106 | pcol->expected_pages); | 112 | pcol->expected_pages); |
107 | return -ENOMEM; | 113 | return -ENOMEM; |
108 | } | 114 | } |
109 | 115 | ||
110 | static void pcol_free(struct page_collect *pcol) | 116 | static void pcol_free(struct page_collect *pcol) |
111 | { | 117 | { |
112 | bio_put(pcol->bio); | 118 | if (pcol->bio) { |
113 | pcol->bio = NULL; | 119 | bio_put(pcol->bio); |
120 | pcol->bio = NULL; | ||
121 | } | ||
122 | |||
123 | if (pcol->ios) { | ||
124 | exofs_put_io_state(pcol->ios); | ||
125 | pcol->ios = NULL; | ||
126 | } | ||
114 | } | 127 | } |
115 | 128 | ||
116 | static int pcol_add_page(struct page_collect *pcol, struct page *page, | 129 | static int pcol_add_page(struct page_collect *pcol, struct page *page, |
@@ -163,22 +176,17 @@ static void update_write_page(struct page *page, int ret) | |||
163 | /* Called at the end of reads, to optionally unlock pages and update their | 176 | /* Called at the end of reads, to optionally unlock pages and update their |
164 | * status. | 177 | * status. |
165 | */ | 178 | */ |
166 | static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | 179 | static int __readpages_done(struct page_collect *pcol, bool do_unlock) |
167 | bool do_unlock) | ||
168 | { | 180 | { |
169 | struct bio_vec *bvec; | 181 | struct bio_vec *bvec; |
170 | int i; | 182 | int i; |
171 | u64 resid; | 183 | u64 resid; |
172 | u64 good_bytes; | 184 | u64 good_bytes; |
173 | u64 length = 0; | 185 | u64 length = 0; |
174 | int ret = exofs_check_ok_resid(or, &resid, NULL); | 186 | int ret = exofs_check_io(pcol->ios, &resid); |
175 | |||
176 | osd_end_request(or); | ||
177 | 187 | ||
178 | if (likely(!ret)) | 188 | if (likely(!ret)) |
179 | good_bytes = pcol->length; | 189 | good_bytes = pcol->length; |
180 | else if (!resid) | ||
181 | good_bytes = 0; | ||
182 | else | 190 | else |
183 | good_bytes = pcol->length - resid; | 191 | good_bytes = pcol->length - resid; |
184 | 192 | ||
@@ -216,13 +224,13 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | |||
216 | } | 224 | } |
217 | 225 | ||
218 | /* callback of async reads */ | 226 | /* callback of async reads */ |
219 | static void readpages_done(struct osd_request *or, void *p) | 227 | static void readpages_done(struct exofs_io_state *ios, void *p) |
220 | { | 228 | { |
221 | struct page_collect *pcol = p; | 229 | struct page_collect *pcol = p; |
222 | 230 | ||
223 | __readpages_done(or, pcol, true); | 231 | __readpages_done(pcol, true); |
224 | atomic_dec(&pcol->sbi->s_curr_pending); | 232 | atomic_dec(&pcol->sbi->s_curr_pending); |
225 | kfree(p); | 233 | kfree(pcol); |
226 | } | 234 | } |
227 | 235 | ||
228 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | 236 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) |
@@ -240,17 +248,13 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | |||
240 | 248 | ||
241 | unlock_page(page); | 249 | unlock_page(page); |
242 | } | 250 | } |
243 | pcol_free(pcol); | ||
244 | } | 251 | } |
245 | 252 | ||
246 | static int read_exec(struct page_collect *pcol, bool is_sync) | 253 | static int read_exec(struct page_collect *pcol, bool is_sync) |
247 | { | 254 | { |
248 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 255 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
249 | struct osd_obj_id obj = {pcol->sbi->s_pid, | 256 | struct exofs_io_state *ios = pcol->ios; |
250 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
251 | struct osd_request *or = NULL; | ||
252 | struct page_collect *pcol_copy = NULL; | 257 | struct page_collect *pcol_copy = NULL; |
253 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
254 | int ret; | 258 | int ret; |
255 | 259 | ||
256 | if (!pcol->bio) | 260 | if (!pcol->bio) |
@@ -259,17 +263,13 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
259 | /* see comment in _readpage() about sync reads */ | 263 | /* see comment in _readpage() about sync reads */ |
260 | WARN_ON(is_sync && (pcol->nr_pages != 1)); | 264 | WARN_ON(is_sync && (pcol->nr_pages != 1)); |
261 | 265 | ||
262 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | 266 | ios->bio = pcol->bio; |
263 | if (unlikely(!or)) { | 267 | ios->length = pcol->length; |
264 | ret = -ENOMEM; | 268 | ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; |
265 | goto err; | ||
266 | } | ||
267 | |||
268 | osd_req_read(or, &obj, i_start, pcol->bio, pcol->length); | ||
269 | 269 | ||
270 | if (is_sync) { | 270 | if (is_sync) { |
271 | exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); | 271 | exofs_oi_read(oi, pcol->ios); |
272 | return __readpages_done(or, pcol, false); | 272 | return __readpages_done(pcol, false); |
273 | } | 273 | } |
274 | 274 | ||
275 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 275 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
@@ -279,14 +279,16 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
279 | } | 279 | } |
280 | 280 | ||
281 | *pcol_copy = *pcol; | 281 | *pcol_copy = *pcol; |
282 | ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); | 282 | ios->done = readpages_done; |
283 | ios->private = pcol_copy; | ||
284 | ret = exofs_oi_read(oi, ios); | ||
283 | if (unlikely(ret)) | 285 | if (unlikely(ret)) |
284 | goto err; | 286 | goto err; |
285 | 287 | ||
286 | atomic_inc(&pcol->sbi->s_curr_pending); | 288 | atomic_inc(&pcol->sbi->s_curr_pending); |
287 | 289 | ||
288 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", | 290 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", |
289 | obj.id, _LLU(i_start), pcol->length); | 291 | ios->obj.id, _LLU(ios->offset), pcol->length); |
290 | 292 | ||
291 | /* pages ownership was passed to pcol_copy */ | 293 | /* pages ownership was passed to pcol_copy */ |
292 | _pcol_reset(pcol); | 294 | _pcol_reset(pcol); |
@@ -295,12 +297,10 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
295 | err: | 297 | err: |
296 | if (!is_sync) | 298 | if (!is_sync) |
297 | _unlock_pcol_pages(pcol, ret, READ); | 299 | _unlock_pcol_pages(pcol, ret, READ); |
298 | else /* Pages unlocked by caller in sync mode only free bio */ | 300 | |
299 | pcol_free(pcol); | 301 | pcol_free(pcol); |
300 | 302 | ||
301 | kfree(pcol_copy); | 303 | kfree(pcol_copy); |
302 | if (or) | ||
303 | osd_end_request(or); | ||
304 | return ret; | 304 | return ret; |
305 | } | 305 | } |
306 | 306 | ||
@@ -421,9 +421,8 @@ static int _readpage(struct page *page, bool is_sync) | |||
421 | 421 | ||
422 | _pcol_init(&pcol, 1, page->mapping->host); | 422 | _pcol_init(&pcol, 1, page->mapping->host); |
423 | 423 | ||
424 | /* readpage_strip might call read_exec(,async) inside at several places | 424 | /* readpage_strip might call read_exec(,is_sync==false) at several |
425 | * but this is safe for is_async=0 since read_exec will not do anything | 425 | * places but not if we have a single page. |
426 | * when we have a single page. | ||
427 | */ | 426 | */ |
428 | ret = readpage_strip(&pcol, page); | 427 | ret = readpage_strip(&pcol, page); |
429 | if (ret) { | 428 | if (ret) { |
@@ -442,8 +441,8 @@ static int exofs_readpage(struct file *file, struct page *page) | |||
442 | return _readpage(page, false); | 441 | return _readpage(page, false); |
443 | } | 442 | } |
444 | 443 | ||
445 | /* Callback for osd_write. All writes are asynchronouse */ | 444 | /* Callback for osd_write. All writes are asynchronous */ |
446 | static void writepages_done(struct osd_request *or, void *p) | 445 | static void writepages_done(struct exofs_io_state *ios, void *p) |
447 | { | 446 | { |
448 | struct page_collect *pcol = p; | 447 | struct page_collect *pcol = p; |
449 | struct bio_vec *bvec; | 448 | struct bio_vec *bvec; |
@@ -451,16 +450,12 @@ static void writepages_done(struct osd_request *or, void *p) | |||
451 | u64 resid; | 450 | u64 resid; |
452 | u64 good_bytes; | 451 | u64 good_bytes; |
453 | u64 length = 0; | 452 | u64 length = 0; |
453 | int ret = exofs_check_io(ios, &resid); | ||
454 | 454 | ||
455 | int ret = exofs_check_ok_resid(or, NULL, &resid); | ||
456 | |||
457 | osd_end_request(or); | ||
458 | atomic_dec(&pcol->sbi->s_curr_pending); | 455 | atomic_dec(&pcol->sbi->s_curr_pending); |
459 | 456 | ||
460 | if (likely(!ret)) | 457 | if (likely(!ret)) |
461 | good_bytes = pcol->length; | 458 | good_bytes = pcol->length; |
462 | else if (!resid) | ||
463 | good_bytes = 0; | ||
464 | else | 459 | else |
465 | good_bytes = pcol->length - resid; | 460 | good_bytes = pcol->length - resid; |
466 | 461 | ||
@@ -498,23 +493,13 @@ static void writepages_done(struct osd_request *or, void *p) | |||
498 | static int write_exec(struct page_collect *pcol) | 493 | static int write_exec(struct page_collect *pcol) |
499 | { | 494 | { |
500 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 495 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
501 | struct osd_obj_id obj = {pcol->sbi->s_pid, | 496 | struct exofs_io_state *ios = pcol->ios; |
502 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
503 | struct osd_request *or = NULL; | ||
504 | struct page_collect *pcol_copy = NULL; | 497 | struct page_collect *pcol_copy = NULL; |
505 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
506 | int ret; | 498 | int ret; |
507 | 499 | ||
508 | if (!pcol->bio) | 500 | if (!pcol->bio) |
509 | return 0; | 501 | return 0; |
510 | 502 | ||
511 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
512 | if (unlikely(!or)) { | ||
513 | EXOFS_ERR("write_exec: Faild to osd_start_request()\n"); | ||
514 | ret = -ENOMEM; | ||
515 | goto err; | ||
516 | } | ||
517 | |||
518 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 503 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
519 | if (!pcol_copy) { | 504 | if (!pcol_copy) { |
520 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); | 505 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); |
@@ -525,16 +510,22 @@ static int write_exec(struct page_collect *pcol) | |||
525 | *pcol_copy = *pcol; | 510 | *pcol_copy = *pcol; |
526 | 511 | ||
527 | pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ | 512 | pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ |
528 | osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); | 513 | |
529 | ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); | 514 | ios->bio = pcol_copy->bio; |
515 | ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; | ||
516 | ios->length = pcol_copy->length; | ||
517 | ios->done = writepages_done; | ||
518 | ios->private = pcol_copy; | ||
519 | |||
520 | ret = exofs_oi_write(oi, ios); | ||
530 | if (unlikely(ret)) { | 521 | if (unlikely(ret)) { |
531 | EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); | 522 | EXOFS_ERR("write_exec: exofs_oi_write() Faild\n"); |
532 | goto err; | 523 | goto err; |
533 | } | 524 | } |
534 | 525 | ||
535 | atomic_inc(&pcol->sbi->s_curr_pending); | 526 | atomic_inc(&pcol->sbi->s_curr_pending); |
536 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", | 527 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", |
537 | pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), | 528 | pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset), |
538 | pcol->length); | 529 | pcol->length); |
539 | /* pages ownership was passed to pcol_copy */ | 530 | /* pages ownership was passed to pcol_copy */ |
540 | _pcol_reset(pcol); | 531 | _pcol_reset(pcol); |
@@ -542,9 +533,9 @@ static int write_exec(struct page_collect *pcol) | |||
542 | 533 | ||
543 | err: | 534 | err: |
544 | _unlock_pcol_pages(pcol, ret, WRITE); | 535 | _unlock_pcol_pages(pcol, ret, WRITE); |
536 | pcol_free(pcol); | ||
545 | kfree(pcol_copy); | 537 | kfree(pcol_copy); |
546 | if (or) | 538 | |
547 | osd_end_request(or); | ||
548 | return ret; | 539 | return ret; |
549 | } | 540 | } |
550 | 541 | ||
@@ -588,6 +579,9 @@ static int writepage_strip(struct page *page, | |||
588 | if (PageError(page)) | 579 | if (PageError(page)) |
589 | ClearPageError(page); | 580 | ClearPageError(page); |
590 | unlock_page(page); | 581 | unlock_page(page); |
582 | EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) " | ||
583 | "outside the limits\n", | ||
584 | inode->i_ino, page->index); | ||
591 | return 0; | 585 | return 0; |
592 | } | 586 | } |
593 | } | 587 | } |
@@ -602,6 +596,9 @@ try_again: | |||
602 | ret = write_exec(pcol); | 596 | ret = write_exec(pcol); |
603 | if (unlikely(ret)) | 597 | if (unlikely(ret)) |
604 | goto fail; | 598 | goto fail; |
599 | |||
600 | EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n", | ||
601 | inode->i_ino, page->index); | ||
605 | goto try_again; | 602 | goto try_again; |
606 | } | 603 | } |
607 | 604 | ||
@@ -636,6 +633,8 @@ try_again: | |||
636 | return 0; | 633 | return 0; |
637 | 634 | ||
638 | fail: | 635 | fail: |
636 | EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", | ||
637 | inode->i_ino, page->index, ret); | ||
639 | set_bit(AS_EIO, &page->mapping->flags); | 638 | set_bit(AS_EIO, &page->mapping->flags); |
640 | unlock_page(page); | 639 | unlock_page(page); |
641 | return ret; | 640 | return ret; |
@@ -654,14 +653,17 @@ static int exofs_writepages(struct address_space *mapping, | |||
654 | wbc->range_end >> PAGE_CACHE_SHIFT; | 653 | wbc->range_end >> PAGE_CACHE_SHIFT; |
655 | 654 | ||
656 | if (start || end) | 655 | if (start || end) |
657 | expected_pages = min(end - start + 1, 32L); | 656 | expected_pages = end - start + 1; |
658 | else | 657 | else |
659 | expected_pages = mapping->nrpages; | 658 | expected_pages = mapping->nrpages; |
660 | 659 | ||
661 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" | 660 | if (expected_pages < 32L) |
662 | " m->nrpages=%lu start=0x%lx end=0x%lx\n", | 661 | expected_pages = 32L; |
662 | |||
663 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " | ||
664 | "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", | ||
663 | mapping->host->i_ino, wbc->range_start, wbc->range_end, | 665 | mapping->host->i_ino, wbc->range_start, wbc->range_end, |
664 | mapping->nrpages, start, end); | 666 | mapping->nrpages, start, end, expected_pages); |
665 | 667 | ||
666 | _pcol_init(&pcol, expected_pages, mapping->host); | 668 | _pcol_init(&pcol, expected_pages, mapping->host); |
667 | 669 | ||
@@ -773,19 +775,28 @@ static int exofs_get_block(struct inode *inode, sector_t iblock, | |||
773 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | 775 | const struct osd_attr g_attr_logical_length = ATTR_DEF( |
774 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | 776 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); |
775 | 777 | ||
778 | static int _do_truncate(struct inode *inode) | ||
779 | { | ||
780 | struct exofs_i_info *oi = exofs_i(inode); | ||
781 | loff_t isize = i_size_read(inode); | ||
782 | int ret; | ||
783 | |||
784 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
785 | |||
786 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | ||
787 | |||
788 | ret = exofs_oi_truncate(oi, (u64)isize); | ||
789 | EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize); | ||
790 | return ret; | ||
791 | } | ||
792 | |||
776 | /* | 793 | /* |
777 | * Truncate a file to the specified size - all we have to do is set the size | 794 | * Truncate a file to the specified size - all we have to do is set the size |
778 | * attribute. We make sure the object exists first. | 795 | * attribute. We make sure the object exists first. |
779 | */ | 796 | */ |
780 | void exofs_truncate(struct inode *inode) | 797 | void exofs_truncate(struct inode *inode) |
781 | { | 798 | { |
782 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
783 | struct exofs_i_info *oi = exofs_i(inode); | 799 | struct exofs_i_info *oi = exofs_i(inode); |
784 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
785 | struct osd_request *or; | ||
786 | struct osd_attr attr; | ||
787 | loff_t isize = i_size_read(inode); | ||
788 | __be64 newsize; | ||
789 | int ret; | 800 | int ret; |
790 | 801 | ||
791 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) | 802 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) |
@@ -795,22 +806,6 @@ void exofs_truncate(struct inode *inode) | |||
795 | return; | 806 | return; |
796 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 807 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
797 | return; | 808 | return; |
798 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
799 | |||
800 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | ||
801 | |||
802 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
803 | if (unlikely(!or)) { | ||
804 | EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n"); | ||
805 | goto fail; | ||
806 | } | ||
807 | |||
808 | osd_req_set_attributes(or, &obj); | ||
809 | |||
810 | newsize = cpu_to_be64((u64)isize); | ||
811 | attr = g_attr_logical_length; | ||
812 | attr.val_ptr = &newsize; | ||
813 | osd_req_add_set_attr_list(or, &attr, 1); | ||
814 | 809 | ||
815 | /* if we are about to truncate an object, and it hasn't been | 810 | /* if we are about to truncate an object, and it hasn't been |
816 | * created yet, wait | 811 | * created yet, wait |
@@ -818,8 +813,7 @@ void exofs_truncate(struct inode *inode) | |||
818 | if (unlikely(wait_obj_created(oi))) | 813 | if (unlikely(wait_obj_created(oi))) |
819 | goto fail; | 814 | goto fail; |
820 | 815 | ||
821 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | 816 | ret = _do_truncate(inode); |
822 | osd_end_request(or); | ||
823 | if (ret) | 817 | if (ret) |
824 | goto fail; | 818 | goto fail; |
825 | 819 | ||
@@ -849,66 +843,57 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
849 | 843 | ||
850 | /* | 844 | /* |
851 | * Read an inode from the OSD, and return it as is. We also return the size | 845 | * Read an inode from the OSD, and return it as is. We also return the size |
852 | * attribute in the 'sanity' argument if we got compiled with debugging turned | 846 | * attribute in the 'obj_size' argument. |
853 | * on. | ||
854 | */ | 847 | */ |
855 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | 848 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, |
856 | struct exofs_fcb *inode, uint64_t *sanity) | 849 | struct exofs_fcb *inode, uint64_t *obj_size) |
857 | { | 850 | { |
858 | struct exofs_sb_info *sbi = sb->s_fs_info; | 851 | struct exofs_sb_info *sbi = sb->s_fs_info; |
859 | struct osd_request *or; | 852 | struct osd_attr attrs[2]; |
860 | struct osd_attr attr; | 853 | struct exofs_io_state *ios; |
861 | struct osd_obj_id obj = {sbi->s_pid, | ||
862 | oi->vfs_inode.i_ino + EXOFS_OBJ_OFF}; | ||
863 | int ret; | 854 | int ret; |
864 | 855 | ||
865 | exofs_make_credential(oi->i_cred, &obj); | 856 | *obj_size = ~0; |
866 | 857 | ret = exofs_get_io_state(sbi, &ios); | |
867 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 858 | if (unlikely(ret)) { |
868 | if (unlikely(!or)) { | 859 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); |
869 | EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); | 860 | return ret; |
870 | return -ENOMEM; | ||
871 | } | 861 | } |
872 | osd_req_get_attributes(or, &obj); | ||
873 | 862 | ||
874 | /* we need the inode attribute */ | 863 | ios->obj.id = exofs_oi_objno(oi); |
875 | osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); | 864 | exofs_make_credential(oi->i_cred, &ios->obj); |
865 | ios->cred = oi->i_cred; | ||
876 | 866 | ||
877 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | 867 | attrs[0] = g_attr_inode_data; |
878 | /* we get the size attributes to do a sanity check */ | 868 | attrs[1] = g_attr_logical_length; |
879 | osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); | 869 | ios->in_attr = attrs; |
880 | #endif | 870 | ios->in_attr_len = ARRAY_SIZE(attrs); |
881 | 871 | ||
882 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | 872 | ret = exofs_sbi_read(ios); |
883 | if (ret) | 873 | if (ret) |
884 | goto out; | 874 | goto out; |
885 | 875 | ||
886 | attr = g_attr_inode_data; | 876 | ret = extract_attr_from_ios(ios, &attrs[0]); |
887 | ret = extract_attr_from_req(or, &attr); | ||
888 | if (ret) { | 877 | if (ret) { |
889 | EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); | 878 | EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); |
890 | goto out; | 879 | goto out; |
891 | } | 880 | } |
881 | WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE); | ||
882 | memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE); | ||
892 | 883 | ||
893 | WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); | 884 | ret = extract_attr_from_ios(ios, &attrs[1]); |
894 | memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE); | ||
895 | |||
896 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
897 | attr = g_attr_logical_length; | ||
898 | ret = extract_attr_from_req(or, &attr); | ||
899 | if (ret) { | 885 | if (ret) { |
900 | EXOFS_ERR("ERROR: extract attr from or failed\n"); | 886 | EXOFS_ERR("%s: extract_attr of logical_length failed\n", |
887 | __func__); | ||
901 | goto out; | 888 | goto out; |
902 | } | 889 | } |
903 | *sanity = get_unaligned_be64(attr.val_ptr); | 890 | *obj_size = get_unaligned_be64(attrs[1].val_ptr); |
904 | #endif | ||
905 | 891 | ||
906 | out: | 892 | out: |
907 | osd_end_request(or); | 893 | exofs_put_io_state(ios); |
908 | return ret; | 894 | return ret; |
909 | } | 895 | } |
910 | 896 | ||
911 | |||
912 | static void __oi_init(struct exofs_i_info *oi) | 897 | static void __oi_init(struct exofs_i_info *oi) |
913 | { | 898 | { |
914 | init_waitqueue_head(&oi->i_wq); | 899 | init_waitqueue_head(&oi->i_wq); |
@@ -922,7 +907,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
922 | struct exofs_i_info *oi; | 907 | struct exofs_i_info *oi; |
923 | struct exofs_fcb fcb; | 908 | struct exofs_fcb fcb; |
924 | struct inode *inode; | 909 | struct inode *inode; |
925 | uint64_t uninitialized_var(sanity); | 910 | uint64_t obj_size; |
926 | int ret; | 911 | int ret; |
927 | 912 | ||
928 | inode = iget_locked(sb, ino); | 913 | inode = iget_locked(sb, ino); |
@@ -934,7 +919,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
934 | __oi_init(oi); | 919 | __oi_init(oi); |
935 | 920 | ||
936 | /* read the inode from the osd */ | 921 | /* read the inode from the osd */ |
937 | ret = exofs_get_inode(sb, oi, &fcb, &sanity); | 922 | ret = exofs_get_inode(sb, oi, &fcb, &obj_size); |
938 | if (ret) | 923 | if (ret) |
939 | goto bad_inode; | 924 | goto bad_inode; |
940 | 925 | ||
@@ -955,13 +940,12 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
955 | inode->i_blkbits = EXOFS_BLKSHIFT; | 940 | inode->i_blkbits = EXOFS_BLKSHIFT; |
956 | inode->i_generation = le32_to_cpu(fcb.i_generation); | 941 | inode->i_generation = le32_to_cpu(fcb.i_generation); |
957 | 942 | ||
958 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | 943 | if ((inode->i_size != obj_size) && |
959 | if ((inode->i_size != sanity) && | ||
960 | (!exofs_inode_is_fast_symlink(inode))) { | 944 | (!exofs_inode_is_fast_symlink(inode))) { |
961 | EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n", | 945 | EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n", |
962 | inode->i_size, _LLU(sanity)); | 946 | inode->i_size, _LLU(obj_size)); |
947 | /* FIXME: call exofs_inode_recovery() */ | ||
963 | } | 948 | } |
964 | #endif | ||
965 | 949 | ||
966 | oi->i_dir_start_lookup = 0; | 950 | oi->i_dir_start_lookup = 0; |
967 | 951 | ||
@@ -1027,23 +1011,30 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi) | |||
1027 | * set the obj_created flag so that other methods know that the object exists on | 1011 | * set the obj_created flag so that other methods know that the object exists on |
1028 | * the OSD. | 1012 | * the OSD. |
1029 | */ | 1013 | */ |
1030 | static void create_done(struct osd_request *or, void *p) | 1014 | static void create_done(struct exofs_io_state *ios, void *p) |
1031 | { | 1015 | { |
1032 | struct inode *inode = p; | 1016 | struct inode *inode = p; |
1033 | struct exofs_i_info *oi = exofs_i(inode); | 1017 | struct exofs_i_info *oi = exofs_i(inode); |
1034 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | 1018 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
1035 | int ret; | 1019 | int ret; |
1036 | 1020 | ||
1037 | ret = exofs_check_ok(or); | 1021 | ret = exofs_check_io(ios, NULL); |
1038 | osd_end_request(or); | 1022 | exofs_put_io_state(ios); |
1023 | |||
1039 | atomic_dec(&sbi->s_curr_pending); | 1024 | atomic_dec(&sbi->s_curr_pending); |
1040 | 1025 | ||
1041 | if (unlikely(ret)) { | 1026 | if (unlikely(ret)) { |
1042 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", | 1027 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", |
1043 | _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); | 1028 | _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid)); |
1044 | make_bad_inode(inode); | 1029 | /*TODO: When FS is corrupted creation can fail, object already |
1045 | } else | 1030 | * exist. Get rid of this asynchronous creation, if exist |
1046 | set_obj_created(oi); | 1031 | * increment the obj counter and try the next object. Until we |
1032 | * succeed. All these dangling objects will be made into lost | ||
1033 | * files by chkfs.exofs | ||
1034 | */ | ||
1035 | } | ||
1036 | |||
1037 | set_obj_created(oi); | ||
1047 | 1038 | ||
1048 | atomic_dec(&inode->i_count); | 1039 | atomic_dec(&inode->i_count); |
1049 | wake_up(&oi->i_wq); | 1040 | wake_up(&oi->i_wq); |
@@ -1058,8 +1049,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1058 | struct inode *inode; | 1049 | struct inode *inode; |
1059 | struct exofs_i_info *oi; | 1050 | struct exofs_i_info *oi; |
1060 | struct exofs_sb_info *sbi; | 1051 | struct exofs_sb_info *sbi; |
1061 | struct osd_request *or; | 1052 | struct exofs_io_state *ios; |
1062 | struct osd_obj_id obj; | ||
1063 | int ret; | 1053 | int ret; |
1064 | 1054 | ||
1065 | sb = dir->i_sb; | 1055 | sb = dir->i_sb; |
@@ -1096,28 +1086,28 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1096 | 1086 | ||
1097 | mark_inode_dirty(inode); | 1087 | mark_inode_dirty(inode); |
1098 | 1088 | ||
1099 | obj.partition = sbi->s_pid; | 1089 | ret = exofs_get_io_state(sbi, &ios); |
1100 | obj.id = inode->i_ino + EXOFS_OBJ_OFF; | 1090 | if (unlikely(ret)) { |
1101 | exofs_make_credential(oi->i_cred, &obj); | 1091 | EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); |
1102 | 1092 | return ERR_PTR(ret); | |
1103 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1104 | if (unlikely(!or)) { | ||
1105 | EXOFS_ERR("exofs_new_inode: osd_start_request failed\n"); | ||
1106 | return ERR_PTR(-ENOMEM); | ||
1107 | } | 1093 | } |
1108 | 1094 | ||
1109 | osd_req_create_object(or, &obj); | 1095 | ios->obj.id = exofs_oi_objno(oi); |
1096 | exofs_make_credential(oi->i_cred, &ios->obj); | ||
1110 | 1097 | ||
1111 | /* increment the refcount so that the inode will still be around when we | 1098 | /* increment the refcount so that the inode will still be around when we |
1112 | * reach the callback | 1099 | * reach the callback |
1113 | */ | 1100 | */ |
1114 | atomic_inc(&inode->i_count); | 1101 | atomic_inc(&inode->i_count); |
1115 | 1102 | ||
1116 | ret = exofs_async_op(or, create_done, inode, oi->i_cred); | 1103 | ios->done = create_done; |
1104 | ios->private = inode; | ||
1105 | ios->cred = oi->i_cred; | ||
1106 | ret = exofs_sbi_create(ios); | ||
1117 | if (ret) { | 1107 | if (ret) { |
1118 | atomic_dec(&inode->i_count); | 1108 | atomic_dec(&inode->i_count); |
1119 | osd_end_request(or); | 1109 | exofs_put_io_state(ios); |
1120 | return ERR_PTR(-EIO); | 1110 | return ERR_PTR(ret); |
1121 | } | 1111 | } |
1122 | atomic_inc(&sbi->s_curr_pending); | 1112 | atomic_inc(&sbi->s_curr_pending); |
1123 | 1113 | ||
@@ -1135,11 +1125,11 @@ struct updatei_args { | |||
1135 | /* | 1125 | /* |
1136 | * Callback function from exofs_update_inode(). | 1126 | * Callback function from exofs_update_inode(). |
1137 | */ | 1127 | */ |
1138 | static void updatei_done(struct osd_request *or, void *p) | 1128 | static void updatei_done(struct exofs_io_state *ios, void *p) |
1139 | { | 1129 | { |
1140 | struct updatei_args *args = p; | 1130 | struct updatei_args *args = p; |
1141 | 1131 | ||
1142 | osd_end_request(or); | 1132 | exofs_put_io_state(ios); |
1143 | 1133 | ||
1144 | atomic_dec(&args->sbi->s_curr_pending); | 1134 | atomic_dec(&args->sbi->s_curr_pending); |
1145 | 1135 | ||
@@ -1155,8 +1145,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1155 | struct exofs_i_info *oi = exofs_i(inode); | 1145 | struct exofs_i_info *oi = exofs_i(inode); |
1156 | struct super_block *sb = inode->i_sb; | 1146 | struct super_block *sb = inode->i_sb; |
1157 | struct exofs_sb_info *sbi = sb->s_fs_info; | 1147 | struct exofs_sb_info *sbi = sb->s_fs_info; |
1158 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | 1148 | struct exofs_io_state *ios; |
1159 | struct osd_request *or; | ||
1160 | struct osd_attr attr; | 1149 | struct osd_attr attr; |
1161 | struct exofs_fcb *fcb; | 1150 | struct exofs_fcb *fcb; |
1162 | struct updatei_args *args; | 1151 | struct updatei_args *args; |
@@ -1193,18 +1182,16 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1193 | } else | 1182 | } else |
1194 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); | 1183 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); |
1195 | 1184 | ||
1196 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 1185 | ret = exofs_get_io_state(sbi, &ios); |
1197 | if (unlikely(!or)) { | 1186 | if (unlikely(ret)) { |
1198 | EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); | 1187 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); |
1199 | ret = -ENOMEM; | ||
1200 | goto free_args; | 1188 | goto free_args; |
1201 | } | 1189 | } |
1202 | 1190 | ||
1203 | osd_req_set_attributes(or, &obj); | ||
1204 | |||
1205 | attr = g_attr_inode_data; | 1191 | attr = g_attr_inode_data; |
1206 | attr.val_ptr = fcb; | 1192 | attr.val_ptr = fcb; |
1207 | osd_req_add_set_attr_list(or, &attr, 1); | 1193 | ios->out_attr_len = 1; |
1194 | ios->out_attr = &attr; | ||
1208 | 1195 | ||
1209 | if (!obj_created(oi)) { | 1196 | if (!obj_created(oi)) { |
1210 | EXOFS_DBGMSG("!obj_created\n"); | 1197 | EXOFS_DBGMSG("!obj_created\n"); |
@@ -1213,22 +1200,19 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1213 | EXOFS_DBGMSG("wait_event done\n"); | 1200 | EXOFS_DBGMSG("wait_event done\n"); |
1214 | } | 1201 | } |
1215 | 1202 | ||
1216 | if (do_sync) { | 1203 | if (!do_sync) { |
1217 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
1218 | osd_end_request(or); | ||
1219 | goto free_args; | ||
1220 | } else { | ||
1221 | args->sbi = sbi; | 1204 | args->sbi = sbi; |
1205 | ios->done = updatei_done; | ||
1206 | ios->private = args; | ||
1207 | } | ||
1222 | 1208 | ||
1223 | ret = exofs_async_op(or, updatei_done, args, oi->i_cred); | 1209 | ret = exofs_oi_write(oi, ios); |
1224 | if (ret) { | 1210 | if (!do_sync && !ret) { |
1225 | osd_end_request(or); | ||
1226 | goto free_args; | ||
1227 | } | ||
1228 | atomic_inc(&sbi->s_curr_pending); | 1211 | atomic_inc(&sbi->s_curr_pending); |
1229 | goto out; /* deallocation in updatei_done */ | 1212 | goto out; /* deallocation in updatei_done */ |
1230 | } | 1213 | } |
1231 | 1214 | ||
1215 | exofs_put_io_state(ios); | ||
1232 | free_args: | 1216 | free_args: |
1233 | kfree(args); | 1217 | kfree(args); |
1234 | out: | 1218 | out: |
@@ -1245,11 +1229,12 @@ int exofs_write_inode(struct inode *inode, int wait) | |||
1245 | * Callback function from exofs_delete_inode() - don't have much cleaning up to | 1229 | * Callback function from exofs_delete_inode() - don't have much cleaning up to |
1246 | * do. | 1230 | * do. |
1247 | */ | 1231 | */ |
1248 | static void delete_done(struct osd_request *or, void *p) | 1232 | static void delete_done(struct exofs_io_state *ios, void *p) |
1249 | { | 1233 | { |
1250 | struct exofs_sb_info *sbi; | 1234 | struct exofs_sb_info *sbi = p; |
1251 | osd_end_request(or); | 1235 | |
1252 | sbi = p; | 1236 | exofs_put_io_state(ios); |
1237 | |||
1253 | atomic_dec(&sbi->s_curr_pending); | 1238 | atomic_dec(&sbi->s_curr_pending); |
1254 | } | 1239 | } |
1255 | 1240 | ||
@@ -1263,8 +1248,7 @@ void exofs_delete_inode(struct inode *inode) | |||
1263 | struct exofs_i_info *oi = exofs_i(inode); | 1248 | struct exofs_i_info *oi = exofs_i(inode); |
1264 | struct super_block *sb = inode->i_sb; | 1249 | struct super_block *sb = inode->i_sb; |
1265 | struct exofs_sb_info *sbi = sb->s_fs_info; | 1250 | struct exofs_sb_info *sbi = sb->s_fs_info; |
1266 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | 1251 | struct exofs_io_state *ios; |
1267 | struct osd_request *or; | ||
1268 | int ret; | 1252 | int ret; |
1269 | 1253 | ||
1270 | truncate_inode_pages(&inode->i_data, 0); | 1254 | truncate_inode_pages(&inode->i_data, 0); |
@@ -1281,25 +1265,26 @@ void exofs_delete_inode(struct inode *inode) | |||
1281 | 1265 | ||
1282 | clear_inode(inode); | 1266 | clear_inode(inode); |
1283 | 1267 | ||
1284 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 1268 | ret = exofs_get_io_state(sbi, &ios); |
1285 | if (unlikely(!or)) { | 1269 | if (unlikely(ret)) { |
1286 | EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); | 1270 | EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); |
1287 | return; | 1271 | return; |
1288 | } | 1272 | } |
1289 | 1273 | ||
1290 | osd_req_remove_object(or, &obj); | ||
1291 | |||
1292 | /* if we are deleting an obj that hasn't been created yet, wait */ | 1274 | /* if we are deleting an obj that hasn't been created yet, wait */ |
1293 | if (!obj_created(oi)) { | 1275 | if (!obj_created(oi)) { |
1294 | BUG_ON(!obj_2bcreated(oi)); | 1276 | BUG_ON(!obj_2bcreated(oi)); |
1295 | wait_event(oi->i_wq, obj_created(oi)); | 1277 | wait_event(oi->i_wq, obj_created(oi)); |
1296 | } | 1278 | } |
1297 | 1279 | ||
1298 | ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); | 1280 | ios->obj.id = exofs_oi_objno(oi); |
1281 | ios->done = delete_done; | ||
1282 | ios->private = sbi; | ||
1283 | ios->cred = oi->i_cred; | ||
1284 | ret = exofs_sbi_remove(ios); | ||
1299 | if (ret) { | 1285 | if (ret) { |
1300 | EXOFS_ERR( | 1286 | EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); |
1301 | "ERROR: @exofs_delete_inode exofs_async_op failed\n"); | 1287 | exofs_put_io_state(ios); |
1302 | osd_end_request(or); | ||
1303 | return; | 1288 | return; |
1304 | } | 1289 | } |
1305 | atomic_inc(&sbi->s_curr_pending); | 1290 | atomic_inc(&sbi->s_curr_pending); |
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 4372542df284..bb2f9d341fdf 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c | |||
@@ -23,88 +23,327 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <scsi/scsi_device.h> | 25 | #include <scsi/scsi_device.h> |
26 | #include <scsi/osd_sense.h> | ||
27 | 26 | ||
28 | #include "exofs.h" | 27 | #include "exofs.h" |
29 | 28 | ||
30 | int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) | 29 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) |
31 | { | 30 | { |
32 | struct osd_sense_info osi; | 31 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); |
33 | int ret = osd_req_decode_sense(or, &osi); | 32 | } |
34 | 33 | ||
35 | if (ret) { /* translate to Linux codes */ | 34 | int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, |
36 | if (osi.additional_code == scsi_invalid_field_in_cdb) { | 35 | u64 offset, void *p, unsigned length) |
37 | if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE) | 36 | { |
38 | ret = -EFAULT; | 37 | struct osd_request *or = osd_start_request(od, GFP_KERNEL); |
39 | if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID) | 38 | /* struct osd_sense_info osi = {.key = 0};*/ |
40 | ret = -ENOENT; | 39 | int ret; |
41 | else | 40 | |
42 | ret = -EINVAL; | 41 | if (unlikely(!or)) { |
43 | } else if (osi.additional_code == osd_quota_error) | 42 | EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); |
44 | ret = -ENOSPC; | 43 | return -ENOMEM; |
45 | else | 44 | } |
46 | ret = -EIO; | 45 | ret = osd_req_read_kern(or, obj, offset, p, length); |
46 | if (unlikely(ret)) { | ||
47 | EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); | ||
48 | goto out; | ||
47 | } | 49 | } |
48 | 50 | ||
49 | /* FIXME: should be include in osd_sense_info */ | 51 | ret = osd_finalize_request(or, 0, cred, NULL); |
50 | if (in_resid) | 52 | if (unlikely(ret)) { |
51 | *in_resid = or->in.req ? or->in.req->resid_len : 0; | 53 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); |
54 | goto out; | ||
55 | } | ||
52 | 56 | ||
53 | if (out_resid) | 57 | ret = osd_execute_request(or); |
54 | *out_resid = or->out.req ? or->out.req->resid_len : 0; | 58 | if (unlikely(ret)) |
59 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | ||
60 | /* osd_req_decode_sense(or, ret); */ | ||
55 | 61 | ||
62 | out: | ||
63 | osd_end_request(or); | ||
56 | return ret; | 64 | return ret; |
57 | } | 65 | } |
58 | 66 | ||
59 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) | 67 | int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios) |
60 | { | 68 | { |
61 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | 69 | struct exofs_io_state *ios; |
70 | |||
71 | /*TODO: Maybe use kmem_cach per sbi of size | ||
72 | * exofs_io_state_size(sbi->s_numdevs) | ||
73 | */ | ||
74 | ios = kzalloc(exofs_io_state_size(1), GFP_KERNEL); | ||
75 | if (unlikely(!ios)) { | ||
76 | *pios = NULL; | ||
77 | return -ENOMEM; | ||
78 | } | ||
79 | |||
80 | ios->sbi = sbi; | ||
81 | ios->obj.partition = sbi->s_pid; | ||
82 | *pios = ios; | ||
83 | return 0; | ||
62 | } | 84 | } |
63 | 85 | ||
64 | /* | 86 | void exofs_put_io_state(struct exofs_io_state *ios) |
65 | * Perform a synchronous OSD operation. | ||
66 | */ | ||
67 | int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential) | ||
68 | { | 87 | { |
69 | int ret; | 88 | if (ios) { |
89 | unsigned i; | ||
70 | 90 | ||
71 | or->timeout = timeout; | 91 | for (i = 0; i < ios->numdevs; i++) { |
72 | ret = osd_finalize_request(or, 0, credential, NULL); | 92 | struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; |
73 | if (ret) { | 93 | |
74 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | 94 | if (per_dev->or) |
75 | return ret; | 95 | osd_end_request(per_dev->or); |
96 | if (per_dev->bio) | ||
97 | bio_put(per_dev->bio); | ||
98 | } | ||
99 | |||
100 | kfree(ios); | ||
76 | } | 101 | } |
102 | } | ||
77 | 103 | ||
78 | ret = osd_execute_request(or); | 104 | static void _sync_done(struct exofs_io_state *ios, void *p) |
105 | { | ||
106 | struct completion *waiting = p; | ||
79 | 107 | ||
80 | if (ret) | 108 | complete(waiting); |
81 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | 109 | } |
82 | /* osd_req_decode_sense(or, ret); */ | 110 | |
111 | static void _last_io(struct kref *kref) | ||
112 | { | ||
113 | struct exofs_io_state *ios = container_of( | ||
114 | kref, struct exofs_io_state, kref); | ||
115 | |||
116 | ios->done(ios, ios->private); | ||
117 | } | ||
118 | |||
119 | static void _done_io(struct osd_request *or, void *p) | ||
120 | { | ||
121 | struct exofs_io_state *ios = p; | ||
122 | |||
123 | kref_put(&ios->kref, _last_io); | ||
124 | } | ||
125 | |||
126 | static int exofs_io_execute(struct exofs_io_state *ios) | ||
127 | { | ||
128 | DECLARE_COMPLETION_ONSTACK(wait); | ||
129 | bool sync = (ios->done == NULL); | ||
130 | int i, ret; | ||
131 | |||
132 | if (sync) { | ||
133 | ios->done = _sync_done; | ||
134 | ios->private = &wait; | ||
135 | } | ||
136 | |||
137 | for (i = 0; i < ios->numdevs; i++) { | ||
138 | struct osd_request *or = ios->per_dev[i].or; | ||
139 | if (unlikely(!or)) | ||
140 | continue; | ||
141 | |||
142 | ret = osd_finalize_request(or, 0, ios->cred, NULL); | ||
143 | if (unlikely(ret)) { | ||
144 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", | ||
145 | ret); | ||
146 | return ret; | ||
147 | } | ||
148 | } | ||
149 | |||
150 | kref_init(&ios->kref); | ||
151 | |||
152 | for (i = 0; i < ios->numdevs; i++) { | ||
153 | struct osd_request *or = ios->per_dev[i].or; | ||
154 | if (unlikely(!or)) | ||
155 | continue; | ||
156 | |||
157 | kref_get(&ios->kref); | ||
158 | osd_execute_request_async(or, _done_io, ios); | ||
159 | } | ||
160 | |||
161 | kref_put(&ios->kref, _last_io); | ||
162 | ret = 0; | ||
163 | |||
164 | if (sync) { | ||
165 | wait_for_completion(&wait); | ||
166 | ret = exofs_check_io(ios, NULL); | ||
167 | } | ||
83 | return ret; | 168 | return ret; |
84 | } | 169 | } |
85 | 170 | ||
86 | /* | 171 | int exofs_check_io(struct exofs_io_state *ios, u64 *resid) |
87 | * Perform an asynchronous OSD operation. | ||
88 | */ | ||
89 | int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done, | ||
90 | void *caller_context, u8 *cred) | ||
91 | { | 172 | { |
92 | int ret; | 173 | enum osd_err_priority acumulated_osd_err = 0; |
174 | int acumulated_lin_err = 0; | ||
175 | int i; | ||
93 | 176 | ||
94 | ret = osd_finalize_request(or, 0, cred, NULL); | 177 | for (i = 0; i < ios->numdevs; i++) { |
95 | if (ret) { | 178 | struct osd_sense_info osi; |
96 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | 179 | int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi); |
97 | return ret; | 180 | |
181 | if (likely(!ret)) | ||
182 | continue; | ||
183 | |||
184 | if (unlikely(ret == -EFAULT)) { | ||
185 | EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__); | ||
186 | /*FIXME: All the pages in this device range should: | ||
187 | * clear_highpage(page); | ||
188 | */ | ||
189 | } | ||
190 | |||
191 | if (osi.osd_err_pri >= acumulated_osd_err) { | ||
192 | acumulated_osd_err = osi.osd_err_pri; | ||
193 | acumulated_lin_err = ret; | ||
194 | } | ||
195 | } | ||
196 | |||
197 | /* TODO: raid specific residual calculations */ | ||
198 | if (resid) { | ||
199 | if (likely(!acumulated_lin_err)) | ||
200 | *resid = 0; | ||
201 | else | ||
202 | *resid = ios->length; | ||
203 | } | ||
204 | |||
205 | return acumulated_lin_err; | ||
206 | } | ||
207 | |||
208 | int exofs_sbi_create(struct exofs_io_state *ios) | ||
209 | { | ||
210 | int i, ret; | ||
211 | |||
212 | for (i = 0; i < 1; i++) { | ||
213 | struct osd_request *or; | ||
214 | |||
215 | or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); | ||
216 | if (unlikely(!or)) { | ||
217 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
218 | ret = -ENOMEM; | ||
219 | goto out; | ||
220 | } | ||
221 | ios->per_dev[i].or = or; | ||
222 | ios->numdevs++; | ||
223 | |||
224 | osd_req_create_object(or, &ios->obj); | ||
225 | } | ||
226 | ret = exofs_io_execute(ios); | ||
227 | |||
228 | out: | ||
229 | return ret; | ||
230 | } | ||
231 | |||
232 | int exofs_sbi_remove(struct exofs_io_state *ios) | ||
233 | { | ||
234 | int i, ret; | ||
235 | |||
236 | for (i = 0; i < 1; i++) { | ||
237 | struct osd_request *or; | ||
238 | |||
239 | or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); | ||
240 | if (unlikely(!or)) { | ||
241 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
242 | ret = -ENOMEM; | ||
243 | goto out; | ||
244 | } | ||
245 | ios->per_dev[i].or = or; | ||
246 | ios->numdevs++; | ||
247 | |||
248 | osd_req_remove_object(or, &ios->obj); | ||
249 | } | ||
250 | ret = exofs_io_execute(ios); | ||
251 | |||
252 | out: | ||
253 | return ret; | ||
254 | } | ||
255 | |||
256 | int exofs_sbi_write(struct exofs_io_state *ios) | ||
257 | { | ||
258 | int i, ret; | ||
259 | |||
260 | for (i = 0; i < 1; i++) { | ||
261 | struct osd_request *or; | ||
262 | |||
263 | or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); | ||
264 | if (unlikely(!or)) { | ||
265 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
266 | ret = -ENOMEM; | ||
267 | goto out; | ||
268 | } | ||
269 | ios->per_dev[i].or = or; | ||
270 | ios->numdevs++; | ||
271 | |||
272 | if (ios->bio) { | ||
273 | struct bio *bio; | ||
274 | |||
275 | bio = ios->bio; | ||
276 | |||
277 | osd_req_write(or, &ios->obj, ios->offset, bio, | ||
278 | ios->length); | ||
279 | /* EXOFS_DBGMSG("write sync=%d\n", sync);*/ | ||
280 | } else if (ios->kern_buff) { | ||
281 | osd_req_write_kern(or, &ios->obj, ios->offset, | ||
282 | ios->kern_buff, ios->length); | ||
283 | /* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/ | ||
284 | } else { | ||
285 | osd_req_set_attributes(or, &ios->obj); | ||
286 | /* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/ | ||
287 | } | ||
288 | |||
289 | if (ios->out_attr) | ||
290 | osd_req_add_set_attr_list(or, ios->out_attr, | ||
291 | ios->out_attr_len); | ||
292 | |||
293 | if (ios->in_attr) | ||
294 | osd_req_add_get_attr_list(or, ios->in_attr, | ||
295 | ios->in_attr_len); | ||
98 | } | 296 | } |
297 | ret = exofs_io_execute(ios); | ||
298 | |||
299 | out: | ||
300 | return ret; | ||
301 | } | ||
302 | |||
303 | int exofs_sbi_read(struct exofs_io_state *ios) | ||
304 | { | ||
305 | int i, ret; | ||
306 | |||
307 | for (i = 0; i < 1; i++) { | ||
308 | struct osd_request *or; | ||
309 | |||
310 | or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); | ||
311 | if (unlikely(!or)) { | ||
312 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
313 | ret = -ENOMEM; | ||
314 | goto out; | ||
315 | } | ||
316 | ios->per_dev[i].or = or; | ||
317 | ios->numdevs++; | ||
318 | |||
319 | if (ios->bio) { | ||
320 | osd_req_read(or, &ios->obj, ios->offset, ios->bio, | ||
321 | ios->length); | ||
322 | /* EXOFS_DBGMSG("read sync=%d\n", sync);*/ | ||
323 | } else if (ios->kern_buff) { | ||
324 | osd_req_read_kern(or, &ios->obj, ios->offset, | ||
325 | ios->kern_buff, ios->length); | ||
326 | /* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/ | ||
327 | } else { | ||
328 | osd_req_get_attributes(or, &ios->obj); | ||
329 | /* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/ | ||
330 | } | ||
331 | |||
332 | if (ios->out_attr) | ||
333 | osd_req_add_set_attr_list(or, ios->out_attr, | ||
334 | ios->out_attr_len); | ||
99 | 335 | ||
100 | ret = osd_execute_request_async(or, async_done, caller_context); | 336 | if (ios->in_attr) |
337 | osd_req_add_get_attr_list(or, ios->in_attr, | ||
338 | ios->in_attr_len); | ||
339 | } | ||
340 | ret = exofs_io_execute(ios); | ||
101 | 341 | ||
102 | if (ret) | 342 | out: |
103 | EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret); | ||
104 | return ret; | 343 | return ret; |
105 | } | 344 | } |
106 | 345 | ||
107 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | 346 | int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) |
108 | { | 347 | { |
109 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | 348 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ |
110 | void *iter = NULL; | 349 | void *iter = NULL; |
@@ -112,7 +351,8 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | |||
112 | 351 | ||
113 | do { | 352 | do { |
114 | nelem = 1; | 353 | nelem = 1; |
115 | osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); | 354 | osd_req_decode_get_attr_list(ios->per_dev[0].or, |
355 | &cur_attr, &nelem, &iter); | ||
116 | if ((cur_attr.attr_page == attr->attr_page) && | 356 | if ((cur_attr.attr_page == attr->attr_page) && |
117 | (cur_attr.attr_id == attr->attr_id)) { | 357 | (cur_attr.attr_id == attr->attr_id)) { |
118 | attr->len = cur_attr.len; | 358 | attr->len = cur_attr.len; |
@@ -123,3 +363,43 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | |||
123 | 363 | ||
124 | return -EIO; | 364 | return -EIO; |
125 | } | 365 | } |
366 | |||
367 | int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) | ||
368 | { | ||
369 | struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; | ||
370 | struct exofs_io_state *ios; | ||
371 | struct osd_attr attr; | ||
372 | __be64 newsize; | ||
373 | int i, ret; | ||
374 | |||
375 | if (exofs_get_io_state(sbi, &ios)) | ||
376 | return -ENOMEM; | ||
377 | |||
378 | ios->obj.id = exofs_oi_objno(oi); | ||
379 | ios->cred = oi->i_cred; | ||
380 | |||
381 | newsize = cpu_to_be64(size); | ||
382 | attr = g_attr_logical_length; | ||
383 | attr.val_ptr = &newsize; | ||
384 | |||
385 | for (i = 0; i < 1; i++) { | ||
386 | struct osd_request *or; | ||
387 | |||
388 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
389 | if (unlikely(!or)) { | ||
390 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
391 | ret = -ENOMEM; | ||
392 | goto out; | ||
393 | } | ||
394 | ios->per_dev[i].or = or; | ||
395 | ios->numdevs++; | ||
396 | |||
397 | osd_req_set_attributes(or, &ios->obj); | ||
398 | osd_req_add_set_attr_list(or, &attr, 1); | ||
399 | } | ||
400 | ret = exofs_io_execute(ios); | ||
401 | |||
402 | out: | ||
403 | exofs_put_io_state(ios); | ||
404 | return ret; | ||
405 | } | ||
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 28add3eac0a4..4cd97f526d49 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -203,49 +203,40 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
203 | { | 203 | { |
204 | struct exofs_sb_info *sbi; | 204 | struct exofs_sb_info *sbi; |
205 | struct exofs_fscb *fscb; | 205 | struct exofs_fscb *fscb; |
206 | struct osd_request *or; | 206 | struct exofs_io_state *ios; |
207 | struct osd_obj_id obj; | ||
208 | int ret = -ENOMEM; | 207 | int ret = -ENOMEM; |
209 | 208 | ||
210 | fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL); | ||
211 | if (!fscb) { | ||
212 | EXOFS_ERR("exofs_write_super: memory allocation failed.\n"); | ||
213 | return -ENOMEM; | ||
214 | } | ||
215 | |||
216 | lock_super(sb); | 209 | lock_super(sb); |
217 | sbi = sb->s_fs_info; | 210 | sbi = sb->s_fs_info; |
211 | fscb = &sbi->s_fscb; | ||
212 | |||
213 | ret = exofs_get_io_state(sbi, &ios); | ||
214 | if (ret) | ||
215 | goto out; | ||
216 | |||
217 | ios->length = sizeof(*fscb); | ||
218 | memset(fscb, 0, ios->length); | ||
218 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); | 219 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); |
219 | fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); | 220 | fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); |
220 | fscb->s_magic = cpu_to_le16(sb->s_magic); | 221 | fscb->s_magic = cpu_to_le16(sb->s_magic); |
221 | fscb->s_newfs = 0; | 222 | fscb->s_newfs = 0; |
222 | 223 | ||
223 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 224 | ios->obj.id = EXOFS_SUPER_ID; |
224 | if (unlikely(!or)) { | 225 | ios->offset = 0; |
225 | EXOFS_ERR("exofs_write_super: osd_start_request failed.\n"); | 226 | ios->kern_buff = fscb; |
226 | goto out; | 227 | ios->cred = sbi->s_cred; |
227 | } | ||
228 | |||
229 | obj.partition = sbi->s_pid; | ||
230 | obj.id = EXOFS_SUPER_ID; | ||
231 | ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb)); | ||
232 | if (unlikely(ret)) { | ||
233 | EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n"); | ||
234 | goto out; | ||
235 | } | ||
236 | 228 | ||
237 | ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | 229 | ret = exofs_sbi_write(ios); |
238 | if (unlikely(ret)) { | 230 | if (unlikely(ret)) { |
239 | EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n"); | 231 | EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); |
240 | goto out; | 232 | goto out; |
241 | } | 233 | } |
242 | sb->s_dirt = 0; | 234 | sb->s_dirt = 0; |
243 | 235 | ||
244 | out: | 236 | out: |
245 | if (or) | 237 | EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); |
246 | osd_end_request(or); | 238 | exofs_put_io_state(ios); |
247 | unlock_super(sb); | 239 | unlock_super(sb); |
248 | kfree(fscb); | ||
249 | return ret; | 240 | return ret; |
250 | } | 241 | } |
251 | 242 | ||
@@ -302,24 +293,23 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
302 | struct inode *root; | 293 | struct inode *root; |
303 | struct exofs_mountopt *opts = data; | 294 | struct exofs_mountopt *opts = data; |
304 | struct exofs_sb_info *sbi; /*extended info */ | 295 | struct exofs_sb_info *sbi; /*extended info */ |
296 | struct osd_dev *od; /* Master device */ | ||
305 | struct exofs_fscb fscb; /*on-disk superblock info */ | 297 | struct exofs_fscb fscb; /*on-disk superblock info */ |
306 | struct osd_request *or = NULL; | ||
307 | struct osd_obj_id obj; | 298 | struct osd_obj_id obj; |
308 | int ret; | 299 | int ret; |
309 | 300 | ||
310 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 301 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
311 | if (!sbi) | 302 | if (!sbi) |
312 | return -ENOMEM; | 303 | return -ENOMEM; |
313 | sb->s_fs_info = sbi; | ||
314 | 304 | ||
315 | /* use mount options to fill superblock */ | 305 | /* use mount options to fill superblock */ |
316 | sbi->s_dev = osduld_path_lookup(opts->dev_name); | 306 | od = osduld_path_lookup(opts->dev_name); |
317 | if (IS_ERR(sbi->s_dev)) { | 307 | if (IS_ERR(od)) { |
318 | ret = PTR_ERR(sbi->s_dev); | 308 | ret = PTR_ERR(od); |
319 | sbi->s_dev = NULL; | ||
320 | goto free_sbi; | 309 | goto free_sbi; |
321 | } | 310 | } |
322 | 311 | ||
312 | sbi->s_dev = od; | ||
323 | sbi->s_pid = opts->pid; | 313 | sbi->s_pid = opts->pid; |
324 | sbi->s_timeout = opts->timeout; | 314 | sbi->s_timeout = opts->timeout; |
325 | 315 | ||
@@ -333,35 +323,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
333 | sb->s_bdev = NULL; | 323 | sb->s_bdev = NULL; |
334 | sb->s_dev = 0; | 324 | sb->s_dev = 0; |
335 | 325 | ||
336 | /* read data from on-disk superblock object */ | ||
337 | obj.partition = sbi->s_pid; | 326 | obj.partition = sbi->s_pid; |
338 | obj.id = EXOFS_SUPER_ID; | 327 | obj.id = EXOFS_SUPER_ID; |
339 | exofs_make_credential(sbi->s_cred, &obj); | 328 | exofs_make_credential(sbi->s_cred, &obj); |
340 | 329 | ||
341 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 330 | ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); |
342 | if (unlikely(!or)) { | 331 | if (unlikely(ret)) |
343 | if (!silent) | ||
344 | EXOFS_ERR( | ||
345 | "exofs_fill_super: osd_start_request failed.\n"); | ||
346 | ret = -ENOMEM; | ||
347 | goto free_sbi; | ||
348 | } | ||
349 | ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb)); | ||
350 | if (unlikely(ret)) { | ||
351 | if (!silent) | ||
352 | EXOFS_ERR( | ||
353 | "exofs_fill_super: osd_req_read_kern failed.\n"); | ||
354 | ret = -ENOMEM; | ||
355 | goto free_sbi; | ||
356 | } | ||
357 | |||
358 | ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | ||
359 | if (unlikely(ret)) { | ||
360 | if (!silent) | ||
361 | EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n"); | ||
362 | ret = -EIO; | ||
363 | goto free_sbi; | 332 | goto free_sbi; |
364 | } | ||
365 | 333 | ||
366 | sb->s_magic = le16_to_cpu(fscb.s_magic); | 334 | sb->s_magic = le16_to_cpu(fscb.s_magic); |
367 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); | 335 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); |
@@ -380,6 +348,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
380 | spin_lock_init(&sbi->s_next_gen_lock); | 348 | spin_lock_init(&sbi->s_next_gen_lock); |
381 | 349 | ||
382 | /* set up operation vectors */ | 350 | /* set up operation vectors */ |
351 | sb->s_fs_info = sbi; | ||
383 | sb->s_op = &exofs_sops; | 352 | sb->s_op = &exofs_sops; |
384 | sb->s_export_op = &exofs_export_ops; | 353 | sb->s_export_op = &exofs_export_ops; |
385 | root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); | 354 | root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); |
@@ -406,16 +375,14 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
406 | } | 375 | } |
407 | 376 | ||
408 | _exofs_print_device("Mounting", opts->dev_name, sbi->s_dev, sbi->s_pid); | 377 | _exofs_print_device("Mounting", opts->dev_name, sbi->s_dev, sbi->s_pid); |
409 | ret = 0; | 378 | return 0; |
410 | out: | ||
411 | if (or) | ||
412 | osd_end_request(or); | ||
413 | return ret; | ||
414 | 379 | ||
415 | free_sbi: | 380 | free_sbi: |
381 | EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", | ||
382 | opts->dev_name, sbi->s_pid, ret); | ||
416 | osduld_put_device(sbi->s_dev); /* NULL safe */ | 383 | osduld_put_device(sbi->s_dev); /* NULL safe */ |
417 | kfree(sbi); | 384 | kfree(sbi); |
418 | goto out; | 385 | return ret; |
419 | } | 386 | } |
420 | 387 | ||
421 | /* | 388 | /* |
@@ -444,7 +411,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
444 | { | 411 | { |
445 | struct super_block *sb = dentry->d_sb; | 412 | struct super_block *sb = dentry->d_sb; |
446 | struct exofs_sb_info *sbi = sb->s_fs_info; | 413 | struct exofs_sb_info *sbi = sb->s_fs_info; |
447 | struct osd_obj_id obj = {sbi->s_pid, 0}; | 414 | struct exofs_io_state *ios; |
448 | struct osd_attr attrs[] = { | 415 | struct osd_attr attrs[] = { |
449 | ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, | 416 | ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, |
450 | OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), | 417 | OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), |
@@ -453,26 +420,25 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
453 | }; | 420 | }; |
454 | uint64_t capacity = ULLONG_MAX; | 421 | uint64_t capacity = ULLONG_MAX; |
455 | uint64_t used = ULLONG_MAX; | 422 | uint64_t used = ULLONG_MAX; |
456 | struct osd_request *or; | ||
457 | uint8_t cred_a[OSD_CAP_LEN]; | 423 | uint8_t cred_a[OSD_CAP_LEN]; |
458 | int ret; | 424 | int ret; |
459 | 425 | ||
460 | /* get used/capacity attributes */ | 426 | ret = exofs_get_io_state(sbi, &ios); |
461 | exofs_make_credential(cred_a, &obj); | 427 | if (ret) { |
462 | 428 | EXOFS_DBGMSG("exofs_get_io_state failed.\n"); | |
463 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 429 | return ret; |
464 | if (unlikely(!or)) { | ||
465 | EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n"); | ||
466 | return -ENOMEM; | ||
467 | } | 430 | } |
468 | 431 | ||
469 | osd_req_get_attributes(or, &obj); | 432 | exofs_make_credential(cred_a, &ios->obj); |
470 | osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs)); | 433 | ios->cred = sbi->s_cred; |
471 | ret = exofs_sync_op(or, sbi->s_timeout, cred_a); | 434 | ios->in_attr = attrs; |
435 | ios->in_attr_len = ARRAY_SIZE(attrs); | ||
436 | |||
437 | ret = exofs_sbi_read(ios); | ||
472 | if (unlikely(ret)) | 438 | if (unlikely(ret)) |
473 | goto out; | 439 | goto out; |
474 | 440 | ||
475 | ret = extract_attr_from_req(or, &attrs[0]); | 441 | ret = extract_attr_from_ios(ios, &attrs[0]); |
476 | if (likely(!ret)) { | 442 | if (likely(!ret)) { |
477 | capacity = get_unaligned_be64(attrs[0].val_ptr); | 443 | capacity = get_unaligned_be64(attrs[0].val_ptr); |
478 | if (unlikely(!capacity)) | 444 | if (unlikely(!capacity)) |
@@ -480,7 +446,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
480 | } else | 446 | } else |
481 | EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); | 447 | EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); |
482 | 448 | ||
483 | ret = extract_attr_from_req(or, &attrs[1]); | 449 | ret = extract_attr_from_ios(ios, &attrs[1]); |
484 | if (likely(!ret)) | 450 | if (likely(!ret)) |
485 | used = get_unaligned_be64(attrs[1].val_ptr); | 451 | used = get_unaligned_be64(attrs[1].val_ptr); |
486 | else | 452 | else |
@@ -497,7 +463,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
497 | buf->f_namelen = EXOFS_NAME_LEN; | 463 | buf->f_namelen = EXOFS_NAME_LEN; |
498 | 464 | ||
499 | out: | 465 | out: |
500 | osd_end_request(or); | 466 | exofs_put_io_state(ios); |
501 | return ret; | 467 | return ret; |
502 | } | 468 | } |
503 | 469 | ||