aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-10 12:32:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-10 12:32:24 -0500
commita5eba3f66f812cbc076a1170b3f888ad63f850b2 (patch)
tree32bcbbc77e2bc1c04c5ed577ff8f24612148631c /fs
parentfc1495bf99de6f65066b3234813180301ff8b693 (diff)
parent04dc1e88ad9c9f9639019e9646a89ce0ebf706bb (diff)
Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd
* 'for-linus' of git://git.open-osd.org/linux-open-osd: exofs: Multi-device mirror support exofs: Move all operations to an io_engine exofs: move osd.c to ios.c exofs: statfs blocks is sectors not FS blocks exofs: Prints on mount and unmout exofs: refactor exofs_i_info initialization into common helper exofs: dbg-print less exofs: More sane debug print trivial: some small fixes in exofs documentation
Diffstat (limited to 'fs')
-rw-r--r--fs/exofs/Kbuild2
-rw-r--r--fs/exofs/common.h81
-rw-r--r--fs/exofs/exofs.h97
-rw-r--r--fs/exofs/inode.c409
-rw-r--r--fs/exofs/ios.c421
-rw-r--r--fs/exofs/osd.c125
-rw-r--r--fs/exofs/pnfs.h51
-rw-r--r--fs/exofs/super.c353
8 files changed, 1093 insertions, 446 deletions
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index cc2d22db119c..2d0f757fda3e 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -12,5 +12,5 @@
12# Kbuild - Gets included from the Kernels Makefile and build system 12# Kbuild - Gets included from the Kernels Makefile and build system
13# 13#
14 14
15exofs-y := osd.o inode.o file.o symlink.o namei.o dir.o super.o 15exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
16obj-$(CONFIG_EXOFS_FS) += exofs.o 16obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index c6718e4817fe..b1b178e61718 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -49,6 +49,7 @@
49#define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ 49#define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */
50#define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ 50#define EXOFS_OBJ_OFF 0x10000 /* offset for objects */
51#define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ 51#define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */
52#define EXOFS_DEVTABLE_ID 0x10001 /* object ID for on-disk device table */
52#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ 53#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */
53 54
54/* exofs Application specific page/attribute */ 55/* exofs Application specific page/attribute */
@@ -78,17 +79,67 @@ enum {
78#define EXOFS_SUPER_MAGIC 0x5DF5 79#define EXOFS_SUPER_MAGIC 0x5DF5
79 80
80/* 81/*
81 * The file system control block - stored in an object's data (mainly, the one 82 * The file system control block - stored in object EXOFS_SUPER_ID's data.
82 * with ID EXOFS_SUPER_ID). This is where the in-memory superblock is stored 83 * This is where the in-memory superblock is stored on disk.
83 * on disk. Right now it just has a magic value, which is basically a sanity
84 * check on our ability to communicate with the object store.
85 */ 84 */
85enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
86struct exofs_fscb { 86struct exofs_fscb {
87 __le64 s_nextid; /* Highest object ID used */ 87 __le64 s_nextid; /* Highest object ID used */
88 __le32 s_numfiles; /* Number of files on fs */ 88 __le64 s_numfiles; /* Number of files on fs */
89 __le32 s_version; /* == EXOFS_FSCB_VER */
89 __le16 s_magic; /* Magic signature */ 90 __le16 s_magic; /* Magic signature */
90 __le16 s_newfs; /* Non-zero if this is a new fs */ 91 __le16 s_newfs; /* Non-zero if this is a new fs */
91}; 92
93 /* From here on it's a static part, only written by mkexofs */
94 __le64 s_dev_table_oid; /* Resurved, not used */
95 __le64 s_dev_table_count; /* == 0 means no dev_table */
96} __packed;
97
98/*
99 * Describes the raid used in the FS. It is part of the device table.
100 * This here is taken from the pNFS-objects definition. In exofs we
101 * use one raid policy through-out the filesystem. (NOTE: the funny
102 * alignment at begining. We take care of it at exofs_device_table.
103 */
104struct exofs_dt_data_map {
105 __le32 cb_num_comps;
106 __le64 cb_stripe_unit;
107 __le32 cb_group_width;
108 __le32 cb_group_depth;
109 __le32 cb_mirror_cnt;
110 __le32 cb_raid_algorithm;
111} __packed;
112
113/*
114 * This is an osd device information descriptor. It is a single entry in
115 * the exofs device table. It describes an osd target lun which
116 * contains data belonging to this FS. (Same partition_id on all devices)
117 */
118struct exofs_dt_device_info {
119 __le32 systemid_len;
120 u8 systemid[OSD_SYSTEMID_LEN];
121 __le64 long_name_offset; /* If !0 then offset-in-file */
122 __le32 osdname_len; /* */
123 u8 osdname[44]; /* Embbeded, Ususally an asci uuid */
124} __packed;
125
126/*
127 * The EXOFS device table - stored in object EXOFS_DEVTABLE_ID's data.
128 * It contains the raid used for this multy-device FS and an array of
129 * participating devices.
130 */
131struct exofs_device_table {
132 __le32 dt_version; /* == EXOFS_DT_VER */
133 struct exofs_dt_data_map dt_data_map; /* Raid policy to use */
134
135 /* Resurved space For future use. Total includeing this:
136 * (8 * sizeof(le64))
137 */
138 __le64 __Resurved[4];
139
140 __le64 dt_num_devices; /* Array size */
141 struct exofs_dt_device_info dt_dev_table[]; /* Array of devices */
142} __packed;
92 143
93/**************************************************************************** 144/****************************************************************************
94 * inode-related things 145 * inode-related things
@@ -155,22 +206,4 @@ enum {
155 (((name_len) + offsetof(struct exofs_dir_entry, name) + \ 206 (((name_len) + offsetof(struct exofs_dir_entry, name) + \
156 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) 207 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)
157 208
158/*************************
159 * function declarations *
160 *************************/
161/* osd.c */
162void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
163 const struct osd_obj_id *obj);
164
165int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid);
166static inline int exofs_check_ok(struct osd_request *or)
167{
168 return exofs_check_ok_resid(or, NULL, NULL);
169}
170int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred);
171int exofs_async_op(struct osd_request *or,
172 osd_req_done_fn *async_done, void *caller_context, u8 *cred);
173
174int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
175
176#endif /*ifndef __EXOFS_COM_H__*/ 209#endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 5ec72e020b22..c35fd4623986 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -30,13 +30,17 @@
30 * along with exofs; if not, write to the Free Software 30 * along with exofs; if not, write to the Free Software
31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
32 */ 32 */
33#ifndef __EXOFS_H__
34#define __EXOFS_H__
33 35
34#include <linux/fs.h> 36#include <linux/fs.h>
35#include <linux/time.h> 37#include <linux/time.h>
36#include "common.h" 38#include "common.h"
37 39
38#ifndef __EXOFS_H__ 40/* FIXME: Remove once pnfs hits mainline
39#define __EXOFS_H__ 41 * #include <linux/exportfs/pnfs_osd_xdr.h>
42 */
43#include "pnfs.h"
40 44
41#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) 45#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
42 46
@@ -55,7 +59,7 @@
55 * our extension to the in-memory superblock 59 * our extension to the in-memory superblock
56 */ 60 */
57struct exofs_sb_info { 61struct exofs_sb_info {
58 struct osd_dev *s_dev; /* returned by get_osd_dev */ 62 struct exofs_fscb s_fscb; /* Written often, pre-allocate*/
59 osd_id s_pid; /* partition ID of file system*/ 63 osd_id s_pid; /* partition ID of file system*/
60 int s_timeout; /* timeout for OSD operations */ 64 int s_timeout; /* timeout for OSD operations */
61 uint64_t s_nextid; /* highest object ID used */ 65 uint64_t s_nextid; /* highest object ID used */
@@ -63,7 +67,11 @@ struct exofs_sb_info {
63 spinlock_t s_next_gen_lock; /* spinlock for gen # update */ 67 spinlock_t s_next_gen_lock; /* spinlock for gen # update */
64 u32 s_next_generation; /* next gen # to use */ 68 u32 s_next_generation; /* next gen # to use */
65 atomic_t s_curr_pending; /* number of pending commands */ 69 atomic_t s_curr_pending; /* number of pending commands */
66 uint8_t s_cred[OSD_CAP_LEN]; /* all-powerful credential */ 70 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
71
72 struct pnfs_osd_data_map data_map; /* Default raid to use */
73 unsigned s_numdevs; /* Num of devices in array */
74 struct osd_dev *s_ods[1]; /* Variable length, minimum 1 */
67}; 75};
68 76
69/* 77/*
@@ -79,6 +87,50 @@ struct exofs_i_info {
79 struct inode vfs_inode; /* normal in-memory inode */ 87 struct inode vfs_inode; /* normal in-memory inode */
80}; 88};
81 89
90static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
91{
92 return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
93}
94
95struct exofs_io_state;
96typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
97
98struct exofs_io_state {
99 struct kref kref;
100
101 void *private;
102 exofs_io_done_fn done;
103
104 struct exofs_sb_info *sbi;
105 struct osd_obj_id obj;
106 u8 *cred;
107
108 /* Global read/write IO*/
109 loff_t offset;
110 unsigned long length;
111 void *kern_buff;
112 struct bio *bio;
113
114 /* Attributes */
115 unsigned in_attr_len;
116 struct osd_attr *in_attr;
117 unsigned out_attr_len;
118 struct osd_attr *out_attr;
119
120 /* Variable array of size numdevs */
121 unsigned numdevs;
122 struct exofs_per_dev_state {
123 struct osd_request *or;
124 struct bio *bio;
125 } per_dev[];
126};
127
128static inline unsigned exofs_io_state_size(unsigned numdevs)
129{
130 return sizeof(struct exofs_io_state) +
131 sizeof(struct exofs_per_dev_state) * numdevs;
132}
133
82/* 134/*
83 * our inode flags 135 * our inode flags
84 */ 136 */
@@ -130,6 +182,42 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
130/************************* 182/*************************
131 * function declarations * 183 * function declarations *
132 *************************/ 184 *************************/
185
186/* ios.c */
187void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
188 const struct osd_obj_id *obj);
189int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
190 u64 offset, void *p, unsigned length);
191
192int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios);
193void exofs_put_io_state(struct exofs_io_state *ios);
194
195int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
196
197int exofs_sbi_create(struct exofs_io_state *ios);
198int exofs_sbi_remove(struct exofs_io_state *ios);
199int exofs_sbi_write(struct exofs_io_state *ios);
200int exofs_sbi_read(struct exofs_io_state *ios);
201
202int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
203
204int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
205static inline int exofs_oi_write(struct exofs_i_info *oi,
206 struct exofs_io_state *ios)
207{
208 ios->obj.id = exofs_oi_objno(oi);
209 ios->cred = oi->i_cred;
210 return exofs_sbi_write(ios);
211}
212
213static inline int exofs_oi_read(struct exofs_i_info *oi,
214 struct exofs_io_state *ios)
215{
216 ios->obj.id = exofs_oi_objno(oi);
217 ios->cred = oi->i_cred;
218 return exofs_sbi_read(ios);
219}
220
133/* inode.c */ 221/* inode.c */
134void exofs_truncate(struct inode *inode); 222void exofs_truncate(struct inode *inode);
135int exofs_setattr(struct dentry *, struct iattr *); 223int exofs_setattr(struct dentry *, struct iattr *);
@@ -169,6 +257,7 @@ extern const struct file_operations exofs_file_operations;
169 257
170/* inode.c */ 258/* inode.c */
171extern const struct address_space_operations exofs_aops; 259extern const struct address_space_operations exofs_aops;
260extern const struct osd_attr g_attr_logical_length;
172 261
173/* namei.c */ 262/* namei.c */
174extern const struct inode_operations exofs_dir_inode_operations; 263extern const struct inode_operations exofs_dir_inode_operations;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 6c10f7476699..698a8636d39c 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -37,15 +37,18 @@
37 37
38#include "exofs.h" 38#include "exofs.h"
39 39
40#ifdef CONFIG_EXOFS_DEBUG 40#define EXOFS_DBGMSG2(M...) do {} while (0)
41# define EXOFS_DEBUG_OBJ_ISIZE 1 41
42#endif 42enum { BIO_MAX_PAGES_KMALLOC =
43 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
44};
43 45
44struct page_collect { 46struct page_collect {
45 struct exofs_sb_info *sbi; 47 struct exofs_sb_info *sbi;
46 struct request_queue *req_q; 48 struct request_queue *req_q;
47 struct inode *inode; 49 struct inode *inode;
48 unsigned expected_pages; 50 unsigned expected_pages;
51 struct exofs_io_state *ios;
49 52
50 struct bio *bio; 53 struct bio *bio;
51 unsigned nr_pages; 54 unsigned nr_pages;
@@ -54,22 +57,23 @@ struct page_collect {
54}; 57};
55 58
56static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 59static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
57 struct inode *inode) 60 struct inode *inode)
58{ 61{
59 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 62 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
60 63
61 pcol->sbi = sbi; 64 pcol->sbi = sbi;
62 pcol->req_q = osd_request_queue(sbi->s_dev); 65 /* Create master bios on first Q, later on cloning, each clone will be
66 * allocated on it's destination Q
67 */
68 pcol->req_q = osd_request_queue(sbi->s_ods[0]);
63 pcol->inode = inode; 69 pcol->inode = inode;
64 pcol->expected_pages = expected_pages; 70 pcol->expected_pages = expected_pages;
65 71
72 pcol->ios = NULL;
66 pcol->bio = NULL; 73 pcol->bio = NULL;
67 pcol->nr_pages = 0; 74 pcol->nr_pages = 0;
68 pcol->length = 0; 75 pcol->length = 0;
69 pcol->pg_first = -1; 76 pcol->pg_first = -1;
70
71 EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino,
72 expected_pages);
73} 77}
74 78
75static void _pcol_reset(struct page_collect *pcol) 79static void _pcol_reset(struct page_collect *pcol)
@@ -80,35 +84,49 @@ static void _pcol_reset(struct page_collect *pcol)
80 pcol->nr_pages = 0; 84 pcol->nr_pages = 0;
81 pcol->length = 0; 85 pcol->length = 0;
82 pcol->pg_first = -1; 86 pcol->pg_first = -1;
83 EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", 87 pcol->ios = NULL;
84 pcol->inode->i_ino, pcol->expected_pages);
85 88
86 /* this is probably the end of the loop but in writes 89 /* this is probably the end of the loop but in writes
87 * it might not end here. don't be left with nothing 90 * it might not end here. don't be left with nothing
88 */ 91 */
89 if (!pcol->expected_pages) 92 if (!pcol->expected_pages)
90 pcol->expected_pages = 128; 93 pcol->expected_pages = BIO_MAX_PAGES_KMALLOC;
91} 94}
92 95
93static int pcol_try_alloc(struct page_collect *pcol) 96static int pcol_try_alloc(struct page_collect *pcol)
94{ 97{
95 int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); 98 int pages = min_t(unsigned, pcol->expected_pages,
99 BIO_MAX_PAGES_KMALLOC);
100
101 if (!pcol->ios) { /* First time allocate io_state */
102 int ret = exofs_get_io_state(pcol->sbi, &pcol->ios);
103
104 if (ret)
105 return ret;
106 }
96 107
97 for (; pages; pages >>= 1) { 108 for (; pages; pages >>= 1) {
98 pcol->bio = bio_alloc(GFP_KERNEL, pages); 109 pcol->bio = bio_kmalloc(GFP_KERNEL, pages);
99 if (likely(pcol->bio)) 110 if (likely(pcol->bio))
100 return 0; 111 return 0;
101 } 112 }
102 113
103 EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", 114 EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n",
104 pcol->expected_pages); 115 pcol->expected_pages);
105 return -ENOMEM; 116 return -ENOMEM;
106} 117}
107 118
108static void pcol_free(struct page_collect *pcol) 119static void pcol_free(struct page_collect *pcol)
109{ 120{
110 bio_put(pcol->bio); 121 if (pcol->bio) {
111 pcol->bio = NULL; 122 bio_put(pcol->bio);
123 pcol->bio = NULL;
124 }
125
126 if (pcol->ios) {
127 exofs_put_io_state(pcol->ios);
128 pcol->ios = NULL;
129 }
112} 130}
113 131
114static int pcol_add_page(struct page_collect *pcol, struct page *page, 132static int pcol_add_page(struct page_collect *pcol, struct page *page,
@@ -161,22 +179,17 @@ static void update_write_page(struct page *page, int ret)
161/* Called at the end of reads, to optionally unlock pages and update their 179/* Called at the end of reads, to optionally unlock pages and update their
162 * status. 180 * status.
163 */ 181 */
164static int __readpages_done(struct osd_request *or, struct page_collect *pcol, 182static int __readpages_done(struct page_collect *pcol, bool do_unlock)
165 bool do_unlock)
166{ 183{
167 struct bio_vec *bvec; 184 struct bio_vec *bvec;
168 int i; 185 int i;
169 u64 resid; 186 u64 resid;
170 u64 good_bytes; 187 u64 good_bytes;
171 u64 length = 0; 188 u64 length = 0;
172 int ret = exofs_check_ok_resid(or, &resid, NULL); 189 int ret = exofs_check_io(pcol->ios, &resid);
173
174 osd_end_request(or);
175 190
176 if (likely(!ret)) 191 if (likely(!ret))
177 good_bytes = pcol->length; 192 good_bytes = pcol->length;
178 else if (!resid)
179 good_bytes = 0;
180 else 193 else
181 good_bytes = pcol->length - resid; 194 good_bytes = pcol->length - resid;
182 195
@@ -198,7 +211,7 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
198 else 211 else
199 page_stat = ret; 212 page_stat = ret;
200 213
201 EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", 214 EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n",
202 inode->i_ino, page->index, 215 inode->i_ino, page->index,
203 page_stat ? "bad_bytes" : "good_bytes"); 216 page_stat ? "bad_bytes" : "good_bytes");
204 217
@@ -214,13 +227,13 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
214} 227}
215 228
216/* callback of async reads */ 229/* callback of async reads */
217static void readpages_done(struct osd_request *or, void *p) 230static void readpages_done(struct exofs_io_state *ios, void *p)
218{ 231{
219 struct page_collect *pcol = p; 232 struct page_collect *pcol = p;
220 233
221 __readpages_done(or, pcol, true); 234 __readpages_done(pcol, true);
222 atomic_dec(&pcol->sbi->s_curr_pending); 235 atomic_dec(&pcol->sbi->s_curr_pending);
223 kfree(p); 236 kfree(pcol);
224} 237}
225 238
226static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) 239static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
@@ -238,17 +251,13 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
238 251
239 unlock_page(page); 252 unlock_page(page);
240 } 253 }
241 pcol_free(pcol);
242} 254}
243 255
244static int read_exec(struct page_collect *pcol, bool is_sync) 256static int read_exec(struct page_collect *pcol, bool is_sync)
245{ 257{
246 struct exofs_i_info *oi = exofs_i(pcol->inode); 258 struct exofs_i_info *oi = exofs_i(pcol->inode);
247 struct osd_obj_id obj = {pcol->sbi->s_pid, 259 struct exofs_io_state *ios = pcol->ios;
248 pcol->inode->i_ino + EXOFS_OBJ_OFF};
249 struct osd_request *or = NULL;
250 struct page_collect *pcol_copy = NULL; 260 struct page_collect *pcol_copy = NULL;
251 loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
252 int ret; 261 int ret;
253 262
254 if (!pcol->bio) 263 if (!pcol->bio)
@@ -257,17 +266,13 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
257 /* see comment in _readpage() about sync reads */ 266 /* see comment in _readpage() about sync reads */
258 WARN_ON(is_sync && (pcol->nr_pages != 1)); 267 WARN_ON(is_sync && (pcol->nr_pages != 1));
259 268
260 or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); 269 ios->bio = pcol->bio;
261 if (unlikely(!or)) { 270 ios->length = pcol->length;
262 ret = -ENOMEM; 271 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
263 goto err;
264 }
265
266 osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
267 272
268 if (is_sync) { 273 if (is_sync) {
269 exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); 274 exofs_oi_read(oi, pcol->ios);
270 return __readpages_done(or, pcol, false); 275 return __readpages_done(pcol, false);
271 } 276 }
272 277
273 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 278 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -277,14 +282,16 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
277 } 282 }
278 283
279 *pcol_copy = *pcol; 284 *pcol_copy = *pcol;
280 ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); 285 ios->done = readpages_done;
286 ios->private = pcol_copy;
287 ret = exofs_oi_read(oi, ios);
281 if (unlikely(ret)) 288 if (unlikely(ret))
282 goto err; 289 goto err;
283 290
284 atomic_inc(&pcol->sbi->s_curr_pending); 291 atomic_inc(&pcol->sbi->s_curr_pending);
285 292
286 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", 293 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
287 obj.id, _LLU(i_start), pcol->length); 294 ios->obj.id, _LLU(ios->offset), pcol->length);
288 295
289 /* pages ownership was passed to pcol_copy */ 296 /* pages ownership was passed to pcol_copy */
290 _pcol_reset(pcol); 297 _pcol_reset(pcol);
@@ -293,12 +300,10 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
293err: 300err:
294 if (!is_sync) 301 if (!is_sync)
295 _unlock_pcol_pages(pcol, ret, READ); 302 _unlock_pcol_pages(pcol, ret, READ);
296 else /* Pages unlocked by caller in sync mode only free bio */ 303
297 pcol_free(pcol); 304 pcol_free(pcol);
298 305
299 kfree(pcol_copy); 306 kfree(pcol_copy);
300 if (or)
301 osd_end_request(or);
302 return ret; 307 return ret;
303} 308}
304 309
@@ -370,12 +375,12 @@ try_again:
370 if (len != PAGE_CACHE_SIZE) 375 if (len != PAGE_CACHE_SIZE)
371 zero_user(page, len, PAGE_CACHE_SIZE - len); 376 zero_user(page, len, PAGE_CACHE_SIZE - len);
372 377
373 EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", 378 EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
374 inode->i_ino, page->index, len); 379 inode->i_ino, page->index, len);
375 380
376 ret = pcol_add_page(pcol, page, len); 381 ret = pcol_add_page(pcol, page, len);
377 if (ret) { 382 if (ret) {
378 EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " 383 EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p "
379 "this_len=0x%zx nr_pages=%u length=0x%lx\n", 384 "this_len=0x%zx nr_pages=%u length=0x%lx\n",
380 page, len, pcol->nr_pages, pcol->length); 385 page, len, pcol->nr_pages, pcol->length);
381 386
@@ -419,9 +424,8 @@ static int _readpage(struct page *page, bool is_sync)
419 424
420 _pcol_init(&pcol, 1, page->mapping->host); 425 _pcol_init(&pcol, 1, page->mapping->host);
421 426
422 /* readpage_strip might call read_exec(,async) inside at several places 427 /* readpage_strip might call read_exec(,is_sync==false) at several
423 * but this is safe for is_async=0 since read_exec will not do anything 428 * places but not if we have a single page.
424 * when we have a single page.
425 */ 429 */
426 ret = readpage_strip(&pcol, page); 430 ret = readpage_strip(&pcol, page);
427 if (ret) { 431 if (ret) {
@@ -440,8 +444,8 @@ static int exofs_readpage(struct file *file, struct page *page)
440 return _readpage(page, false); 444 return _readpage(page, false);
441} 445}
442 446
443/* Callback for osd_write. All writes are asynchronouse */ 447/* Callback for osd_write. All writes are asynchronous */
444static void writepages_done(struct osd_request *or, void *p) 448static void writepages_done(struct exofs_io_state *ios, void *p)
445{ 449{
446 struct page_collect *pcol = p; 450 struct page_collect *pcol = p;
447 struct bio_vec *bvec; 451 struct bio_vec *bvec;
@@ -449,16 +453,12 @@ static void writepages_done(struct osd_request *or, void *p)
449 u64 resid; 453 u64 resid;
450 u64 good_bytes; 454 u64 good_bytes;
451 u64 length = 0; 455 u64 length = 0;
456 int ret = exofs_check_io(ios, &resid);
452 457
453 int ret = exofs_check_ok_resid(or, NULL, &resid);
454
455 osd_end_request(or);
456 atomic_dec(&pcol->sbi->s_curr_pending); 458 atomic_dec(&pcol->sbi->s_curr_pending);
457 459
458 if (likely(!ret)) 460 if (likely(!ret))
459 good_bytes = pcol->length; 461 good_bytes = pcol->length;
460 else if (!resid)
461 good_bytes = 0;
462 else 462 else
463 good_bytes = pcol->length - resid; 463 good_bytes = pcol->length - resid;
464 464
@@ -482,7 +482,7 @@ static void writepages_done(struct osd_request *or, void *p)
482 482
483 update_write_page(page, page_stat); 483 update_write_page(page, page_stat);
484 unlock_page(page); 484 unlock_page(page);
485 EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", 485 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n",
486 inode->i_ino, page->index, page_stat); 486 inode->i_ino, page->index, page_stat);
487 487
488 length += bvec->bv_len; 488 length += bvec->bv_len;
@@ -496,23 +496,13 @@ static void writepages_done(struct osd_request *or, void *p)
496static int write_exec(struct page_collect *pcol) 496static int write_exec(struct page_collect *pcol)
497{ 497{
498 struct exofs_i_info *oi = exofs_i(pcol->inode); 498 struct exofs_i_info *oi = exofs_i(pcol->inode);
499 struct osd_obj_id obj = {pcol->sbi->s_pid, 499 struct exofs_io_state *ios = pcol->ios;
500 pcol->inode->i_ino + EXOFS_OBJ_OFF};
501 struct osd_request *or = NULL;
502 struct page_collect *pcol_copy = NULL; 500 struct page_collect *pcol_copy = NULL;
503 loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
504 int ret; 501 int ret;
505 502
506 if (!pcol->bio) 503 if (!pcol->bio)
507 return 0; 504 return 0;
508 505
509 or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
510 if (unlikely(!or)) {
511 EXOFS_ERR("write_exec: Faild to osd_start_request()\n");
512 ret = -ENOMEM;
513 goto err;
514 }
515
516 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 506 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
517 if (!pcol_copy) { 507 if (!pcol_copy) {
518 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); 508 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n");
@@ -523,16 +513,22 @@ static int write_exec(struct page_collect *pcol)
523 *pcol_copy = *pcol; 513 *pcol_copy = *pcol;
524 514
525 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ 515 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
526 osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); 516
527 ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); 517 ios->bio = pcol_copy->bio;
518 ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
519 ios->length = pcol_copy->length;
520 ios->done = writepages_done;
521 ios->private = pcol_copy;
522
523 ret = exofs_oi_write(oi, ios);
528 if (unlikely(ret)) { 524 if (unlikely(ret)) {
529 EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); 525 EXOFS_ERR("write_exec: exofs_oi_write() Faild\n");
530 goto err; 526 goto err;
531 } 527 }
532 528
533 atomic_inc(&pcol->sbi->s_curr_pending); 529 atomic_inc(&pcol->sbi->s_curr_pending);
534 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", 530 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
535 pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), 531 pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
536 pcol->length); 532 pcol->length);
537 /* pages ownership was passed to pcol_copy */ 533 /* pages ownership was passed to pcol_copy */
538 _pcol_reset(pcol); 534 _pcol_reset(pcol);
@@ -540,9 +536,9 @@ static int write_exec(struct page_collect *pcol)
540 536
541err: 537err:
542 _unlock_pcol_pages(pcol, ret, WRITE); 538 _unlock_pcol_pages(pcol, ret, WRITE);
539 pcol_free(pcol);
543 kfree(pcol_copy); 540 kfree(pcol_copy);
544 if (or) 541
545 osd_end_request(or);
546 return ret; 542 return ret;
547} 543}
548 544
@@ -586,6 +582,9 @@ static int writepage_strip(struct page *page,
586 if (PageError(page)) 582 if (PageError(page))
587 ClearPageError(page); 583 ClearPageError(page);
588 unlock_page(page); 584 unlock_page(page);
585 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) "
586 "outside the limits\n",
587 inode->i_ino, page->index);
589 return 0; 588 return 0;
590 } 589 }
591 } 590 }
@@ -600,6 +599,9 @@ try_again:
600 ret = write_exec(pcol); 599 ret = write_exec(pcol);
601 if (unlikely(ret)) 600 if (unlikely(ret))
602 goto fail; 601 goto fail;
602
603 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n",
604 inode->i_ino, page->index);
603 goto try_again; 605 goto try_again;
604 } 606 }
605 607
@@ -609,7 +611,7 @@ try_again:
609 goto fail; 611 goto fail;
610 } 612 }
611 613
612 EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", 614 EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n",
613 inode->i_ino, page->index, len); 615 inode->i_ino, page->index, len);
614 616
615 ret = pcol_add_page(pcol, page, len); 617 ret = pcol_add_page(pcol, page, len);
@@ -634,6 +636,8 @@ try_again:
634 return 0; 636 return 0;
635 637
636fail: 638fail:
639 EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n",
640 inode->i_ino, page->index, ret);
637 set_bit(AS_EIO, &page->mapping->flags); 641 set_bit(AS_EIO, &page->mapping->flags);
638 unlock_page(page); 642 unlock_page(page);
639 return ret; 643 return ret;
@@ -652,14 +656,17 @@ static int exofs_writepages(struct address_space *mapping,
652 wbc->range_end >> PAGE_CACHE_SHIFT; 656 wbc->range_end >> PAGE_CACHE_SHIFT;
653 657
654 if (start || end) 658 if (start || end)
655 expected_pages = min(end - start + 1, 32L); 659 expected_pages = end - start + 1;
656 else 660 else
657 expected_pages = mapping->nrpages; 661 expected_pages = mapping->nrpages;
658 662
659 EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" 663 if (expected_pages < 32L)
660 " m->nrpages=%lu start=0x%lx end=0x%lx\n", 664 expected_pages = 32L;
665
666 EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx "
667 "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n",
661 mapping->host->i_ino, wbc->range_start, wbc->range_end, 668 mapping->host->i_ino, wbc->range_start, wbc->range_end,
662 mapping->nrpages, start, end); 669 mapping->nrpages, start, end, expected_pages);
663 670
664 _pcol_init(&pcol, expected_pages, mapping->host); 671 _pcol_init(&pcol, expected_pages, mapping->host);
665 672
@@ -771,19 +778,28 @@ static int exofs_get_block(struct inode *inode, sector_t iblock,
771const struct osd_attr g_attr_logical_length = ATTR_DEF( 778const struct osd_attr g_attr_logical_length = ATTR_DEF(
772 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 779 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
773 780
781static int _do_truncate(struct inode *inode)
782{
783 struct exofs_i_info *oi = exofs_i(inode);
784 loff_t isize = i_size_read(inode);
785 int ret;
786
787 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
788
789 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
790
791 ret = exofs_oi_truncate(oi, (u64)isize);
792 EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize);
793 return ret;
794}
795
774/* 796/*
775 * Truncate a file to the specified size - all we have to do is set the size 797 * Truncate a file to the specified size - all we have to do is set the size
776 * attribute. We make sure the object exists first. 798 * attribute. We make sure the object exists first.
777 */ 799 */
778void exofs_truncate(struct inode *inode) 800void exofs_truncate(struct inode *inode)
779{ 801{
780 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
781 struct exofs_i_info *oi = exofs_i(inode); 802 struct exofs_i_info *oi = exofs_i(inode);
782 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
783 struct osd_request *or;
784 struct osd_attr attr;
785 loff_t isize = i_size_read(inode);
786 __be64 newsize;
787 int ret; 803 int ret;
788 804
789 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) 805 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
@@ -793,22 +809,6 @@ void exofs_truncate(struct inode *inode)
793 return; 809 return;
794 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 810 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
795 return; 811 return;
796 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
797
798 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
799
800 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
801 if (unlikely(!or)) {
802 EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n");
803 goto fail;
804 }
805
806 osd_req_set_attributes(or, &obj);
807
808 newsize = cpu_to_be64((u64)isize);
809 attr = g_attr_logical_length;
810 attr.val_ptr = &newsize;
811 osd_req_add_set_attr_list(or, &attr, 1);
812 812
813 /* if we are about to truncate an object, and it hasn't been 813 /* if we are about to truncate an object, and it hasn't been
814 * created yet, wait 814 * created yet, wait
@@ -816,8 +816,7 @@ void exofs_truncate(struct inode *inode)
816 if (unlikely(wait_obj_created(oi))) 816 if (unlikely(wait_obj_created(oi)))
817 goto fail; 817 goto fail;
818 818
819 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); 819 ret = _do_truncate(inode);
820 osd_end_request(or);
821 if (ret) 820 if (ret)
822 goto fail; 821 goto fail;
823 822
@@ -847,65 +846,62 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
847 846
848/* 847/*
849 * Read an inode from the OSD, and return it as is. We also return the size 848 * Read an inode from the OSD, and return it as is. We also return the size
850 * attribute in the 'sanity' argument if we got compiled with debugging turned 849 * attribute in the 'obj_size' argument.
851 * on.
852 */ 850 */
853static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 851static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
854 struct exofs_fcb *inode, uint64_t *sanity) 852 struct exofs_fcb *inode, uint64_t *obj_size)
855{ 853{
856 struct exofs_sb_info *sbi = sb->s_fs_info; 854 struct exofs_sb_info *sbi = sb->s_fs_info;
857 struct osd_request *or; 855 struct osd_attr attrs[2];
858 struct osd_attr attr; 856 struct exofs_io_state *ios;
859 struct osd_obj_id obj = {sbi->s_pid,
860 oi->vfs_inode.i_ino + EXOFS_OBJ_OFF};
861 int ret; 857 int ret;
862 858
863 exofs_make_credential(oi->i_cred, &obj); 859 *obj_size = ~0;
864 860 ret = exofs_get_io_state(sbi, &ios);
865 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 861 if (unlikely(ret)) {
866 if (unlikely(!or)) { 862 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
867 EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); 863 return ret;
868 return -ENOMEM;
869 } 864 }
870 osd_req_get_attributes(or, &obj);
871 865
872 /* we need the inode attribute */ 866 ios->obj.id = exofs_oi_objno(oi);
873 osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); 867 exofs_make_credential(oi->i_cred, &ios->obj);
868 ios->cred = oi->i_cred;
874 869
875#ifdef EXOFS_DEBUG_OBJ_ISIZE 870 attrs[0] = g_attr_inode_data;
876 /* we get the size attributes to do a sanity check */ 871 attrs[1] = g_attr_logical_length;
877 osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); 872 ios->in_attr = attrs;
878#endif 873 ios->in_attr_len = ARRAY_SIZE(attrs);
879 874
880 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); 875 ret = exofs_sbi_read(ios);
881 if (ret) 876 if (ret)
882 goto out; 877 goto out;
883 878
884 attr = g_attr_inode_data; 879 ret = extract_attr_from_ios(ios, &attrs[0]);
885 ret = extract_attr_from_req(or, &attr);
886 if (ret) { 880 if (ret) {
887 EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); 881 EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
888 goto out; 882 goto out;
889 } 883 }
884 WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE);
885 memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE);
890 886
891 WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); 887 ret = extract_attr_from_ios(ios, &attrs[1]);
892 memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE);
893
894#ifdef EXOFS_DEBUG_OBJ_ISIZE
895 attr = g_attr_logical_length;
896 ret = extract_attr_from_req(or, &attr);
897 if (ret) { 888 if (ret) {
898 EXOFS_ERR("ERROR: extract attr from or failed\n"); 889 EXOFS_ERR("%s: extract_attr of logical_length failed\n",
890 __func__);
899 goto out; 891 goto out;
900 } 892 }
901 *sanity = get_unaligned_be64(attr.val_ptr); 893 *obj_size = get_unaligned_be64(attrs[1].val_ptr);
902#endif
903 894
904out: 895out:
905 osd_end_request(or); 896 exofs_put_io_state(ios);
906 return ret; 897 return ret;
907} 898}
908 899
900static void __oi_init(struct exofs_i_info *oi)
901{
902 init_waitqueue_head(&oi->i_wq);
903 oi->i_flags = 0;
904}
909/* 905/*
910 * Fill in an inode read from the OSD and set it up for use 906 * Fill in an inode read from the OSD and set it up for use
911 */ 907 */
@@ -914,7 +910,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
914 struct exofs_i_info *oi; 910 struct exofs_i_info *oi;
915 struct exofs_fcb fcb; 911 struct exofs_fcb fcb;
916 struct inode *inode; 912 struct inode *inode;
917 uint64_t uninitialized_var(sanity); 913 uint64_t obj_size;
918 int ret; 914 int ret;
919 915
920 inode = iget_locked(sb, ino); 916 inode = iget_locked(sb, ino);
@@ -923,13 +919,13 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
923 if (!(inode->i_state & I_NEW)) 919 if (!(inode->i_state & I_NEW))
924 return inode; 920 return inode;
925 oi = exofs_i(inode); 921 oi = exofs_i(inode);
922 __oi_init(oi);
926 923
927 /* read the inode from the osd */ 924 /* read the inode from the osd */
928 ret = exofs_get_inode(sb, oi, &fcb, &sanity); 925 ret = exofs_get_inode(sb, oi, &fcb, &obj_size);
929 if (ret) 926 if (ret)
930 goto bad_inode; 927 goto bad_inode;
931 928
932 init_waitqueue_head(&oi->i_wq);
933 set_obj_created(oi); 929 set_obj_created(oi);
934 930
935 /* copy stuff from on-disk struct to in-memory struct */ 931 /* copy stuff from on-disk struct to in-memory struct */
@@ -947,14 +943,12 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
947 inode->i_blkbits = EXOFS_BLKSHIFT; 943 inode->i_blkbits = EXOFS_BLKSHIFT;
948 inode->i_generation = le32_to_cpu(fcb.i_generation); 944 inode->i_generation = le32_to_cpu(fcb.i_generation);
949 945
950#ifdef EXOFS_DEBUG_OBJ_ISIZE 946 if ((inode->i_size != obj_size) &&
951 if ((inode->i_size != sanity) &&
952 (!exofs_inode_is_fast_symlink(inode))) { 947 (!exofs_inode_is_fast_symlink(inode))) {
953 EXOFS_ERR("WARNING: Size of object from inode and " 948 EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n",
954 "attributes differ (%lld != %llu)\n", 949 inode->i_size, _LLU(obj_size));
955 inode->i_size, _LLU(sanity)); 950 /* FIXME: call exofs_inode_recovery() */
956 } 951 }
957#endif
958 952
959 oi->i_dir_start_lookup = 0; 953 oi->i_dir_start_lookup = 0;
960 954
@@ -1020,23 +1014,30 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
1020 * set the obj_created flag so that other methods know that the object exists on 1014 * set the obj_created flag so that other methods know that the object exists on
1021 * the OSD. 1015 * the OSD.
1022 */ 1016 */
1023static void create_done(struct osd_request *or, void *p) 1017static void create_done(struct exofs_io_state *ios, void *p)
1024{ 1018{
1025 struct inode *inode = p; 1019 struct inode *inode = p;
1026 struct exofs_i_info *oi = exofs_i(inode); 1020 struct exofs_i_info *oi = exofs_i(inode);
1027 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1021 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
1028 int ret; 1022 int ret;
1029 1023
1030 ret = exofs_check_ok(or); 1024 ret = exofs_check_io(ios, NULL);
1031 osd_end_request(or); 1025 exofs_put_io_state(ios);
1026
1032 atomic_dec(&sbi->s_curr_pending); 1027 atomic_dec(&sbi->s_curr_pending);
1033 1028
1034 if (unlikely(ret)) { 1029 if (unlikely(ret)) {
1035 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", 1030 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
1036 _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); 1031 _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid));
1037 make_bad_inode(inode); 1032 /*TODO: When FS is corrupted creation can fail, object already
1038 } else 1033 * exist. Get rid of this asynchronous creation, if exist
1039 set_obj_created(oi); 1034 * increment the obj counter and try the next object. Until we
1035 * succeed. All these dangling objects will be made into lost
1036 * files by chkfs.exofs
1037 */
1038 }
1039
1040 set_obj_created(oi);
1040 1041
1041 atomic_dec(&inode->i_count); 1042 atomic_dec(&inode->i_count);
1042 wake_up(&oi->i_wq); 1043 wake_up(&oi->i_wq);
@@ -1051,8 +1052,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1051 struct inode *inode; 1052 struct inode *inode;
1052 struct exofs_i_info *oi; 1053 struct exofs_i_info *oi;
1053 struct exofs_sb_info *sbi; 1054 struct exofs_sb_info *sbi;
1054 struct osd_request *or; 1055 struct exofs_io_state *ios;
1055 struct osd_obj_id obj;
1056 int ret; 1056 int ret;
1057 1057
1058 sb = dir->i_sb; 1058 sb = dir->i_sb;
@@ -1061,8 +1061,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1061 return ERR_PTR(-ENOMEM); 1061 return ERR_PTR(-ENOMEM);
1062 1062
1063 oi = exofs_i(inode); 1063 oi = exofs_i(inode);
1064 __oi_init(oi);
1064 1065
1065 init_waitqueue_head(&oi->i_wq);
1066 set_obj_2bcreated(oi); 1066 set_obj_2bcreated(oi);
1067 1067
1068 sbi = sb->s_fs_info; 1068 sbi = sb->s_fs_info;
@@ -1089,28 +1089,28 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1089 1089
1090 mark_inode_dirty(inode); 1090 mark_inode_dirty(inode);
1091 1091
1092 obj.partition = sbi->s_pid; 1092 ret = exofs_get_io_state(sbi, &ios);
1093 obj.id = inode->i_ino + EXOFS_OBJ_OFF; 1093 if (unlikely(ret)) {
1094 exofs_make_credential(oi->i_cred, &obj); 1094 EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
1095 1095 return ERR_PTR(ret);
1096 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
1097 if (unlikely(!or)) {
1098 EXOFS_ERR("exofs_new_inode: osd_start_request failed\n");
1099 return ERR_PTR(-ENOMEM);
1100 } 1096 }
1101 1097
1102 osd_req_create_object(or, &obj); 1098 ios->obj.id = exofs_oi_objno(oi);
1099 exofs_make_credential(oi->i_cred, &ios->obj);
1103 1100
1104 /* increment the refcount so that the inode will still be around when we 1101 /* increment the refcount so that the inode will still be around when we
1105 * reach the callback 1102 * reach the callback
1106 */ 1103 */
1107 atomic_inc(&inode->i_count); 1104 atomic_inc(&inode->i_count);
1108 1105
1109 ret = exofs_async_op(or, create_done, inode, oi->i_cred); 1106 ios->done = create_done;
1107 ios->private = inode;
1108 ios->cred = oi->i_cred;
1109 ret = exofs_sbi_create(ios);
1110 if (ret) { 1110 if (ret) {
1111 atomic_dec(&inode->i_count); 1111 atomic_dec(&inode->i_count);
1112 osd_end_request(or); 1112 exofs_put_io_state(ios);
1113 return ERR_PTR(-EIO); 1113 return ERR_PTR(ret);
1114 } 1114 }
1115 atomic_inc(&sbi->s_curr_pending); 1115 atomic_inc(&sbi->s_curr_pending);
1116 1116
@@ -1128,11 +1128,11 @@ struct updatei_args {
1128/* 1128/*
1129 * Callback function from exofs_update_inode(). 1129 * Callback function from exofs_update_inode().
1130 */ 1130 */
1131static void updatei_done(struct osd_request *or, void *p) 1131static void updatei_done(struct exofs_io_state *ios, void *p)
1132{ 1132{
1133 struct updatei_args *args = p; 1133 struct updatei_args *args = p;
1134 1134
1135 osd_end_request(or); 1135 exofs_put_io_state(ios);
1136 1136
1137 atomic_dec(&args->sbi->s_curr_pending); 1137 atomic_dec(&args->sbi->s_curr_pending);
1138 1138
@@ -1148,8 +1148,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1148 struct exofs_i_info *oi = exofs_i(inode); 1148 struct exofs_i_info *oi = exofs_i(inode);
1149 struct super_block *sb = inode->i_sb; 1149 struct super_block *sb = inode->i_sb;
1150 struct exofs_sb_info *sbi = sb->s_fs_info; 1150 struct exofs_sb_info *sbi = sb->s_fs_info;
1151 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; 1151 struct exofs_io_state *ios;
1152 struct osd_request *or;
1153 struct osd_attr attr; 1152 struct osd_attr attr;
1154 struct exofs_fcb *fcb; 1153 struct exofs_fcb *fcb;
1155 struct updatei_args *args; 1154 struct updatei_args *args;
@@ -1186,18 +1185,16 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1186 } else 1185 } else
1187 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1186 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
1188 1187
1189 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 1188 ret = exofs_get_io_state(sbi, &ios);
1190 if (unlikely(!or)) { 1189 if (unlikely(ret)) {
1191 EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); 1190 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
1192 ret = -ENOMEM;
1193 goto free_args; 1191 goto free_args;
1194 } 1192 }
1195 1193
1196 osd_req_set_attributes(or, &obj);
1197
1198 attr = g_attr_inode_data; 1194 attr = g_attr_inode_data;
1199 attr.val_ptr = fcb; 1195 attr.val_ptr = fcb;
1200 osd_req_add_set_attr_list(or, &attr, 1); 1196 ios->out_attr_len = 1;
1197 ios->out_attr = &attr;
1201 1198
1202 if (!obj_created(oi)) { 1199 if (!obj_created(oi)) {
1203 EXOFS_DBGMSG("!obj_created\n"); 1200 EXOFS_DBGMSG("!obj_created\n");
@@ -1206,22 +1203,19 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1206 EXOFS_DBGMSG("wait_event done\n"); 1203 EXOFS_DBGMSG("wait_event done\n");
1207 } 1204 }
1208 1205
1209 if (do_sync) { 1206 if (!do_sync) {
1210 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
1211 osd_end_request(or);
1212 goto free_args;
1213 } else {
1214 args->sbi = sbi; 1207 args->sbi = sbi;
1208 ios->done = updatei_done;
1209 ios->private = args;
1210 }
1215 1211
1216 ret = exofs_async_op(or, updatei_done, args, oi->i_cred); 1212 ret = exofs_oi_write(oi, ios);
1217 if (ret) { 1213 if (!do_sync && !ret) {
1218 osd_end_request(or);
1219 goto free_args;
1220 }
1221 atomic_inc(&sbi->s_curr_pending); 1214 atomic_inc(&sbi->s_curr_pending);
1222 goto out; /* deallocation in updatei_done */ 1215 goto out; /* deallocation in updatei_done */
1223 } 1216 }
1224 1217
1218 exofs_put_io_state(ios);
1225free_args: 1219free_args:
1226 kfree(args); 1220 kfree(args);
1227out: 1221out:
@@ -1238,11 +1232,12 @@ int exofs_write_inode(struct inode *inode, int wait)
1238 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1232 * Callback function from exofs_delete_inode() - don't have much cleaning up to
1239 * do. 1233 * do.
1240 */ 1234 */
1241static void delete_done(struct osd_request *or, void *p) 1235static void delete_done(struct exofs_io_state *ios, void *p)
1242{ 1236{
1243 struct exofs_sb_info *sbi; 1237 struct exofs_sb_info *sbi = p;
1244 osd_end_request(or); 1238
1245 sbi = p; 1239 exofs_put_io_state(ios);
1240
1246 atomic_dec(&sbi->s_curr_pending); 1241 atomic_dec(&sbi->s_curr_pending);
1247} 1242}
1248 1243
@@ -1256,8 +1251,7 @@ void exofs_delete_inode(struct inode *inode)
1256 struct exofs_i_info *oi = exofs_i(inode); 1251 struct exofs_i_info *oi = exofs_i(inode);
1257 struct super_block *sb = inode->i_sb; 1252 struct super_block *sb = inode->i_sb;
1258 struct exofs_sb_info *sbi = sb->s_fs_info; 1253 struct exofs_sb_info *sbi = sb->s_fs_info;
1259 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; 1254 struct exofs_io_state *ios;
1260 struct osd_request *or;
1261 int ret; 1255 int ret;
1262 1256
1263 truncate_inode_pages(&inode->i_data, 0); 1257 truncate_inode_pages(&inode->i_data, 0);
@@ -1274,25 +1268,26 @@ void exofs_delete_inode(struct inode *inode)
1274 1268
1275 clear_inode(inode); 1269 clear_inode(inode);
1276 1270
1277 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 1271 ret = exofs_get_io_state(sbi, &ios);
1278 if (unlikely(!or)) { 1272 if (unlikely(ret)) {
1279 EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); 1273 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
1280 return; 1274 return;
1281 } 1275 }
1282 1276
1283 osd_req_remove_object(or, &obj);
1284
1285 /* if we are deleting an obj that hasn't been created yet, wait */ 1277 /* if we are deleting an obj that hasn't been created yet, wait */
1286 if (!obj_created(oi)) { 1278 if (!obj_created(oi)) {
1287 BUG_ON(!obj_2bcreated(oi)); 1279 BUG_ON(!obj_2bcreated(oi));
1288 wait_event(oi->i_wq, obj_created(oi)); 1280 wait_event(oi->i_wq, obj_created(oi));
1289 } 1281 }
1290 1282
1291 ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); 1283 ios->obj.id = exofs_oi_objno(oi);
1284 ios->done = delete_done;
1285 ios->private = sbi;
1286 ios->cred = oi->i_cred;
1287 ret = exofs_sbi_remove(ios);
1292 if (ret) { 1288 if (ret) {
1293 EXOFS_ERR( 1289 EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__);
1294 "ERROR: @exofs_delete_inode exofs_async_op failed\n"); 1290 exofs_put_io_state(ios);
1295 osd_end_request(or);
1296 return; 1291 return;
1297 } 1292 }
1298 atomic_inc(&sbi->s_curr_pending); 1293 atomic_inc(&sbi->s_curr_pending);
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
new file mode 100644
index 000000000000..5bad01fa1f9f
--- /dev/null
+++ b/fs/exofs/ios.c
@@ -0,0 +1,421 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com>
6 *
7 * This file is part of exofs.
8 *
9 * exofs is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation. Since it is based on ext2, and the only
12 * valid version of GPL for the Linux kernel is version 2, the only valid
13 * version of GPL for exofs is version 2.
14 *
15 * exofs is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with exofs; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#include <scsi/scsi_device.h>
26
27#include "exofs.h"
28
29void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
30{
31 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
32}
33
34int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
35 u64 offset, void *p, unsigned length)
36{
37 struct osd_request *or = osd_start_request(od, GFP_KERNEL);
38/* struct osd_sense_info osi = {.key = 0};*/
39 int ret;
40
41 if (unlikely(!or)) {
42 EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
43 return -ENOMEM;
44 }
45 ret = osd_req_read_kern(or, obj, offset, p, length);
46 if (unlikely(ret)) {
47 EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
48 goto out;
49 }
50
51 ret = osd_finalize_request(or, 0, cred, NULL);
52 if (unlikely(ret)) {
53 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
54 goto out;
55 }
56
57 ret = osd_execute_request(or);
58 if (unlikely(ret))
59 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
60 /* osd_req_decode_sense(or, ret); */
61
62out:
63 osd_end_request(or);
64 return ret;
65}
66
67int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios)
68{
69 struct exofs_io_state *ios;
70
71 /*TODO: Maybe use kmem_cach per sbi of size
72 * exofs_io_state_size(sbi->s_numdevs)
73 */
74 ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL);
75 if (unlikely(!ios)) {
76 *pios = NULL;
77 return -ENOMEM;
78 }
79
80 ios->sbi = sbi;
81 ios->obj.partition = sbi->s_pid;
82 *pios = ios;
83 return 0;
84}
85
86void exofs_put_io_state(struct exofs_io_state *ios)
87{
88 if (ios) {
89 unsigned i;
90
91 for (i = 0; i < ios->numdevs; i++) {
92 struct exofs_per_dev_state *per_dev = &ios->per_dev[i];
93
94 if (per_dev->or)
95 osd_end_request(per_dev->or);
96 if (per_dev->bio)
97 bio_put(per_dev->bio);
98 }
99
100 kfree(ios);
101 }
102}
103
104static void _sync_done(struct exofs_io_state *ios, void *p)
105{
106 struct completion *waiting = p;
107
108 complete(waiting);
109}
110
111static void _last_io(struct kref *kref)
112{
113 struct exofs_io_state *ios = container_of(
114 kref, struct exofs_io_state, kref);
115
116 ios->done(ios, ios->private);
117}
118
119static void _done_io(struct osd_request *or, void *p)
120{
121 struct exofs_io_state *ios = p;
122
123 kref_put(&ios->kref, _last_io);
124}
125
126static int exofs_io_execute(struct exofs_io_state *ios)
127{
128 DECLARE_COMPLETION_ONSTACK(wait);
129 bool sync = (ios->done == NULL);
130 int i, ret;
131
132 if (sync) {
133 ios->done = _sync_done;
134 ios->private = &wait;
135 }
136
137 for (i = 0; i < ios->numdevs; i++) {
138 struct osd_request *or = ios->per_dev[i].or;
139 if (unlikely(!or))
140 continue;
141
142 ret = osd_finalize_request(or, 0, ios->cred, NULL);
143 if (unlikely(ret)) {
144 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n",
145 ret);
146 return ret;
147 }
148 }
149
150 kref_init(&ios->kref);
151
152 for (i = 0; i < ios->numdevs; i++) {
153 struct osd_request *or = ios->per_dev[i].or;
154 if (unlikely(!or))
155 continue;
156
157 kref_get(&ios->kref);
158 osd_execute_request_async(or, _done_io, ios);
159 }
160
161 kref_put(&ios->kref, _last_io);
162 ret = 0;
163
164 if (sync) {
165 wait_for_completion(&wait);
166 ret = exofs_check_io(ios, NULL);
167 }
168 return ret;
169}
170
171int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
172{
173 enum osd_err_priority acumulated_osd_err = 0;
174 int acumulated_lin_err = 0;
175 int i;
176
177 for (i = 0; i < ios->numdevs; i++) {
178 struct osd_sense_info osi;
179 int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi);
180
181 if (likely(!ret))
182 continue;
183
184 if (unlikely(ret == -EFAULT)) {
185 EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__);
186 /*FIXME: All the pages in this device range should:
187 * clear_highpage(page);
188 */
189 }
190
191 if (osi.osd_err_pri >= acumulated_osd_err) {
192 acumulated_osd_err = osi.osd_err_pri;
193 acumulated_lin_err = ret;
194 }
195 }
196
197 /* TODO: raid specific residual calculations */
198 if (resid) {
199 if (likely(!acumulated_lin_err))
200 *resid = 0;
201 else
202 *resid = ios->length;
203 }
204
205 return acumulated_lin_err;
206}
207
208int exofs_sbi_create(struct exofs_io_state *ios)
209{
210 int i, ret;
211
212 for (i = 0; i < ios->sbi->s_numdevs; i++) {
213 struct osd_request *or;
214
215 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
216 if (unlikely(!or)) {
217 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
218 ret = -ENOMEM;
219 goto out;
220 }
221 ios->per_dev[i].or = or;
222 ios->numdevs++;
223
224 osd_req_create_object(or, &ios->obj);
225 }
226 ret = exofs_io_execute(ios);
227
228out:
229 return ret;
230}
231
232int exofs_sbi_remove(struct exofs_io_state *ios)
233{
234 int i, ret;
235
236 for (i = 0; i < ios->sbi->s_numdevs; i++) {
237 struct osd_request *or;
238
239 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
240 if (unlikely(!or)) {
241 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
242 ret = -ENOMEM;
243 goto out;
244 }
245 ios->per_dev[i].or = or;
246 ios->numdevs++;
247
248 osd_req_remove_object(or, &ios->obj);
249 }
250 ret = exofs_io_execute(ios);
251
252out:
253 return ret;
254}
255
256int exofs_sbi_write(struct exofs_io_state *ios)
257{
258 int i, ret;
259
260 for (i = 0; i < ios->sbi->s_numdevs; i++) {
261 struct osd_request *or;
262
263 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
264 if (unlikely(!or)) {
265 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
266 ret = -ENOMEM;
267 goto out;
268 }
269 ios->per_dev[i].or = or;
270 ios->numdevs++;
271
272 if (ios->bio) {
273 struct bio *bio;
274
275 if (i != 0) {
276 bio = bio_kmalloc(GFP_KERNEL,
277 ios->bio->bi_max_vecs);
278 if (unlikely(!bio)) {
279 ret = -ENOMEM;
280 goto out;
281 }
282
283 __bio_clone(bio, ios->bio);
284 bio->bi_bdev = NULL;
285 bio->bi_next = NULL;
286 ios->per_dev[i].bio = bio;
287 } else {
288 bio = ios->bio;
289 }
290
291 osd_req_write(or, &ios->obj, ios->offset, bio,
292 ios->length);
293/* EXOFS_DBGMSG("write sync=%d\n", sync);*/
294 } else if (ios->kern_buff) {
295 osd_req_write_kern(or, &ios->obj, ios->offset,
296 ios->kern_buff, ios->length);
297/* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/
298 } else {
299 osd_req_set_attributes(or, &ios->obj);
300/* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/
301 }
302
303 if (ios->out_attr)
304 osd_req_add_set_attr_list(or, ios->out_attr,
305 ios->out_attr_len);
306
307 if (ios->in_attr)
308 osd_req_add_get_attr_list(or, ios->in_attr,
309 ios->in_attr_len);
310 }
311 ret = exofs_io_execute(ios);
312
313out:
314 return ret;
315}
316
317int exofs_sbi_read(struct exofs_io_state *ios)
318{
319 int i, ret;
320
321 for (i = 0; i < 1; i++) {
322 struct osd_request *or;
323 unsigned first_dev = (unsigned)ios->obj.id;
324
325 first_dev %= ios->sbi->s_numdevs;
326 or = osd_start_request(ios->sbi->s_ods[first_dev], GFP_KERNEL);
327 if (unlikely(!or)) {
328 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
329 ret = -ENOMEM;
330 goto out;
331 }
332 ios->per_dev[i].or = or;
333 ios->numdevs++;
334
335 if (ios->bio) {
336 osd_req_read(or, &ios->obj, ios->offset, ios->bio,
337 ios->length);
338/* EXOFS_DBGMSG("read sync=%d\n", sync);*/
339 } else if (ios->kern_buff) {
340 osd_req_read_kern(or, &ios->obj, ios->offset,
341 ios->kern_buff, ios->length);
342/* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/
343 } else {
344 osd_req_get_attributes(or, &ios->obj);
345/* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/
346 }
347
348 if (ios->out_attr)
349 osd_req_add_set_attr_list(or, ios->out_attr,
350 ios->out_attr_len);
351
352 if (ios->in_attr)
353 osd_req_add_get_attr_list(or, ios->in_attr,
354 ios->in_attr_len);
355 }
356 ret = exofs_io_execute(ios);
357
358out:
359 return ret;
360}
361
362int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
363{
364 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
365 void *iter = NULL;
366 int nelem;
367
368 do {
369 nelem = 1;
370 osd_req_decode_get_attr_list(ios->per_dev[0].or,
371 &cur_attr, &nelem, &iter);
372 if ((cur_attr.attr_page == attr->attr_page) &&
373 (cur_attr.attr_id == attr->attr_id)) {
374 attr->len = cur_attr.len;
375 attr->val_ptr = cur_attr.val_ptr;
376 return 0;
377 }
378 } while (iter);
379
380 return -EIO;
381}
382
383int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
384{
385 struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
386 struct exofs_io_state *ios;
387 struct osd_attr attr;
388 __be64 newsize;
389 int i, ret;
390
391 if (exofs_get_io_state(sbi, &ios))
392 return -ENOMEM;
393
394 ios->obj.id = exofs_oi_objno(oi);
395 ios->cred = oi->i_cred;
396
397 newsize = cpu_to_be64(size);
398 attr = g_attr_logical_length;
399 attr.val_ptr = &newsize;
400
401 for (i = 0; i < sbi->s_numdevs; i++) {
402 struct osd_request *or;
403
404 or = osd_start_request(sbi->s_ods[i], GFP_KERNEL);
405 if (unlikely(!or)) {
406 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
407 ret = -ENOMEM;
408 goto out;
409 }
410 ios->per_dev[i].or = or;
411 ios->numdevs++;
412
413 osd_req_set_attributes(or, &ios->obj);
414 osd_req_add_set_attr_list(or, &attr, 1);
415 }
416 ret = exofs_io_execute(ios);
417
418out:
419 exofs_put_io_state(ios);
420 return ret;
421}
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
deleted file mode 100644
index 4372542df284..000000000000
--- a/fs/exofs/osd.c
+++ /dev/null
@@ -1,125 +0,0 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com>
6 *
7 * This file is part of exofs.
8 *
9 * exofs is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation. Since it is based on ext2, and the only
12 * valid version of GPL for the Linux kernel is version 2, the only valid
13 * version of GPL for exofs is version 2.
14 *
15 * exofs is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with exofs; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#include <scsi/scsi_device.h>
26#include <scsi/osd_sense.h>
27
28#include "exofs.h"
29
30int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid)
31{
32 struct osd_sense_info osi;
33 int ret = osd_req_decode_sense(or, &osi);
34
35 if (ret) { /* translate to Linux codes */
36 if (osi.additional_code == scsi_invalid_field_in_cdb) {
37 if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE)
38 ret = -EFAULT;
39 if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID)
40 ret = -ENOENT;
41 else
42 ret = -EINVAL;
43 } else if (osi.additional_code == osd_quota_error)
44 ret = -ENOSPC;
45 else
46 ret = -EIO;
47 }
48
49 /* FIXME: should be include in osd_sense_info */
50 if (in_resid)
51 *in_resid = or->in.req ? or->in.req->resid_len : 0;
52
53 if (out_resid)
54 *out_resid = or->out.req ? or->out.req->resid_len : 0;
55
56 return ret;
57}
58
59void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
60{
61 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
62}
63
64/*
65 * Perform a synchronous OSD operation.
66 */
67int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
68{
69 int ret;
70
71 or->timeout = timeout;
72 ret = osd_finalize_request(or, 0, credential, NULL);
73 if (ret) {
74 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
75 return ret;
76 }
77
78 ret = osd_execute_request(or);
79
80 if (ret)
81 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
82 /* osd_req_decode_sense(or, ret); */
83 return ret;
84}
85
86/*
87 * Perform an asynchronous OSD operation.
88 */
89int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done,
90 void *caller_context, u8 *cred)
91{
92 int ret;
93
94 ret = osd_finalize_request(or, 0, cred, NULL);
95 if (ret) {
96 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
97 return ret;
98 }
99
100 ret = osd_execute_request_async(or, async_done, caller_context);
101
102 if (ret)
103 EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret);
104 return ret;
105}
106
107int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
108{
109 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
110 void *iter = NULL;
111 int nelem;
112
113 do {
114 nelem = 1;
115 osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
116 if ((cur_attr.attr_page == attr->attr_page) &&
117 (cur_attr.attr_id == attr->attr_id)) {
118 attr->len = cur_attr.len;
119 attr->val_ptr = cur_attr.val_ptr;
120 return 0;
121 }
122 } while (iter);
123
124 return -EIO;
125}
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
new file mode 100644
index 000000000000..423033addd1f
--- /dev/null
+++ b/fs/exofs/pnfs.h
@@ -0,0 +1,51 @@
1/*
2 * Copyright (C) 2008, 2009
3 * Boaz Harrosh <bharrosh@panasas.com>
4 *
5 * This file is part of exofs.
6 *
7 * exofs is free software; you can redistribute it and/or modify it under the
8 * terms of the GNU General Public License version 2 as published by the Free
9 * Software Foundation.
10 *
11 */
12
13/* FIXME: Remove this file once pnfs hits mainline */
14
15#ifndef __EXOFS_PNFS_H__
16#define __EXOFS_PNFS_H__
17
18#if defined(CONFIG_PNFS)
19
20
21/* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */
22#include "../nfs/objlayout/pnfs_osd_xdr.h"
23
24#else /* defined(CONFIG_PNFS) */
25
26enum pnfs_iomode {
27 IOMODE_READ = 1,
28 IOMODE_RW = 2,
29 IOMODE_ANY = 3,
30};
31
32/* Layout Structure */
33enum pnfs_osd_raid_algorithm4 {
34 PNFS_OSD_RAID_0 = 1,
35 PNFS_OSD_RAID_4 = 2,
36 PNFS_OSD_RAID_5 = 3,
37 PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */
38};
39
40struct pnfs_osd_data_map {
41 u32 odm_num_comps;
42 u64 odm_stripe_unit;
43 u32 odm_group_width;
44 u32 odm_group_depth;
45 u32 odm_mirror_cnt;
46 u32 odm_raid_algorithm;
47};
48
49#endif /* else defined(CONFIG_PNFS) */
50
51#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 9f500dec3b59..a1d1e77b12eb 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -203,49 +203,45 @@ int exofs_sync_fs(struct super_block *sb, int wait)
203{ 203{
204 struct exofs_sb_info *sbi; 204 struct exofs_sb_info *sbi;
205 struct exofs_fscb *fscb; 205 struct exofs_fscb *fscb;
206 struct osd_request *or; 206 struct exofs_io_state *ios;
207 struct osd_obj_id obj;
208 int ret = -ENOMEM; 207 int ret = -ENOMEM;
209 208
210 fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
211 if (!fscb) {
212 EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
213 return -ENOMEM;
214 }
215
216 lock_super(sb); 209 lock_super(sb);
217 sbi = sb->s_fs_info; 210 sbi = sb->s_fs_info;
211 fscb = &sbi->s_fscb;
212
213 ret = exofs_get_io_state(sbi, &ios);
214 if (ret)
215 goto out;
216
217 /* Note: We only write the changing part of the fscb. .i.e upto the
218 * the fscb->s_dev_table_oid member. There is no read-modify-write
219 * here.
220 */
221 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
222 memset(fscb, 0, ios->length);
218 fscb->s_nextid = cpu_to_le64(sbi->s_nextid); 223 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
219 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); 224 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
220 fscb->s_magic = cpu_to_le16(sb->s_magic); 225 fscb->s_magic = cpu_to_le16(sb->s_magic);
221 fscb->s_newfs = 0; 226 fscb->s_newfs = 0;
227 fscb->s_version = EXOFS_FSCB_VER;
222 228
223 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 229 ios->obj.id = EXOFS_SUPER_ID;
224 if (unlikely(!or)) { 230 ios->offset = 0;
225 EXOFS_ERR("exofs_write_super: osd_start_request failed.\n"); 231 ios->kern_buff = fscb;
226 goto out; 232 ios->cred = sbi->s_cred;
227 }
228 233
229 obj.partition = sbi->s_pid; 234 ret = exofs_sbi_write(ios);
230 obj.id = EXOFS_SUPER_ID;
231 ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb));
232 if (unlikely(ret)) { 235 if (unlikely(ret)) {
233 EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n"); 236 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
234 goto out;
235 }
236
237 ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
238 if (unlikely(ret)) {
239 EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n");
240 goto out; 237 goto out;
241 } 238 }
242 sb->s_dirt = 0; 239 sb->s_dirt = 0;
243 240
244out: 241out:
245 if (or) 242 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
246 osd_end_request(or); 243 exofs_put_io_state(ios);
247 unlock_super(sb); 244 unlock_super(sb);
248 kfree(fscb);
249 return ret; 245 return ret;
250} 246}
251 247
@@ -257,6 +253,29 @@ static void exofs_write_super(struct super_block *sb)
257 sb->s_dirt = 0; 253 sb->s_dirt = 0;
258} 254}
259 255
256static void _exofs_print_device(const char *msg, const char *dev_path,
257 struct osd_dev *od, u64 pid)
258{
259 const struct osd_dev_info *odi = osduld_device_info(od);
260
261 printk(KERN_NOTICE "exofs: %s %s osd_name-%s pid-0x%llx\n",
262 msg, dev_path ?: "", odi->osdname, _LLU(pid));
263}
264
265void exofs_free_sbi(struct exofs_sb_info *sbi)
266{
267 while (sbi->s_numdevs) {
268 int i = --sbi->s_numdevs;
269 struct osd_dev *od = sbi->s_ods[i];
270
271 if (od) {
272 sbi->s_ods[i] = NULL;
273 osduld_put_device(od);
274 }
275 }
276 kfree(sbi);
277}
278
260/* 279/*
261 * This function is called when the vfs is freeing the superblock. We just 280 * This function is called when the vfs is freeing the superblock. We just
262 * need to free our own part. 281 * need to free our own part.
@@ -279,11 +298,182 @@ static void exofs_put_super(struct super_block *sb)
279 msecs_to_jiffies(100)); 298 msecs_to_jiffies(100));
280 } 299 }
281 300
282 osduld_put_device(sbi->s_dev); 301 _exofs_print_device("Unmounting", NULL, sbi->s_ods[0], sbi->s_pid);
283 kfree(sb->s_fs_info); 302
303 exofs_free_sbi(sbi);
284 sb->s_fs_info = NULL; 304 sb->s_fs_info = NULL;
285} 305}
286 306
307static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
308 struct exofs_device_table *dt)
309{
310 sbi->data_map.odm_num_comps =
311 le32_to_cpu(dt->dt_data_map.cb_num_comps);
312 sbi->data_map.odm_stripe_unit =
313 le64_to_cpu(dt->dt_data_map.cb_stripe_unit);
314 sbi->data_map.odm_group_width =
315 le32_to_cpu(dt->dt_data_map.cb_group_width);
316 sbi->data_map.odm_group_depth =
317 le32_to_cpu(dt->dt_data_map.cb_group_depth);
318 sbi->data_map.odm_mirror_cnt =
319 le32_to_cpu(dt->dt_data_map.cb_mirror_cnt);
320 sbi->data_map.odm_raid_algorithm =
321 le32_to_cpu(dt->dt_data_map.cb_raid_algorithm);
322
323/* FIXME: Hard coded mirror only for now. if not so do not mount */
324 if ((sbi->data_map.odm_num_comps != numdevs) ||
325 (sbi->data_map.odm_stripe_unit != EXOFS_BLKSIZE) ||
326 (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) ||
327 (sbi->data_map.odm_mirror_cnt != (numdevs - 1)))
328 return -EINVAL;
329 else
330 return 0;
331}
332
333/* @odi is valid only as long as @fscb_dev is valid */
334static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
335 struct osd_dev_info *odi)
336{
337 odi->systemid_len = le32_to_cpu(dt_dev->systemid_len);
338 memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len);
339
340 odi->osdname_len = le32_to_cpu(dt_dev->osdname_len);
341 odi->osdname = dt_dev->osdname;
342
343 /* FIXME support long names. Will need a _put function */
344 if (dt_dev->long_name_offset)
345 return -EINVAL;
346
347 /* Make sure osdname is printable!
348 * mkexofs should give us space for a null-terminator else the
349 * device-table is invalid.
350 */
351 if (unlikely(odi->osdname_len >= sizeof(dt_dev->osdname)))
352 odi->osdname_len = sizeof(dt_dev->osdname) - 1;
353 dt_dev->osdname[odi->osdname_len] = 0;
354
355 /* If it's all zeros something is bad we read past end-of-obj */
356 return !(odi->systemid_len || odi->osdname_len);
357}
358
359static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
360 unsigned table_count)
361{
362 struct exofs_sb_info *sbi = *psbi;
363 struct osd_dev *fscb_od;
364 struct osd_obj_id obj = {.partition = sbi->s_pid,
365 .id = EXOFS_DEVTABLE_ID};
366 struct exofs_device_table *dt;
367 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
368 sizeof(*dt);
369 unsigned numdevs, i;
370 int ret;
371
372 dt = kmalloc(table_bytes, GFP_KERNEL);
373 if (unlikely(!dt)) {
374 EXOFS_ERR("ERROR: allocating %x bytes for device table\n",
375 table_bytes);
376 return -ENOMEM;
377 }
378
379 fscb_od = sbi->s_ods[0];
380 sbi->s_ods[0] = NULL;
381 sbi->s_numdevs = 0;
382 ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes);
383 if (unlikely(ret)) {
384 EXOFS_ERR("ERROR: reading device table\n");
385 goto out;
386 }
387
388 numdevs = le64_to_cpu(dt->dt_num_devices);
389 if (unlikely(!numdevs)) {
390 ret = -EINVAL;
391 goto out;
392 }
393 WARN_ON(table_count != numdevs);
394
395 ret = _read_and_match_data_map(sbi, numdevs, dt);
396 if (unlikely(ret))
397 goto out;
398
399 if (likely(numdevs > 1)) {
400 unsigned size = numdevs * sizeof(sbi->s_ods[0]);
401
402 sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL);
403 if (unlikely(!sbi)) {
404 ret = -ENOMEM;
405 goto out;
406 }
407 memset(&sbi->s_ods[1], 0, size - sizeof(sbi->s_ods[0]));
408 *psbi = sbi;
409 }
410
411 for (i = 0; i < numdevs; i++) {
412 struct exofs_fscb fscb;
413 struct osd_dev_info odi;
414 struct osd_dev *od;
415
416 if (exofs_devs_2_odi(&dt->dt_dev_table[i], &odi)) {
417 EXOFS_ERR("ERROR: Read all-zeros device entry\n");
418 ret = -EINVAL;
419 goto out;
420 }
421
422 printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n",
423 i, odi.osdname);
424
425 /* On all devices the device table is identical. The user can
426 * specify any one of the participating devices on the command
427 * line. We always keep them in device-table order.
428 */
429 if (fscb_od && osduld_device_same(fscb_od, &odi)) {
430 sbi->s_ods[i] = fscb_od;
431 ++sbi->s_numdevs;
432 fscb_od = NULL;
433 continue;
434 }
435
436 od = osduld_info_lookup(&odi);
437 if (unlikely(IS_ERR(od))) {
438 ret = PTR_ERR(od);
439 EXOFS_ERR("ERROR: device requested is not found "
440 "osd_name-%s =>%d\n", odi.osdname, ret);
441 goto out;
442 }
443
444 sbi->s_ods[i] = od;
445 ++sbi->s_numdevs;
446
447 /* Read the fscb of the other devices to make sure the FS
448 * partition is there.
449 */
450 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb,
451 sizeof(fscb));
452 if (unlikely(ret)) {
453 EXOFS_ERR("ERROR: Malformed participating device "
454 "error reading fscb osd_name-%s\n",
455 odi.osdname);
456 goto out;
457 }
458
459 /* TODO: verify other information is correct and FS-uuid
460 * matches. Benny what did you say about device table
461 * generation and old devices?
462 */
463 }
464
465out:
466 kfree(dt);
467 if (unlikely(!ret && fscb_od)) {
468 EXOFS_ERR(
469 "ERROR: Bad device-table container device not present\n");
470 osduld_put_device(fscb_od);
471 ret = -EINVAL;
472 }
473
474 return ret;
475}
476
287/* 477/*
288 * Read the superblock from the OSD and fill in the fields 478 * Read the superblock from the OSD and fill in the fields
289 */ 479 */
@@ -292,24 +482,25 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
292 struct inode *root; 482 struct inode *root;
293 struct exofs_mountopt *opts = data; 483 struct exofs_mountopt *opts = data;
294 struct exofs_sb_info *sbi; /*extended info */ 484 struct exofs_sb_info *sbi; /*extended info */
485 struct osd_dev *od; /* Master device */
295 struct exofs_fscb fscb; /*on-disk superblock info */ 486 struct exofs_fscb fscb; /*on-disk superblock info */
296 struct osd_request *or = NULL;
297 struct osd_obj_id obj; 487 struct osd_obj_id obj;
488 unsigned table_count;
298 int ret; 489 int ret;
299 490
300 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 491 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
301 if (!sbi) 492 if (!sbi)
302 return -ENOMEM; 493 return -ENOMEM;
303 sb->s_fs_info = sbi;
304 494
305 /* use mount options to fill superblock */ 495 /* use mount options to fill superblock */
306 sbi->s_dev = osduld_path_lookup(opts->dev_name); 496 od = osduld_path_lookup(opts->dev_name);
307 if (IS_ERR(sbi->s_dev)) { 497 if (IS_ERR(od)) {
308 ret = PTR_ERR(sbi->s_dev); 498 ret = PTR_ERR(od);
309 sbi->s_dev = NULL;
310 goto free_sbi; 499 goto free_sbi;
311 } 500 }
312 501
502 sbi->s_ods[0] = od;
503 sbi->s_numdevs = 1;
313 sbi->s_pid = opts->pid; 504 sbi->s_pid = opts->pid;
314 sbi->s_timeout = opts->timeout; 505 sbi->s_timeout = opts->timeout;
315 506
@@ -323,35 +514,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
323 sb->s_bdev = NULL; 514 sb->s_bdev = NULL;
324 sb->s_dev = 0; 515 sb->s_dev = 0;
325 516
326 /* read data from on-disk superblock object */
327 obj.partition = sbi->s_pid; 517 obj.partition = sbi->s_pid;
328 obj.id = EXOFS_SUPER_ID; 518 obj.id = EXOFS_SUPER_ID;
329 exofs_make_credential(sbi->s_cred, &obj); 519 exofs_make_credential(sbi->s_cred, &obj);
330 520
331 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 521 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb));
332 if (unlikely(!or)) { 522 if (unlikely(ret))
333 if (!silent)
334 EXOFS_ERR(
335 "exofs_fill_super: osd_start_request failed.\n");
336 ret = -ENOMEM;
337 goto free_sbi;
338 }
339 ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb));
340 if (unlikely(ret)) {
341 if (!silent)
342 EXOFS_ERR(
343 "exofs_fill_super: osd_req_read_kern failed.\n");
344 ret = -ENOMEM;
345 goto free_sbi;
346 }
347
348 ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
349 if (unlikely(ret)) {
350 if (!silent)
351 EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n");
352 ret = -EIO;
353 goto free_sbi; 523 goto free_sbi;
354 }
355 524
356 sb->s_magic = le16_to_cpu(fscb.s_magic); 525 sb->s_magic = le16_to_cpu(fscb.s_magic);
357 sbi->s_nextid = le64_to_cpu(fscb.s_nextid); 526 sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
@@ -364,12 +533,26 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
364 ret = -EINVAL; 533 ret = -EINVAL;
365 goto free_sbi; 534 goto free_sbi;
366 } 535 }
536 if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) {
537 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
538 EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
539 ret = -EINVAL;
540 goto free_sbi;
541 }
367 542
368 /* start generation numbers from a random point */ 543 /* start generation numbers from a random point */
369 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 544 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
370 spin_lock_init(&sbi->s_next_gen_lock); 545 spin_lock_init(&sbi->s_next_gen_lock);
371 546
547 table_count = le64_to_cpu(fscb.s_dev_table_count);
548 if (table_count) {
549 ret = exofs_read_lookup_dev_table(&sbi, table_count);
550 if (unlikely(ret))
551 goto free_sbi;
552 }
553
372 /* set up operation vectors */ 554 /* set up operation vectors */
555 sb->s_fs_info = sbi;
373 sb->s_op = &exofs_sops; 556 sb->s_op = &exofs_sops;
374 sb->s_export_op = &exofs_export_ops; 557 sb->s_export_op = &exofs_export_ops;
375 root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); 558 root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
@@ -395,16 +578,15 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
395 goto free_sbi; 578 goto free_sbi;
396 } 579 }
397 580
398 ret = 0; 581 _exofs_print_device("Mounting", opts->dev_name, sbi->s_ods[0],
399out: 582 sbi->s_pid);
400 if (or) 583 return 0;
401 osd_end_request(or);
402 return ret;
403 584
404free_sbi: 585free_sbi:
405 osduld_put_device(sbi->s_dev); /* NULL safe */ 586 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
406 kfree(sbi); 587 opts->dev_name, sbi->s_pid, ret);
407 goto out; 588 exofs_free_sbi(sbi);
589 return ret;
408} 590}
409 591
410/* 592/*
@@ -433,7 +615,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
433{ 615{
434 struct super_block *sb = dentry->d_sb; 616 struct super_block *sb = dentry->d_sb;
435 struct exofs_sb_info *sbi = sb->s_fs_info; 617 struct exofs_sb_info *sbi = sb->s_fs_info;
436 struct osd_obj_id obj = {sbi->s_pid, 0}; 618 struct exofs_io_state *ios;
437 struct osd_attr attrs[] = { 619 struct osd_attr attrs[] = {
438 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, 620 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
439 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), 621 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
@@ -442,32 +624,33 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
442 }; 624 };
443 uint64_t capacity = ULLONG_MAX; 625 uint64_t capacity = ULLONG_MAX;
444 uint64_t used = ULLONG_MAX; 626 uint64_t used = ULLONG_MAX;
445 struct osd_request *or;
446 uint8_t cred_a[OSD_CAP_LEN]; 627 uint8_t cred_a[OSD_CAP_LEN];
447 int ret; 628 int ret;
448 629
449 /* get used/capacity attributes */ 630 ret = exofs_get_io_state(sbi, &ios);
450 exofs_make_credential(cred_a, &obj); 631 if (ret) {
451 632 EXOFS_DBGMSG("exofs_get_io_state failed.\n");
452 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 633 return ret;
453 if (unlikely(!or)) {
454 EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n");
455 return -ENOMEM;
456 } 634 }
457 635
458 osd_req_get_attributes(or, &obj); 636 exofs_make_credential(cred_a, &ios->obj);
459 osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs)); 637 ios->cred = sbi->s_cred;
460 ret = exofs_sync_op(or, sbi->s_timeout, cred_a); 638 ios->in_attr = attrs;
639 ios->in_attr_len = ARRAY_SIZE(attrs);
640
641 ret = exofs_sbi_read(ios);
461 if (unlikely(ret)) 642 if (unlikely(ret))
462 goto out; 643 goto out;
463 644
464 ret = extract_attr_from_req(or, &attrs[0]); 645 ret = extract_attr_from_ios(ios, &attrs[0]);
465 if (likely(!ret)) 646 if (likely(!ret)) {
466 capacity = get_unaligned_be64(attrs[0].val_ptr); 647 capacity = get_unaligned_be64(attrs[0].val_ptr);
467 else 648 if (unlikely(!capacity))
649 capacity = ULLONG_MAX;
650 } else
468 EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); 651 EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n");
469 652
470 ret = extract_attr_from_req(or, &attrs[1]); 653 ret = extract_attr_from_ios(ios, &attrs[1]);
471 if (likely(!ret)) 654 if (likely(!ret))
472 used = get_unaligned_be64(attrs[1].val_ptr); 655 used = get_unaligned_be64(attrs[1].val_ptr);
473 else 656 else
@@ -476,15 +659,15 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
476 /* fill in the stats buffer */ 659 /* fill in the stats buffer */
477 buf->f_type = EXOFS_SUPER_MAGIC; 660 buf->f_type = EXOFS_SUPER_MAGIC;
478 buf->f_bsize = EXOFS_BLKSIZE; 661 buf->f_bsize = EXOFS_BLKSIZE;
479 buf->f_blocks = (capacity >> EXOFS_BLKSHIFT); 662 buf->f_blocks = capacity >> 9;
480 buf->f_bfree = ((capacity - used) >> EXOFS_BLKSHIFT); 663 buf->f_bfree = (capacity - used) >> 9;
481 buf->f_bavail = buf->f_bfree; 664 buf->f_bavail = buf->f_bfree;
482 buf->f_files = sbi->s_numfiles; 665 buf->f_files = sbi->s_numfiles;
483 buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; 666 buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles;
484 buf->f_namelen = EXOFS_NAME_LEN; 667 buf->f_namelen = EXOFS_NAME_LEN;
485 668
486out: 669out:
487 osd_end_request(or); 670 exofs_put_io_state(ios);
488 return ret; 671 return ret;
489} 672}
490 673