aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exofs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/exofs')
-rw-r--r--fs/exofs/common.h22
-rw-r--r--fs/exofs/dir.c37
-rw-r--r--fs/exofs/exofs.h6
-rw-r--r--fs/exofs/file.c20
-rw-r--r--fs/exofs/inode.c126
-rw-r--r--fs/exofs/ios.c10
-rw-r--r--fs/exofs/namei.c10
-rw-r--r--fs/exofs/super.c211
8 files changed, 305 insertions, 137 deletions
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index f0d520312d8b..3bbd46956d77 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -53,10 +53,14 @@
53#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ 53#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */
54 54
55/* exofs Application specific page/attribute */ 55/* exofs Application specific page/attribute */
56/* Inode attrs */
56# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) 57# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3)
57# define EXOFS_ATTR_INODE_DATA 1 58# define EXOFS_ATTR_INODE_DATA 1
58# define EXOFS_ATTR_INODE_FILE_LAYOUT 2 59# define EXOFS_ATTR_INODE_FILE_LAYOUT 2
59# define EXOFS_ATTR_INODE_DIR_LAYOUT 3 60# define EXOFS_ATTR_INODE_DIR_LAYOUT 3
61/* Partition attrs */
62# define EXOFS_APAGE_SB_DATA (0xF0000000U + 3)
63# define EXOFS_ATTR_SB_STATS 1
60 64
61/* 65/*
62 * The maximum number of files we can have is limited by the size of the 66 * The maximum number of files we can have is limited by the size of the
@@ -86,8 +90,8 @@ enum {
86 */ 90 */
87enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1}; 91enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
88struct exofs_fscb { 92struct exofs_fscb {
89 __le64 s_nextid; /* Highest object ID used */ 93 __le64 s_nextid; /* Only used after mkfs */
90 __le64 s_numfiles; /* Number of files on fs */ 94 __le64 s_numfiles; /* Only used after mkfs */
91 __le32 s_version; /* == EXOFS_FSCB_VER */ 95 __le32 s_version; /* == EXOFS_FSCB_VER */
92 __le16 s_magic; /* Magic signature */ 96 __le16 s_magic; /* Magic signature */
93 __le16 s_newfs; /* Non-zero if this is a new fs */ 97 __le16 s_newfs; /* Non-zero if this is a new fs */
@@ -98,10 +102,20 @@ struct exofs_fscb {
98} __packed; 102} __packed;
99 103
100/* 104/*
105 * This struct is set on the FS partition's attributes.
106 * [EXOFS_APAGE_SB_DATA, EXOFS_ATTR_SB_STATS] and is written together
107 * with the create command, to atomically persist the sb writeable information.
108 */
109struct exofs_sb_stats {
110 __le64 s_nextid; /* Highest object ID used */
111 __le64 s_numfiles; /* Number of files on fs */
112} __packed;
113
114/*
101 * Describes the raid used in the FS. It is part of the device table. 115 * Describes the raid used in the FS. It is part of the device table.
102 * This here is taken from the pNFS-objects definition. In exofs we 116 * This here is taken from the pNFS-objects definition. In exofs we
103 * use one raid policy through-out the filesystem. (NOTE: the funny 117 * use one raid policy through-out the filesystem. (NOTE: the funny
104 * alignment at begining. We take care of it at exofs_device_table. 118 * alignment at beginning. We take care of it at exofs_device_table.
105 */ 119 */
106struct exofs_dt_data_map { 120struct exofs_dt_data_map {
107 __le32 cb_num_comps; 121 __le32 cb_num_comps;
@@ -122,7 +136,7 @@ struct exofs_dt_device_info {
122 u8 systemid[OSD_SYSTEMID_LEN]; 136 u8 systemid[OSD_SYSTEMID_LEN];
123 __le64 long_name_offset; /* If !0 then offset-in-file */ 137 __le64 long_name_offset; /* If !0 then offset-in-file */
124 __le32 osdname_len; /* */ 138 __le32 osdname_len; /* */
125 u8 osdname[44]; /* Embbeded, Ususally an asci uuid */ 139 u8 osdname[44]; /* Embbeded, Usually an asci uuid */
126} __packed; 140} __packed;
127 141
128/* 142/*
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index d91e9d829bc1..d0941c6a1f72 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -124,7 +124,7 @@ out:
124 124
125Ebadsize: 125Ebadsize:
126 EXOFS_ERR("ERROR [exofs_check_page]: " 126 EXOFS_ERR("ERROR [exofs_check_page]: "
127 "size of directory #%lu is not a multiple of chunk size", 127 "size of directory(0x%lx) is not a multiple of chunk size\n",
128 dir->i_ino 128 dir->i_ino
129 ); 129 );
130 goto fail; 130 goto fail;
@@ -142,8 +142,8 @@ Espan:
142 goto bad_entry; 142 goto bad_entry;
143bad_entry: 143bad_entry:
144 EXOFS_ERR( 144 EXOFS_ERR(
145 "ERROR [exofs_check_page]: bad entry in directory #%lu: %s - " 145 "ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
146 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d", 146 "offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
147 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, 147 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
148 _LLU(le64_to_cpu(p->inode_no)), 148 _LLU(le64_to_cpu(p->inode_no)),
149 rec_len, p->name_len); 149 rec_len, p->name_len);
@@ -151,8 +151,8 @@ bad_entry:
151Eend: 151Eend:
152 p = (struct exofs_dir_entry *)(kaddr + offs); 152 p = (struct exofs_dir_entry *)(kaddr + offs);
153 EXOFS_ERR("ERROR [exofs_check_page]: " 153 EXOFS_ERR("ERROR [exofs_check_page]: "
154 "entry in directory #%lu spans the page boundary" 154 "entry in directory(0x%lx) spans the page boundary"
155 "offset=%lu, inode=%llu", 155 "offset=%lu, inode=0x%llx\n",
156 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, 156 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
157 _LLU(le64_to_cpu(p->inode_no))); 157 _LLU(le64_to_cpu(p->inode_no)));
158fail: 158fail:
@@ -261,9 +261,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
261 struct page *page = exofs_get_page(inode, n); 261 struct page *page = exofs_get_page(inode, n);
262 262
263 if (IS_ERR(page)) { 263 if (IS_ERR(page)) {
264 EXOFS_ERR("ERROR: " 264 EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n",
265 "bad page in #%lu", 265 inode->i_ino);
266 inode->i_ino);
267 filp->f_pos += PAGE_CACHE_SIZE - offset; 266 filp->f_pos += PAGE_CACHE_SIZE - offset;
268 return PTR_ERR(page); 267 return PTR_ERR(page);
269 } 268 }
@@ -283,7 +282,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
283 for (; (char *)de <= limit; de = exofs_next_entry(de)) { 282 for (; (char *)de <= limit; de = exofs_next_entry(de)) {
284 if (de->rec_len == 0) { 283 if (de->rec_len == 0) {
285 EXOFS_ERR("ERROR: " 284 EXOFS_ERR("ERROR: "
286 "zero-length directory entry"); 285 "zero-length entry in directory(0x%lx)\n",
286 inode->i_ino);
287 exofs_put_page(page); 287 exofs_put_page(page);
288 return -EIO; 288 return -EIO;
289 } 289 }
@@ -342,9 +342,9 @@ struct exofs_dir_entry *exofs_find_entry(struct inode *dir,
342 kaddr += exofs_last_byte(dir, n) - reclen; 342 kaddr += exofs_last_byte(dir, n) - reclen;
343 while ((char *) de <= kaddr) { 343 while ((char *) de <= kaddr) {
344 if (de->rec_len == 0) { 344 if (de->rec_len == 0) {
345 EXOFS_ERR( 345 EXOFS_ERR("ERROR: zero-length entry in "
346 "ERROR: exofs_find_entry: " 346 "directory(0x%lx)\n",
347 "zero-length directory entry"); 347 dir->i_ino);
348 exofs_put_page(page); 348 exofs_put_page(page);
349 goto out; 349 goto out;
350 } 350 }
@@ -420,7 +420,7 @@ int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de,
420 err = exofs_write_begin(NULL, page->mapping, pos, len, 420 err = exofs_write_begin(NULL, page->mapping, pos, len,
421 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); 421 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
422 if (err) 422 if (err)
423 EXOFS_ERR("exofs_set_link: exofs_write_begin FAILD => %d\n", 423 EXOFS_ERR("exofs_set_link: exofs_write_begin FAILED => %d\n",
424 err); 424 err);
425 425
426 de->inode_no = cpu_to_le64(inode->i_ino); 426 de->inode_no = cpu_to_le64(inode->i_ino);
@@ -472,7 +472,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
472 } 472 }
473 if (de->rec_len == 0) { 473 if (de->rec_len == 0) {
474 EXOFS_ERR("ERROR: exofs_add_link: " 474 EXOFS_ERR("ERROR: exofs_add_link: "
475 "zero-length directory entry"); 475 "zero-length entry in directory(0x%lx)\n",
476 inode->i_ino);
476 err = -EIO; 477 err = -EIO;
477 goto out_unlock; 478 goto out_unlock;
478 } 479 }
@@ -491,7 +492,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
491 exofs_put_page(page); 492 exofs_put_page(page);
492 } 493 }
493 494
494 EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=%p", dentry, inode); 495 EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=0x%lx\n",
496 dentry, inode->i_ino);
495 return -EINVAL; 497 return -EINVAL;
496 498
497got_it: 499got_it:
@@ -542,7 +544,8 @@ int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page)
542 while (de < dir) { 544 while (de < dir) {
543 if (de->rec_len == 0) { 545 if (de->rec_len == 0) {
544 EXOFS_ERR("ERROR: exofs_delete_entry:" 546 EXOFS_ERR("ERROR: exofs_delete_entry:"
545 "zero-length directory entry"); 547 "zero-length entry in directory(0x%lx)\n",
548 inode->i_ino);
546 err = -EIO; 549 err = -EIO;
547 goto out; 550 goto out;
548 } 551 }
@@ -556,7 +559,7 @@ int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page)
556 err = exofs_write_begin(NULL, page->mapping, pos, to - from, 0, 559 err = exofs_write_begin(NULL, page->mapping, pos, to - from, 0,
557 &page, NULL); 560 &page, NULL);
558 if (err) 561 if (err)
559 EXOFS_ERR("exofs_delete_entry: exofs_write_begin FAILD => %d\n", 562 EXOFS_ERR("exofs_delete_entry: exofs_write_begin FAILED => %d\n",
560 err); 563 err);
561 if (pde) 564 if (pde)
562 pde->rec_len = cpu_to_le16(to - from); 565 pde->rec_len = cpu_to_le16(to - from);
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 2dc925fa1010..c965806c2821 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -77,7 +77,7 @@ struct exofs_layout {
77 * our extension to the in-memory superblock 77 * our extension to the in-memory superblock
78 */ 78 */
79struct exofs_sb_info { 79struct exofs_sb_info {
80 struct exofs_fscb s_fscb; /* Written often, pre-allocate*/ 80 struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
81 int s_timeout; /* timeout for OSD operations */ 81 int s_timeout; /* timeout for OSD operations */
82 uint64_t s_nextid; /* highest object ID used */ 82 uint64_t s_nextid; /* highest object ID used */
83 uint32_t s_numfiles; /* number of files on fs */ 83 uint32_t s_numfiles; /* number of files on fs */
@@ -256,6 +256,8 @@ static inline int exofs_oi_read(struct exofs_i_info *oi,
256} 256}
257 257
258/* inode.c */ 258/* inode.c */
259unsigned exofs_max_io_pages(struct exofs_layout *layout,
260 unsigned expected_pages);
259int exofs_setattr(struct dentry *, struct iattr *); 261int exofs_setattr(struct dentry *, struct iattr *);
260int exofs_write_begin(struct file *file, struct address_space *mapping, 262int exofs_write_begin(struct file *file, struct address_space *mapping,
261 loff_t pos, unsigned len, unsigned flags, 263 loff_t pos, unsigned len, unsigned flags,
@@ -279,7 +281,7 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
279 struct inode *); 281 struct inode *);
280 282
281/* super.c */ 283/* super.c */
282int exofs_sync_fs(struct super_block *sb, int wait); 284int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
283 285
284/********************* 286/*********************
285 * operation vectors * 287 * operation vectors *
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 68cb23e3bb98..45ca323d8363 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -45,26 +45,8 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
45static int exofs_file_fsync(struct file *filp, int datasync) 45static int exofs_file_fsync(struct file *filp, int datasync)
46{ 46{
47 int ret; 47 int ret;
48 struct inode *inode = filp->f_mapping->host;
49 struct writeback_control wbc = {
50 .sync_mode = WB_SYNC_ALL,
51 .nr_to_write = 0, /* metadata-only; caller takes care of data */
52 };
53 struct super_block *sb;
54
55 if (!(inode->i_state & I_DIRTY))
56 return 0;
57 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
58 return 0;
59
60 ret = sync_inode(inode, &wbc);
61
62 /* This is a good place to write the sb */
63 /* TODO: Sechedule an sb-sync on create */
64 sb = inode->i_sb;
65 if (sb->s_dirt)
66 exofs_sync_fs(sb, 1);
67 48
49 ret = sync_inode_metadata(filp->f_mapping->host, 1);
68 return ret; 50 return ret;
69} 51}
70 52
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 3eadd97324b1..8472c098445d 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -43,6 +43,17 @@ enum { BIO_MAX_PAGES_KMALLOC =
43 PAGE_SIZE / sizeof(struct page *), 43 PAGE_SIZE / sizeof(struct page *),
44}; 44};
45 45
46unsigned exofs_max_io_pages(struct exofs_layout *layout,
47 unsigned expected_pages)
48{
49 unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
50
51 /* TODO: easily support bio chaining */
52 pages = min_t(unsigned, pages,
53 layout->group_width * BIO_MAX_PAGES_KMALLOC);
54 return pages;
55}
56
46struct page_collect { 57struct page_collect {
47 struct exofs_sb_info *sbi; 58 struct exofs_sb_info *sbi;
48 struct inode *inode; 59 struct inode *inode;
@@ -97,8 +108,7 @@ static void _pcol_reset(struct page_collect *pcol)
97 108
98static int pcol_try_alloc(struct page_collect *pcol) 109static int pcol_try_alloc(struct page_collect *pcol)
99{ 110{
100 unsigned pages = min_t(unsigned, pcol->expected_pages, 111 unsigned pages;
101 MAX_PAGES_KMALLOC);
102 112
103 if (!pcol->ios) { /* First time allocate io_state */ 113 if (!pcol->ios) { /* First time allocate io_state */
104 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); 114 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
@@ -108,8 +118,7 @@ static int pcol_try_alloc(struct page_collect *pcol)
108 } 118 }
109 119
110 /* TODO: easily support bio chaining */ 120 /* TODO: easily support bio chaining */
111 pages = min_t(unsigned, pages, 121 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
112 pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
113 122
114 for (; pages; pages >>= 1) { 123 for (; pages; pages >>= 1) {
115 pcol->pages = kmalloc(pages * sizeof(struct page *), 124 pcol->pages = kmalloc(pages * sizeof(struct page *),
@@ -185,7 +194,7 @@ static void update_write_page(struct page *page, int ret)
185/* Called at the end of reads, to optionally unlock pages and update their 194/* Called at the end of reads, to optionally unlock pages and update their
186 * status. 195 * status.
187 */ 196 */
188static int __readpages_done(struct page_collect *pcol, bool do_unlock) 197static int __readpages_done(struct page_collect *pcol)
189{ 198{
190 int i; 199 int i;
191 u64 resid; 200 u64 resid;
@@ -221,7 +230,7 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock)
221 page_stat ? "bad_bytes" : "good_bytes"); 230 page_stat ? "bad_bytes" : "good_bytes");
222 231
223 ret = update_read_page(page, page_stat); 232 ret = update_read_page(page, page_stat);
224 if (do_unlock) 233 if (!pcol->read_4_write)
225 unlock_page(page); 234 unlock_page(page);
226 length += PAGE_SIZE; 235 length += PAGE_SIZE;
227 } 236 }
@@ -236,7 +245,7 @@ static void readpages_done(struct exofs_io_state *ios, void *p)
236{ 245{
237 struct page_collect *pcol = p; 246 struct page_collect *pcol = p;
238 247
239 __readpages_done(pcol, true); 248 __readpages_done(pcol);
240 atomic_dec(&pcol->sbi->s_curr_pending); 249 atomic_dec(&pcol->sbi->s_curr_pending);
241 kfree(pcol); 250 kfree(pcol);
242} 251}
@@ -257,7 +266,7 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
257 } 266 }
258} 267}
259 268
260static int read_exec(struct page_collect *pcol, bool is_sync) 269static int read_exec(struct page_collect *pcol)
261{ 270{
262 struct exofs_i_info *oi = exofs_i(pcol->inode); 271 struct exofs_i_info *oi = exofs_i(pcol->inode);
263 struct exofs_io_state *ios = pcol->ios; 272 struct exofs_io_state *ios = pcol->ios;
@@ -267,17 +276,14 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
267 if (!pcol->pages) 276 if (!pcol->pages)
268 return 0; 277 return 0;
269 278
270 /* see comment in _readpage() about sync reads */
271 WARN_ON(is_sync && (pcol->nr_pages != 1));
272
273 ios->pages = pcol->pages; 279 ios->pages = pcol->pages;
274 ios->nr_pages = pcol->nr_pages; 280 ios->nr_pages = pcol->nr_pages;
275 ios->length = pcol->length; 281 ios->length = pcol->length;
276 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; 282 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
277 283
278 if (is_sync) { 284 if (pcol->read_4_write) {
279 exofs_oi_read(oi, pcol->ios); 285 exofs_oi_read(oi, pcol->ios);
280 return __readpages_done(pcol, false); 286 return __readpages_done(pcol);
281 } 287 }
282 288
283 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 289 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -303,7 +309,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
303 return 0; 309 return 0;
304 310
305err: 311err:
306 if (!is_sync) 312 if (!pcol->read_4_write)
307 _unlock_pcol_pages(pcol, ret, READ); 313 _unlock_pcol_pages(pcol, ret, READ);
308 314
309 pcol_free(pcol); 315 pcol_free(pcol);
@@ -353,10 +359,12 @@ static int readpage_strip(void *data, struct page *page)
353 359
354 if (!pcol->read_4_write) 360 if (!pcol->read_4_write)
355 unlock_page(page); 361 unlock_page(page);
356 EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," 362 EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx "
357 " splitting\n", inode->i_ino, page->index); 363 "read_4_write=%d index=0x%lx end_index=0x%lx "
364 "splitting\n", inode->i_ino, len,
365 pcol->read_4_write, page->index, end_index);
358 366
359 return read_exec(pcol, false); 367 return read_exec(pcol);
360 } 368 }
361 369
362try_again: 370try_again:
@@ -366,7 +374,7 @@ try_again:
366 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 374 } else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
367 page->index)) { 375 page->index)) {
368 /* Discontinuity detected, split the request */ 376 /* Discontinuity detected, split the request */
369 ret = read_exec(pcol, false); 377 ret = read_exec(pcol);
370 if (unlikely(ret)) 378 if (unlikely(ret))
371 goto fail; 379 goto fail;
372 goto try_again; 380 goto try_again;
@@ -391,7 +399,7 @@ try_again:
391 page, len, pcol->nr_pages, pcol->length); 399 page, len, pcol->nr_pages, pcol->length);
392 400
393 /* split the request, and start again with current page */ 401 /* split the request, and start again with current page */
394 ret = read_exec(pcol, false); 402 ret = read_exec(pcol);
395 if (unlikely(ret)) 403 if (unlikely(ret))
396 goto fail; 404 goto fail;
397 405
@@ -420,27 +428,24 @@ static int exofs_readpages(struct file *file, struct address_space *mapping,
420 return ret; 428 return ret;
421 } 429 }
422 430
423 return read_exec(&pcol, false); 431 return read_exec(&pcol);
424} 432}
425 433
426static int _readpage(struct page *page, bool is_sync) 434static int _readpage(struct page *page, bool read_4_write)
427{ 435{
428 struct page_collect pcol; 436 struct page_collect pcol;
429 int ret; 437 int ret;
430 438
431 _pcol_init(&pcol, 1, page->mapping->host); 439 _pcol_init(&pcol, 1, page->mapping->host);
432 440
433 /* readpage_strip might call read_exec(,is_sync==false) at several 441 pcol.read_4_write = read_4_write;
434 * places but not if we have a single page.
435 */
436 pcol.read_4_write = is_sync;
437 ret = readpage_strip(&pcol, page); 442 ret = readpage_strip(&pcol, page);
438 if (ret) { 443 if (ret) {
439 EXOFS_ERR("_readpage => %d\n", ret); 444 EXOFS_ERR("_readpage => %d\n", ret);
440 return ret; 445 return ret;
441 } 446 }
442 447
443 return read_exec(&pcol, is_sync); 448 return read_exec(&pcol);
444} 449}
445 450
446/* 451/*
@@ -511,7 +516,7 @@ static int write_exec(struct page_collect *pcol)
511 516
512 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 517 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
513 if (!pcol_copy) { 518 if (!pcol_copy) {
514 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); 519 EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n");
515 ret = -ENOMEM; 520 ret = -ENOMEM;
516 goto err; 521 goto err;
517 } 522 }
@@ -527,7 +532,7 @@ static int write_exec(struct page_collect *pcol)
527 532
528 ret = exofs_oi_write(oi, ios); 533 ret = exofs_oi_write(oi, ios);
529 if (unlikely(ret)) { 534 if (unlikely(ret)) {
530 EXOFS_ERR("write_exec: exofs_oi_write() Faild\n"); 535 EXOFS_ERR("write_exec: exofs_oi_write() Failed\n");
531 goto err; 536 goto err;
532 } 537 }
533 538
@@ -628,7 +633,7 @@ try_again:
628 /* split the request, next loop will start again */ 633 /* split the request, next loop will start again */
629 ret = write_exec(pcol); 634 ret = write_exec(pcol);
630 if (unlikely(ret)) { 635 if (unlikely(ret)) {
631 EXOFS_DBGMSG("write_exec faild => %d", ret); 636 EXOFS_DBGMSG("write_exec failed => %d", ret);
632 goto fail; 637 goto fail;
633 } 638 }
634 639
@@ -719,7 +724,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
719 ret = simple_write_begin(file, mapping, pos, len, flags, pagep, 724 ret = simple_write_begin(file, mapping, pos, len, flags, pagep,
720 fsdata); 725 fsdata);
721 if (ret) { 726 if (ret) {
722 EXOFS_DBGMSG("simple_write_begin faild\n"); 727 EXOFS_DBGMSG("simple_write_begin failed\n");
723 goto out; 728 goto out;
724 } 729 }
725 730
@@ -728,11 +733,28 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
728 733
729 /* read modify write */ 734 /* read modify write */
730 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { 735 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
736 loff_t i_size = i_size_read(mapping->host);
737 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
738 size_t rlen;
739
740 if (page->index < end_index)
741 rlen = PAGE_CACHE_SIZE;
742 else if (page->index == end_index)
743 rlen = i_size & ~PAGE_CACHE_MASK;
744 else
745 rlen = 0;
746
747 if (!rlen) {
748 clear_highpage(page);
749 SetPageUptodate(page);
750 goto out;
751 }
752
731 ret = _readpage(page, true); 753 ret = _readpage(page, true);
732 if (ret) { 754 if (ret) {
733 /*SetPageError was done by _readpage. Is it ok?*/ 755 /*SetPageError was done by _readpage. Is it ok?*/
734 unlock_page(page); 756 unlock_page(page);
735 EXOFS_DBGMSG("__readpage_filler faild\n"); 757 EXOFS_DBGMSG("__readpage failed\n");
736 } 758 }
737 } 759 }
738out: 760out:
@@ -801,7 +823,6 @@ const struct address_space_operations exofs_aops = {
801 .direct_IO = NULL, /* TODO: Should be trivial to do */ 823 .direct_IO = NULL, /* TODO: Should be trivial to do */
802 824
803 /* With these NULL has special meaning or default is not exported */ 825 /* With these NULL has special meaning or default is not exported */
804 .sync_page = NULL,
805 .get_xip_mem = NULL, 826 .get_xip_mem = NULL,
806 .migratepage = NULL, 827 .migratepage = NULL,
807 .launder_page = NULL, 828 .launder_page = NULL,
@@ -1036,6 +1057,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1036 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1057 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
1037 } 1058 }
1038 1059
1060 inode->i_mapping->backing_dev_info = sb->s_bdi;
1039 if (S_ISREG(inode->i_mode)) { 1061 if (S_ISREG(inode->i_mode)) {
1040 inode->i_op = &exofs_file_inode_operations; 1062 inode->i_op = &exofs_file_inode_operations;
1041 inode->i_fop = &exofs_file_operations; 1063 inode->i_fop = &exofs_file_operations;
@@ -1072,11 +1094,14 @@ bad_inode:
1072int __exofs_wait_obj_created(struct exofs_i_info *oi) 1094int __exofs_wait_obj_created(struct exofs_i_info *oi)
1073{ 1095{
1074 if (!obj_created(oi)) { 1096 if (!obj_created(oi)) {
1097 EXOFS_DBGMSG("!obj_created\n");
1075 BUG_ON(!obj_2bcreated(oi)); 1098 BUG_ON(!obj_2bcreated(oi));
1076 wait_event(oi->i_wq, obj_created(oi)); 1099 wait_event(oi->i_wq, obj_created(oi));
1100 EXOFS_DBGMSG("wait_event done\n");
1077 } 1101 }
1078 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; 1102 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
1079} 1103}
1104
1080/* 1105/*
1081 * Callback function from exofs_new_inode(). The important thing is that we 1106 * Callback function from exofs_new_inode(). The important thing is that we
1082 * set the obj_created flag so that other methods know that the object exists on 1107 * set the obj_created flag so that other methods know that the object exists on
@@ -1095,7 +1120,7 @@ static void create_done(struct exofs_io_state *ios, void *p)
1095 atomic_dec(&sbi->s_curr_pending); 1120 atomic_dec(&sbi->s_curr_pending);
1096 1121
1097 if (unlikely(ret)) { 1122 if (unlikely(ret)) {
1098 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", 1123 EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx",
1099 _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); 1124 _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid));
1100 /*TODO: When FS is corrupted creation can fail, object already 1125 /*TODO: When FS is corrupted creation can fail, object already
1101 * exist. Get rid of this asynchronous creation, if exist 1126 * exist. Get rid of this asynchronous creation, if exist
@@ -1107,7 +1132,6 @@ static void create_done(struct exofs_io_state *ios, void *p)
1107 1132
1108 set_obj_created(oi); 1133 set_obj_created(oi);
1109 1134
1110 atomic_dec(&inode->i_count);
1111 wake_up(&oi->i_wq); 1135 wake_up(&oi->i_wq);
1112} 1136}
1113 1137
@@ -1135,7 +1159,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1135 1159
1136 sbi = sb->s_fs_info; 1160 sbi = sb->s_fs_info;
1137 1161
1138 sb->s_dirt = 1; 1162 inode->i_mapping->backing_dev_info = sb->s_bdi;
1139 inode_init_owner(inode, dir, mode); 1163 inode_init_owner(inode, dir, mode);
1140 inode->i_ino = sbi->s_nextid++; 1164 inode->i_ino = sbi->s_nextid++;
1141 inode->i_blkbits = EXOFS_BLKSHIFT; 1165 inode->i_blkbits = EXOFS_BLKSHIFT;
@@ -1146,6 +1170,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1146 spin_unlock(&sbi->s_next_gen_lock); 1170 spin_unlock(&sbi->s_next_gen_lock);
1147 insert_inode_hash(inode); 1171 insert_inode_hash(inode);
1148 1172
1173 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
1174
1149 mark_inode_dirty(inode); 1175 mark_inode_dirty(inode);
1150 1176
1151 ret = exofs_get_io_state(&sbi->layout, &ios); 1177 ret = exofs_get_io_state(&sbi->layout, &ios);
@@ -1157,17 +1183,11 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1157 ios->obj.id = exofs_oi_objno(oi); 1183 ios->obj.id = exofs_oi_objno(oi);
1158 exofs_make_credential(oi->i_cred, &ios->obj); 1184 exofs_make_credential(oi->i_cred, &ios->obj);
1159 1185
1160 /* increment the refcount so that the inode will still be around when we
1161 * reach the callback
1162 */
1163 atomic_inc(&inode->i_count);
1164
1165 ios->done = create_done; 1186 ios->done = create_done;
1166 ios->private = inode; 1187 ios->private = inode;
1167 ios->cred = oi->i_cred; 1188 ios->cred = oi->i_cred;
1168 ret = exofs_sbi_create(ios); 1189 ret = exofs_sbi_create(ios);
1169 if (ret) { 1190 if (ret) {
1170 atomic_dec(&inode->i_count);
1171 exofs_put_io_state(ios); 1191 exofs_put_io_state(ios);
1172 return ERR_PTR(ret); 1192 return ERR_PTR(ret);
1173 } 1193 }
@@ -1215,7 +1235,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1215 1235
1216 args = kzalloc(sizeof(*args), GFP_KERNEL); 1236 args = kzalloc(sizeof(*args), GFP_KERNEL);
1217 if (!args) { 1237 if (!args) {
1218 EXOFS_DBGMSG("Faild kzalloc of args\n"); 1238 EXOFS_DBGMSG("Failed kzalloc of args\n");
1219 return -ENOMEM; 1239 return -ENOMEM;
1220 } 1240 }
1221 1241
@@ -1257,12 +1277,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1257 ios->out_attr_len = 1; 1277 ios->out_attr_len = 1;
1258 ios->out_attr = &attr; 1278 ios->out_attr = &attr;
1259 1279
1260 if (!obj_created(oi)) { 1280 wait_obj_created(oi);
1261 EXOFS_DBGMSG("!obj_created\n");
1262 BUG_ON(!obj_2bcreated(oi));
1263 wait_event(oi->i_wq, obj_created(oi));
1264 EXOFS_DBGMSG("wait_event done\n");
1265 }
1266 1281
1267 if (!do_sync) { 1282 if (!do_sync) {
1268 args->sbi = sbi; 1283 args->sbi = sbi;
@@ -1287,7 +1302,8 @@ out:
1287 1302
1288int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) 1303int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
1289{ 1304{
1290 return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); 1305 /* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */
1306 return exofs_update_inode(inode, 1);
1291} 1307}
1292 1308
1293/* 1309/*
@@ -1325,12 +1341,12 @@ void exofs_evict_inode(struct inode *inode)
1325 inode->i_size = 0; 1341 inode->i_size = 0;
1326 end_writeback(inode); 1342 end_writeback(inode);
1327 1343
1328 /* if we are deleting an obj that hasn't been created yet, wait */ 1344 /* if we are deleting an obj that hasn't been created yet, wait.
1329 if (!obj_created(oi)) { 1345 * This also makes sure that create_done cannot be called with an
1330 BUG_ON(!obj_2bcreated(oi)); 1346 * already evicted inode.
1331 wait_event(oi->i_wq, obj_created(oi)); 1347 */
1332 /* ignore the error attempt a remove anyway */ 1348 wait_obj_created(oi);
1333 } 1349 /* ignore the error, attempt a remove anyway */
1334 1350
1335 /* Now Remove the OSD objects */ 1351 /* Now Remove the OSD objects */
1336 ret = exofs_get_io_state(&sbi->layout, &ios); 1352 ret = exofs_get_io_state(&sbi->layout, &ios);
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 6550bf70e41d..f74a2ec027a6 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -55,7 +55,7 @@ int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
55 55
56 ret = osd_finalize_request(or, 0, cred, NULL); 56 ret = osd_finalize_request(or, 0, cred, NULL);
57 if (unlikely(ret)) { 57 if (unlikely(ret)) {
58 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); 58 EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
59 goto out; 59 goto out;
60 } 60 }
61 61
@@ -79,7 +79,7 @@ int exofs_get_io_state(struct exofs_layout *layout,
79 */ 79 */
80 ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL); 80 ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL);
81 if (unlikely(!ios)) { 81 if (unlikely(!ios)) {
82 EXOFS_DBGMSG("Faild kzalloc bytes=%d\n", 82 EXOFS_DBGMSG("Failed kzalloc bytes=%d\n",
83 exofs_io_state_size(layout->s_numdevs)); 83 exofs_io_state_size(layout->s_numdevs));
84 *pios = NULL; 84 *pios = NULL;
85 return -ENOMEM; 85 return -ENOMEM;
@@ -172,7 +172,7 @@ static int exofs_io_execute(struct exofs_io_state *ios)
172 172
173 ret = osd_finalize_request(or, 0, ios->cred, NULL); 173 ret = osd_finalize_request(or, 0, ios->cred, NULL);
174 if (unlikely(ret)) { 174 if (unlikely(ret)) {
175 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", 175 EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n",
176 ret); 176 ret);
177 return ret; 177 return ret;
178 } 178 }
@@ -361,7 +361,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
361 361
362 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); 362 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
363 if (unlikely(!per_dev->bio)) { 363 if (unlikely(!per_dev->bio)) {
364 EXOFS_DBGMSG("Faild to allocate BIO size=%u\n", 364 EXOFS_DBGMSG("Failed to allocate BIO size=%u\n",
365 bio_size); 365 bio_size);
366 return -ENOMEM; 366 return -ENOMEM;
367 } 367 }
@@ -564,7 +564,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
564 master_dev->bio->bi_max_vecs); 564 master_dev->bio->bi_max_vecs);
565 if (unlikely(!bio)) { 565 if (unlikely(!bio)) {
566 EXOFS_DBGMSG( 566 EXOFS_DBGMSG(
567 "Faild to allocate BIO size=%u\n", 567 "Failed to allocate BIO size=%u\n",
568 master_dev->bio->bi_max_vecs); 568 master_dev->bio->bi_max_vecs);
569 ret = -ENOMEM; 569 ret = -ENOMEM;
570 goto out; 570 goto out;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index b7dd0c236863..4d70db110cfc 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
153 153
154 inode->i_ctime = CURRENT_TIME; 154 inode->i_ctime = CURRENT_TIME;
155 inode_inc_link_count(inode); 155 inode_inc_link_count(inode);
156 atomic_inc(&inode->i_count); 156 ihold(inode);
157 157
158 return exofs_add_nondir(dentry, inode); 158 return exofs_add_nondir(dentry, inode);
159} 159}
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page); 272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
273 if (!new_de) 273 if (!new_de)
274 goto out_dir; 274 goto out_dir;
275 inode_inc_link_count(old_inode);
276 err = exofs_set_link(new_dir, new_de, new_page, old_inode); 275 err = exofs_set_link(new_dir, new_de, new_page, old_inode);
277 new_inode->i_ctime = CURRENT_TIME; 276 new_inode->i_ctime = CURRENT_TIME;
278 if (dir_de) 277 if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
286 if (new_dir->i_nlink >= EXOFS_LINK_MAX) 285 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
287 goto out_dir; 286 goto out_dir;
288 } 287 }
289 inode_inc_link_count(old_inode);
290 err = exofs_add_link(new_dentry, old_inode); 288 err = exofs_add_link(new_dentry, old_inode);
291 if (err) { 289 if (err)
292 inode_dec_link_count(old_inode);
293 goto out_dir; 290 goto out_dir;
294 }
295 if (dir_de) 291 if (dir_de)
296 inode_inc_link_count(new_dir); 292 inode_inc_link_count(new_dir);
297 } 293 }
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
299 old_inode->i_ctime = CURRENT_TIME; 295 old_inode->i_ctime = CURRENT_TIME;
300 296
301 exofs_delete_entry(old_de, old_page); 297 exofs_delete_entry(old_de, old_page);
302 inode_dec_link_count(old_inode); 298 mark_inode_dirty(old_inode);
303 299
304 if (dir_de) { 300 if (dir_de) {
305 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir); 301 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 047e92fa3af8..c57beddcc217 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -48,6 +48,7 @@
48 * struct to hold what we get from mount options 48 * struct to hold what we get from mount options
49 */ 49 */
50struct exofs_mountopt { 50struct exofs_mountopt {
51 bool is_osdname;
51 const char *dev_name; 52 const char *dev_name;
52 uint64_t pid; 53 uint64_t pid;
53 int timeout; 54 int timeout;
@@ -56,7 +57,7 @@ struct exofs_mountopt {
56/* 57/*
57 * exofs-specific mount-time options. 58 * exofs-specific mount-time options.
58 */ 59 */
59enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err }; 60enum { Opt_name, Opt_pid, Opt_to, Opt_err };
60 61
61/* 62/*
62 * Our mount-time options. These should ideally be 64-bit unsigned, but the 63 * Our mount-time options. These should ideally be 64-bit unsigned, but the
@@ -64,6 +65,7 @@ enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err };
64 * sufficient for most applications now. 65 * sufficient for most applications now.
65 */ 66 */
66static match_table_t tokens = { 67static match_table_t tokens = {
68 {Opt_name, "osdname=%s"},
67 {Opt_pid, "pid=%u"}, 69 {Opt_pid, "pid=%u"},
68 {Opt_to, "to=%u"}, 70 {Opt_to, "to=%u"},
69 {Opt_err, NULL} 71 {Opt_err, NULL}
@@ -94,6 +96,14 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
94 96
95 token = match_token(p, tokens, args); 97 token = match_token(p, tokens, args);
96 switch (token) { 98 switch (token) {
99 case Opt_name:
100 opts->dev_name = match_strdup(&args[0]);
101 if (unlikely(!opts->dev_name)) {
102 EXOFS_ERR("Error allocating dev_name");
103 return -ENOMEM;
104 }
105 opts->is_osdname = true;
106 break;
97 case Opt_pid: 107 case Opt_pid:
98 if (0 == match_strlcpy(str, &args[0], sizeof(str))) 108 if (0 == match_strlcpy(str, &args[0], sizeof(str)))
99 return -EINVAL; 109 return -EINVAL;
@@ -150,12 +160,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
150 return &oi->vfs_inode; 160 return &oi->vfs_inode;
151} 161}
152 162
163static void exofs_i_callback(struct rcu_head *head)
164{
165 struct inode *inode = container_of(head, struct inode, i_rcu);
166 INIT_LIST_HEAD(&inode->i_dentry);
167 kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
168}
169
153/* 170/*
154 * Remove an inode from the cache 171 * Remove an inode from the cache
155 */ 172 */
156static void exofs_destroy_inode(struct inode *inode) 173static void exofs_destroy_inode(struct inode *inode)
157{ 174{
158 kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); 175 call_rcu(&inode->i_rcu, exofs_i_callback);
159} 176}
160 177
161/* 178/*
@@ -196,6 +213,101 @@ static void destroy_inodecache(void)
196static const struct super_operations exofs_sops; 213static const struct super_operations exofs_sops;
197static const struct export_operations exofs_export_ops; 214static const struct export_operations exofs_export_ops;
198 215
216static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
217 EXOFS_APAGE_SB_DATA,
218 EXOFS_ATTR_SB_STATS,
219 sizeof(struct exofs_sb_stats));
220
221static int __sbi_read_stats(struct exofs_sb_info *sbi)
222{
223 struct osd_attr attrs[] = {
224 [0] = g_attr_sb_stats,
225 };
226 struct exofs_io_state *ios;
227 int ret;
228
229 ret = exofs_get_io_state(&sbi->layout, &ios);
230 if (unlikely(ret)) {
231 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
232 return ret;
233 }
234
235 ios->cred = sbi->s_cred;
236
237 ios->in_attr = attrs;
238 ios->in_attr_len = ARRAY_SIZE(attrs);
239
240 ret = exofs_sbi_read(ios);
241 if (unlikely(ret)) {
242 EXOFS_ERR("Error reading super_block stats => %d\n", ret);
243 goto out;
244 }
245
246 ret = extract_attr_from_ios(ios, &attrs[0]);
247 if (ret) {
248 EXOFS_ERR("%s: extract_attr of sb_stats failed\n", __func__);
249 goto out;
250 }
251 if (attrs[0].len) {
252 struct exofs_sb_stats *ess;
253
254 if (unlikely(attrs[0].len != sizeof(*ess))) {
255 EXOFS_ERR("%s: Wrong version of exofs_sb_stats "
256 "size(%d) != expected(%zd)\n",
257 __func__, attrs[0].len, sizeof(*ess));
258 goto out;
259 }
260
261 ess = attrs[0].val_ptr;
262 sbi->s_nextid = le64_to_cpu(ess->s_nextid);
263 sbi->s_numfiles = le32_to_cpu(ess->s_numfiles);
264 }
265
266out:
267 exofs_put_io_state(ios);
268 return ret;
269}
270
271static void stats_done(struct exofs_io_state *ios, void *p)
272{
273 exofs_put_io_state(ios);
274 /* Good thanks nothing to do anymore */
275}
276
277/* Asynchronously write the stats attribute */
278int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
279{
280 struct osd_attr attrs[] = {
281 [0] = g_attr_sb_stats,
282 };
283 struct exofs_io_state *ios;
284 int ret;
285
286 ret = exofs_get_io_state(&sbi->layout, &ios);
287 if (unlikely(ret)) {
288 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
289 return ret;
290 }
291
292 sbi->s_ess.s_nextid = cpu_to_le64(sbi->s_nextid);
293 sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
294 attrs[0].val_ptr = &sbi->s_ess;
295
296 ios->cred = sbi->s_cred;
297 ios->done = stats_done;
298 ios->private = sbi;
299 ios->out_attr = attrs;
300 ios->out_attr_len = ARRAY_SIZE(attrs);
301
302 ret = exofs_sbi_write(ios);
303 if (unlikely(ret)) {
304 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
305 exofs_put_io_state(ios);
306 }
307
308 return ret;
309}
310
199/* 311/*
200 * Write the superblock to the OSD 312 * Write the superblock to the OSD
201 */ 313 */
@@ -206,18 +318,25 @@ int exofs_sync_fs(struct super_block *sb, int wait)
206 struct exofs_io_state *ios; 318 struct exofs_io_state *ios;
207 int ret = -ENOMEM; 319 int ret = -ENOMEM;
208 320
209 lock_super(sb); 321 fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
322 if (unlikely(!fscb))
323 return -ENOMEM;
324
210 sbi = sb->s_fs_info; 325 sbi = sb->s_fs_info;
211 fscb = &sbi->s_fscb;
212 326
327 /* NOTE: We no longer dirty the super_block anywhere in exofs. The
328 * reason we write the fscb here on unmount is so we can stay backwards
329 * compatible with fscb->s_version == 1. (What we are not compatible
330 * with is if a new version FS crashed and then we try to mount an old
331 * version). Otherwise the exofs_fscb is read-only from mkfs time. All
332 * the writeable info is set in exofs_sbi_write_stats() above.
333 */
213 ret = exofs_get_io_state(&sbi->layout, &ios); 334 ret = exofs_get_io_state(&sbi->layout, &ios);
214 if (ret) 335 if (unlikely(ret))
215 goto out; 336 goto out;
216 337
217 /* Note: We only write the changing part of the fscb. .i.e upto the 338 lock_super(sb);
218 * the fscb->s_dev_table_oid member. There is no read-modify-write 339
219 * here.
220 */
221 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); 340 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
222 memset(fscb, 0, ios->length); 341 memset(fscb, 0, ios->length);
223 fscb->s_nextid = cpu_to_le64(sbi->s_nextid); 342 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
@@ -232,16 +351,17 @@ int exofs_sync_fs(struct super_block *sb, int wait)
232 ios->cred = sbi->s_cred; 351 ios->cred = sbi->s_cred;
233 352
234 ret = exofs_sbi_write(ios); 353 ret = exofs_sbi_write(ios);
235 if (unlikely(ret)) { 354 if (unlikely(ret))
236 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); 355 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
237 goto out; 356 else
238 } 357 sb->s_dirt = 0;
239 sb->s_dirt = 0; 358
240 359
360 unlock_super(sb);
241out: 361out:
242 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); 362 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
243 exofs_put_io_state(ios); 363 exofs_put_io_state(ios);
244 unlock_super(sb); 364 kfree(fscb);
245 return ret; 365 return ret;
246} 366}
247 367
@@ -285,13 +405,14 @@ static void exofs_put_super(struct super_block *sb)
285 int num_pend; 405 int num_pend;
286 struct exofs_sb_info *sbi = sb->s_fs_info; 406 struct exofs_sb_info *sbi = sb->s_fs_info;
287 407
288 if (sb->s_dirt)
289 exofs_write_super(sb);
290
291 /* make sure there are no pending commands */ 408 /* make sure there are no pending commands */
292 for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; 409 for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
293 num_pend = atomic_read(&sbi->s_curr_pending)) { 410 num_pend = atomic_read(&sbi->s_curr_pending)) {
294 wait_queue_head_t wq; 411 wait_queue_head_t wq;
412
413 printk(KERN_NOTICE "%s: !!Pending operations in flight. "
414 "This is a BUG. please report to osd-dev@open-osd.org\n",
415 __func__);
295 init_waitqueue_head(&wq); 416 init_waitqueue_head(&wq);
296 wait_event_timeout(wq, 417 wait_event_timeout(wq,
297 (atomic_read(&sbi->s_curr_pending) == 0), 418 (atomic_read(&sbi->s_curr_pending) == 0),
@@ -383,6 +504,23 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
383 return 0; 504 return 0;
384} 505}
385 506
507static unsigned __ra_pages(struct exofs_layout *layout)
508{
509 const unsigned _MIN_RA = 32; /* min 128K read-ahead */
510 unsigned ra_pages = layout->group_width * layout->stripe_unit /
511 PAGE_SIZE;
512 unsigned max_io_pages = exofs_max_io_pages(layout, ~0);
513
514 ra_pages *= 2; /* two stripes */
515 if (ra_pages < _MIN_RA)
516 ra_pages = roundup(_MIN_RA, ra_pages / 2);
517
518 if (ra_pages > max_io_pages)
519 ra_pages = max_io_pages;
520
521 return ra_pages;
522}
523
386/* @odi is valid only as long as @fscb_dev is valid */ 524/* @odi is valid only as long as @fscb_dev is valid */
387static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, 525static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
388 struct osd_dev_info *odi) 526 struct osd_dev_info *odi)
@@ -488,7 +626,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
488 } 626 }
489 627
490 od = osduld_info_lookup(&odi); 628 od = osduld_info_lookup(&odi);
491 if (unlikely(IS_ERR(od))) { 629 if (IS_ERR(od)) {
492 ret = PTR_ERR(od); 630 ret = PTR_ERR(od);
493 EXOFS_ERR("ERROR: device requested is not found " 631 EXOFS_ERR("ERROR: device requested is not found "
494 "osd_name-%s =>%d\n", odi.osdname, ret); 632 "osd_name-%s =>%d\n", odi.osdname, ret);
@@ -551,9 +689,17 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
551 goto free_bdi; 689 goto free_bdi;
552 690
553 /* use mount options to fill superblock */ 691 /* use mount options to fill superblock */
554 od = osduld_path_lookup(opts->dev_name); 692 if (opts->is_osdname) {
693 struct osd_dev_info odi = {.systemid_len = 0};
694
695 odi.osdname_len = strlen(opts->dev_name);
696 odi.osdname = (u8 *)opts->dev_name;
697 od = osduld_info_lookup(&odi);
698 } else {
699 od = osduld_path_lookup(opts->dev_name);
700 }
555 if (IS_ERR(od)) { 701 if (IS_ERR(od)) {
556 ret = PTR_ERR(od); 702 ret = -EINVAL;
557 goto free_sbi; 703 goto free_sbi;
558 } 704 }
559 705
@@ -587,6 +733,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
587 goto free_sbi; 733 goto free_sbi;
588 734
589 sb->s_magic = le16_to_cpu(fscb.s_magic); 735 sb->s_magic = le16_to_cpu(fscb.s_magic);
736 /* NOTE: we read below to be backward compatible with old versions */
590 sbi->s_nextid = le64_to_cpu(fscb.s_nextid); 737 sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
591 sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); 738 sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
592 739
@@ -597,7 +744,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
597 ret = -EINVAL; 744 ret = -EINVAL;
598 goto free_sbi; 745 goto free_sbi;
599 } 746 }
600 if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) { 747 if (le32_to_cpu(fscb.s_version) > EXOFS_FSCB_VER) {
601 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", 748 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
602 EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); 749 EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
603 ret = -EINVAL; 750 ret = -EINVAL;
@@ -615,7 +762,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
615 goto free_sbi; 762 goto free_sbi;
616 } 763 }
617 764
765 __sbi_read_stats(sbi);
766
618 /* set up operation vectors */ 767 /* set up operation vectors */
768 sbi->bdi.ra_pages = __ra_pages(&sbi->layout);
619 sb->s_bdi = &sbi->bdi; 769 sb->s_bdi = &sbi->bdi;
620 sb->s_fs_info = sbi; 770 sb->s_fs_info = sbi;
621 sb->s_op = &exofs_sops; 771 sb->s_op = &exofs_sops;
@@ -645,6 +795,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
645 795
646 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], 796 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
647 sbi->layout.s_pid); 797 sbi->layout.s_pid);
798 if (opts->is_osdname)
799 kfree(opts->dev_name);
648 return 0; 800 return 0;
649 801
650free_sbi: 802free_sbi:
@@ -653,25 +805,28 @@ free_bdi:
653 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 805 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
654 opts->dev_name, sbi->layout.s_pid, ret); 806 opts->dev_name, sbi->layout.s_pid, ret);
655 exofs_free_sbi(sbi); 807 exofs_free_sbi(sbi);
808 if (opts->is_osdname)
809 kfree(opts->dev_name);
656 return ret; 810 return ret;
657} 811}
658 812
659/* 813/*
660 * Set up the superblock (calls exofs_fill_super eventually) 814 * Set up the superblock (calls exofs_fill_super eventually)
661 */ 815 */
662static int exofs_get_sb(struct file_system_type *type, 816static struct dentry *exofs_mount(struct file_system_type *type,
663 int flags, const char *dev_name, 817 int flags, const char *dev_name,
664 void *data, struct vfsmount *mnt) 818 void *data)
665{ 819{
666 struct exofs_mountopt opts; 820 struct exofs_mountopt opts;
667 int ret; 821 int ret;
668 822
669 ret = parse_options(data, &opts); 823 ret = parse_options(data, &opts);
670 if (ret) 824 if (ret)
671 return ret; 825 return ERR_PTR(ret);
672 826
673 opts.dev_name = dev_name; 827 if (!opts.dev_name)
674 return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt); 828 opts.dev_name = dev_name;
829 return mount_nodev(type, flags, &opts, exofs_fill_super);
675} 830}
676 831
677/* 832/*
@@ -758,7 +913,7 @@ struct dentry *exofs_get_parent(struct dentry *child)
758 unsigned long ino = exofs_parent_ino(child); 913 unsigned long ino = exofs_parent_ino(child);
759 914
760 if (!ino) 915 if (!ino)
761 return NULL; 916 return ERR_PTR(-ESTALE);
762 917
763 return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino)); 918 return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino));
764} 919}
@@ -809,7 +964,7 @@ static const struct export_operations exofs_export_ops = {
809static struct file_system_type exofs_type = { 964static struct file_system_type exofs_type = {
810 .owner = THIS_MODULE, 965 .owner = THIS_MODULE,
811 .name = "exofs", 966 .name = "exofs",
812 .get_sb = exofs_get_sb, 967 .mount = exofs_mount,
813 .kill_sb = generic_shutdown_super, 968 .kill_sb = generic_shutdown_super,
814}; 969};
815 970