aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exofs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/exofs/inode.c')
-rw-r--r--fs/exofs/inode.c126
1 files changed, 71 insertions, 55 deletions
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 3eadd97324b1..8472c098445d 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -43,6 +43,17 @@ enum { BIO_MAX_PAGES_KMALLOC =
43 PAGE_SIZE / sizeof(struct page *), 43 PAGE_SIZE / sizeof(struct page *),
44}; 44};
45 45
46unsigned exofs_max_io_pages(struct exofs_layout *layout,
47 unsigned expected_pages)
48{
49 unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
50
51 /* TODO: easily support bio chaining */
52 pages = min_t(unsigned, pages,
53 layout->group_width * BIO_MAX_PAGES_KMALLOC);
54 return pages;
55}
56
46struct page_collect { 57struct page_collect {
47 struct exofs_sb_info *sbi; 58 struct exofs_sb_info *sbi;
48 struct inode *inode; 59 struct inode *inode;
@@ -97,8 +108,7 @@ static void _pcol_reset(struct page_collect *pcol)
97 108
98static int pcol_try_alloc(struct page_collect *pcol) 109static int pcol_try_alloc(struct page_collect *pcol)
99{ 110{
100 unsigned pages = min_t(unsigned, pcol->expected_pages, 111 unsigned pages;
101 MAX_PAGES_KMALLOC);
102 112
103 if (!pcol->ios) { /* First time allocate io_state */ 113 if (!pcol->ios) { /* First time allocate io_state */
104 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); 114 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
@@ -108,8 +118,7 @@ static int pcol_try_alloc(struct page_collect *pcol)
108 } 118 }
109 119
110 /* TODO: easily support bio chaining */ 120 /* TODO: easily support bio chaining */
111 pages = min_t(unsigned, pages, 121 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
112 pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
113 122
114 for (; pages; pages >>= 1) { 123 for (; pages; pages >>= 1) {
115 pcol->pages = kmalloc(pages * sizeof(struct page *), 124 pcol->pages = kmalloc(pages * sizeof(struct page *),
@@ -185,7 +194,7 @@ static void update_write_page(struct page *page, int ret)
185/* Called at the end of reads, to optionally unlock pages and update their 194/* Called at the end of reads, to optionally unlock pages and update their
186 * status. 195 * status.
187 */ 196 */
188static int __readpages_done(struct page_collect *pcol, bool do_unlock) 197static int __readpages_done(struct page_collect *pcol)
189{ 198{
190 int i; 199 int i;
191 u64 resid; 200 u64 resid;
@@ -221,7 +230,7 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock)
221 page_stat ? "bad_bytes" : "good_bytes"); 230 page_stat ? "bad_bytes" : "good_bytes");
222 231
223 ret = update_read_page(page, page_stat); 232 ret = update_read_page(page, page_stat);
224 if (do_unlock) 233 if (!pcol->read_4_write)
225 unlock_page(page); 234 unlock_page(page);
226 length += PAGE_SIZE; 235 length += PAGE_SIZE;
227 } 236 }
@@ -236,7 +245,7 @@ static void readpages_done(struct exofs_io_state *ios, void *p)
236{ 245{
237 struct page_collect *pcol = p; 246 struct page_collect *pcol = p;
238 247
239 __readpages_done(pcol, true); 248 __readpages_done(pcol);
240 atomic_dec(&pcol->sbi->s_curr_pending); 249 atomic_dec(&pcol->sbi->s_curr_pending);
241 kfree(pcol); 250 kfree(pcol);
242} 251}
@@ -257,7 +266,7 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
257 } 266 }
258} 267}
259 268
260static int read_exec(struct page_collect *pcol, bool is_sync) 269static int read_exec(struct page_collect *pcol)
261{ 270{
262 struct exofs_i_info *oi = exofs_i(pcol->inode); 271 struct exofs_i_info *oi = exofs_i(pcol->inode);
263 struct exofs_io_state *ios = pcol->ios; 272 struct exofs_io_state *ios = pcol->ios;
@@ -267,17 +276,14 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
267 if (!pcol->pages) 276 if (!pcol->pages)
268 return 0; 277 return 0;
269 278
270 /* see comment in _readpage() about sync reads */
271 WARN_ON(is_sync && (pcol->nr_pages != 1));
272
273 ios->pages = pcol->pages; 279 ios->pages = pcol->pages;
274 ios->nr_pages = pcol->nr_pages; 280 ios->nr_pages = pcol->nr_pages;
275 ios->length = pcol->length; 281 ios->length = pcol->length;
276 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; 282 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
277 283
278 if (is_sync) { 284 if (pcol->read_4_write) {
279 exofs_oi_read(oi, pcol->ios); 285 exofs_oi_read(oi, pcol->ios);
280 return __readpages_done(pcol, false); 286 return __readpages_done(pcol);
281 } 287 }
282 288
283 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 289 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -303,7 +309,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
303 return 0; 309 return 0;
304 310
305err: 311err:
306 if (!is_sync) 312 if (!pcol->read_4_write)
307 _unlock_pcol_pages(pcol, ret, READ); 313 _unlock_pcol_pages(pcol, ret, READ);
308 314
309 pcol_free(pcol); 315 pcol_free(pcol);
@@ -353,10 +359,12 @@ static int readpage_strip(void *data, struct page *page)
353 359
354 if (!pcol->read_4_write) 360 if (!pcol->read_4_write)
355 unlock_page(page); 361 unlock_page(page);
356 EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," 362 EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx "
357 " splitting\n", inode->i_ino, page->index); 363 "read_4_write=%d index=0x%lx end_index=0x%lx "
364 "splitting\n", inode->i_ino, len,
365 pcol->read_4_write, page->index, end_index);
358 366
359 return read_exec(pcol, false); 367 return read_exec(pcol);
360 } 368 }
361 369
362try_again: 370try_again:
@@ -366,7 +374,7 @@ try_again:
366 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 374 } else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
367 page->index)) { 375 page->index)) {
368 /* Discontinuity detected, split the request */ 376 /* Discontinuity detected, split the request */
369 ret = read_exec(pcol, false); 377 ret = read_exec(pcol);
370 if (unlikely(ret)) 378 if (unlikely(ret))
371 goto fail; 379 goto fail;
372 goto try_again; 380 goto try_again;
@@ -391,7 +399,7 @@ try_again:
391 page, len, pcol->nr_pages, pcol->length); 399 page, len, pcol->nr_pages, pcol->length);
392 400
393 /* split the request, and start again with current page */ 401 /* split the request, and start again with current page */
394 ret = read_exec(pcol, false); 402 ret = read_exec(pcol);
395 if (unlikely(ret)) 403 if (unlikely(ret))
396 goto fail; 404 goto fail;
397 405
@@ -420,27 +428,24 @@ static int exofs_readpages(struct file *file, struct address_space *mapping,
420 return ret; 428 return ret;
421 } 429 }
422 430
423 return read_exec(&pcol, false); 431 return read_exec(&pcol);
424} 432}
425 433
426static int _readpage(struct page *page, bool is_sync) 434static int _readpage(struct page *page, bool read_4_write)
427{ 435{
428 struct page_collect pcol; 436 struct page_collect pcol;
429 int ret; 437 int ret;
430 438
431 _pcol_init(&pcol, 1, page->mapping->host); 439 _pcol_init(&pcol, 1, page->mapping->host);
432 440
433 /* readpage_strip might call read_exec(,is_sync==false) at several 441 pcol.read_4_write = read_4_write;
434 * places but not if we have a single page.
435 */
436 pcol.read_4_write = is_sync;
437 ret = readpage_strip(&pcol, page); 442 ret = readpage_strip(&pcol, page);
438 if (ret) { 443 if (ret) {
439 EXOFS_ERR("_readpage => %d\n", ret); 444 EXOFS_ERR("_readpage => %d\n", ret);
440 return ret; 445 return ret;
441 } 446 }
442 447
443 return read_exec(&pcol, is_sync); 448 return read_exec(&pcol);
444} 449}
445 450
446/* 451/*
@@ -511,7 +516,7 @@ static int write_exec(struct page_collect *pcol)
511 516
512 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 517 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
513 if (!pcol_copy) { 518 if (!pcol_copy) {
514 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); 519 EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n");
515 ret = -ENOMEM; 520 ret = -ENOMEM;
516 goto err; 521 goto err;
517 } 522 }
@@ -527,7 +532,7 @@ static int write_exec(struct page_collect *pcol)
527 532
528 ret = exofs_oi_write(oi, ios); 533 ret = exofs_oi_write(oi, ios);
529 if (unlikely(ret)) { 534 if (unlikely(ret)) {
530 EXOFS_ERR("write_exec: exofs_oi_write() Faild\n"); 535 EXOFS_ERR("write_exec: exofs_oi_write() Failed\n");
531 goto err; 536 goto err;
532 } 537 }
533 538
@@ -628,7 +633,7 @@ try_again:
628 /* split the request, next loop will start again */ 633 /* split the request, next loop will start again */
629 ret = write_exec(pcol); 634 ret = write_exec(pcol);
630 if (unlikely(ret)) { 635 if (unlikely(ret)) {
631 EXOFS_DBGMSG("write_exec faild => %d", ret); 636 EXOFS_DBGMSG("write_exec failed => %d", ret);
632 goto fail; 637 goto fail;
633 } 638 }
634 639
@@ -719,7 +724,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
719 ret = simple_write_begin(file, mapping, pos, len, flags, pagep, 724 ret = simple_write_begin(file, mapping, pos, len, flags, pagep,
720 fsdata); 725 fsdata);
721 if (ret) { 726 if (ret) {
722 EXOFS_DBGMSG("simple_write_begin faild\n"); 727 EXOFS_DBGMSG("simple_write_begin failed\n");
723 goto out; 728 goto out;
724 } 729 }
725 730
@@ -728,11 +733,28 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
728 733
729 /* read modify write */ 734 /* read modify write */
730 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { 735 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
736 loff_t i_size = i_size_read(mapping->host);
737 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
738 size_t rlen;
739
740 if (page->index < end_index)
741 rlen = PAGE_CACHE_SIZE;
742 else if (page->index == end_index)
743 rlen = i_size & ~PAGE_CACHE_MASK;
744 else
745 rlen = 0;
746
747 if (!rlen) {
748 clear_highpage(page);
749 SetPageUptodate(page);
750 goto out;
751 }
752
731 ret = _readpage(page, true); 753 ret = _readpage(page, true);
732 if (ret) { 754 if (ret) {
733 /*SetPageError was done by _readpage. Is it ok?*/ 755 /*SetPageError was done by _readpage. Is it ok?*/
734 unlock_page(page); 756 unlock_page(page);
735 EXOFS_DBGMSG("__readpage_filler faild\n"); 757 EXOFS_DBGMSG("__readpage failed\n");
736 } 758 }
737 } 759 }
738out: 760out:
@@ -801,7 +823,6 @@ const struct address_space_operations exofs_aops = {
801 .direct_IO = NULL, /* TODO: Should be trivial to do */ 823 .direct_IO = NULL, /* TODO: Should be trivial to do */
802 824
803 /* With these NULL has special meaning or default is not exported */ 825 /* With these NULL has special meaning or default is not exported */
804 .sync_page = NULL,
805 .get_xip_mem = NULL, 826 .get_xip_mem = NULL,
806 .migratepage = NULL, 827 .migratepage = NULL,
807 .launder_page = NULL, 828 .launder_page = NULL,
@@ -1036,6 +1057,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1036 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1057 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
1037 } 1058 }
1038 1059
1060 inode->i_mapping->backing_dev_info = sb->s_bdi;
1039 if (S_ISREG(inode->i_mode)) { 1061 if (S_ISREG(inode->i_mode)) {
1040 inode->i_op = &exofs_file_inode_operations; 1062 inode->i_op = &exofs_file_inode_operations;
1041 inode->i_fop = &exofs_file_operations; 1063 inode->i_fop = &exofs_file_operations;
@@ -1072,11 +1094,14 @@ bad_inode:
1072int __exofs_wait_obj_created(struct exofs_i_info *oi) 1094int __exofs_wait_obj_created(struct exofs_i_info *oi)
1073{ 1095{
1074 if (!obj_created(oi)) { 1096 if (!obj_created(oi)) {
1097 EXOFS_DBGMSG("!obj_created\n");
1075 BUG_ON(!obj_2bcreated(oi)); 1098 BUG_ON(!obj_2bcreated(oi));
1076 wait_event(oi->i_wq, obj_created(oi)); 1099 wait_event(oi->i_wq, obj_created(oi));
1100 EXOFS_DBGMSG("wait_event done\n");
1077 } 1101 }
1078 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; 1102 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
1079} 1103}
1104
1080/* 1105/*
1081 * Callback function from exofs_new_inode(). The important thing is that we 1106 * Callback function from exofs_new_inode(). The important thing is that we
1082 * set the obj_created flag so that other methods know that the object exists on 1107 * set the obj_created flag so that other methods know that the object exists on
@@ -1095,7 +1120,7 @@ static void create_done(struct exofs_io_state *ios, void *p)
1095 atomic_dec(&sbi->s_curr_pending); 1120 atomic_dec(&sbi->s_curr_pending);
1096 1121
1097 if (unlikely(ret)) { 1122 if (unlikely(ret)) {
1098 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", 1123 EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx",
1099 _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); 1124 _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid));
1100 /*TODO: When FS is corrupted creation can fail, object already 1125 /*TODO: When FS is corrupted creation can fail, object already
1101 * exist. Get rid of this asynchronous creation, if exist 1126 * exist. Get rid of this asynchronous creation, if exist
@@ -1107,7 +1132,6 @@ static void create_done(struct exofs_io_state *ios, void *p)
1107 1132
1108 set_obj_created(oi); 1133 set_obj_created(oi);
1109 1134
1110 atomic_dec(&inode->i_count);
1111 wake_up(&oi->i_wq); 1135 wake_up(&oi->i_wq);
1112} 1136}
1113 1137
@@ -1135,7 +1159,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1135 1159
1136 sbi = sb->s_fs_info; 1160 sbi = sb->s_fs_info;
1137 1161
1138 sb->s_dirt = 1; 1162 inode->i_mapping->backing_dev_info = sb->s_bdi;
1139 inode_init_owner(inode, dir, mode); 1163 inode_init_owner(inode, dir, mode);
1140 inode->i_ino = sbi->s_nextid++; 1164 inode->i_ino = sbi->s_nextid++;
1141 inode->i_blkbits = EXOFS_BLKSHIFT; 1165 inode->i_blkbits = EXOFS_BLKSHIFT;
@@ -1146,6 +1170,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1146 spin_unlock(&sbi->s_next_gen_lock); 1170 spin_unlock(&sbi->s_next_gen_lock);
1147 insert_inode_hash(inode); 1171 insert_inode_hash(inode);
1148 1172
1173 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
1174
1149 mark_inode_dirty(inode); 1175 mark_inode_dirty(inode);
1150 1176
1151 ret = exofs_get_io_state(&sbi->layout, &ios); 1177 ret = exofs_get_io_state(&sbi->layout, &ios);
@@ -1157,17 +1183,11 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1157 ios->obj.id = exofs_oi_objno(oi); 1183 ios->obj.id = exofs_oi_objno(oi);
1158 exofs_make_credential(oi->i_cred, &ios->obj); 1184 exofs_make_credential(oi->i_cred, &ios->obj);
1159 1185
1160 /* increment the refcount so that the inode will still be around when we
1161 * reach the callback
1162 */
1163 atomic_inc(&inode->i_count);
1164
1165 ios->done = create_done; 1186 ios->done = create_done;
1166 ios->private = inode; 1187 ios->private = inode;
1167 ios->cred = oi->i_cred; 1188 ios->cred = oi->i_cred;
1168 ret = exofs_sbi_create(ios); 1189 ret = exofs_sbi_create(ios);
1169 if (ret) { 1190 if (ret) {
1170 atomic_dec(&inode->i_count);
1171 exofs_put_io_state(ios); 1191 exofs_put_io_state(ios);
1172 return ERR_PTR(ret); 1192 return ERR_PTR(ret);
1173 } 1193 }
@@ -1215,7 +1235,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1215 1235
1216 args = kzalloc(sizeof(*args), GFP_KERNEL); 1236 args = kzalloc(sizeof(*args), GFP_KERNEL);
1217 if (!args) { 1237 if (!args) {
1218 EXOFS_DBGMSG("Faild kzalloc of args\n"); 1238 EXOFS_DBGMSG("Failed kzalloc of args\n");
1219 return -ENOMEM; 1239 return -ENOMEM;
1220 } 1240 }
1221 1241
@@ -1257,12 +1277,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1257 ios->out_attr_len = 1; 1277 ios->out_attr_len = 1;
1258 ios->out_attr = &attr; 1278 ios->out_attr = &attr;
1259 1279
1260 if (!obj_created(oi)) { 1280 wait_obj_created(oi);
1261 EXOFS_DBGMSG("!obj_created\n");
1262 BUG_ON(!obj_2bcreated(oi));
1263 wait_event(oi->i_wq, obj_created(oi));
1264 EXOFS_DBGMSG("wait_event done\n");
1265 }
1266 1281
1267 if (!do_sync) { 1282 if (!do_sync) {
1268 args->sbi = sbi; 1283 args->sbi = sbi;
@@ -1287,7 +1302,8 @@ out:
1287 1302
1288int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) 1303int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
1289{ 1304{
1290 return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); 1305 /* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */
1306 return exofs_update_inode(inode, 1);
1291} 1307}
1292 1308
1293/* 1309/*
@@ -1325,12 +1341,12 @@ void exofs_evict_inode(struct inode *inode)
1325 inode->i_size = 0; 1341 inode->i_size = 0;
1326 end_writeback(inode); 1342 end_writeback(inode);
1327 1343
1328 /* if we are deleting an obj that hasn't been created yet, wait */ 1344 /* if we are deleting an obj that hasn't been created yet, wait.
1329 if (!obj_created(oi)) { 1345 * This also makes sure that create_done cannot be called with an
1330 BUG_ON(!obj_2bcreated(oi)); 1346 * already evicted inode.
1331 wait_event(oi->i_wq, obj_created(oi)); 1347 */
1332 /* ignore the error attempt a remove anyway */ 1348 wait_obj_created(oi);
1333 } 1349 /* ignore the error, attempt a remove anyway */
1334 1350
1335 /* Now Remove the OSD objects */ 1351 /* Now Remove the OSD objects */
1336 ret = exofs_get_io_state(&sbi->layout, &ios); 1352 ret = exofs_get_io_state(&sbi->layout, &ios);