1 files changed, 71 insertions, 55 deletions
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 3eadd97324b1..8472c098445d 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -43,6 +43,17 @@ enum { BIO_MAX_PAGES_KMALLOC =
                PAGE_SIZE / sizeof(struct page *),
 };
+unsigned exofs_max_io_pages(struct exofs_layout *layout,
+                            unsigned expected_pages)
+{
+        unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
+        /* TODO: easily support bio chaining */
+        pages =  min_t(unsigned, pages,
+                       layout->group_width * BIO_MAX_PAGES_KMALLOC);
+        return pages;
+}
 struct page_collect {
        struct exofs_sb_info *sbi;
        struct inode *inode;
@@ -97,8 +108,7 @@ static void _pcol_reset(struct page_collect *pcol)
 static int pcol_try_alloc(struct page_collect *pcol)
 {
-        unsigned pages = min_t(unsigned, pcol->expected_pages,
+        unsigned pages;
-                          MAX_PAGES_KMALLOC);
        if (!pcol->ios) { /* First time allocate io_state */
                int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
@@ -108,8 +118,7 @@ static int pcol_try_alloc(struct page_collect *pcol)
        }
        /* TODO: easily support bio chaining */
-        pages =  min_t(unsigned, pages,
+        pages =  exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
-                       pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
        for (; pages; pages >>= 1) {
                pcol->pages = kmalloc(pages * sizeof(struct page *),
@@ -185,7 +194,7 @@ static void update_write_page(struct page *page, int ret)
 /* Called at the end of reads, to optionally unlock pages and update their
 * status.
 */
-static int __readpages_done(struct page_collect *pcol, bool do_unlock)
+static int __readpages_done(struct page_collect *pcol)
 {
        int i;
        u64 resid;
@@ -221,7 +230,7 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock)
                          page_stat ? "bad_bytes" : "good_bytes");
                ret = update_read_page(page, page_stat);
-                if (do_unlock)
+                if (!pcol->read_4_write)
                        unlock_page(page);
                length += PAGE_SIZE;
        }
@@ -236,7 +245,7 @@ static void readpages_done(struct exofs_io_state *ios, void *p)
 {
        struct page_collect *pcol = p;
-        __readpages_done(pcol, true);
+        __readpages_done(pcol);
        atomic_dec(&pcol->sbi->s_curr_pending);
        kfree(pcol);
 }
@@ -257,7 +266,7 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
        }
 }
-static int read_exec(struct page_collect *pcol, bool is_sync)
+static int read_exec(struct page_collect *pcol)
 {
        struct exofs_i_info *oi = exofs_i(pcol->inode);
        struct exofs_io_state *ios = pcol->ios;
@@ -267,17 +276,14 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
        if (!pcol->pages)
                return 0;
-        /* see comment in _readpage() about sync reads */
-        WARN_ON(is_sync && (pcol->nr_pages != 1));
        ios->pages = pcol->pages;
        ios->nr_pages = pcol->nr_pages;
        ios->length = pcol->length;
        ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
-        if (is_sync) {
+        if (pcol->read_4_write) {
                exofs_oi_read(oi, pcol->ios);
-                return __readpages_done(pcol, false);
+                return __readpages_done(pcol);
        }
        pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -303,7 +309,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
        return 0;
 err:
-        if (!is_sync)
+        if (!pcol->read_4_write)
                _unlock_pcol_pages(pcol, ret, READ);
        pcol_free(pcol);
@@ -353,10 +359,12 @@ static int readpage_strip(void *data, struct page *page)
                if (!pcol->read_4_write)
                        unlock_page(page);
-                EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page,"
+                EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx "
-                             " splitting\n", inode->i_ino, page->index);
+                             "read_4_write=%d index=0x%lx end_index=0x%lx "
+                             "splitting\n", inode->i_ino, len,
+                             pcol->read_4_write, page->index, end_index);
-                return read_exec(pcol, false);
+                return read_exec(pcol);
        }
 try_again:
@@ -366,7 +374,7 @@ try_again:
        } else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
                   page->index)) {
                /* Discontinuity detected, split the request */
-                ret = read_exec(pcol, false);
+                ret = read_exec(pcol);
                if (unlikely(ret))
                        goto fail;
                goto try_again;
@@ -391,7 +399,7 @@ try_again:
                          page, len, pcol->nr_pages, pcol->length);
                /* split the request, and start again with current page */
-                ret = read_exec(pcol, false);
+                ret = read_exec(pcol);
                if (unlikely(ret))
                        goto fail;
@@ -420,27 +428,24 @@ static int exofs_readpages(struct file *file, struct address_space *mapping,
                return ret;
        }
-        return read_exec(&pcol, false);
+        return read_exec(&pcol);
 }
-static int _readpage(struct page *page, bool is_sync)
+static int _readpage(struct page *page, bool read_4_write)
 {
        struct page_collect pcol;
        int ret;
        _pcol_init(&pcol, 1, page->mapping->host);
-        /* readpage_strip might call read_exec(,is_sync==false) at several
+        pcol.read_4_write = read_4_write;
-         * places but not if we have a single page.
-         */
-        pcol.read_4_write = is_sync;
        ret = readpage_strip(&pcol, page);
        if (ret) {
                EXOFS_ERR("_readpage => %d\n", ret);
                return ret;
        }
-        return read_exec(&pcol, is_sync);
+        return read_exec(&pcol);
 }
 /*
@@ -511,7 +516,7 @@ static int write_exec(struct page_collect *pcol)
        pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
        if (!pcol_copy) {
-                EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n");
+                EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n");
                ret = -ENOMEM;
                goto err;
        }
@@ -527,7 +532,7 @@ static int write_exec(struct page_collect *pcol)
        ret = exofs_oi_write(oi, ios);
        if (unlikely(ret)) {
-                EXOFS_ERR("write_exec: exofs_oi_write() Faild\n");
+                EXOFS_ERR("write_exec: exofs_oi_write() Failed\n");
                goto err;
        }
@@ -628,7 +633,7 @@ try_again:
                /* split the request, next loop will start again */
                ret = write_exec(pcol);
                if (unlikely(ret)) {
-                        EXOFS_DBGMSG("write_exec faild => %d", ret);
+                        EXOFS_DBGMSG("write_exec failed => %d", ret);
                        goto fail;
                }
@@ -719,7 +724,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
                ret = simple_write_begin(file, mapping, pos, len, flags, pagep,
                                         fsdata);
                if (ret) {
-                        EXOFS_DBGMSG("simple_write_begin faild\n");
+                        EXOFS_DBGMSG("simple_write_begin failed\n");
                        goto out;
                }
@@ -728,11 +733,28 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
         /* read modify write */
        if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
+                loff_t i_size = i_size_read(mapping->host);
+                pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+                size_t rlen;
+                if (page->index < end_index)
+                        rlen = PAGE_CACHE_SIZE;
+                else if (page->index == end_index)
+                        rlen = i_size & ~PAGE_CACHE_MASK;
+                else
+                        rlen = 0;
+                if (!rlen) {
+                        clear_highpage(page);
+                        SetPageUptodate(page);
+                        goto out;
+                }
                ret = _readpage(page, true);
                if (ret) {
                        /*SetPageError was done by _readpage. Is it ok?*/
                        unlock_page(page);
-                        EXOFS_DBGMSG("__readpage_filler faild\n");
+                        EXOFS_DBGMSG("__readpage failed\n");
                }
        }
 out:
@@ -801,7 +823,6 @@ const struct address_space_operations exofs_aops = {
        .direct_IO      = NULL, /* TODO: Should be trivial to do */
        /* With these NULL has special meaning or default is not exported */
-        .sync_page      = NULL,
        .get_xip_mem    = NULL,
        .migratepage    = NULL,
        .launder_page   = NULL,
@@ -1036,6 +1057,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
                memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
        }
+        inode->i_mapping->backing_dev_info = sb->s_bdi;
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &exofs_file_inode_operations;
                inode->i_fop = &exofs_file_operations;
@@ -1072,11 +1094,14 @@ bad_inode:
 int __exofs_wait_obj_created(struct exofs_i_info *oi)
 {
        if (!obj_created(oi)) {
+                EXOFS_DBGMSG("!obj_created\n");
                BUG_ON(!obj_2bcreated(oi));
                wait_event(oi->i_wq, obj_created(oi));
+                EXOFS_DBGMSG("wait_event done\n");
        }
        return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
 }
 /*
 * Callback function from exofs_new_inode().  The important thing is that we
 * set the obj_created flag so that other methods know that the object exists on
@@ -1095,7 +1120,7 @@ static void create_done(struct exofs_io_state *ios, void *p)
        atomic_dec(&sbi->s_curr_pending);
        if (unlikely(ret)) {
-                EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
+                EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx",
                          _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid));
                /*TODO: When FS is corrupted creation can fail, object already
                 * exist. Get rid of this asynchronous creation, if exist
@@ -1107,7 +1132,6 @@ static void create_done(struct exofs_io_state *ios, void *p)
        set_obj_created(oi);
-        atomic_dec(&inode->i_count);
        wake_up(&oi->i_wq);
 }
@@ -1135,7 +1159,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
        sbi = sb->s_fs_info;
-        sb->s_dirt = 1;
+        inode->i_mapping->backing_dev_info = sb->s_bdi;
        inode_init_owner(inode, dir, mode);
        inode->i_ino = sbi->s_nextid++;
        inode->i_blkbits = EXOFS_BLKSHIFT;
@@ -1146,6 +1170,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
        spin_unlock(&sbi->s_next_gen_lock);
        insert_inode_hash(inode);
+        exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
        mark_inode_dirty(inode);
        ret = exofs_get_io_state(&sbi->layout, &ios);
@@ -1157,17 +1183,11 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
        ios->obj.id = exofs_oi_objno(oi);
        exofs_make_credential(oi->i_cred, &ios->obj);
-        /* increment the refcount so that the inode will still be around when we
-         * reach the callback
-         */
-        atomic_inc(&inode->i_count);
        ios->done = create_done;
        ios->private = inode;
        ios->cred = oi->i_cred;
        ret = exofs_sbi_create(ios);
        if (ret) {
-                atomic_dec(&inode->i_count);
                exofs_put_io_state(ios);
                return ERR_PTR(ret);
        }
@@ -1215,7 +1235,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
        args = kzalloc(sizeof(*args), GFP_KERNEL);
        if (!args) {
-                EXOFS_DBGMSG("Faild kzalloc of args\n");
+                EXOFS_DBGMSG("Failed kzalloc of args\n");
                return -ENOMEM;
        }
@@ -1257,12 +1277,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
        ios->out_attr_len = 1;
        ios->out_attr = &attr;
-        if (!obj_created(oi)) {
+        wait_obj_created(oi);
-                EXOFS_DBGMSG("!obj_created\n");
-                BUG_ON(!obj_2bcreated(oi));
-                wait_event(oi->i_wq, obj_created(oi));
-                EXOFS_DBGMSG("wait_event done\n");
-        }
        if (!do_sync) {
                args->sbi = sbi;
@@ -1287,7 +1302,8 @@ out:
 int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-        return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+        /* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */
+        return exofs_update_inode(inode, 1);
 }
 /*
@@ -1325,12 +1341,12 @@ void exofs_evict_inode(struct inode *inode)
        inode->i_size = 0;
        end_writeback(inode);
-        /* if we are deleting an obj that hasn't been created yet, wait */
+        /* if we are deleting an obj that hasn't been created yet, wait.
-        if (!obj_created(oi)) {
+         * This also makes sure that create_done cannot be called with an
-                BUG_ON(!obj_2bcreated(oi));
+         * already evicted inode.
-                wait_event(oi->i_wq, obj_created(oi));
+         */
-                /* ignore the error attempt a remove anyway */
+        wait_obj_created(oi);
-        }
+        /* ignore the error, attempt a remove anyway */
        /* Now Remove the OSD objects */
        ret = exofs_get_io_state(&sbi->layout, &ios);