1 files changed, 301 insertions, 166 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c7fed5b18745..a2e7952bc5f9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -371,6 +371,34 @@ static int ext4_block_to_path(struct inode *inode,
        return n;
 }
+static int __ext4_check_blockref(const char *function, struct inode *inode,
+                                 unsigned int *p, unsigned int max) {
+        unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
+        unsigned int *bref = p;
+        while (bref < p+max) {
+                if (unlikely(*bref >= maxblocks)) {
+                        ext4_error(inode->i_sb, function,
+                                   "block reference %u >= max (%u) "
+                                   "in inode #%lu, offset=%d",
+                                   *bref, maxblocks,
+                                   inode->i_ino, (int)(bref-p));
+                        return -EIO;
+                }
+                bref++;
+        }
+        return 0;
+}
+#define ext4_check_indirect_blockref(inode, bh)                         \
+        __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data,  \
+                              EXT4_ADDR_PER_BLOCK((inode)->i_sb))
+#define ext4_check_inode_blockref(inode)                                \
+        __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data,   \
+                              EXT4_NDIR_BLOCKS)
 /**
 *      ext4_get_branch - read the chain of indirect blocks leading to data
 *      @inode: inode in question
@@ -415,9 +443,22 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
        if (!p->key)
                goto no_block;
        while (--depth) {
-                bh = sb_bread(sb, le32_to_cpu(p->key));
+                bh = sb_getblk(sb, le32_to_cpu(p->key));
-                if (!bh)
+                if (unlikely(!bh))
                        goto failure;
+                  
+                if (!bh_uptodate_or_lock(bh)) {
+                        if (bh_submit_read(bh) < 0) {
+                                put_bh(bh);
+                                goto failure;
+                        }
+                        /* validate block references */
+                        if (ext4_check_indirect_blockref(inode, bh)) {
+                                put_bh(bh);
+                                goto failure;
+                        }
+                }
+                
                add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
                /* Reader: end */
                if (!p->key)
@@ -459,6 +500,8 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
        ext4_fsblk_t bg_start;
        ext4_fsblk_t last_block;
        ext4_grpblk_t colour;
+        ext4_group_t block_group;
+        int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
        /* Try to find previous block */
        for (p = ind->p - 1; p >= start; p--) {
@@ -474,9 +517,22 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
         * It is going to be referred to from the inode itself? OK, just put it
         * into the same cylinder group then.
         */
-        bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group);
+        block_group = ei->i_block_group;
+        if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
+                block_group &= ~(flex_size-1);
+                if (S_ISREG(inode->i_mode))
+                        block_group++;
+        }
+        bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
        last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
+        /*
+         * If we are doing delayed allocation, we don't need take
+         * colour into account.
+         */
+        if (test_opt(inode->i_sb, DELALLOC))
+                return bg_start;
        if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
                colour = (current->pid % 16) *
                        (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
@@ -975,6 +1031,17 @@ out:
        return err;
 }
+qsize_t ext4_get_reserved_space(struct inode *inode)
+{
+        unsigned long long total;
+        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+        total = EXT4_I(inode)->i_reserved_data_blocks +
+                EXT4_I(inode)->i_reserved_meta_blocks;
+        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+        return total;
+}
 /*
 * Calculate the number of metadata blocks need to reserve
 * to allocate @blocks for non extent file based file
@@ -1036,8 +1103,21 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
        /* update per-inode reservations */
        BUG_ON(used  > EXT4_I(inode)->i_reserved_data_blocks);
        EXT4_I(inode)->i_reserved_data_blocks -= used;
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+        /*
+         * free those over-booking quota for metadata blocks
+         */
+        if (mdb_free)
+                vfs_dq_release_reservation_block(inode, mdb_free);
+        /*
+         * If we have done all the pending block allocations and if
+         * there aren't any writers on the inode, we can discard the
+         * inode's preallocations.
+         */
+        if (!total && (atomic_read(&inode->i_writecount) == 0))
+                ext4_discard_preallocations(inode);
 }
 /*
@@ -1553,8 +1633,8 @@ static int ext4_journalled_write_end(struct file *file,
 static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
 {
        int retries = 0;
-       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       unsigned long md_needed, mdblocks, total = 0;
+        unsigned long md_needed, mdblocks, total = 0;
        /*
         * recalculate the amount of metadata blocks to reserve
@@ -1570,12 +1650,23 @@ repeat:
        md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
        total = md_needed + nrblocks;
+        /*
+         * Make quota reservation here to prevent quota overflow
+         * later. Real quota accounting is done at pages writeout
+         * time.
+         */
+        if (vfs_dq_reserve_block(inode, total)) {
+                spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+                return -EDQUOT;
+        }
        if (ext4_claim_free_blocks(sbi, total)) {
                spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                        yield();
                        goto repeat;
                }
+                vfs_dq_release_reservation_block(inode, total);
                return -ENOSPC;
        }
        EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
@@ -1629,6 +1720,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
        BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
        EXT4_I(inode)->i_reserved_meta_blocks = mdb;
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+        vfs_dq_release_reservation_block(inode, release);
 }
 static void ext4_da_page_release_reservation(struct page *page,
@@ -1658,9 +1751,10 @@ static void ext4_da_page_release_reservation(struct page *page,
 struct mpage_da_data {
        struct inode *inode;
-        struct buffer_head lbh;                 /* extent of blocks */
+        sector_t b_blocknr;             /* start block number of extent */
+        size_t b_size;                  /* size of extent */
+        unsigned long b_state;          /* state of the extent */
        unsigned long first_page, next_page;    /* extent of pages */
-        get_block_t *get_block;
        struct writeback_control *wbc;
        int io_done;
        int pages_written;
@@ -1674,7 +1768,6 @@ struct mpage_da_data {
 * @mpd->inode: inode
 * @mpd->first_page: first page of the extent
 * @mpd->next_page: page after the last page of the extent
- * @mpd->get_block: the filesystem's block mapper function
 *
 * By the time mpage_da_submit_io() is called we expect all blocks
 * to be allocated. this may be wrong if allocation failed.
@@ -1694,7 +1787,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
        /*
         * We need to start from the first_page to the next_page - 1
         * to make sure we also write the mapped dirty buffer_heads.
-         * If we look at mpd->lbh.b_blocknr we would only be looking
+         * If we look at mpd->b_blocknr we would only be looking
         * at the currently mapped buffer_heads.
         */
        index = mpd->first_page;
@@ -1884,68 +1977,111 @@ static void ext4_print_free_blocks(struct inode *inode)
        return;
 }
+#define         EXT4_DELALLOC_RSVED     1
+static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
+                                   struct buffer_head *bh_result, int create)
+{
+        int ret;
+        unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+        loff_t disksize = EXT4_I(inode)->i_disksize;
+        handle_t *handle = NULL;
+        handle = ext4_journal_current_handle();
+        BUG_ON(!handle);
+        ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
+                                   bh_result, create, 0, EXT4_DELALLOC_RSVED);
+        if (ret <= 0)
+                return ret;
+        bh_result->b_size = (ret << inode->i_blkbits);
+        if (ext4_should_order_data(inode)) {
+                int retval;
+                retval = ext4_jbd2_file_inode(handle, inode);
+                if (retval)
+                        /*
+                         * Failed to add inode for ordered mode. Don't
+                         * update file size
+                         */
+                        return retval;
+        }
+        /*
+         * Update on-disk size along with block allocation we don't
+         * use 'extend_disksize' as size may change within already
+         * allocated block -bzzz
+         */
+        disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
+        if (disksize > i_size_read(inode))
+                disksize = i_size_read(inode);
+        if (disksize > EXT4_I(inode)->i_disksize) {
+                ext4_update_i_disksize(inode, disksize);
+                ret = ext4_mark_inode_dirty(handle, inode);
+                return ret;
+        }
+        return 0;
+}
 /*
 * mpage_da_map_blocks - go through given space
 *
- * @mpd->lbh - bh describing space
+ * @mpd - bh describing space
- * @mpd->get_block - the filesystem's block mapper function
 *
 * The function skips space we know is already mapped to disk blocks.
 *
 */
-static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
+static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
        int err = 0;
        struct buffer_head new;
-        struct buffer_head *lbh = &mpd->lbh;
        sector_t next;
        /*
         * We consider only non-mapped and non-allocated blocks
         */
-        if (buffer_mapped(lbh) && !buffer_delay(lbh))
+        if ((mpd->b_state  & (1 << BH_Mapped)) &&
+            !(mpd->b_state & (1 << BH_Delay)))
                return 0;
-        new.b_state = lbh->b_state;
+        new.b_state = mpd->b_state;
        new.b_blocknr = 0;
-        new.b_size = lbh->b_size;
+        new.b_size = mpd->b_size;
-        next = lbh->b_blocknr;
+        next = mpd->b_blocknr;
        /*
         * If we didn't accumulate anything
         * to write simply return
         */
        if (!new.b_size)
                return 0;
-        err = mpd->get_block(mpd->inode, next, &new, 1);
-        if (err) {
-                /* If get block returns with error
+        err = ext4_da_get_block_write(mpd->inode, next, &new, 1);
-                 * we simply return. Later writepage
+        if (err) {
-                 * will redirty the page and writepages
+                /*
-                 * will find the dirty page again
+                 * If get block returns with error we simply
+                 * return. Later writepage will redirty the page and
+                 * writepages will find the dirty page again
                 */
                if (err == -EAGAIN)
                        return 0;
                if (err == -ENOSPC &&
-                                ext4_count_free_blocks(mpd->inode->i_sb)) {
+                    ext4_count_free_blocks(mpd->inode->i_sb)) {
                        mpd->retval = err;
                        return 0;
                }
                /*
-                 * get block failure will cause us
+                 * get block failure will cause us to loop in
-                 * to loop in writepages. Because
+                 * writepages, because a_ops->writepage won't be able
-                 * a_ops->writepage won't be able to
+                 * to make progress. The page will be redirtied by
-                 * make progress. The page will be redirtied
+                 * writepage and writepages will again try to write
-                 * by writepage and writepages will again
+                 * the same.
-                 * try to write the same.
                 */
                printk(KERN_EMERG "%s block allocation failed for inode %lu "
                                  "at logical offset %llu with max blocks "
                                  "%zd with error %d\n",
                                  __func__, mpd->inode->i_ino,
                                  (unsigned long long)next,
-                                  lbh->b_size >> mpd->inode->i_blkbits, err);
+                                  mpd->b_size >> mpd->inode->i_blkbits, err);
                printk(KERN_EMERG "This should not happen.!! "
                                        "Data will be lost\n");
                if (err == -ENOSPC) {
@@ -1953,7 +2089,7 @@ static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
                }
                /* invlaidate all the pages */
                ext4_da_block_invalidatepages(mpd, next,
-                                lbh->b_size >> mpd->inode->i_blkbits);
+                                mpd->b_size >> mpd->inode->i_blkbits);
                return err;
        }
        BUG_ON(new.b_size == 0);
@@ -1965,7 +2101,8 @@ static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
         * If blocks are delayed marked, we need to
         * put actual blocknr and drop delayed bit
         */
-        if (buffer_delay(lbh) || buffer_unwritten(lbh))
+        if ((mpd->b_state & (1 << BH_Delay)) ||
+            (mpd->b_state & (1 << BH_Unwritten)))
                mpage_put_bnr_to_bhs(mpd, next, &new);
        return 0;
@@ -1984,12 +2121,11 @@ static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
 * the function is used to collect contig. blocks in same state
 */
 static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
-                                   sector_t logical, struct buffer_head *bh)
+                                   sector_t logical, size_t b_size,
+                                   unsigned long b_state)
 {
        sector_t next;
-        size_t b_size = bh->b_size;
+        int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
-        struct buffer_head *lbh = &mpd->lbh;
-        int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
        /* check if thereserved journal credits might overflow */
        if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
@@ -2016,19 +2152,19 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
        /*
         * First block in the extent
         */
-        if (lbh->b_size == 0) {
+        if (mpd->b_size == 0) {
-                lbh->b_blocknr = logical;
+                mpd->b_blocknr = logical;
-                lbh->b_size = b_size;
+                mpd->b_size = b_size;
-                lbh->b_state = bh->b_state & BH_FLAGS;
+                mpd->b_state = b_state & BH_FLAGS;
                return;
        }
-        next = lbh->b_blocknr + nrblocks;
+        next = mpd->b_blocknr + nrblocks;
        /*
         * Can we merge the block to our big extent?
         */
-        if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
+        if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
-                lbh->b_size += b_size;
+                mpd->b_size += b_size;
                return;
        }
@@ -2057,7 +2193,7 @@ static int __mpage_da_writepage(struct page *page,
 {
        struct mpage_da_data *mpd = data;
        struct inode *inode = mpd->inode;
-        struct buffer_head *bh, *head, fake;
+        struct buffer_head *bh, *head;
        sector_t logical;
        if (mpd->io_done) {
@@ -2099,9 +2235,9 @@ static int __mpage_da_writepage(struct page *page,
                /*
                 * ... and blocks
                 */
-                mpd->lbh.b_size = 0;
+                mpd->b_size = 0;
-                mpd->lbh.b_state = 0;
+                mpd->b_state = 0;
-                mpd->lbh.b_blocknr = 0;
+                mpd->b_blocknr = 0;
        }
        mpd->next_page = page->index + 1;
@@ -2109,16 +2245,8 @@ static int __mpage_da_writepage(struct page *page,
                  (PAGE_CACHE_SHIFT - inode->i_blkbits);
        if (!page_has_buffers(page)) {
-                /*
+                mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
-                 * There is no attached buffer heads yet (mmap?)
+                                       (1 << BH_Dirty) | (1 << BH_Uptodate));
-                 * we treat the page asfull of dirty blocks
-                 */
-                bh = &fake;
-                bh->b_size = PAGE_CACHE_SIZE;
-                bh->b_state = 0;
-                set_buffer_dirty(bh);
-                set_buffer_uptodate(bh);
-                mpage_add_bh_to_extent(mpd, logical, bh);
                if (mpd->io_done)
                        return MPAGE_DA_EXTENT_TAIL;
        } else {
@@ -2136,8 +2264,10 @@ static int __mpage_da_writepage(struct page *page,
                         * with the page in ext4_da_writepage
                         */
                        if (buffer_dirty(bh) &&
-                                (!buffer_mapped(bh) || buffer_delay(bh))) {
+                            (!buffer_mapped(bh) || buffer_delay(bh))) {
-                                mpage_add_bh_to_extent(mpd, logical, bh);
+                                mpage_add_bh_to_extent(mpd, logical,
+                                                       bh->b_size,
+                                                       bh->b_state);
                                if (mpd->io_done)
                                        return MPAGE_DA_EXTENT_TAIL;
                        } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
@@ -2149,9 +2279,8 @@ static int __mpage_da_writepage(struct page *page,
                                 * unmapped buffer_head later we need to
                                 * use the b_state flag of that buffer_head.
                                 */
-                                if (mpd->lbh.b_size == 0)
+                                if (mpd->b_size == 0)
-                                        mpd->lbh.b_state =
+                                        mpd->b_state = bh->b_state & BH_FLAGS;
-                                                bh->b_state & BH_FLAGS;
                        }
                        logical++;
                } while ((bh = bh->b_this_page) != head);
@@ -2161,51 +2290,6 @@ static int __mpage_da_writepage(struct page *page,
 }
 /*
- * mpage_da_writepages - walk the list of dirty pages of the given
- * address space, allocates non-allocated blocks, maps newly-allocated
- * blocks to existing bhs and issue IO them
- *
- * @mapping: address space structure to write
- * @wbc: subtract the number of written pages from *@wbc->nr_to_write
- * @get_block: the filesystem's block mapper function.
- *
- * This is a library function, which implements the writepages()
- * address_space_operation.
- */
-static int mpage_da_writepages(struct address_space *mapping,
-                               struct writeback_control *wbc,
-                               struct mpage_da_data *mpd)
-{
-        int ret;
-        if (!mpd->get_block)
-                return generic_writepages(mapping, wbc);
-        mpd->lbh.b_size = 0;
-        mpd->lbh.b_state = 0;
-        mpd->lbh.b_blocknr = 0;
-        mpd->first_page = 0;
-        mpd->next_page = 0;
-        mpd->io_done = 0;
-        mpd->pages_written = 0;
-        mpd->retval = 0;
-        ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
-        /*
-         * Handle last extent of pages
-         */
-        if (!mpd->io_done && mpd->next_page != mpd->first_page) {
-                if (mpage_da_map_blocks(mpd) == 0)
-                        mpage_da_submit_io(mpd);
-                mpd->io_done = 1;
-                ret = MPAGE_DA_EXTENT_TAIL;
-        }
-        wbc->nr_to_write -= mpd->pages_written;
-        return ret;
-}
-/*
 * this is a special callback for ->write_begin() only
 * it's intention is to return mapped block or reserve space
 */
@@ -2244,51 +2328,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
        return ret;
 }
-#define         EXT4_DELALLOC_RSVED     1
-static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
-                                   struct buffer_head *bh_result, int create)
-{
-        int ret;
-        unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-        loff_t disksize = EXT4_I(inode)->i_disksize;
-        handle_t *handle = NULL;
-        handle = ext4_journal_current_handle();
-        BUG_ON(!handle);
-        ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-                        bh_result, create, 0, EXT4_DELALLOC_RSVED);
-        if (ret > 0) {
-                bh_result->b_size = (ret << inode->i_blkbits);
-                if (ext4_should_order_data(inode)) {
-                        int retval;
-                        retval = ext4_jbd2_file_inode(handle, inode);
-                        if (retval)
-                                /*
-                                 * Failed to add inode for ordered
-                                 * mode. Don't update file size
-                                 */
-                                return retval;
-                }
-                /*
-                 * Update on-disk size along with block allocation
-                 * we don't use 'extend_disksize' as size may change
-                 * within already allocated block -bzzz
-                 */
-                disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
-                if (disksize > i_size_read(inode))
-                        disksize = i_size_read(inode);
-                if (disksize > EXT4_I(inode)->i_disksize) {
-                        ext4_update_i_disksize(inode, disksize);
-                        ret = ext4_mark_inode_dirty(handle, inode);
-                        return ret;
-                }
-                ret = 0;
-        }
-        return ret;
-}
 static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
 {
@@ -2539,8 +2578,38 @@ retry:
                        dump_stack();
                        goto out_writepages;
                }
-                mpd.get_block = ext4_da_get_block_write;
-                ret = mpage_da_writepages(mapping, wbc, &mpd);
+                /*
+                 * Now call __mpage_da_writepage to find the next
+                 * contiguous region of logical blocks that need
+                 * blocks to be allocated by ext4.  We don't actually
+                 * submit the blocks for I/O here, even though
+                 * write_cache_pages thinks it will, and will set the
+                 * pages as clean for write before calling
+                 * __mpage_da_writepage().
+                 */
+                mpd.b_size = 0;
+                mpd.b_state = 0;
+                mpd.b_blocknr = 0;
+                mpd.first_page = 0;
+                mpd.next_page = 0;
+                mpd.io_done = 0;
+                mpd.pages_written = 0;
+                mpd.retval = 0;
+                ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
+                                        &mpd);
+                /*
+                 * If we have a contigous extent of pages and we
+                 * haven't done the I/O yet, map the blocks and submit
+                 * them for I/O.
+                 */
+                if (!mpd.io_done && mpd.next_page != mpd.first_page) {
+                        if (mpage_da_map_blocks(&mpd) == 0)
+                                mpage_da_submit_io(&mpd);
+                        mpd.io_done = 1;
+                        ret = MPAGE_DA_EXTENT_TAIL;
+                }
+                wbc->nr_to_write -= mpd.pages_written;
                ext4_journal_stop(handle);
@@ -2816,6 +2885,48 @@ out:
        return;
 }
+/*
+ * Force all delayed allocation blocks to be allocated for a given inode.
+ */
+int ext4_alloc_da_blocks(struct inode *inode)
+{
+        if (!EXT4_I(inode)->i_reserved_data_blocks &&
+            !EXT4_I(inode)->i_reserved_meta_blocks)
+                return 0;
+        /*
+         * We do something simple for now.  The filemap_flush() will
+         * also start triggering a write of the data blocks, which is
+         * not strictly speaking necessary (and for users of
+         * laptop_mode, not even desirable).  However, to do otherwise
+         * would require replicating code paths in:
+         * 
+         * ext4_da_writepages() ->
+         *    write_cache_pages() ---> (via passed in callback function)
+         *        __mpage_da_writepage() -->
+         *           mpage_add_bh_to_extent()
+         *           mpage_da_map_blocks()
+         *
+         * The problem is that write_cache_pages(), located in
+         * mm/page-writeback.c, marks pages clean in preparation for
+         * doing I/O, which is not desirable if we're not planning on
+         * doing I/O at all.
+         *
+         * We could call write_cache_pages(), and then redirty all of
+         * the pages by calling redirty_page_for_writeback() but that
+         * would be ugly in the extreme.  So instead we would need to
+         * replicate parts of the code in the above functions,
+         * simplifying them becuase we wouldn't actually intend to
+         * write out the pages, but rather only collect contiguous
+         * logical block extents, call the multi-block allocator, and
+         * then update the buffer heads with the block allocations.
+         * 
+         * For now, though, we'll cheat by calling filemap_flush(),
+         * which will map the blocks, and start the I/O, but not
+         * actually wait for the I/O to complete.
+         */
+        return filemap_flush(inode->i_mapping);
+}
 /*
 * bmap() is special.  It gets used by applications such as lilo and by
@@ -3838,6 +3949,9 @@ void ext4_truncate(struct inode *inode)
        if (!ext4_can_truncate(inode))
                return;
+        if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
+                ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
                ext4_ext_truncate(inode);
                return;
@@ -4080,12 +4194,7 @@ make_io:
                        unsigned num;
                        table = ext4_inode_table(sb, gdp);
-                        /* Make sure s_inode_readahead_blks is a power of 2 */
+                        /* s_inode_readahead_blks is always a power of 2 */
-                        while (EXT4_SB(sb)->s_inode_readahead_blks &
-                               (EXT4_SB(sb)->s_inode_readahead_blks-1))
-                                EXT4_SB(sb)->s_inode_readahead_blks = 
-                                   (EXT4_SB(sb)->s_inode_readahead_blks &
-                                    (EXT4_SB(sb)->s_inode_readahead_blks-1));
                        b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
                        if (table > b)
                                b = table;
@@ -4257,6 +4366,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        ei->i_disksize = inode->i_size;
        inode->i_generation = le32_to_cpu(raw_inode->i_generation);
        ei->i_block_group = iloc.block_group;
+        ei->i_last_alloc_group = ~0;
        /*
         * NOTE! The in-memory inode i_data array is in little-endian order
         * even on big-endian machines: we do NOT byteswap the block numbers!
@@ -4299,6 +4409,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
        }
+        if (ei->i_flags & EXT4_EXTENTS_FL) {
+                /* Validate extent which is part of inode */
+                ret = ext4_ext_check_inode(inode);
+        } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+                   (S_ISLNK(inode->i_mode) &&
+                    !ext4_inode_is_fast_symlink(inode))) {
+                /* Validate block references which are part of inode */
+                ret = ext4_check_inode_blockref(inode);
+        }
+        if (ret) {
+                brelse(bh);
+                goto bad_inode;
+        }
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext4_file_inode_operations;
                inode->i_fop = &ext4_file_operations;
@@ -4315,7 +4439,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        inode->i_op = &ext4_symlink_inode_operations;
                        ext4_set_aops(inode);
                }
-        } else {
+        } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+              S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
                inode->i_op = &ext4_special_inode_operations;
                if (raw_inode->i_block[0])
                        init_special_inode(inode, inode->i_mode,
@@ -4323,6 +4448,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                else
                        init_special_inode(inode, inode->i_mode,
                           new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
+        } else {
+                brelse(bh);
+                ret = -EIO;
+                ext4_error(inode->i_sb, __func__, 
+                           "bogus i_mode (%o) for inode=%lu",
+                           inode->i_mode, inode->i_ino);
+                goto bad_inode;
        }
        brelse(iloc.bh);
        ext4_set_inode_flags(inode);
@@ -4612,7 +4744,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        error = PTR_ERR(handle);
                        goto err_out;
                }
-                error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
+                error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
                if (error) {
                        ext4_journal_stop(handle);
                        return error;
@@ -4991,7 +5123,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 * i_size has been changed by generic_commit_write() and we thus need
 * to include the updated inode in the current transaction.
 *
- * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks
+ * Also, vfs_dq_alloc_block() will always dirty the inode when blocks
 * are allocated to the file.
 *
 * If the inode is marked synchronous, we don't honour that here - doing
@@ -5116,8 +5248,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
        return !buffer_mapped(bh);
 }
-int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        loff_t size;
        unsigned long len;
        int ret = -EINVAL;
@@ -5169,6 +5302,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
                goto out_unlock;
        ret = 0;
 out_unlock:
+        if (ret)
+                ret = VM_FAULT_SIGBUS;
        up_read(&inode->i_alloc_sem);
        return ret;
 }