[XFS] Fix memory corruption with small buffer reads

When we have multiple buffers in a single page for a blocksize == pagesize filesystem we might overwrite the page contents if two callers hit it shortly after each other. To prevent that we need to keep the page locked until I/O is completed and the page marked uptodate. Thanks to Eric Sandeen for triaging this bug and finding a reproducible testcase and Dave Chinner for additional advice. This should fix kernel.org bz #10421. Tested-by: Eric Sandeen <sandeen@sandeen.net> SGI-PV: 981813 SGI-Modid: xfs-linux-melb:xfs-kern:31173a Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
author: Christoph Hellwig <hch@infradead.org> 2008-05-19 02:34:42 -0400
committer: Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> 2008-05-23 04:12:49 -0400
commit: 6ab455eeaff6893cd06da33843e840d888cdc04a (patch)
tree: e7744d1580647ca3b08e829bcf976f2f60c49986
parent: c8f5f12e46f079a954d4f7163ba59dadee08ca26 (diff)
2 files changed, 39 insertions, 4 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5105015a75ad..98e0e86093b4 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -387,6 +387,8 @@ _xfs_buf_lookup_pages(
                if (unlikely(page == NULL)) {
                        if (flags & XBF_READ_AHEAD) {
                                bp->b_page_count = i;
+                                for (i = 0; i < bp->b_page_count; i++)
+                                        unlock_page(bp->b_pages[i]);
                                return -ENOMEM;
                        }
@@ -416,17 +418,24 @@ _xfs_buf_lookup_pages(
                ASSERT(!PagePrivate(page));
                if (!PageUptodate(page)) {
                        page_count--;
-                        if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) {
+                        if (blocksize >= PAGE_CACHE_SIZE) {
+                                if (flags & XBF_READ)
+                                        bp->b_flags |= _XBF_PAGE_LOCKED;
+                        } else if (!PagePrivate(page)) {
                                if (test_page_region(page, offset, nbytes))
                                        page_count++;
                        }
                }
-                unlock_page(page);
                bp->b_pages[i] = page;
                offset = 0;
        }
+        if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
+                for (i = 0; i < bp->b_page_count; i++)
+                        unlock_page(bp->b_pages[i]);
+        }
        if (page_count == bp->b_page_count)
                bp->b_flags |= XBF_DONE;
@@ -746,6 +755,7 @@ xfs_buf_associate_memory(
        bp->b_count_desired = len;
        bp->b_buffer_length = buflen;
        bp->b_flags |= XBF_MAPPED;
+        bp->b_flags &= ~_XBF_PAGE_LOCKED;
        return 0;
 }
@@ -1093,8 +1103,10 @@ _xfs_buf_ioend(
        xfs_buf_t               *bp,
        int                     schedule)
 {
-        if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
+        if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
+                bp->b_flags &= ~_XBF_PAGE_LOCKED;
                xfs_buf_ioend(bp, schedule);
+        }
 }
 STATIC void
@@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io(
                if (--bvec >= bio->bi_io_vec)
                        prefetchw(&bvec->bv_page->flags);
+                if (bp->b_flags & _XBF_PAGE_LOCKED)
+                        unlock_page(page);
        } while (bvec >= bio->bi_io_vec);
        _xfs_buf_ioend(bp, 1);
@@ -1163,7 +1178,8 @@ _xfs_buf_ioapply(
         * filesystem block size is not smaller than the page size.
         */
        if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
-            (bp->b_flags & XBF_READ) &&
+            ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
+              (XBF_READ|_XBF_PAGE_LOCKED)) &&
            (blocksize >= PAGE_CACHE_SIZE)) {
                bio = bio_alloc(GFP_NOIO, 1);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 841d7883528d..f948ec7ba9a4 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -66,6 +66,25 @@ typedef enum {
        _XBF_PAGES = (1 << 18),     /* backed by refcounted pages          */
        _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
        _XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue             */
+        /*
+         * Special flag for supporting metadata blocks smaller than a FSB.
+         *
+         * In this case we can have multiple xfs_buf_t on a single page and
+         * need to lock out concurrent xfs_buf_t readers as they only
+         * serialise access to the buffer.
+         *
+         * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
+         * between reads of the page. Hence we can have one thread read the
+         * page and modify it, but then race with another thread that thinks
+         * the page is not up-to-date and hence reads it again.
+         *
+         * The result is that the first modifcation to the page is lost.
+         * This sort of AGF/AGI reading race can happen when unlinking inodes
+         * that require truncation and results in the AGI unlinked list
+         * modifications being lost.
+         */
+        _XBF_PAGE_LOCKED = (1 << 22),
 } xfs_buf_flags_t;
 typedef enum {
author	Christoph Hellwig <hch@infradead.org>	2008-05-19 02:34:42 -0400
committer	Lachlan McIlroy <lachlan@redback.melbourne.sgi.com>	2008-05-23 04:12:49 -0400
commit	6ab455eeaff6893cd06da33843e840d888cdc04a (patch)
tree	e7744d1580647ca3b08e829bcf976f2f60c49986
parent	c8f5f12e46f079a954d4f7163ba59dadee08ca26 (diff)