Merge branch 'for-2.6.36' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.36' of git://git.kernel.dk/linux-2.6-block: (149 commits) block: make sure that REQ_* types are seen even with CONFIG_BLOCK=n xen-blkfront: fix missing out label blkdev: fix blkdev_issue_zeroout return value block: update request stacking methods to support discards block: fix missing export of blk_types.h writeback: fix bad _bh spinlock nesting drbd: revert "delay probes", feature is being re-implemented differently drbd: Initialize all members of sync_conf to their defaults [Bugz 315] drbd: Disable delay probes for the upcomming release writeback: cleanup bdi_register writeback: add new tracepoints writeback: remove unnecessary init_timer call writeback: optimize periodic bdi thread wakeups writeback: prevent unnecessary bdi threads wakeups writeback: move bdi threads exiting logic to the forker thread writeback: restructure bdi forker loop a little writeback: move last_active to bdi writeback: do not remove bdi from bdi_list writeback: simplify bdi code a little writeback: do not lose wake-ups in bdi threads ... Fixed up pretty trivial conflicts in drivers/block/virtio_blk.c and drivers/scsi/scsi_error.c as per Jens.
author: Linus Torvalds <torvalds@linux-foundation.org> 2010-08-10 18:22:42 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2010-08-10 18:22:42 -0400
commit: 2f9e825d3e0e2b407ae8f082de5c00afcf7378fb (patch)
tree: f8b3ee40674ce4acd5508a0a0bf52a30904caf6c /fs
parent: 7ae0dea900b027cd90e8a3e14deca9a19e17638b (diff)
parent: de75d60d5ea235e6e09f4962ab22541ce0fe176a (diff)
14 files changed, 147 insertions, 117 deletions
diff --git a/fs/bio.c b/fs/bio.c
index e7bf6ca64dcf..8abb2dfb2e7c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -843,7 +843,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
        if (!bio)
                goto out_bmd;
-        bio->bi_rw |= (!write_to_vm << BIO_RW);
+        if (!write_to_vm)
+                bio->bi_rw |= REQ_WRITE;
        ret = 0;
@@ -1024,7 +1025,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
         * set data direction, and check if mapped pages need bouncing
         */
        if (!write_to_vm)
-                bio->bi_rw |= (1 << BIO_RW);
+                bio->bi_rw |= REQ_WRITE;
        bio->bi_bdev = bdev;
        bio->bi_flags |= (1 << BIO_USER_MAPPED);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 451afbd543b5..66411463b734 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1346,13 +1346,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                return ret;
        }
-        lock_kernel();
 restart:
        ret = -ENXIO;
        disk = get_gendisk(bdev->bd_dev, &partno);
        if (!disk)
-                goto out_unlock_kernel;
+                goto out;
        mutex_lock_nested(&bdev->bd_mutex, for_part);
        if (!bdev->bd_openers) {
@@ -1432,7 +1431,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        if (for_part)
                bdev->bd_part_count++;
        mutex_unlock(&bdev->bd_mutex);
-        unlock_kernel();
        return 0;
 out_clear:
@@ -1445,9 +1443,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        bdev->bd_contains = NULL;
 out_unlock_bdev:
        mutex_unlock(&bdev->bd_mutex);
- out_unlock_kernel:
+ out:
-        unlock_kernel();
        if (disk)
                module_put(disk->fops->owner);
        put_disk(disk);
@@ -1516,7 +1512,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
        struct block_device *victim = NULL;
        mutex_lock_nested(&bdev->bd_mutex, for_part);
-        lock_kernel();
        if (for_part)
                bdev->bd_part_count--;
@@ -1541,7 +1536,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                        victim = bdev->bd_contains;
                bdev->bd_contains = NULL;
        }
-        unlock_kernel();
        mutex_unlock(&bdev->bd_mutex);
        bdput(bdev);
        if (victim)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 34f7c375567e..64f10082f048 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -480,7 +480,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
        end_io_wq->work.func = end_workqueue_fn;
        end_io_wq->work.flags = 0;
-        if (bio->bi_rw & (1 << BIO_RW)) {
+        if (bio->bi_rw & REQ_WRITE) {
                if (end_io_wq->metadata)
                        btrfs_queue_worker(&fs_info->endio_meta_write_workers,
                                           &end_io_wq->work);
@@ -604,7 +604,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
        atomic_inc(&fs_info->nr_async_submits);
-        if (rw & (1 << BIO_RW_SYNCIO))
+        if (rw & REQ_SYNC)
                btrfs_set_work_high_prio(&async->work);
        btrfs_queue_worker(&fs_info->workers, &async->work);
@@ -668,7 +668,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                                          bio, 1);
        BUG_ON(ret);
-        if (!(rw & (1 << BIO_RW))) {
+        if (!(rw & REQ_WRITE)) {
                /*
                 * called for a read, do the setup so that checksum validation
                 * can happen in the async kernel threads
@@ -1427,7 +1427,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
         * ram and up to date before trying to verify things.  For
         * blocksize <= pagesize, it is basically a noop
         */
-        if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata &&
+        if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
            !bio_ready_for_csum(bio)) {
                btrfs_queue_worker(&fs_info->endio_meta_workers,
                                   &end_io_wq->work);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8976c3343a96..c03864406af3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1429,7 +1429,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
        ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
        BUG_ON(ret);
-        if (!(rw & (1 << BIO_RW))) {
+        if (!(rw & REQ_WRITE)) {
                if (bio_flags & EXTENT_BIO_COMPRESSED) {
                        return btrfs_submit_compressed_read(inode, bio,
                                                    mirror_num, bio_flags);
@@ -1841,7 +1841,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
        bio->bi_size = 0;
        bio_add_page(bio, page, failrec->len, start - page_offset(page));
-        if (failed_bio->bi_rw & (1 << BIO_RW))
+        if (failed_bio->bi_rw & REQ_WRITE)
                rw = WRITE;
        else
                rw = READ;
@@ -5647,7 +5647,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
        struct bio_vec *bvec = bio->bi_io_vec;
        u64 start;
        int skip_sum;
-        int write = rw & (1 << BIO_RW);
+        int write = rw & REQ_WRITE;
        int ret = 0;
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d6e3af8be95b..dd318ff280b2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -258,7 +258,7 @@ loop_lock:
                BUG_ON(atomic_read(&cur->bi_cnt) == 0);
-                if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
+                if (cur->bi_rw & REQ_SYNC)
                        num_sync_run++;
                submit_bio(cur->bi_rw, cur);
@@ -2651,7 +2651,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
        int max_errors = 0;
        struct btrfs_multi_bio *multi = NULL;
-        if (multi_ret && !(rw & (1 << BIO_RW)))
+        if (multi_ret && !(rw & REQ_WRITE))
                stripes_allocated = 1;
 again:
        if (multi_ret) {
@@ -2687,7 +2687,7 @@ again:
                mirror_num = 0;
        /* if our multi bio struct is too small, back off and try again */
-        if (rw & (1 << BIO_RW)) {
+        if (rw & REQ_WRITE) {
                if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
                                 BTRFS_BLOCK_GROUP_DUP)) {
                        stripes_required = map->num_stripes;
@@ -2697,7 +2697,7 @@ again:
                        max_errors = 1;
                }
        }
-        if (multi_ret && (rw & (1 << BIO_RW)) &&
+        if (multi_ret && (rw & REQ_WRITE) &&
            stripes_allocated < stripes_required) {
                stripes_allocated = map->num_stripes;
                free_extent_map(em);
@@ -2733,7 +2733,7 @@ again:
        num_stripes = 1;
        stripe_index = 0;
        if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-                if (unplug_page || (rw & (1 << BIO_RW)))
+                if (unplug_page || (rw & REQ_WRITE))
                        num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
@@ -2744,7 +2744,7 @@ again:
                }
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-                if (rw & (1 << BIO_RW))
+                if (rw & REQ_WRITE)
                        num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
@@ -2755,7 +2755,7 @@ again:
                stripe_index = do_div(stripe_nr, factor);
                stripe_index *= map->sub_stripes;
-                if (unplug_page || (rw & (1 << BIO_RW)))
+                if (unplug_page || (rw & REQ_WRITE))
                        num_stripes = map->sub_stripes;
                else if (mirror_num)
                        stripe_index += mirror_num - 1;
@@ -2945,7 +2945,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
        struct btrfs_pending_bios *pending_bios;
        /* don't bother with additional async steps for reads, right now */
-        if (!(rw & (1 << BIO_RW))) {
+        if (!(rw & REQ_WRITE)) {
                bio_get(bio);
                submit_bio(rw, bio);
                bio_put(bio);
@@ -2964,7 +2964,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
        bio->bi_rw |= rw;
        spin_lock(&device->io_lock);
-        if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
+        if (bio->bi_rw & REQ_SYNC)
                pending_bios = &device->pending_sync_bios;
        else
                pending_bios = &device->pending_bios;
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 66b9cf79c5ba..de89645777c7 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -177,7 +177,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
                nbytes = req->uc_outSize; /* don't have more space! */
        }
        if (copy_from_user(req->uc_data, buf, nbytes)) {
-                req->uc_flags |= REQ_ABORT;
+                req->uc_flags |= CODA_REQ_ABORT;
                wake_up(&req->uc_sleep);
                retval = -EFAULT;
                goto out;
@@ -254,8 +254,8 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
                retval = -EFAULT;
        
        /* If request was not a signal, enqueue and don't free */
-        if (!(req->uc_flags & REQ_ASYNC)) {
+        if (!(req->uc_flags & CODA_REQ_ASYNC)) {
-                req->uc_flags |= REQ_READ;
+                req->uc_flags |= CODA_REQ_READ;
                list_add_tail(&(req->uc_chain), &vcp->vc_processing);
                goto out;
        }
@@ -315,19 +315,19 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
                list_del(&req->uc_chain);
                /* Async requests need to be freed here */
-                if (req->uc_flags & REQ_ASYNC) {
+                if (req->uc_flags & CODA_REQ_ASYNC) {
                        CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr));
                        kfree(req);
                        continue;
                }
-                req->uc_flags |= REQ_ABORT;
+                req->uc_flags |= CODA_REQ_ABORT;
                wake_up(&req->uc_sleep);
        }
        list_for_each_entry_safe(req, tmp, &vcp->vc_processing, uc_chain) {
                list_del(&req->uc_chain);
-                req->uc_flags |= REQ_ABORT;
+                req->uc_flags |= CODA_REQ_ABORT;
                wake_up(&req->uc_sleep);
        }
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index f09c5ed76f6c..b8893ab6f9e6 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -604,7 +604,7 @@ static void coda_unblock_signals(sigset_t *old)
                               (((r)->uc_opcode != CODA_CLOSE && \
                                 (r)->uc_opcode != CODA_STORE && \
                                 (r)->uc_opcode != CODA_RELEASE) || \
-                                (r)->uc_flags & REQ_READ))
+                                (r)->uc_flags & CODA_REQ_READ))
 static inline void coda_waitfor_upcall(struct upc_req *req)
 {
@@ -624,7 +624,7 @@ static inline void coda_waitfor_upcall(struct upc_req *req)
                        set_current_state(TASK_UNINTERRUPTIBLE);
                /* got a reply */
-                if (req->uc_flags & (REQ_WRITE | REQ_ABORT))
+                if (req->uc_flags & (CODA_REQ_WRITE | CODA_REQ_ABORT))
                        break;
                if (blocked && time_after(jiffies, timeout) &&
@@ -708,7 +708,7 @@ static int coda_upcall(struct venus_comm *vcp,
        coda_waitfor_upcall(req);
        /* Op went through, interrupt or not... */
-        if (req->uc_flags & REQ_WRITE) {
+        if (req->uc_flags & CODA_REQ_WRITE) {
                out = (union outputArgs *)req->uc_data;
                /* here we map positive Venus errors to kernel errors */
                error = -out->oh.result;
@@ -717,13 +717,13 @@ static int coda_upcall(struct venus_comm *vcp,
        }
        error = -EINTR;
-        if ((req->uc_flags & REQ_ABORT) || !signal_pending(current)) {
+        if ((req->uc_flags & CODA_REQ_ABORT) || !signal_pending(current)) {
                printk(KERN_WARNING "coda: Unexpected interruption.\n");
                goto exit;
        }
        /* Interrupted before venus read it. */
-        if (!(req->uc_flags & REQ_READ))
+        if (!(req->uc_flags & CODA_REQ_READ))
                goto exit;
        /* Venus saw the upcall, make sure we can send interrupt signal */
@@ -747,7 +747,7 @@ static int coda_upcall(struct venus_comm *vcp,
        sig_inputArgs->ih.opcode = CODA_SIGNAL;
        sig_inputArgs->ih.unique = req->uc_unique;
-        sig_req->uc_flags = REQ_ASYNC;
+        sig_req->uc_flags = CODA_REQ_ASYNC;
        sig_req->uc_opcode = sig_inputArgs->ih.opcode;
        sig_req->uc_unique = sig_inputArgs->ih.unique;
        sig_req->uc_inSize = sizeof(struct coda_in_hdr);
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 4337cad7777b..e2732203fa93 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -599,7 +599,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
                        } else {
                                bio = master_dev->bio;
                                /* FIXME: bio_set_dir() */
-                                bio->bi_rw |= (1 << BIO_RW);
+                                bio->bi_rw |= REQ_WRITE;
                        }
                        osd_req_write(or, &ios->obj, per_dev->offset, bio,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b7c7586caea1..2f76c4a081a2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,15 +26,9 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
+#include <linux/tracepoint.h>
 #include "internal.h"
-#define inode_to_bdi(inode)     ((inode)->i_mapping->backing_dev_info)
-/*
- * We don't actually have pdflush, but this one is exported though /proc...
- */
-int nr_pdflush_threads;
 /*
 * Passed into wb_writeback(), essentially a subset of writeback_control
 */
@@ -50,6 +44,21 @@ struct wb_writeback_work {
        struct completion *done;        /* set if the caller waits */
 };
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure so that the definition remains local to this
+ * file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+#define inode_to_bdi(inode)     ((inode)->i_mapping->backing_dev_info)
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
+ */
+int nr_pdflush_threads;
 /**
 * writeback_in_progress - determine whether there is writeback in progress
 * @bdi: the device's backing_dev_info structure.
@@ -65,22 +74,21 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 static void bdi_queue_work(struct backing_dev_info *bdi,
                struct wb_writeback_work *work)
 {
-        spin_lock(&bdi->wb_lock);
+        trace_writeback_queue(bdi, work);
-        list_add_tail(&work->list, &bdi->work_list);
-        spin_unlock(&bdi->wb_lock);
-        /*
+        spin_lock_bh(&bdi->wb_lock);
-         * If the default thread isn't there, make sure we add it. When
+        list_add_tail(&work->list, &bdi->work_list);
-         * it gets created and wakes up, we'll run this work.
+        if (bdi->wb.task) {
-         */
+                wake_up_process(bdi->wb.task);
-        if (unlikely(list_empty_careful(&bdi->wb_list)))
+        } else {
+                /*
+                 * The bdi thread isn't there, wake up the forker thread which
+                 * will create and run it.
+                 */
+                trace_writeback_nothread(bdi, work);
                wake_up_process(default_backing_dev_info.wb.task);
-        else {
-                struct bdi_writeback *wb = &bdi->wb;
-                if (wb->task)
-                        wake_up_process(wb->task);
        }
+        spin_unlock_bh(&bdi->wb_lock);
 }
 static void
@@ -95,8 +103,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
         */
        work = kzalloc(sizeof(*work), GFP_ATOMIC);
        if (!work) {
-                if (bdi->wb.task)
+                if (bdi->wb.task) {
+                        trace_writeback_nowork(bdi);
                        wake_up_process(bdi->wb.task);
+                }
                return;
        }
@@ -643,10 +653,14 @@ static long wb_writeback(struct bdi_writeback *wb,
                wbc.more_io = 0;
                wbc.nr_to_write = MAX_WRITEBACK_PAGES;
                wbc.pages_skipped = 0;
+                trace_wbc_writeback_start(&wbc, wb->bdi);
                if (work->sb)
                        __writeback_inodes_sb(work->sb, wb, &wbc);
                else
                        writeback_inodes_wb(wb, &wbc);
+                trace_wbc_writeback_written(&wbc, wb->bdi);
                work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
                wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
@@ -674,6 +688,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                if (!list_empty(&wb->b_more_io))  {
                        inode = list_entry(wb->b_more_io.prev,
                                                struct inode, i_list);
+                        trace_wbc_writeback_wait(&wbc, wb->bdi);
                        inode_wait_for_writeback(inode);
                }
                spin_unlock(&inode_lock);
@@ -686,17 +701,17 @@ static long wb_writeback(struct bdi_writeback *wb,
 * Return the next wb_writeback_work struct that hasn't been processed yet.
 */
 static struct wb_writeback_work *
-get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
+get_next_work_item(struct backing_dev_info *bdi)
 {
        struct wb_writeback_work *work = NULL;
-        spin_lock(&bdi->wb_lock);
+        spin_lock_bh(&bdi->wb_lock);
        if (!list_empty(&bdi->work_list)) {
                work = list_entry(bdi->work_list.next,
                                  struct wb_writeback_work, list);
                list_del_init(&work->list);
        }
-        spin_unlock(&bdi->wb_lock);
+        spin_unlock_bh(&bdi->wb_lock);
        return work;
 }
@@ -744,7 +759,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
        struct wb_writeback_work *work;
        long wrote = 0;
-        while ((work = get_next_work_item(bdi, wb)) != NULL) {
+        while ((work = get_next_work_item(bdi)) != NULL) {
                /*
                 * Override sync mode, in case we must wait for completion
                 * because this thread is exiting now.
@@ -752,6 +767,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
                if (force_wait)
                        work->sync_mode = WB_SYNC_ALL;
+                trace_writeback_exec(bdi, work);
                wrote += wb_writeback(wb, work);
                /*
@@ -776,47 +793,66 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 * Handle writeback of dirty data for the device backed by this bdi. Also
 * wakes up periodically and does kupdated style flushing.
 */
-int bdi_writeback_task(struct bdi_writeback *wb)
+int bdi_writeback_thread(void *data)
 {
-        unsigned long last_active = jiffies;
+        struct bdi_writeback *wb = data;
-        unsigned long wait_jiffies = -1UL;
+        struct backing_dev_info *bdi = wb->bdi;
        long pages_written;
+        current->flags |= PF_FLUSHER | PF_SWAPWRITE;
+        set_freezable();
+        wb->last_active = jiffies;
+        /*
+         * Our parent may run at a different priority, just set us to normal
+         */
+        set_user_nice(current, 0);
+        trace_writeback_thread_start(bdi);
        while (!kthread_should_stop()) {
+                /*
+                 * Remove own delayed wake-up timer, since we are already awake
+                 * and we'll take care of the preriodic write-back.
+                 */
+                del_timer(&wb->wakeup_timer);
                pages_written = wb_do_writeback(wb, 0);
+                trace_writeback_pages_written(pages_written);
                if (pages_written)
-                        last_active = jiffies;
+                        wb->last_active = jiffies;
-                else if (wait_jiffies != -1UL) {
-                        unsigned long max_idle;
-                        /*
+                set_current_state(TASK_INTERRUPTIBLE);
-                         * Longest period of inactivity that we tolerate. If we
+                if (!list_empty(&bdi->work_list)) {
-                         * see dirty data again later, the task will get
+                        __set_current_state(TASK_RUNNING);
-                         * recreated automatically.
+                        continue;
-                         */
-                        max_idle = max(5UL * 60 * HZ, wait_jiffies);
-                        if (time_after(jiffies, max_idle + last_active))
-                                break;
                }
-                if (dirty_writeback_interval) {
+                if (wb_has_dirty_io(wb) && dirty_writeback_interval)
-                        wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
+                        schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
-                        schedule_timeout_interruptible(wait_jiffies);
+                else {
-                } else {
+                        /*
-                        set_current_state(TASK_INTERRUPTIBLE);
+                         * We have nothing to do, so can go sleep without any
-                        if (list_empty_careful(&wb->bdi->work_list) &&
+                         * timeout and save power. When a work is queued or
-                            !kthread_should_stop())
+                         * something is made dirty - we will be woken up.
-                                schedule();
+                         */
-                        __set_current_state(TASK_RUNNING);
+                        schedule();
                }
                try_to_freeze();
        }
+        /* Flush any work that raced with us exiting */
+        if (!list_empty(&bdi->work_list))
+                wb_do_writeback(wb, 1);
+        trace_writeback_thread_stop(bdi);
        return 0;
 }
 /*
 * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
 * the whole world.
@@ -891,6 +927,8 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 void __mark_inode_dirty(struct inode *inode, int flags)
 {
        struct super_block *sb = inode->i_sb;
+        struct backing_dev_info *bdi = NULL;
+        bool wakeup_bdi = false;
        /*
         * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -944,22 +982,31 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                 * reposition it (that would break b_dirty time-ordering).
                 */
                if (!was_dirty) {
-                        struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+                        bdi = inode_to_bdi(inode);
-                        struct backing_dev_info *bdi = wb->bdi;
+                        if (bdi_cap_writeback_dirty(bdi)) {
-                        if (bdi_cap_writeback_dirty(bdi) &&
+                                WARN(!test_bit(BDI_registered, &bdi->state),
-                            !test_bit(BDI_registered, &bdi->state)) {
+                                     "bdi-%s not registered\n", bdi->name);
-                                WARN_ON(1);
-                                printk(KERN_ERR "bdi-%s not registered\n",
+                                /*
-                                                                bdi->name);
+                                 * If this is the first dirty inode for this
+                                 * bdi, we have to wake-up the corresponding
+                                 * bdi thread to make sure background
+                                 * write-back happens later.
+                                 */
+                                if (!wb_has_dirty_io(&bdi->wb))
+                                        wakeup_bdi = true;
                        }
                        inode->dirtied_when = jiffies;
-                        list_move(&inode->i_list, &wb->b_dirty);
+                        list_move(&inode->i_list, &bdi->wb.b_dirty);
                }
        }
 out:
        spin_unlock(&inode_lock);
+        if (wakeup_bdi)
+                bdi_wakeup_thread_delayed(bdi);
 }
 EXPORT_SYMBOL(__mark_inode_dirty);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6a857e24f947..cde1248a6225 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -595,7 +595,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
        if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
                goto skip_barrier;
        get_bh(bh);
-        submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh);
+        submit_bh(WRITE_BARRIER | REQ_META, bh);
        wait_on_buffer(bh);
        if (buffer_eopnotsupp(bh)) {
                clear_buffer_eopnotsupp(bh);
@@ -605,7 +605,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
                lock_buffer(bh);
 skip_barrier:
                get_bh(bh);
-                submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh);
+                submit_bh(WRITE_SYNC | REQ_META, bh);
                wait_on_buffer(bh);
        }
        if (!buffer_uptodate(bh))
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 18176d0b75d7..f3b071f921aa 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -36,8 +36,8 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 {
        struct buffer_head *bh, *head;
        int nr_underway = 0;
-        int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ?
+        int write_op = REQ_META |
-                        WRITE_SYNC_PLUG : WRITE));
+                (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE);
        BUG_ON(!PageLocked(page));
        BUG_ON(!page_has_buffers(page));
@@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
        }
        bh->b_end_io = end_buffer_read_sync;
        get_bh(bh);
-        submit_bh(READ_SYNC | (1 << BIO_RW_META), bh);
+        submit_bh(READ_SYNC | REQ_META, bh);
        if (!(flags & DIO_WAIT))
                return 0;
@@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
        if (buffer_uptodate(first_bh))
                goto out;
        if (!buffer_locked(first_bh))
-                ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh);
+                ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
        dblock++;
        extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 4f44bdeb2f03..4d4b1e8ac64c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -274,7 +274,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
        bio->bi_end_io = end_bio_io_page;
        bio->bi_private = page;
-        submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
+        submit_bio(READ_SYNC | REQ_META, bio);
        wait_on_page_locked(page);
        bio_put(bio);
        if (!PageUptodate(page)) {
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2e6a2723b8fa..4588fb9e93df 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -508,7 +508,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
                 * Last BIO is always sent through the following
                 * submission.
                 */
-                rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+                rw |= REQ_SYNC | REQ_UNPLUG;
                res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
        }
diff --git a/fs/splice.c b/fs/splice.c
index efdbfece9932..8f1dfaecc8f0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -399,17 +399,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
                 * If the page isn't uptodate, we may need to start io on it
                 */
                if (!PageUptodate(page)) {
-                        /*
+                        lock_page(page);
-                         * If in nonblock mode then dont block on waiting
-                         * for an in-flight io page
-                         */
-                        if (flags & SPLICE_F_NONBLOCK) {
-                                if (!trylock_page(page)) {
-                                        error = -EAGAIN;
-                                        break;
-                                }
-                        } else
-                                lock_page(page);
                        /*
                         * Page was truncated, or invalidated by the
@@ -597,7 +587,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
        struct page *pages[PIPE_DEF_BUFFERS];
        struct partial_page partial[PIPE_DEF_BUFFERS];
        struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
-        pgoff_t index;
        ssize_t res;
        size_t this_len;
        int error;
@@ -621,7 +610,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
                        goto shrink_ret;
        }
-        index = *ppos >> PAGE_CACHE_SHIFT;
        offset = *ppos & ~PAGE_CACHE_MASK;
        nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
author	Linus Torvalds <torvalds@linux-foundation.org>	2010-08-10 18:22:42 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2010-08-10 18:22:42 -0400
commit	2f9e825d3e0e2b407ae8f082de5c00afcf7378fb (patch)
tree	f8b3ee40674ce4acd5508a0a0bf52a30904caf6c /fs
parent	7ae0dea900b027cd90e8a3e14deca9a19e17638b (diff)
parent	de75d60d5ea235e6e09f4962ab22541ce0fe176a (diff)