aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c139
1 files changed, 43 insertions, 96 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ae0f438c2ee6..24489126f8ca 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,8 +53,6 @@ struct wb_writeback_work {
53 unsigned int for_background:1; 53 unsigned int for_background:1;
54 unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ 54 unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
55 unsigned int auto_free:1; /* free on completion */ 55 unsigned int auto_free:1; /* free on completion */
56 unsigned int single_wait:1;
57 unsigned int single_done:1;
58 enum wb_reason reason; /* why was writeback initiated? */ 56 enum wb_reason reason; /* why was writeback initiated? */
59 57
60 struct list_head list; /* pending work list */ 58 struct list_head list; /* pending work list */
@@ -178,14 +176,11 @@ static void wb_wakeup(struct bdi_writeback *wb)
178static void wb_queue_work(struct bdi_writeback *wb, 176static void wb_queue_work(struct bdi_writeback *wb,
179 struct wb_writeback_work *work) 177 struct wb_writeback_work *work)
180{ 178{
181 trace_writeback_queue(wb->bdi, work); 179 trace_writeback_queue(wb, work);
182 180
183 spin_lock_bh(&wb->work_lock); 181 spin_lock_bh(&wb->work_lock);
184 if (!test_bit(WB_registered, &wb->state)) { 182 if (!test_bit(WB_registered, &wb->state))
185 if (work->single_wait)
186 work->single_done = 1;
187 goto out_unlock; 183 goto out_unlock;
188 }
189 if (work->done) 184 if (work->done)
190 atomic_inc(&work->done->cnt); 185 atomic_inc(&work->done->cnt);
191 list_add_tail(&work->list, &wb->work_list); 186 list_add_tail(&work->list, &wb->work_list);
@@ -706,7 +701,7 @@ EXPORT_SYMBOL_GPL(wbc_account_io);
706 701
707/** 702/**
708 * inode_congested - test whether an inode is congested 703 * inode_congested - test whether an inode is congested
709 * @inode: inode to test for congestion 704 * @inode: inode to test for congestion (may be NULL)
710 * @cong_bits: mask of WB_[a]sync_congested bits to test 705 * @cong_bits: mask of WB_[a]sync_congested bits to test
711 * 706 *
712 * Tests whether @inode is congested. @cong_bits is the mask of congestion 707 * Tests whether @inode is congested. @cong_bits is the mask of congestion
@@ -716,6 +711,9 @@ EXPORT_SYMBOL_GPL(wbc_account_io);
716 * determined by whether the cgwb (cgroup bdi_writeback) for the blkcg 711 * determined by whether the cgwb (cgroup bdi_writeback) for the blkcg
717 * associated with @inode is congested; otherwise, the root wb's congestion 712 * associated with @inode is congested; otherwise, the root wb's congestion
718 * state is used. 713 * state is used.
714 *
715 * @inode is allowed to be NULL as this function is often called on
716 * mapping->host which is NULL for the swapper space.
719 */ 717 */
720int inode_congested(struct inode *inode, int cong_bits) 718int inode_congested(struct inode *inode, int cong_bits)
721{ 719{
@@ -738,32 +736,6 @@ int inode_congested(struct inode *inode, int cong_bits)
738EXPORT_SYMBOL_GPL(inode_congested); 736EXPORT_SYMBOL_GPL(inode_congested);
739 737
740/** 738/**
741 * wb_wait_for_single_work - wait for completion of a single bdi_writeback_work
742 * @bdi: bdi the work item was issued to
743 * @work: work item to wait for
744 *
745 * Wait for the completion of @work which was issued to one of @bdi's
746 * bdi_writeback's. The caller must have set @work->single_wait before
747 * issuing it. This wait operates independently fo
748 * wb_wait_for_completion() and also disables automatic freeing of @work.
749 */
750static void wb_wait_for_single_work(struct backing_dev_info *bdi,
751 struct wb_writeback_work *work)
752{
753 if (WARN_ON_ONCE(!work->single_wait))
754 return;
755
756 wait_event(bdi->wb_waitq, work->single_done);
757
758 /*
759 * Paired with smp_wmb() in wb_do_writeback() and ensures that all
760 * modifications to @work prior to assertion of ->single_done is
761 * visible to the caller once this function returns.
762 */
763 smp_rmb();
764}
765
766/**
767 * wb_split_bdi_pages - split nr_pages to write according to bandwidth 739 * wb_split_bdi_pages - split nr_pages to write according to bandwidth
768 * @wb: target bdi_writeback to split @nr_pages to 740 * @wb: target bdi_writeback to split @nr_pages to
769 * @nr_pages: number of pages to write for the whole bdi 741 * @nr_pages: number of pages to write for the whole bdi
@@ -792,38 +764,6 @@ static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
792} 764}
793 765
794/** 766/**
795 * wb_clone_and_queue_work - clone a wb_writeback_work and issue it to a wb
796 * @wb: target bdi_writeback
797 * @base_work: source wb_writeback_work
798 *
799 * Try to make a clone of @base_work and issue it to @wb. If cloning
800 * succeeds, %true is returned; otherwise, @base_work is issued directly
801 * and %false is returned. In the latter case, the caller is required to
802 * wait for @base_work's completion using wb_wait_for_single_work().
803 *
804 * A clone is auto-freed on completion. @base_work never is.
805 */
806static bool wb_clone_and_queue_work(struct bdi_writeback *wb,
807 struct wb_writeback_work *base_work)
808{
809 struct wb_writeback_work *work;
810
811 work = kmalloc(sizeof(*work), GFP_ATOMIC);
812 if (work) {
813 *work = *base_work;
814 work->auto_free = 1;
815 work->single_wait = 0;
816 } else {
817 work = base_work;
818 work->auto_free = 0;
819 work->single_wait = 1;
820 }
821 work->single_done = 0;
822 wb_queue_work(wb, work);
823 return work != base_work;
824}
825
826/**
827 * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi 767 * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi
828 * @bdi: target backing_dev_info 768 * @bdi: target backing_dev_info
829 * @base_work: wb_writeback_work to issue 769 * @base_work: wb_writeback_work to issue
@@ -838,15 +778,19 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
838 struct wb_writeback_work *base_work, 778 struct wb_writeback_work *base_work,
839 bool skip_if_busy) 779 bool skip_if_busy)
840{ 780{
841 long nr_pages = base_work->nr_pages; 781 int next_memcg_id = 0;
842 int next_blkcg_id = 0;
843 struct bdi_writeback *wb; 782 struct bdi_writeback *wb;
844 struct wb_iter iter; 783 struct wb_iter iter;
845 784
846 might_sleep(); 785 might_sleep();
847restart: 786restart:
848 rcu_read_lock(); 787 rcu_read_lock();
849 bdi_for_each_wb(wb, bdi, &iter, next_blkcg_id) { 788 bdi_for_each_wb(wb, bdi, &iter, next_memcg_id) {
789 DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
790 struct wb_writeback_work fallback_work;
791 struct wb_writeback_work *work;
792 long nr_pages;
793
850 /* SYNC_ALL writes out I_DIRTY_TIME too */ 794 /* SYNC_ALL writes out I_DIRTY_TIME too */
851 if (!wb_has_dirty_io(wb) && 795 if (!wb_has_dirty_io(wb) &&
852 (base_work->sync_mode == WB_SYNC_NONE || 796 (base_work->sync_mode == WB_SYNC_NONE ||
@@ -855,13 +799,30 @@ restart:
855 if (skip_if_busy && writeback_in_progress(wb)) 799 if (skip_if_busy && writeback_in_progress(wb))
856 continue; 800 continue;
857 801
858 base_work->nr_pages = wb_split_bdi_pages(wb, nr_pages); 802 nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages);
859 if (!wb_clone_and_queue_work(wb, base_work)) { 803
860 next_blkcg_id = wb->blkcg_css->id + 1; 804 work = kmalloc(sizeof(*work), GFP_ATOMIC);
861 rcu_read_unlock(); 805 if (work) {
862 wb_wait_for_single_work(bdi, base_work); 806 *work = *base_work;
863 goto restart; 807 work->nr_pages = nr_pages;
808 work->auto_free = 1;
809 wb_queue_work(wb, work);
810 continue;
864 } 811 }
812
813 /* alloc failed, execute synchronously using on-stack fallback */
814 work = &fallback_work;
815 *work = *base_work;
816 work->nr_pages = nr_pages;
817 work->auto_free = 0;
818 work->done = &fallback_work_done;
819
820 wb_queue_work(wb, work);
821
822 next_memcg_id = wb->memcg_css->id + 1;
823 rcu_read_unlock();
824 wb_wait_for_completion(bdi, &fallback_work_done);
825 goto restart;
865 } 826 }
866 rcu_read_unlock(); 827 rcu_read_unlock();
867} 828}
@@ -902,8 +863,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
902 863
903 if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) { 864 if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) {
904 base_work->auto_free = 0; 865 base_work->auto_free = 0;
905 base_work->single_wait = 0;
906 base_work->single_done = 0;
907 wb_queue_work(&bdi->wb, base_work); 866 wb_queue_work(&bdi->wb, base_work);
908 } 867 }
909} 868}
@@ -924,7 +883,7 @@ void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
924 */ 883 */
925 work = kzalloc(sizeof(*work), GFP_ATOMIC); 884 work = kzalloc(sizeof(*work), GFP_ATOMIC);
926 if (!work) { 885 if (!work) {
927 trace_writeback_nowork(wb->bdi); 886 trace_writeback_nowork(wb);
928 wb_wakeup(wb); 887 wb_wakeup(wb);
929 return; 888 return;
930 } 889 }
@@ -954,7 +913,7 @@ void wb_start_background_writeback(struct bdi_writeback *wb)
954 * We just wake up the flusher thread. It will perform background 913 * We just wake up the flusher thread. It will perform background
955 * writeback as soon as there is no other work to do. 914 * writeback as soon as there is no other work to do.
956 */ 915 */
957 trace_writeback_wake_background(wb->bdi); 916 trace_writeback_wake_background(wb);
958 wb_wakeup(wb); 917 wb_wakeup(wb);
959} 918}
960 919
@@ -1660,14 +1619,14 @@ static long wb_writeback(struct bdi_writeback *wb,
1660 } else if (work->for_background) 1619 } else if (work->for_background)
1661 oldest_jif = jiffies; 1620 oldest_jif = jiffies;
1662 1621
1663 trace_writeback_start(wb->bdi, work); 1622 trace_writeback_start(wb, work);
1664 if (list_empty(&wb->b_io)) 1623 if (list_empty(&wb->b_io))
1665 queue_io(wb, work); 1624 queue_io(wb, work);
1666 if (work->sb) 1625 if (work->sb)
1667 progress = writeback_sb_inodes(work->sb, wb, work); 1626 progress = writeback_sb_inodes(work->sb, wb, work);
1668 else 1627 else
1669 progress = __writeback_inodes_wb(wb, work); 1628 progress = __writeback_inodes_wb(wb, work);
1670 trace_writeback_written(wb->bdi, work); 1629 trace_writeback_written(wb, work);
1671 1630
1672 wb_update_bandwidth(wb, wb_start); 1631 wb_update_bandwidth(wb, wb_start);
1673 1632
@@ -1692,7 +1651,7 @@ static long wb_writeback(struct bdi_writeback *wb,
1692 * we'll just busyloop. 1651 * we'll just busyloop.
1693 */ 1652 */
1694 if (!list_empty(&wb->b_more_io)) { 1653 if (!list_empty(&wb->b_more_io)) {
1695 trace_writeback_wait(wb->bdi, work); 1654 trace_writeback_wait(wb, work);
1696 inode = wb_inode(wb->b_more_io.prev); 1655 inode = wb_inode(wb->b_more_io.prev);
1697 spin_lock(&inode->i_lock); 1656 spin_lock(&inode->i_lock);
1698 spin_unlock(&wb->list_lock); 1657 spin_unlock(&wb->list_lock);
@@ -1797,26 +1756,14 @@ static long wb_do_writeback(struct bdi_writeback *wb)
1797 set_bit(WB_writeback_running, &wb->state); 1756 set_bit(WB_writeback_running, &wb->state);
1798 while ((work = get_next_work_item(wb)) != NULL) { 1757 while ((work = get_next_work_item(wb)) != NULL) {
1799 struct wb_completion *done = work->done; 1758 struct wb_completion *done = work->done;
1800 bool need_wake_up = false;
1801 1759
1802 trace_writeback_exec(wb->bdi, work); 1760 trace_writeback_exec(wb, work);
1803 1761
1804 wrote += wb_writeback(wb, work); 1762 wrote += wb_writeback(wb, work);
1805 1763
1806 if (work->single_wait) { 1764 if (work->auto_free)
1807 WARN_ON_ONCE(work->auto_free);
1808 /* paired w/ rmb in wb_wait_for_single_work() */
1809 smp_wmb();
1810 work->single_done = 1;
1811 need_wake_up = true;
1812 } else if (work->auto_free) {
1813 kfree(work); 1765 kfree(work);
1814 }
1815
1816 if (done && atomic_dec_and_test(&done->cnt)) 1766 if (done && atomic_dec_and_test(&done->cnt))
1817 need_wake_up = true;
1818
1819 if (need_wake_up)
1820 wake_up_all(&wb->bdi->wb_waitq); 1767 wake_up_all(&wb->bdi->wb_waitq);
1821 } 1768 }
1822 1769