diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 139 |
1 files changed, 43 insertions, 96 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ae0f438c2ee6..24489126f8ca 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -53,8 +53,6 @@ struct wb_writeback_work { | |||
53 | unsigned int for_background:1; | 53 | unsigned int for_background:1; |
54 | unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ | 54 | unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ |
55 | unsigned int auto_free:1; /* free on completion */ | 55 | unsigned int auto_free:1; /* free on completion */ |
56 | unsigned int single_wait:1; | ||
57 | unsigned int single_done:1; | ||
58 | enum wb_reason reason; /* why was writeback initiated? */ | 56 | enum wb_reason reason; /* why was writeback initiated? */ |
59 | 57 | ||
60 | struct list_head list; /* pending work list */ | 58 | struct list_head list; /* pending work list */ |
@@ -178,14 +176,11 @@ static void wb_wakeup(struct bdi_writeback *wb) | |||
178 | static void wb_queue_work(struct bdi_writeback *wb, | 176 | static void wb_queue_work(struct bdi_writeback *wb, |
179 | struct wb_writeback_work *work) | 177 | struct wb_writeback_work *work) |
180 | { | 178 | { |
181 | trace_writeback_queue(wb->bdi, work); | 179 | trace_writeback_queue(wb, work); |
182 | 180 | ||
183 | spin_lock_bh(&wb->work_lock); | 181 | spin_lock_bh(&wb->work_lock); |
184 | if (!test_bit(WB_registered, &wb->state)) { | 182 | if (!test_bit(WB_registered, &wb->state)) |
185 | if (work->single_wait) | ||
186 | work->single_done = 1; | ||
187 | goto out_unlock; | 183 | goto out_unlock; |
188 | } | ||
189 | if (work->done) | 184 | if (work->done) |
190 | atomic_inc(&work->done->cnt); | 185 | atomic_inc(&work->done->cnt); |
191 | list_add_tail(&work->list, &wb->work_list); | 186 | list_add_tail(&work->list, &wb->work_list); |
@@ -706,7 +701,7 @@ EXPORT_SYMBOL_GPL(wbc_account_io); | |||
706 | 701 | ||
707 | /** | 702 | /** |
708 | * inode_congested - test whether an inode is congested | 703 | * inode_congested - test whether an inode is congested |
709 | * @inode: inode to test for congestion | 704 | * @inode: inode to test for congestion (may be NULL) |
710 | * @cong_bits: mask of WB_[a]sync_congested bits to test | 705 | * @cong_bits: mask of WB_[a]sync_congested bits to test |
711 | * | 706 | * |
712 | * Tests whether @inode is congested. @cong_bits is the mask of congestion | 707 | * Tests whether @inode is congested. @cong_bits is the mask of congestion |
@@ -716,6 +711,9 @@ EXPORT_SYMBOL_GPL(wbc_account_io); | |||
716 | * determined by whether the cgwb (cgroup bdi_writeback) for the blkcg | 711 | * determined by whether the cgwb (cgroup bdi_writeback) for the blkcg |
717 | * associated with @inode is congested; otherwise, the root wb's congestion | 712 | * associated with @inode is congested; otherwise, the root wb's congestion |
718 | * state is used. | 713 | * state is used. |
714 | * | ||
715 | * @inode is allowed to be NULL as this function is often called on | ||
716 | * mapping->host which is NULL for the swapper space. | ||
719 | */ | 717 | */ |
720 | int inode_congested(struct inode *inode, int cong_bits) | 718 | int inode_congested(struct inode *inode, int cong_bits) |
721 | { | 719 | { |
@@ -738,32 +736,6 @@ int inode_congested(struct inode *inode, int cong_bits) | |||
738 | EXPORT_SYMBOL_GPL(inode_congested); | 736 | EXPORT_SYMBOL_GPL(inode_congested); |
739 | 737 | ||
740 | /** | 738 | /** |
741 | * wb_wait_for_single_work - wait for completion of a single bdi_writeback_work | ||
742 | * @bdi: bdi the work item was issued to | ||
743 | * @work: work item to wait for | ||
744 | * | ||
745 | * Wait for the completion of @work which was issued to one of @bdi's | ||
746 | * bdi_writeback's. The caller must have set @work->single_wait before | ||
747 | * issuing it. This wait operates independently fo | ||
748 | * wb_wait_for_completion() and also disables automatic freeing of @work. | ||
749 | */ | ||
750 | static void wb_wait_for_single_work(struct backing_dev_info *bdi, | ||
751 | struct wb_writeback_work *work) | ||
752 | { | ||
753 | if (WARN_ON_ONCE(!work->single_wait)) | ||
754 | return; | ||
755 | |||
756 | wait_event(bdi->wb_waitq, work->single_done); | ||
757 | |||
758 | /* | ||
759 | * Paired with smp_wmb() in wb_do_writeback() and ensures that all | ||
760 | * modifications to @work prior to assertion of ->single_done is | ||
761 | * visible to the caller once this function returns. | ||
762 | */ | ||
763 | smp_rmb(); | ||
764 | } | ||
765 | |||
766 | /** | ||
767 | * wb_split_bdi_pages - split nr_pages to write according to bandwidth | 739 | * wb_split_bdi_pages - split nr_pages to write according to bandwidth |
768 | * @wb: target bdi_writeback to split @nr_pages to | 740 | * @wb: target bdi_writeback to split @nr_pages to |
769 | * @nr_pages: number of pages to write for the whole bdi | 741 | * @nr_pages: number of pages to write for the whole bdi |
@@ -792,38 +764,6 @@ static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages) | |||
792 | } | 764 | } |
793 | 765 | ||
794 | /** | 766 | /** |
795 | * wb_clone_and_queue_work - clone a wb_writeback_work and issue it to a wb | ||
796 | * @wb: target bdi_writeback | ||
797 | * @base_work: source wb_writeback_work | ||
798 | * | ||
799 | * Try to make a clone of @base_work and issue it to @wb. If cloning | ||
800 | * succeeds, %true is returned; otherwise, @base_work is issued directly | ||
801 | * and %false is returned. In the latter case, the caller is required to | ||
802 | * wait for @base_work's completion using wb_wait_for_single_work(). | ||
803 | * | ||
804 | * A clone is auto-freed on completion. @base_work never is. | ||
805 | */ | ||
806 | static bool wb_clone_and_queue_work(struct bdi_writeback *wb, | ||
807 | struct wb_writeback_work *base_work) | ||
808 | { | ||
809 | struct wb_writeback_work *work; | ||
810 | |||
811 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | ||
812 | if (work) { | ||
813 | *work = *base_work; | ||
814 | work->auto_free = 1; | ||
815 | work->single_wait = 0; | ||
816 | } else { | ||
817 | work = base_work; | ||
818 | work->auto_free = 0; | ||
819 | work->single_wait = 1; | ||
820 | } | ||
821 | work->single_done = 0; | ||
822 | wb_queue_work(wb, work); | ||
823 | return work != base_work; | ||
824 | } | ||
825 | |||
826 | /** | ||
827 | * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi | 767 | * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi |
828 | * @bdi: target backing_dev_info | 768 | * @bdi: target backing_dev_info |
829 | * @base_work: wb_writeback_work to issue | 769 | * @base_work: wb_writeback_work to issue |
@@ -838,15 +778,19 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, | |||
838 | struct wb_writeback_work *base_work, | 778 | struct wb_writeback_work *base_work, |
839 | bool skip_if_busy) | 779 | bool skip_if_busy) |
840 | { | 780 | { |
841 | long nr_pages = base_work->nr_pages; | 781 | int next_memcg_id = 0; |
842 | int next_blkcg_id = 0; | ||
843 | struct bdi_writeback *wb; | 782 | struct bdi_writeback *wb; |
844 | struct wb_iter iter; | 783 | struct wb_iter iter; |
845 | 784 | ||
846 | might_sleep(); | 785 | might_sleep(); |
847 | restart: | 786 | restart: |
848 | rcu_read_lock(); | 787 | rcu_read_lock(); |
849 | bdi_for_each_wb(wb, bdi, &iter, next_blkcg_id) { | 788 | bdi_for_each_wb(wb, bdi, &iter, next_memcg_id) { |
789 | DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done); | ||
790 | struct wb_writeback_work fallback_work; | ||
791 | struct wb_writeback_work *work; | ||
792 | long nr_pages; | ||
793 | |||
850 | /* SYNC_ALL writes out I_DIRTY_TIME too */ | 794 | /* SYNC_ALL writes out I_DIRTY_TIME too */ |
851 | if (!wb_has_dirty_io(wb) && | 795 | if (!wb_has_dirty_io(wb) && |
852 | (base_work->sync_mode == WB_SYNC_NONE || | 796 | (base_work->sync_mode == WB_SYNC_NONE || |
@@ -855,13 +799,30 @@ restart: | |||
855 | if (skip_if_busy && writeback_in_progress(wb)) | 799 | if (skip_if_busy && writeback_in_progress(wb)) |
856 | continue; | 800 | continue; |
857 | 801 | ||
858 | base_work->nr_pages = wb_split_bdi_pages(wb, nr_pages); | 802 | nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages); |
859 | if (!wb_clone_and_queue_work(wb, base_work)) { | 803 | |
860 | next_blkcg_id = wb->blkcg_css->id + 1; | 804 | work = kmalloc(sizeof(*work), GFP_ATOMIC); |
861 | rcu_read_unlock(); | 805 | if (work) { |
862 | wb_wait_for_single_work(bdi, base_work); | 806 | *work = *base_work; |
863 | goto restart; | 807 | work->nr_pages = nr_pages; |
808 | work->auto_free = 1; | ||
809 | wb_queue_work(wb, work); | ||
810 | continue; | ||
864 | } | 811 | } |
812 | |||
813 | /* alloc failed, execute synchronously using on-stack fallback */ | ||
814 | work = &fallback_work; | ||
815 | *work = *base_work; | ||
816 | work->nr_pages = nr_pages; | ||
817 | work->auto_free = 0; | ||
818 | work->done = &fallback_work_done; | ||
819 | |||
820 | wb_queue_work(wb, work); | ||
821 | |||
822 | next_memcg_id = wb->memcg_css->id + 1; | ||
823 | rcu_read_unlock(); | ||
824 | wb_wait_for_completion(bdi, &fallback_work_done); | ||
825 | goto restart; | ||
865 | } | 826 | } |
866 | rcu_read_unlock(); | 827 | rcu_read_unlock(); |
867 | } | 828 | } |
@@ -902,8 +863,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, | |||
902 | 863 | ||
903 | if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) { | 864 | if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) { |
904 | base_work->auto_free = 0; | 865 | base_work->auto_free = 0; |
905 | base_work->single_wait = 0; | ||
906 | base_work->single_done = 0; | ||
907 | wb_queue_work(&bdi->wb, base_work); | 866 | wb_queue_work(&bdi->wb, base_work); |
908 | } | 867 | } |
909 | } | 868 | } |
@@ -924,7 +883,7 @@ void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, | |||
924 | */ | 883 | */ |
925 | work = kzalloc(sizeof(*work), GFP_ATOMIC); | 884 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
926 | if (!work) { | 885 | if (!work) { |
927 | trace_writeback_nowork(wb->bdi); | 886 | trace_writeback_nowork(wb); |
928 | wb_wakeup(wb); | 887 | wb_wakeup(wb); |
929 | return; | 888 | return; |
930 | } | 889 | } |
@@ -954,7 +913,7 @@ void wb_start_background_writeback(struct bdi_writeback *wb) | |||
954 | * We just wake up the flusher thread. It will perform background | 913 | * We just wake up the flusher thread. It will perform background |
955 | * writeback as soon as there is no other work to do. | 914 | * writeback as soon as there is no other work to do. |
956 | */ | 915 | */ |
957 | trace_writeback_wake_background(wb->bdi); | 916 | trace_writeback_wake_background(wb); |
958 | wb_wakeup(wb); | 917 | wb_wakeup(wb); |
959 | } | 918 | } |
960 | 919 | ||
@@ -1660,14 +1619,14 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
1660 | } else if (work->for_background) | 1619 | } else if (work->for_background) |
1661 | oldest_jif = jiffies; | 1620 | oldest_jif = jiffies; |
1662 | 1621 | ||
1663 | trace_writeback_start(wb->bdi, work); | 1622 | trace_writeback_start(wb, work); |
1664 | if (list_empty(&wb->b_io)) | 1623 | if (list_empty(&wb->b_io)) |
1665 | queue_io(wb, work); | 1624 | queue_io(wb, work); |
1666 | if (work->sb) | 1625 | if (work->sb) |
1667 | progress = writeback_sb_inodes(work->sb, wb, work); | 1626 | progress = writeback_sb_inodes(work->sb, wb, work); |
1668 | else | 1627 | else |
1669 | progress = __writeback_inodes_wb(wb, work); | 1628 | progress = __writeback_inodes_wb(wb, work); |
1670 | trace_writeback_written(wb->bdi, work); | 1629 | trace_writeback_written(wb, work); |
1671 | 1630 | ||
1672 | wb_update_bandwidth(wb, wb_start); | 1631 | wb_update_bandwidth(wb, wb_start); |
1673 | 1632 | ||
@@ -1692,7 +1651,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
1692 | * we'll just busyloop. | 1651 | * we'll just busyloop. |
1693 | */ | 1652 | */ |
1694 | if (!list_empty(&wb->b_more_io)) { | 1653 | if (!list_empty(&wb->b_more_io)) { |
1695 | trace_writeback_wait(wb->bdi, work); | 1654 | trace_writeback_wait(wb, work); |
1696 | inode = wb_inode(wb->b_more_io.prev); | 1655 | inode = wb_inode(wb->b_more_io.prev); |
1697 | spin_lock(&inode->i_lock); | 1656 | spin_lock(&inode->i_lock); |
1698 | spin_unlock(&wb->list_lock); | 1657 | spin_unlock(&wb->list_lock); |
@@ -1797,26 +1756,14 @@ static long wb_do_writeback(struct bdi_writeback *wb) | |||
1797 | set_bit(WB_writeback_running, &wb->state); | 1756 | set_bit(WB_writeback_running, &wb->state); |
1798 | while ((work = get_next_work_item(wb)) != NULL) { | 1757 | while ((work = get_next_work_item(wb)) != NULL) { |
1799 | struct wb_completion *done = work->done; | 1758 | struct wb_completion *done = work->done; |
1800 | bool need_wake_up = false; | ||
1801 | 1759 | ||
1802 | trace_writeback_exec(wb->bdi, work); | 1760 | trace_writeback_exec(wb, work); |
1803 | 1761 | ||
1804 | wrote += wb_writeback(wb, work); | 1762 | wrote += wb_writeback(wb, work); |
1805 | 1763 | ||
1806 | if (work->single_wait) { | 1764 | if (work->auto_free) |
1807 | WARN_ON_ONCE(work->auto_free); | ||
1808 | /* paired w/ rmb in wb_wait_for_single_work() */ | ||
1809 | smp_wmb(); | ||
1810 | work->single_done = 1; | ||
1811 | need_wake_up = true; | ||
1812 | } else if (work->auto_free) { | ||
1813 | kfree(work); | 1765 | kfree(work); |
1814 | } | ||
1815 | |||
1816 | if (done && atomic_dec_and_test(&done->cnt)) | 1766 | if (done && atomic_dec_and_test(&done->cnt)) |
1817 | need_wake_up = true; | ||
1818 | |||
1819 | if (need_wake_up) | ||
1820 | wake_up_all(&wb->bdi->wb_waitq); | 1767 | wake_up_all(&wb->bdi->wb_waitq); |
1821 | } | 1768 | } |
1822 | 1769 | ||