aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c27
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h24
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c129
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c228
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
6 files changed, 194 insertions, 240 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5ea402023ebd..9ef9ed2cfe2e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -293,7 +293,6 @@ xfs_buf_allocate_memory(
293 size_t nbytes, offset; 293 size_t nbytes, offset;
294 gfp_t gfp_mask = xb_to_gfp(flags); 294 gfp_t gfp_mask = xb_to_gfp(flags);
295 unsigned short page_count, i; 295 unsigned short page_count, i;
296 pgoff_t first;
297 xfs_off_t end; 296 xfs_off_t end;
298 int error; 297 int error;
299 298
@@ -333,7 +332,6 @@ use_alloc_page:
333 return error; 332 return error;
334 333
335 offset = bp->b_offset; 334 offset = bp->b_offset;
336 first = bp->b_file_offset >> PAGE_SHIFT;
337 bp->b_flags |= _XBF_PAGES; 335 bp->b_flags |= _XBF_PAGES;
338 336
339 for (i = 0; i < bp->b_page_count; i++) { 337 for (i = 0; i < bp->b_page_count; i++) {
@@ -657,8 +655,6 @@ xfs_buf_readahead(
657 xfs_off_t ioff, 655 xfs_off_t ioff,
658 size_t isize) 656 size_t isize)
659{ 657{
660 struct backing_dev_info *bdi;
661
662 if (bdi_read_congested(target->bt_bdi)) 658 if (bdi_read_congested(target->bt_bdi))
663 return; 659 return;
664 660
@@ -919,8 +915,6 @@ xfs_buf_lock(
919 915
920 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 916 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
921 xfs_log_force(bp->b_target->bt_mount, 0); 917 xfs_log_force(bp->b_target->bt_mount, 0);
922 if (atomic_read(&bp->b_io_remaining))
923 blk_flush_plug(current);
924 down(&bp->b_sema); 918 down(&bp->b_sema);
925 XB_SET_OWNER(bp); 919 XB_SET_OWNER(bp);
926 920
@@ -1309,8 +1303,6 @@ xfs_buf_iowait(
1309{ 1303{
1310 trace_xfs_buf_iowait(bp, _RET_IP_); 1304 trace_xfs_buf_iowait(bp, _RET_IP_);
1311 1305
1312 if (atomic_read(&bp->b_io_remaining))
1313 blk_flush_plug(current);
1314 wait_for_completion(&bp->b_iowait); 1306 wait_for_completion(&bp->b_iowait);
1315 1307
1316 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1308 trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1747,8 +1739,8 @@ xfsbufd(
1747 do { 1739 do {
1748 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1740 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1749 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); 1741 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1750 int count = 0;
1751 struct list_head tmp; 1742 struct list_head tmp;
1743 struct blk_plug plug;
1752 1744
1753 if (unlikely(freezing(current))) { 1745 if (unlikely(freezing(current))) {
1754 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1746 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1764,16 +1756,15 @@ xfsbufd(
1764 1756
1765 xfs_buf_delwri_split(target, &tmp, age); 1757 xfs_buf_delwri_split(target, &tmp, age);
1766 list_sort(NULL, &tmp, xfs_buf_cmp); 1758 list_sort(NULL, &tmp, xfs_buf_cmp);
1759
1760 blk_start_plug(&plug);
1767 while (!list_empty(&tmp)) { 1761 while (!list_empty(&tmp)) {
1768 struct xfs_buf *bp; 1762 struct xfs_buf *bp;
1769 bp = list_first_entry(&tmp, struct xfs_buf, b_list); 1763 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1770 list_del_init(&bp->b_list); 1764 list_del_init(&bp->b_list);
1771 xfs_bdstrat_cb(bp); 1765 xfs_bdstrat_cb(bp);
1772 count++;
1773 } 1766 }
1774 if (count) 1767 blk_finish_plug(&plug);
1775 blk_flush_plug(current);
1776
1777 } while (!kthread_should_stop()); 1768 } while (!kthread_should_stop());
1778 1769
1779 return 0; 1770 return 0;
@@ -1793,6 +1784,7 @@ xfs_flush_buftarg(
1793 int pincount = 0; 1784 int pincount = 0;
1794 LIST_HEAD(tmp_list); 1785 LIST_HEAD(tmp_list);
1795 LIST_HEAD(wait_list); 1786 LIST_HEAD(wait_list);
1787 struct blk_plug plug;
1796 1788
1797 xfs_buf_runall_queues(xfsconvertd_workqueue); 1789 xfs_buf_runall_queues(xfsconvertd_workqueue);
1798 xfs_buf_runall_queues(xfsdatad_workqueue); 1790 xfs_buf_runall_queues(xfsdatad_workqueue);
@@ -1807,6 +1799,8 @@ xfs_flush_buftarg(
1807 * we do that after issuing all the IO. 1799 * we do that after issuing all the IO.
1808 */ 1800 */
1809 list_sort(NULL, &tmp_list, xfs_buf_cmp); 1801 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1802
1803 blk_start_plug(&plug);
1810 while (!list_empty(&tmp_list)) { 1804 while (!list_empty(&tmp_list)) {
1811 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); 1805 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1812 ASSERT(target == bp->b_target); 1806 ASSERT(target == bp->b_target);
@@ -1817,10 +1811,10 @@ xfs_flush_buftarg(
1817 } 1811 }
1818 xfs_bdstrat_cb(bp); 1812 xfs_bdstrat_cb(bp);
1819 } 1813 }
1814 blk_finish_plug(&plug);
1820 1815
1821 if (wait) { 1816 if (wait) {
1822 /* Expedite and wait for IO to complete. */ 1817 /* Wait for IO to complete. */
1823 blk_flush_plug(current);
1824 while (!list_empty(&wait_list)) { 1818 while (!list_empty(&wait_list)) {
1825 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1819 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1826 1820
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 508e06fd7d1e..3ca795609113 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -28,53 +28,47 @@
28/* 28/*
29 * XFS logging functions 29 * XFS logging functions
30 */ 30 */
31static int 31static void
32__xfs_printk( 32__xfs_printk(
33 const char *level, 33 const char *level,
34 const struct xfs_mount *mp, 34 const struct xfs_mount *mp,
35 struct va_format *vaf) 35 struct va_format *vaf)
36{ 36{
37 if (mp && mp->m_fsname) 37 if (mp && mp->m_fsname)
38 return printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); 38 printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
39 return printk("%sXFS: %pV\n", level, vaf); 39 printk("%sXFS: %pV\n", level, vaf);
40} 40}
41 41
42int xfs_printk( 42void xfs_printk(
43 const char *level, 43 const char *level,
44 const struct xfs_mount *mp, 44 const struct xfs_mount *mp,
45 const char *fmt, ...) 45 const char *fmt, ...)
46{ 46{
47 struct va_format vaf; 47 struct va_format vaf;
48 va_list args; 48 va_list args;
49 int r;
50 49
51 va_start(args, fmt); 50 va_start(args, fmt);
52 51
53 vaf.fmt = fmt; 52 vaf.fmt = fmt;
54 vaf.va = &args; 53 vaf.va = &args;
55 54
56 r = __xfs_printk(level, mp, &vaf); 55 __xfs_printk(level, mp, &vaf);
57 va_end(args); 56 va_end(args);
58
59 return r;
60} 57}
61 58
62#define define_xfs_printk_level(func, kern_level) \ 59#define define_xfs_printk_level(func, kern_level) \
63int func(const struct xfs_mount *mp, const char *fmt, ...) \ 60void func(const struct xfs_mount *mp, const char *fmt, ...) \
64{ \ 61{ \
65 struct va_format vaf; \ 62 struct va_format vaf; \
66 va_list args; \ 63 va_list args; \
67 int r; \
68 \ 64 \
69 va_start(args, fmt); \ 65 va_start(args, fmt); \
70 \ 66 \
71 vaf.fmt = fmt; \ 67 vaf.fmt = fmt; \
72 vaf.va = &args; \ 68 vaf.va = &args; \
73 \ 69 \
74 r = __xfs_printk(kern_level, mp, &vaf); \ 70 __xfs_printk(kern_level, mp, &vaf); \
75 va_end(args); \ 71 va_end(args); \
76 \
77 return r; \
78} \ 72} \
79 73
80define_xfs_printk_level(xfs_emerg, KERN_EMERG); 74define_xfs_printk_level(xfs_emerg, KERN_EMERG);
@@ -88,7 +82,7 @@ define_xfs_printk_level(xfs_info, KERN_INFO);
88define_xfs_printk_level(xfs_debug, KERN_DEBUG); 82define_xfs_printk_level(xfs_debug, KERN_DEBUG);
89#endif 83#endif
90 84
91int 85void
92xfs_alert_tag( 86xfs_alert_tag(
93 const struct xfs_mount *mp, 87 const struct xfs_mount *mp,
94 int panic_tag, 88 int panic_tag,
@@ -97,7 +91,6 @@ xfs_alert_tag(
97 struct va_format vaf; 91 struct va_format vaf;
98 va_list args; 92 va_list args;
99 int do_panic = 0; 93 int do_panic = 0;
100 int r;
101 94
102 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { 95 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
103 xfs_printk(KERN_ALERT, mp, 96 xfs_printk(KERN_ALERT, mp,
@@ -110,12 +103,10 @@ xfs_alert_tag(
110 vaf.fmt = fmt; 103 vaf.fmt = fmt;
111 vaf.va = &args; 104 vaf.va = &args;
112 105
113 r = __xfs_printk(KERN_ALERT, mp, &vaf); 106 __xfs_printk(KERN_ALERT, mp, &vaf);
114 va_end(args); 107 va_end(args);
115 108
116 BUG_ON(do_panic); 109 BUG_ON(do_panic);
117
118 return r;
119} 110}
120 111
121void 112void
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
index e77ffa16745b..f1b3fc1b6c4e 100644
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,32 +3,34 @@
3 3
4struct xfs_mount; 4struct xfs_mount;
5 5
6extern int xfs_printk(const char *level, const struct xfs_mount *mp, 6extern void xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...) 7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4))); 8 __attribute__ ((format (printf, 3, 4)));
9extern int xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) 9extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3))); 10 __attribute__ ((format (printf, 2, 3)));
11extern int xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) 11extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
12 __attribute__ ((format (printf, 2, 3))); 12 __attribute__ ((format (printf, 2, 3)));
13extern int xfs_alert_tag(const struct xfs_mount *mp, int tag, 13extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
14 const char *fmt, ...) 14 const char *fmt, ...)
15 __attribute__ ((format (printf, 3, 4))); 15 __attribute__ ((format (printf, 3, 4)));
16extern int xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) 16extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
17 __attribute__ ((format (printf, 2, 3))); 17 __attribute__ ((format (printf, 2, 3)));
18extern int xfs_err(const struct xfs_mount *mp, const char *fmt, ...) 18extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
19 __attribute__ ((format (printf, 2, 3))); 19 __attribute__ ((format (printf, 2, 3)));
20extern int xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) 20extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
21 __attribute__ ((format (printf, 2, 3))); 21 __attribute__ ((format (printf, 2, 3)));
22extern int xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) 22extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
23 __attribute__ ((format (printf, 2, 3))); 23 __attribute__ ((format (printf, 2, 3)));
24extern int xfs_info(const struct xfs_mount *mp, const char *fmt, ...) 24extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
25 __attribute__ ((format (printf, 2, 3))); 25 __attribute__ ((format (printf, 2, 3)));
26 26
27#ifdef DEBUG 27#ifdef DEBUG
28extern int xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 28extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3))); 29 __attribute__ ((format (printf, 2, 3)));
30#else 30#else
31#define xfs_debug(mp, fmt, ...) (0) 31static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
32{
33}
32#endif 34#endif
33 35
34extern void assfail(char *expr, char *f, int l); 36extern void assfail(char *expr, char *f, int l);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1ba5c451da36..b38e58d02299 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -816,75 +816,6 @@ xfs_setup_devices(
816 return 0; 816 return 0;
817} 817}
818 818
819/*
820 * XFS AIL push thread support
821 */
822void
823xfsaild_wakeup(
824 struct xfs_ail *ailp,
825 xfs_lsn_t threshold_lsn)
826{
827 /* only ever move the target forwards */
828 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
829 ailp->xa_target = threshold_lsn;
830 wake_up_process(ailp->xa_task);
831 }
832}
833
834STATIC int
835xfsaild(
836 void *data)
837{
838 struct xfs_ail *ailp = data;
839 xfs_lsn_t last_pushed_lsn = 0;
840 long tout = 0; /* milliseconds */
841
842 while (!kthread_should_stop()) {
843 /*
844 * for short sleeps indicating congestion, don't allow us to
845 * get woken early. Otherwise all we do is bang on the AIL lock
846 * without making progress.
847 */
848 if (tout && tout <= 20)
849 __set_current_state(TASK_KILLABLE);
850 else
851 __set_current_state(TASK_INTERRUPTIBLE);
852 schedule_timeout(tout ?
853 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
854
855 /* swsusp */
856 try_to_freeze();
857
858 ASSERT(ailp->xa_mount->m_log);
859 if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
860 continue;
861
862 tout = xfsaild_push(ailp, &last_pushed_lsn);
863 }
864
865 return 0;
866} /* xfsaild */
867
868int
869xfsaild_start(
870 struct xfs_ail *ailp)
871{
872 ailp->xa_target = 0;
873 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
874 ailp->xa_mount->m_fsname);
875 if (IS_ERR(ailp->xa_task))
876 return -PTR_ERR(ailp->xa_task);
877 return 0;
878}
879
880void
881xfsaild_stop(
882 struct xfs_ail *ailp)
883{
884 kthread_stop(ailp->xa_task);
885}
886
887
888/* Catch misguided souls that try to use this interface on XFS */ 819/* Catch misguided souls that try to use this interface on XFS */
889STATIC struct inode * 820STATIC struct inode *
890xfs_fs_alloc_inode( 821xfs_fs_alloc_inode(
@@ -1191,22 +1122,12 @@ xfs_fs_sync_fs(
1191 return -error; 1122 return -error;
1192 1123
1193 if (laptop_mode) { 1124 if (laptop_mode) {
1194 int prev_sync_seq = mp->m_sync_seq;
1195
1196 /* 1125 /*
1197 * The disk must be active because we're syncing. 1126 * The disk must be active because we're syncing.
1198 * We schedule xfssyncd now (now that the disk is 1127 * We schedule xfssyncd now (now that the disk is
1199 * active) instead of later (when it might not be). 1128 * active) instead of later (when it might not be).
1200 */ 1129 */
1201 wake_up_process(mp->m_sync_task); 1130 flush_delayed_work_sync(&mp->m_sync_work);
1202 /*
1203 * We have to wait for the sync iteration to complete.
1204 * If we don't, the disk activity caused by the sync
1205 * will come after the sync is completed, and that
1206 * triggers another sync from laptop mode.
1207 */
1208 wait_event(mp->m_wait_single_sync_task,
1209 mp->m_sync_seq != prev_sync_seq);
1210 } 1131 }
1211 1132
1212 return 0; 1133 return 0;
@@ -1490,9 +1411,6 @@ xfs_fs_fill_super(
1490 spin_lock_init(&mp->m_sb_lock); 1411 spin_lock_init(&mp->m_sb_lock);
1491 mutex_init(&mp->m_growlock); 1412 mutex_init(&mp->m_growlock);
1492 atomic_set(&mp->m_active_trans, 0); 1413 atomic_set(&mp->m_active_trans, 0);
1493 INIT_LIST_HEAD(&mp->m_sync_list);
1494 spin_lock_init(&mp->m_sync_lock);
1495 init_waitqueue_head(&mp->m_wait_single_sync_task);
1496 1414
1497 mp->m_super = sb; 1415 mp->m_super = sb;
1498 sb->s_fs_info = mp; 1416 sb->s_fs_info = mp;
@@ -1799,6 +1717,38 @@ xfs_destroy_zones(void)
1799} 1717}
1800 1718
1801STATIC int __init 1719STATIC int __init
1720xfs_init_workqueues(void)
1721{
1722 /*
1723 * max_active is set to 8 to give enough concurency to allow
1724 * multiple work operations on each CPU to run. This allows multiple
1725 * filesystems to be running sync work concurrently, and scales with
1726 * the number of CPUs in the system.
1727 */
1728 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1729 if (!xfs_syncd_wq)
1730 goto out;
1731
1732 xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
1733 if (!xfs_ail_wq)
1734 goto out_destroy_syncd;
1735
1736 return 0;
1737
1738out_destroy_syncd:
1739 destroy_workqueue(xfs_syncd_wq);
1740out:
1741 return -ENOMEM;
1742}
1743
1744STATIC void
1745xfs_destroy_workqueues(void)
1746{
1747 destroy_workqueue(xfs_ail_wq);
1748 destroy_workqueue(xfs_syncd_wq);
1749}
1750
1751STATIC int __init
1802init_xfs_fs(void) 1752init_xfs_fs(void)
1803{ 1753{
1804 int error; 1754 int error;
@@ -1813,10 +1763,14 @@ init_xfs_fs(void)
1813 if (error) 1763 if (error)
1814 goto out; 1764 goto out;
1815 1765
1816 error = xfs_mru_cache_init(); 1766 error = xfs_init_workqueues();
1817 if (error) 1767 if (error)
1818 goto out_destroy_zones; 1768 goto out_destroy_zones;
1819 1769
1770 error = xfs_mru_cache_init();
1771 if (error)
1772 goto out_destroy_wq;
1773
1820 error = xfs_filestream_init(); 1774 error = xfs_filestream_init();
1821 if (error) 1775 if (error)
1822 goto out_mru_cache_uninit; 1776 goto out_mru_cache_uninit;
@@ -1833,6 +1787,10 @@ init_xfs_fs(void)
1833 if (error) 1787 if (error)
1834 goto out_cleanup_procfs; 1788 goto out_cleanup_procfs;
1835 1789
1790 error = xfs_init_workqueues();
1791 if (error)
1792 goto out_sysctl_unregister;
1793
1836 vfs_initquota(); 1794 vfs_initquota();
1837 1795
1838 error = register_filesystem(&xfs_fs_type); 1796 error = register_filesystem(&xfs_fs_type);
@@ -1850,6 +1808,8 @@ init_xfs_fs(void)
1850 xfs_filestream_uninit(); 1808 xfs_filestream_uninit();
1851 out_mru_cache_uninit: 1809 out_mru_cache_uninit:
1852 xfs_mru_cache_uninit(); 1810 xfs_mru_cache_uninit();
1811 out_destroy_wq:
1812 xfs_destroy_workqueues();
1853 out_destroy_zones: 1813 out_destroy_zones:
1854 xfs_destroy_zones(); 1814 xfs_destroy_zones();
1855 out: 1815 out:
@@ -1866,6 +1826,7 @@ exit_xfs_fs(void)
1866 xfs_buf_terminate(); 1826 xfs_buf_terminate();
1867 xfs_filestream_uninit(); 1827 xfs_filestream_uninit();
1868 xfs_mru_cache_uninit(); 1828 xfs_mru_cache_uninit();
1829 xfs_destroy_workqueues();
1869 xfs_destroy_zones(); 1830 xfs_destroy_zones();
1870} 1831}
1871 1832
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 9cf35a688f53..e4f9c1b0836c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -22,6 +22,7 @@
22#include "xfs_log.h" 22#include "xfs_log.h"
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_trans_priv.h"
25#include "xfs_sb.h" 26#include "xfs_sb.h"
26#include "xfs_ag.h" 27#include "xfs_ag.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
@@ -39,6 +40,8 @@
39#include <linux/kthread.h> 40#include <linux/kthread.h>
40#include <linux/freezer.h> 41#include <linux/freezer.h>
41 42
43struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
44
42/* 45/*
43 * The inode lookup is done in batches to keep the amount of lock traffic and 46 * The inode lookup is done in batches to keep the amount of lock traffic and
44 * radix tree lookups to a minimum. The batch size is a trade off between 47 * radix tree lookups to a minimum. The batch size is a trade off between
@@ -431,62 +434,12 @@ xfs_quiesce_attr(
431 xfs_unmountfs_writesb(mp); 434 xfs_unmountfs_writesb(mp);
432} 435}
433 436
434/* 437static void
435 * Enqueue a work item to be picked up by the vfs xfssyncd thread. 438xfs_syncd_queue_sync(
436 * Doing this has two advantages: 439 struct xfs_mount *mp)
437 * - It saves on stack space, which is tight in certain situations
438 * - It can be used (with care) as a mechanism to avoid deadlocks.
439 * Flushing while allocating in a full filesystem requires both.
440 */
441STATIC void
442xfs_syncd_queue_work(
443 struct xfs_mount *mp,
444 void *data,
445 void (*syncer)(struct xfs_mount *, void *),
446 struct completion *completion)
447{
448 struct xfs_sync_work *work;
449
450 work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
451 INIT_LIST_HEAD(&work->w_list);
452 work->w_syncer = syncer;
453 work->w_data = data;
454 work->w_mount = mp;
455 work->w_completion = completion;
456 spin_lock(&mp->m_sync_lock);
457 list_add_tail(&work->w_list, &mp->m_sync_list);
458 spin_unlock(&mp->m_sync_lock);
459 wake_up_process(mp->m_sync_task);
460}
461
462/*
463 * Flush delayed allocate data, attempting to free up reserved space
464 * from existing allocations. At this point a new allocation attempt
465 * has failed with ENOSPC and we are in the process of scratching our
466 * heads, looking about for more room...
467 */
468STATIC void
469xfs_flush_inodes_work(
470 struct xfs_mount *mp,
471 void *arg)
472{
473 struct inode *inode = arg;
474 xfs_sync_data(mp, SYNC_TRYLOCK);
475 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
476 iput(inode);
477}
478
479void
480xfs_flush_inodes(
481 xfs_inode_t *ip)
482{ 440{
483 struct inode *inode = VFS_I(ip); 441 queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
484 DECLARE_COMPLETION_ONSTACK(completion); 442 msecs_to_jiffies(xfs_syncd_centisecs * 10));
485
486 igrab(inode);
487 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
488 wait_for_completion(&completion);
489 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
490} 443}
491 444
492/* 445/*
@@ -496,9 +449,10 @@ xfs_flush_inodes(
496 */ 449 */
497STATIC void 450STATIC void
498xfs_sync_worker( 451xfs_sync_worker(
499 struct xfs_mount *mp, 452 struct work_struct *work)
500 void *unused)
501{ 453{
454 struct xfs_mount *mp = container_of(to_delayed_work(work),
455 struct xfs_mount, m_sync_work);
502 int error; 456 int error;
503 457
504 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 458 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
@@ -508,73 +462,106 @@ xfs_sync_worker(
508 error = xfs_fs_log_dummy(mp); 462 error = xfs_fs_log_dummy(mp);
509 else 463 else
510 xfs_log_force(mp, 0); 464 xfs_log_force(mp, 0);
511 xfs_reclaim_inodes(mp, 0);
512 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 465 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
466
467 /* start pushing all the metadata that is currently dirty */
468 xfs_ail_push_all(mp->m_ail);
513 } 469 }
514 mp->m_sync_seq++; 470
515 wake_up(&mp->m_wait_single_sync_task); 471 /* queue us up again */
472 xfs_syncd_queue_sync(mp);
516} 473}
517 474
518STATIC int 475/*
519xfssyncd( 476 * Queue a new inode reclaim pass if there are reclaimable inodes and there
520 void *arg) 477 * isn't a reclaim pass already in progress. By default it runs every 5s based
478 * on the xfs syncd work default of 30s. Perhaps this should have it's own
479 * tunable, but that can be done if this method proves to be ineffective or too
480 * aggressive.
481 */
482static void
483xfs_syncd_queue_reclaim(
484 struct xfs_mount *mp)
521{ 485{
522 struct xfs_mount *mp = arg;
523 long timeleft;
524 xfs_sync_work_t *work, *n;
525 LIST_HEAD (tmp);
526
527 set_freezable();
528 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
529 for (;;) {
530 if (list_empty(&mp->m_sync_list))
531 timeleft = schedule_timeout_interruptible(timeleft);
532 /* swsusp */
533 try_to_freeze();
534 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
535 break;
536 486
537 spin_lock(&mp->m_sync_lock); 487 /*
538 /* 488 * We can have inodes enter reclaim after we've shut down the syncd
539 * We can get woken by laptop mode, to do a sync - 489 * workqueue during unmount, so don't allow reclaim work to be queued
540 * that's the (only!) case where the list would be 490 * during unmount.
541 * empty with time remaining. 491 */
542 */ 492 if (!(mp->m_super->s_flags & MS_ACTIVE))
543 if (!timeleft || list_empty(&mp->m_sync_list)) { 493 return;
544 if (!timeleft)
545 timeleft = xfs_syncd_centisecs *
546 msecs_to_jiffies(10);
547 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
548 list_add_tail(&mp->m_sync_work.w_list,
549 &mp->m_sync_list);
550 }
551 list_splice_init(&mp->m_sync_list, &tmp);
552 spin_unlock(&mp->m_sync_lock);
553 494
554 list_for_each_entry_safe(work, n, &tmp, w_list) { 495 rcu_read_lock();
555 (*work->w_syncer)(mp, work->w_data); 496 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
556 list_del(&work->w_list); 497 queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
557 if (work == &mp->m_sync_work) 498 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
558 continue;
559 if (work->w_completion)
560 complete(work->w_completion);
561 kmem_free(work);
562 }
563 } 499 }
500 rcu_read_unlock();
501}
564 502
565 return 0; 503/*
504 * This is a fast pass over the inode cache to try to get reclaim moving on as
505 * many inodes as possible in a short period of time. It kicks itself every few
506 * seconds, as well as being kicked by the inode cache shrinker when memory
507 * goes low. It scans as quickly as possible avoiding locked inodes or those
508 * already being flushed, and once done schedules a future pass.
509 */
510STATIC void
511xfs_reclaim_worker(
512 struct work_struct *work)
513{
514 struct xfs_mount *mp = container_of(to_delayed_work(work),
515 struct xfs_mount, m_reclaim_work);
516
517 xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
518 xfs_syncd_queue_reclaim(mp);
519}
520
521/*
522 * Flush delayed allocate data, attempting to free up reserved space
523 * from existing allocations. At this point a new allocation attempt
524 * has failed with ENOSPC and we are in the process of scratching our
525 * heads, looking about for more room.
526 *
527 * Queue a new data flush if there isn't one already in progress and
528 * wait for completion of the flush. This means that we only ever have one
529 * inode flush in progress no matter how many ENOSPC events are occurring and
530 * so will prevent the system from bogging down due to every concurrent
531 * ENOSPC event scanning all the active inodes in the system for writeback.
532 */
533void
534xfs_flush_inodes(
535 struct xfs_inode *ip)
536{
537 struct xfs_mount *mp = ip->i_mount;
538
539 queue_work(xfs_syncd_wq, &mp->m_flush_work);
540 flush_work_sync(&mp->m_flush_work);
541}
542
543STATIC void
544xfs_flush_worker(
545 struct work_struct *work)
546{
547 struct xfs_mount *mp = container_of(work,
548 struct xfs_mount, m_flush_work);
549
550 xfs_sync_data(mp, SYNC_TRYLOCK);
551 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
566} 552}
567 553
568int 554int
569xfs_syncd_init( 555xfs_syncd_init(
570 struct xfs_mount *mp) 556 struct xfs_mount *mp)
571{ 557{
572 mp->m_sync_work.w_syncer = xfs_sync_worker; 558 INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
573 mp->m_sync_work.w_mount = mp; 559 INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
574 mp->m_sync_work.w_completion = NULL; 560 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
575 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); 561
576 if (IS_ERR(mp->m_sync_task)) 562 xfs_syncd_queue_sync(mp);
577 return -PTR_ERR(mp->m_sync_task); 563 xfs_syncd_queue_reclaim(mp);
564
578 return 0; 565 return 0;
579} 566}
580 567
@@ -582,7 +569,9 @@ void
582xfs_syncd_stop( 569xfs_syncd_stop(
583 struct xfs_mount *mp) 570 struct xfs_mount *mp)
584{ 571{
585 kthread_stop(mp->m_sync_task); 572 cancel_delayed_work_sync(&mp->m_sync_work);
573 cancel_delayed_work_sync(&mp->m_reclaim_work);
574 cancel_work_sync(&mp->m_flush_work);
586} 575}
587 576
588void 577void
@@ -601,6 +590,10 @@ __xfs_inode_set_reclaim_tag(
601 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 590 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
602 XFS_ICI_RECLAIM_TAG); 591 XFS_ICI_RECLAIM_TAG);
603 spin_unlock(&ip->i_mount->m_perag_lock); 592 spin_unlock(&ip->i_mount->m_perag_lock);
593
594 /* schedule periodic background inode reclaim */
595 xfs_syncd_queue_reclaim(ip->i_mount);
596
604 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, 597 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
605 -1, _RET_IP_); 598 -1, _RET_IP_);
606 } 599 }
@@ -1017,7 +1010,13 @@ xfs_reclaim_inodes(
1017} 1010}
1018 1011
1019/* 1012/*
1020 * Shrinker infrastructure. 1013 * Inode cache shrinker.
1014 *
1015 * When called we make sure that there is a background (fast) inode reclaim in
1016 * progress, while we will throttle the speed of reclaim via doiing synchronous
1017 * reclaim of inodes. That means if we come across dirty inodes, we wait for
1018 * them to be cleaned, which we hope will not be very long due to the
1019 * background walker having already kicked the IO off on those dirty inodes.
1021 */ 1020 */
1022static int 1021static int
1023xfs_reclaim_inode_shrink( 1022xfs_reclaim_inode_shrink(
@@ -1032,10 +1031,15 @@ xfs_reclaim_inode_shrink(
1032 1031
1033 mp = container_of(shrink, struct xfs_mount, m_inode_shrink); 1032 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1034 if (nr_to_scan) { 1033 if (nr_to_scan) {
1034 /* kick background reclaimer and push the AIL */
1035 xfs_syncd_queue_reclaim(mp);
1036 xfs_ail_push_all(mp->m_ail);
1037
1035 if (!(gfp_mask & __GFP_FS)) 1038 if (!(gfp_mask & __GFP_FS))
1036 return -1; 1039 return -1;
1037 1040
1038 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); 1041 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
1042 &nr_to_scan);
1039 /* terminate if we don't exhaust the scan */ 1043 /* terminate if we don't exhaust the scan */
1040 if (nr_to_scan > 0) 1044 if (nr_to_scan > 0)
1041 return -1; 1045 return -1;
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 32ba6628290c..e3a6ad27415f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work {
32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ 33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
34 34
35extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
36
35int xfs_syncd_init(struct xfs_mount *mp); 37int xfs_syncd_init(struct xfs_mount *mp);
36void xfs_syncd_stop(struct xfs_mount *mp); 38void xfs_syncd_stop(struct xfs_mount *mp);
37 39