diff options
Diffstat (limited to 'fs/xfs')
28 files changed, 551 insertions, 527 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 52dbd14260ba..79ce38be15a1 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -1295,7 +1295,7 @@ xfs_get_blocks_direct( | |||
1295 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | 1295 | * If the private argument is non-NULL __xfs_get_blocks signals us that we |
1296 | * need to issue a transaction to convert the range from unwritten to written | 1296 | * need to issue a transaction to convert the range from unwritten to written |
1297 | * extents. In case this is regular synchronous I/O we just call xfs_end_io | 1297 | * extents. In case this is regular synchronous I/O we just call xfs_end_io |
1298 | * to do this and we are done. But in case this was a successfull AIO | 1298 | * to do this and we are done. But in case this was a successful AIO |
1299 | * request this handler is called from interrupt context, from which we | 1299 | * request this handler is called from interrupt context, from which we |
1300 | * can't start transactions. In that case offload the I/O completion to | 1300 | * can't start transactions. In that case offload the I/O completion to |
1301 | * the workqueues we also use for buffered I/O completion. | 1301 | * the workqueues we also use for buffered I/O completion. |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 596bb2c9de42..9ef9ed2cfe2e 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -120,7 +120,7 @@ xfs_buf_lru_add( | |||
120 | * The unlocked check is safe here because it only occurs when there are not | 120 | * The unlocked check is safe here because it only occurs when there are not |
121 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there | 121 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there |
122 | * to optimise the shrinker removing the buffer from the LRU and calling | 122 | * to optimise the shrinker removing the buffer from the LRU and calling |
123 | * xfs_buf_free(). i.e. it removes an unneccessary round trip on the | 123 | * xfs_buf_free(). i.e. it removes an unnecessary round trip on the |
124 | * bt_lru_lock. | 124 | * bt_lru_lock. |
125 | */ | 125 | */ |
126 | STATIC void | 126 | STATIC void |
@@ -293,7 +293,6 @@ xfs_buf_allocate_memory( | |||
293 | size_t nbytes, offset; | 293 | size_t nbytes, offset; |
294 | gfp_t gfp_mask = xb_to_gfp(flags); | 294 | gfp_t gfp_mask = xb_to_gfp(flags); |
295 | unsigned short page_count, i; | 295 | unsigned short page_count, i; |
296 | pgoff_t first; | ||
297 | xfs_off_t end; | 296 | xfs_off_t end; |
298 | int error; | 297 | int error; |
299 | 298 | ||
@@ -333,7 +332,6 @@ use_alloc_page: | |||
333 | return error; | 332 | return error; |
334 | 333 | ||
335 | offset = bp->b_offset; | 334 | offset = bp->b_offset; |
336 | first = bp->b_file_offset >> PAGE_SHIFT; | ||
337 | bp->b_flags |= _XBF_PAGES; | 335 | bp->b_flags |= _XBF_PAGES; |
338 | 336 | ||
339 | for (i = 0; i < bp->b_page_count; i++) { | 337 | for (i = 0; i < bp->b_page_count; i++) { |
@@ -380,7 +378,7 @@ out_free_pages: | |||
380 | } | 378 | } |
381 | 379 | ||
382 | /* | 380 | /* |
383 | * Map buffer into kernel address-space if nessecary. | 381 | * Map buffer into kernel address-space if necessary. |
384 | */ | 382 | */ |
385 | STATIC int | 383 | STATIC int |
386 | _xfs_buf_map_pages( | 384 | _xfs_buf_map_pages( |
@@ -657,8 +655,6 @@ xfs_buf_readahead( | |||
657 | xfs_off_t ioff, | 655 | xfs_off_t ioff, |
658 | size_t isize) | 656 | size_t isize) |
659 | { | 657 | { |
660 | struct backing_dev_info *bdi; | ||
661 | |||
662 | if (bdi_read_congested(target->bt_bdi)) | 658 | if (bdi_read_congested(target->bt_bdi)) |
663 | return; | 659 | return; |
664 | 660 | ||
@@ -919,8 +915,6 @@ xfs_buf_lock( | |||
919 | 915 | ||
920 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | 916 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) |
921 | xfs_log_force(bp->b_target->bt_mount, 0); | 917 | xfs_log_force(bp->b_target->bt_mount, 0); |
922 | if (atomic_read(&bp->b_io_remaining)) | ||
923 | blk_flush_plug(current); | ||
924 | down(&bp->b_sema); | 918 | down(&bp->b_sema); |
925 | XB_SET_OWNER(bp); | 919 | XB_SET_OWNER(bp); |
926 | 920 | ||
@@ -1309,8 +1303,6 @@ xfs_buf_iowait( | |||
1309 | { | 1303 | { |
1310 | trace_xfs_buf_iowait(bp, _RET_IP_); | 1304 | trace_xfs_buf_iowait(bp, _RET_IP_); |
1311 | 1305 | ||
1312 | if (atomic_read(&bp->b_io_remaining)) | ||
1313 | blk_flush_plug(current); | ||
1314 | wait_for_completion(&bp->b_iowait); | 1306 | wait_for_completion(&bp->b_iowait); |
1315 | 1307 | ||
1316 | trace_xfs_buf_iowait_done(bp, _RET_IP_); | 1308 | trace_xfs_buf_iowait_done(bp, _RET_IP_); |
@@ -1747,8 +1739,8 @@ xfsbufd( | |||
1747 | do { | 1739 | do { |
1748 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); | 1740 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); |
1749 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); | 1741 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); |
1750 | int count = 0; | ||
1751 | struct list_head tmp; | 1742 | struct list_head tmp; |
1743 | struct blk_plug plug; | ||
1752 | 1744 | ||
1753 | if (unlikely(freezing(current))) { | 1745 | if (unlikely(freezing(current))) { |
1754 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); | 1746 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); |
@@ -1764,16 +1756,15 @@ xfsbufd( | |||
1764 | 1756 | ||
1765 | xfs_buf_delwri_split(target, &tmp, age); | 1757 | xfs_buf_delwri_split(target, &tmp, age); |
1766 | list_sort(NULL, &tmp, xfs_buf_cmp); | 1758 | list_sort(NULL, &tmp, xfs_buf_cmp); |
1759 | |||
1760 | blk_start_plug(&plug); | ||
1767 | while (!list_empty(&tmp)) { | 1761 | while (!list_empty(&tmp)) { |
1768 | struct xfs_buf *bp; | 1762 | struct xfs_buf *bp; |
1769 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); | 1763 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); |
1770 | list_del_init(&bp->b_list); | 1764 | list_del_init(&bp->b_list); |
1771 | xfs_bdstrat_cb(bp); | 1765 | xfs_bdstrat_cb(bp); |
1772 | count++; | ||
1773 | } | 1766 | } |
1774 | if (count) | 1767 | blk_finish_plug(&plug); |
1775 | blk_flush_plug(current); | ||
1776 | |||
1777 | } while (!kthread_should_stop()); | 1768 | } while (!kthread_should_stop()); |
1778 | 1769 | ||
1779 | return 0; | 1770 | return 0; |
@@ -1793,6 +1784,7 @@ xfs_flush_buftarg( | |||
1793 | int pincount = 0; | 1784 | int pincount = 0; |
1794 | LIST_HEAD(tmp_list); | 1785 | LIST_HEAD(tmp_list); |
1795 | LIST_HEAD(wait_list); | 1786 | LIST_HEAD(wait_list); |
1787 | struct blk_plug plug; | ||
1796 | 1788 | ||
1797 | xfs_buf_runall_queues(xfsconvertd_workqueue); | 1789 | xfs_buf_runall_queues(xfsconvertd_workqueue); |
1798 | xfs_buf_runall_queues(xfsdatad_workqueue); | 1790 | xfs_buf_runall_queues(xfsdatad_workqueue); |
@@ -1807,6 +1799,8 @@ xfs_flush_buftarg( | |||
1807 | * we do that after issuing all the IO. | 1799 | * we do that after issuing all the IO. |
1808 | */ | 1800 | */ |
1809 | list_sort(NULL, &tmp_list, xfs_buf_cmp); | 1801 | list_sort(NULL, &tmp_list, xfs_buf_cmp); |
1802 | |||
1803 | blk_start_plug(&plug); | ||
1810 | while (!list_empty(&tmp_list)) { | 1804 | while (!list_empty(&tmp_list)) { |
1811 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); | 1805 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); |
1812 | ASSERT(target == bp->b_target); | 1806 | ASSERT(target == bp->b_target); |
@@ -1817,10 +1811,10 @@ xfs_flush_buftarg( | |||
1817 | } | 1811 | } |
1818 | xfs_bdstrat_cb(bp); | 1812 | xfs_bdstrat_cb(bp); |
1819 | } | 1813 | } |
1814 | blk_finish_plug(&plug); | ||
1820 | 1815 | ||
1821 | if (wait) { | 1816 | if (wait) { |
1822 | /* Expedite and wait for IO to complete. */ | 1817 | /* Wait for IO to complete. */ |
1823 | blk_flush_plug(current); | ||
1824 | while (!list_empty(&wait_list)) { | 1818 | while (!list_empty(&wait_list)) { |
1825 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); | 1819 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); |
1826 | 1820 | ||
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 52aadfbed132..f4213ba1ff85 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -381,7 +381,7 @@ xfs_aio_write_isize_update( | |||
381 | 381 | ||
382 | /* | 382 | /* |
383 | * If this was a direct or synchronous I/O that failed (such as ENOSPC) then | 383 | * If this was a direct or synchronous I/O that failed (such as ENOSPC) then |
384 | * part of the I/O may have been written to disk before the error occured. In | 384 | * part of the I/O may have been written to disk before the error occurred. In |
385 | * this case the on-disk file size may have been adjusted beyond the in-memory | 385 | * this case the on-disk file size may have been adjusted beyond the in-memory |
386 | * file size and now needs to be truncated back. | 386 | * file size and now needs to be truncated back. |
387 | */ | 387 | */ |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 9ff7fc603d2f..dd21784525a8 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -70,7 +70,7 @@ xfs_synchronize_times( | |||
70 | 70 | ||
71 | /* | 71 | /* |
72 | * If the linux inode is valid, mark it dirty. | 72 | * If the linux inode is valid, mark it dirty. |
73 | * Used when commiting a dirty inode into a transaction so that | 73 | * Used when committing a dirty inode into a transaction so that |
74 | * the inode will get written back by the linux code | 74 | * the inode will get written back by the linux code |
75 | */ | 75 | */ |
76 | void | 76 | void |
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c index 508e06fd7d1e..3ca795609113 100644 --- a/fs/xfs/linux-2.6/xfs_message.c +++ b/fs/xfs/linux-2.6/xfs_message.c | |||
@@ -28,53 +28,47 @@ | |||
28 | /* | 28 | /* |
29 | * XFS logging functions | 29 | * XFS logging functions |
30 | */ | 30 | */ |
31 | static int | 31 | static void |
32 | __xfs_printk( | 32 | __xfs_printk( |
33 | const char *level, | 33 | const char *level, |
34 | const struct xfs_mount *mp, | 34 | const struct xfs_mount *mp, |
35 | struct va_format *vaf) | 35 | struct va_format *vaf) |
36 | { | 36 | { |
37 | if (mp && mp->m_fsname) | 37 | if (mp && mp->m_fsname) |
38 | return printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); | 38 | printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); |
39 | return printk("%sXFS: %pV\n", level, vaf); | 39 | printk("%sXFS: %pV\n", level, vaf); |
40 | } | 40 | } |
41 | 41 | ||
42 | int xfs_printk( | 42 | void xfs_printk( |
43 | const char *level, | 43 | const char *level, |
44 | const struct xfs_mount *mp, | 44 | const struct xfs_mount *mp, |
45 | const char *fmt, ...) | 45 | const char *fmt, ...) |
46 | { | 46 | { |
47 | struct va_format vaf; | 47 | struct va_format vaf; |
48 | va_list args; | 48 | va_list args; |
49 | int r; | ||
50 | 49 | ||
51 | va_start(args, fmt); | 50 | va_start(args, fmt); |
52 | 51 | ||
53 | vaf.fmt = fmt; | 52 | vaf.fmt = fmt; |
54 | vaf.va = &args; | 53 | vaf.va = &args; |
55 | 54 | ||
56 | r = __xfs_printk(level, mp, &vaf); | 55 | __xfs_printk(level, mp, &vaf); |
57 | va_end(args); | 56 | va_end(args); |
58 | |||
59 | return r; | ||
60 | } | 57 | } |
61 | 58 | ||
62 | #define define_xfs_printk_level(func, kern_level) \ | 59 | #define define_xfs_printk_level(func, kern_level) \ |
63 | int func(const struct xfs_mount *mp, const char *fmt, ...) \ | 60 | void func(const struct xfs_mount *mp, const char *fmt, ...) \ |
64 | { \ | 61 | { \ |
65 | struct va_format vaf; \ | 62 | struct va_format vaf; \ |
66 | va_list args; \ | 63 | va_list args; \ |
67 | int r; \ | ||
68 | \ | 64 | \ |
69 | va_start(args, fmt); \ | 65 | va_start(args, fmt); \ |
70 | \ | 66 | \ |
71 | vaf.fmt = fmt; \ | 67 | vaf.fmt = fmt; \ |
72 | vaf.va = &args; \ | 68 | vaf.va = &args; \ |
73 | \ | 69 | \ |
74 | r = __xfs_printk(kern_level, mp, &vaf); \ | 70 | __xfs_printk(kern_level, mp, &vaf); \ |
75 | va_end(args); \ | 71 | va_end(args); \ |
76 | \ | ||
77 | return r; \ | ||
78 | } \ | 72 | } \ |
79 | 73 | ||
80 | define_xfs_printk_level(xfs_emerg, KERN_EMERG); | 74 | define_xfs_printk_level(xfs_emerg, KERN_EMERG); |
@@ -88,7 +82,7 @@ define_xfs_printk_level(xfs_info, KERN_INFO); | |||
88 | define_xfs_printk_level(xfs_debug, KERN_DEBUG); | 82 | define_xfs_printk_level(xfs_debug, KERN_DEBUG); |
89 | #endif | 83 | #endif |
90 | 84 | ||
91 | int | 85 | void |
92 | xfs_alert_tag( | 86 | xfs_alert_tag( |
93 | const struct xfs_mount *mp, | 87 | const struct xfs_mount *mp, |
94 | int panic_tag, | 88 | int panic_tag, |
@@ -97,7 +91,6 @@ xfs_alert_tag( | |||
97 | struct va_format vaf; | 91 | struct va_format vaf; |
98 | va_list args; | 92 | va_list args; |
99 | int do_panic = 0; | 93 | int do_panic = 0; |
100 | int r; | ||
101 | 94 | ||
102 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { | 95 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { |
103 | xfs_printk(KERN_ALERT, mp, | 96 | xfs_printk(KERN_ALERT, mp, |
@@ -110,12 +103,10 @@ xfs_alert_tag( | |||
110 | vaf.fmt = fmt; | 103 | vaf.fmt = fmt; |
111 | vaf.va = &args; | 104 | vaf.va = &args; |
112 | 105 | ||
113 | r = __xfs_printk(KERN_ALERT, mp, &vaf); | 106 | __xfs_printk(KERN_ALERT, mp, &vaf); |
114 | va_end(args); | 107 | va_end(args); |
115 | 108 | ||
116 | BUG_ON(do_panic); | 109 | BUG_ON(do_panic); |
117 | |||
118 | return r; | ||
119 | } | 110 | } |
120 | 111 | ||
121 | void | 112 | void |
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h index e77ffa16745b..f1b3fc1b6c4e 100644 --- a/fs/xfs/linux-2.6/xfs_message.h +++ b/fs/xfs/linux-2.6/xfs_message.h | |||
@@ -3,32 +3,34 @@ | |||
3 | 3 | ||
4 | struct xfs_mount; | 4 | struct xfs_mount; |
5 | 5 | ||
6 | extern int xfs_printk(const char *level, const struct xfs_mount *mp, | 6 | extern void xfs_printk(const char *level, const struct xfs_mount *mp, |
7 | const char *fmt, ...) | 7 | const char *fmt, ...) |
8 | __attribute__ ((format (printf, 3, 4))); | 8 | __attribute__ ((format (printf, 3, 4))); |
9 | extern int xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) | 9 | extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) |
10 | __attribute__ ((format (printf, 2, 3))); | 10 | __attribute__ ((format (printf, 2, 3))); |
11 | extern int xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) | 11 | extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) |
12 | __attribute__ ((format (printf, 2, 3))); | 12 | __attribute__ ((format (printf, 2, 3))); |
13 | extern int xfs_alert_tag(const struct xfs_mount *mp, int tag, | 13 | extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, |
14 | const char *fmt, ...) | 14 | const char *fmt, ...) |
15 | __attribute__ ((format (printf, 3, 4))); | 15 | __attribute__ ((format (printf, 3, 4))); |
16 | extern int xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) | 16 | extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) |
17 | __attribute__ ((format (printf, 2, 3))); | 17 | __attribute__ ((format (printf, 2, 3))); |
18 | extern int xfs_err(const struct xfs_mount *mp, const char *fmt, ...) | 18 | extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) |
19 | __attribute__ ((format (printf, 2, 3))); | 19 | __attribute__ ((format (printf, 2, 3))); |
20 | extern int xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) | 20 | extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) |
21 | __attribute__ ((format (printf, 2, 3))); | 21 | __attribute__ ((format (printf, 2, 3))); |
22 | extern int xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) | 22 | extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) |
23 | __attribute__ ((format (printf, 2, 3))); | 23 | __attribute__ ((format (printf, 2, 3))); |
24 | extern int xfs_info(const struct xfs_mount *mp, const char *fmt, ...) | 24 | extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) |
25 | __attribute__ ((format (printf, 2, 3))); | 25 | __attribute__ ((format (printf, 2, 3))); |
26 | 26 | ||
27 | #ifdef DEBUG | 27 | #ifdef DEBUG |
28 | extern int xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | 28 | extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) |
29 | __attribute__ ((format (printf, 2, 3))); | 29 | __attribute__ ((format (printf, 2, 3))); |
30 | #else | 30 | #else |
31 | #define xfs_debug(mp, fmt, ...) (0) | 31 | static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) |
32 | { | ||
33 | } | ||
32 | #endif | 34 | #endif |
33 | 35 | ||
34 | extern void assfail(char *expr, char *f, int l); | 36 | extern void assfail(char *expr, char *f, int l); |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 1ba5c451da36..b38e58d02299 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -816,75 +816,6 @@ xfs_setup_devices( | |||
816 | return 0; | 816 | return 0; |
817 | } | 817 | } |
818 | 818 | ||
819 | /* | ||
820 | * XFS AIL push thread support | ||
821 | */ | ||
822 | void | ||
823 | xfsaild_wakeup( | ||
824 | struct xfs_ail *ailp, | ||
825 | xfs_lsn_t threshold_lsn) | ||
826 | { | ||
827 | /* only ever move the target forwards */ | ||
828 | if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) { | ||
829 | ailp->xa_target = threshold_lsn; | ||
830 | wake_up_process(ailp->xa_task); | ||
831 | } | ||
832 | } | ||
833 | |||
834 | STATIC int | ||
835 | xfsaild( | ||
836 | void *data) | ||
837 | { | ||
838 | struct xfs_ail *ailp = data; | ||
839 | xfs_lsn_t last_pushed_lsn = 0; | ||
840 | long tout = 0; /* milliseconds */ | ||
841 | |||
842 | while (!kthread_should_stop()) { | ||
843 | /* | ||
844 | * for short sleeps indicating congestion, don't allow us to | ||
845 | * get woken early. Otherwise all we do is bang on the AIL lock | ||
846 | * without making progress. | ||
847 | */ | ||
848 | if (tout && tout <= 20) | ||
849 | __set_current_state(TASK_KILLABLE); | ||
850 | else | ||
851 | __set_current_state(TASK_INTERRUPTIBLE); | ||
852 | schedule_timeout(tout ? | ||
853 | msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); | ||
854 | |||
855 | /* swsusp */ | ||
856 | try_to_freeze(); | ||
857 | |||
858 | ASSERT(ailp->xa_mount->m_log); | ||
859 | if (XFS_FORCED_SHUTDOWN(ailp->xa_mount)) | ||
860 | continue; | ||
861 | |||
862 | tout = xfsaild_push(ailp, &last_pushed_lsn); | ||
863 | } | ||
864 | |||
865 | return 0; | ||
866 | } /* xfsaild */ | ||
867 | |||
868 | int | ||
869 | xfsaild_start( | ||
870 | struct xfs_ail *ailp) | ||
871 | { | ||
872 | ailp->xa_target = 0; | ||
873 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", | ||
874 | ailp->xa_mount->m_fsname); | ||
875 | if (IS_ERR(ailp->xa_task)) | ||
876 | return -PTR_ERR(ailp->xa_task); | ||
877 | return 0; | ||
878 | } | ||
879 | |||
880 | void | ||
881 | xfsaild_stop( | ||
882 | struct xfs_ail *ailp) | ||
883 | { | ||
884 | kthread_stop(ailp->xa_task); | ||
885 | } | ||
886 | |||
887 | |||
888 | /* Catch misguided souls that try to use this interface on XFS */ | 819 | /* Catch misguided souls that try to use this interface on XFS */ |
889 | STATIC struct inode * | 820 | STATIC struct inode * |
890 | xfs_fs_alloc_inode( | 821 | xfs_fs_alloc_inode( |
@@ -1191,22 +1122,12 @@ xfs_fs_sync_fs( | |||
1191 | return -error; | 1122 | return -error; |
1192 | 1123 | ||
1193 | if (laptop_mode) { | 1124 | if (laptop_mode) { |
1194 | int prev_sync_seq = mp->m_sync_seq; | ||
1195 | |||
1196 | /* | 1125 | /* |
1197 | * The disk must be active because we're syncing. | 1126 | * The disk must be active because we're syncing. |
1198 | * We schedule xfssyncd now (now that the disk is | 1127 | * We schedule xfssyncd now (now that the disk is |
1199 | * active) instead of later (when it might not be). | 1128 | * active) instead of later (when it might not be). |
1200 | */ | 1129 | */ |
1201 | wake_up_process(mp->m_sync_task); | 1130 | flush_delayed_work_sync(&mp->m_sync_work); |
1202 | /* | ||
1203 | * We have to wait for the sync iteration to complete. | ||
1204 | * If we don't, the disk activity caused by the sync | ||
1205 | * will come after the sync is completed, and that | ||
1206 | * triggers another sync from laptop mode. | ||
1207 | */ | ||
1208 | wait_event(mp->m_wait_single_sync_task, | ||
1209 | mp->m_sync_seq != prev_sync_seq); | ||
1210 | } | 1131 | } |
1211 | 1132 | ||
1212 | return 0; | 1133 | return 0; |
@@ -1490,9 +1411,6 @@ xfs_fs_fill_super( | |||
1490 | spin_lock_init(&mp->m_sb_lock); | 1411 | spin_lock_init(&mp->m_sb_lock); |
1491 | mutex_init(&mp->m_growlock); | 1412 | mutex_init(&mp->m_growlock); |
1492 | atomic_set(&mp->m_active_trans, 0); | 1413 | atomic_set(&mp->m_active_trans, 0); |
1493 | INIT_LIST_HEAD(&mp->m_sync_list); | ||
1494 | spin_lock_init(&mp->m_sync_lock); | ||
1495 | init_waitqueue_head(&mp->m_wait_single_sync_task); | ||
1496 | 1414 | ||
1497 | mp->m_super = sb; | 1415 | mp->m_super = sb; |
1498 | sb->s_fs_info = mp; | 1416 | sb->s_fs_info = mp; |
@@ -1799,6 +1717,38 @@ xfs_destroy_zones(void) | |||
1799 | } | 1717 | } |
1800 | 1718 | ||
1801 | STATIC int __init | 1719 | STATIC int __init |
1720 | xfs_init_workqueues(void) | ||
1721 | { | ||
1722 | /* | ||
1723 | * max_active is set to 8 to give enough concurency to allow | ||
1724 | * multiple work operations on each CPU to run. This allows multiple | ||
1725 | * filesystems to be running sync work concurrently, and scales with | ||
1726 | * the number of CPUs in the system. | ||
1727 | */ | ||
1728 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); | ||
1729 | if (!xfs_syncd_wq) | ||
1730 | goto out; | ||
1731 | |||
1732 | xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); | ||
1733 | if (!xfs_ail_wq) | ||
1734 | goto out_destroy_syncd; | ||
1735 | |||
1736 | return 0; | ||
1737 | |||
1738 | out_destroy_syncd: | ||
1739 | destroy_workqueue(xfs_syncd_wq); | ||
1740 | out: | ||
1741 | return -ENOMEM; | ||
1742 | } | ||
1743 | |||
1744 | STATIC void | ||
1745 | xfs_destroy_workqueues(void) | ||
1746 | { | ||
1747 | destroy_workqueue(xfs_ail_wq); | ||
1748 | destroy_workqueue(xfs_syncd_wq); | ||
1749 | } | ||
1750 | |||
1751 | STATIC int __init | ||
1802 | init_xfs_fs(void) | 1752 | init_xfs_fs(void) |
1803 | { | 1753 | { |
1804 | int error; | 1754 | int error; |
@@ -1813,10 +1763,14 @@ init_xfs_fs(void) | |||
1813 | if (error) | 1763 | if (error) |
1814 | goto out; | 1764 | goto out; |
1815 | 1765 | ||
1816 | error = xfs_mru_cache_init(); | 1766 | error = xfs_init_workqueues(); |
1817 | if (error) | 1767 | if (error) |
1818 | goto out_destroy_zones; | 1768 | goto out_destroy_zones; |
1819 | 1769 | ||
1770 | error = xfs_mru_cache_init(); | ||
1771 | if (error) | ||
1772 | goto out_destroy_wq; | ||
1773 | |||
1820 | error = xfs_filestream_init(); | 1774 | error = xfs_filestream_init(); |
1821 | if (error) | 1775 | if (error) |
1822 | goto out_mru_cache_uninit; | 1776 | goto out_mru_cache_uninit; |
@@ -1833,6 +1787,10 @@ init_xfs_fs(void) | |||
1833 | if (error) | 1787 | if (error) |
1834 | goto out_cleanup_procfs; | 1788 | goto out_cleanup_procfs; |
1835 | 1789 | ||
1790 | error = xfs_init_workqueues(); | ||
1791 | if (error) | ||
1792 | goto out_sysctl_unregister; | ||
1793 | |||
1836 | vfs_initquota(); | 1794 | vfs_initquota(); |
1837 | 1795 | ||
1838 | error = register_filesystem(&xfs_fs_type); | 1796 | error = register_filesystem(&xfs_fs_type); |
@@ -1850,6 +1808,8 @@ init_xfs_fs(void) | |||
1850 | xfs_filestream_uninit(); | 1808 | xfs_filestream_uninit(); |
1851 | out_mru_cache_uninit: | 1809 | out_mru_cache_uninit: |
1852 | xfs_mru_cache_uninit(); | 1810 | xfs_mru_cache_uninit(); |
1811 | out_destroy_wq: | ||
1812 | xfs_destroy_workqueues(); | ||
1853 | out_destroy_zones: | 1813 | out_destroy_zones: |
1854 | xfs_destroy_zones(); | 1814 | xfs_destroy_zones(); |
1855 | out: | 1815 | out: |
@@ -1866,6 +1826,7 @@ exit_xfs_fs(void) | |||
1866 | xfs_buf_terminate(); | 1826 | xfs_buf_terminate(); |
1867 | xfs_filestream_uninit(); | 1827 | xfs_filestream_uninit(); |
1868 | xfs_mru_cache_uninit(); | 1828 | xfs_mru_cache_uninit(); |
1829 | xfs_destroy_workqueues(); | ||
1869 | xfs_destroy_zones(); | 1830 | xfs_destroy_zones(); |
1870 | } | 1831 | } |
1871 | 1832 | ||
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 594cd822d84d..e4f9c1b0836c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_trans_priv.h" | ||
25 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 27 | #include "xfs_ag.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
@@ -39,6 +40,8 @@ | |||
39 | #include <linux/kthread.h> | 40 | #include <linux/kthread.h> |
40 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
41 | 42 | ||
43 | struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | ||
44 | |||
42 | /* | 45 | /* |
43 | * The inode lookup is done in batches to keep the amount of lock traffic and | 46 | * The inode lookup is done in batches to keep the amount of lock traffic and |
44 | * radix tree lookups to a minimum. The batch size is a trade off between | 47 | * radix tree lookups to a minimum. The batch size is a trade off between |
@@ -401,7 +404,7 @@ xfs_quiesce_fs( | |||
401 | /* | 404 | /* |
402 | * Second stage of a quiesce. The data is already synced, now we have to take | 405 | * Second stage of a quiesce. The data is already synced, now we have to take |
403 | * care of the metadata. New transactions are already blocked, so we need to | 406 | * care of the metadata. New transactions are already blocked, so we need to |
404 | * wait for any remaining transactions to drain out before proceding. | 407 | * wait for any remaining transactions to drain out before proceeding. |
405 | */ | 408 | */ |
406 | void | 409 | void |
407 | xfs_quiesce_attr( | 410 | xfs_quiesce_attr( |
@@ -431,62 +434,12 @@ xfs_quiesce_attr( | |||
431 | xfs_unmountfs_writesb(mp); | 434 | xfs_unmountfs_writesb(mp); |
432 | } | 435 | } |
433 | 436 | ||
434 | /* | 437 | static void |
435 | * Enqueue a work item to be picked up by the vfs xfssyncd thread. | 438 | xfs_syncd_queue_sync( |
436 | * Doing this has two advantages: | 439 | struct xfs_mount *mp) |
437 | * - It saves on stack space, which is tight in certain situations | ||
438 | * - It can be used (with care) as a mechanism to avoid deadlocks. | ||
439 | * Flushing while allocating in a full filesystem requires both. | ||
440 | */ | ||
441 | STATIC void | ||
442 | xfs_syncd_queue_work( | ||
443 | struct xfs_mount *mp, | ||
444 | void *data, | ||
445 | void (*syncer)(struct xfs_mount *, void *), | ||
446 | struct completion *completion) | ||
447 | { | ||
448 | struct xfs_sync_work *work; | ||
449 | |||
450 | work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); | ||
451 | INIT_LIST_HEAD(&work->w_list); | ||
452 | work->w_syncer = syncer; | ||
453 | work->w_data = data; | ||
454 | work->w_mount = mp; | ||
455 | work->w_completion = completion; | ||
456 | spin_lock(&mp->m_sync_lock); | ||
457 | list_add_tail(&work->w_list, &mp->m_sync_list); | ||
458 | spin_unlock(&mp->m_sync_lock); | ||
459 | wake_up_process(mp->m_sync_task); | ||
460 | } | ||
461 | |||
462 | /* | ||
463 | * Flush delayed allocate data, attempting to free up reserved space | ||
464 | * from existing allocations. At this point a new allocation attempt | ||
465 | * has failed with ENOSPC and we are in the process of scratching our | ||
466 | * heads, looking about for more room... | ||
467 | */ | ||
468 | STATIC void | ||
469 | xfs_flush_inodes_work( | ||
470 | struct xfs_mount *mp, | ||
471 | void *arg) | ||
472 | { | ||
473 | struct inode *inode = arg; | ||
474 | xfs_sync_data(mp, SYNC_TRYLOCK); | ||
475 | xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); | ||
476 | iput(inode); | ||
477 | } | ||
478 | |||
479 | void | ||
480 | xfs_flush_inodes( | ||
481 | xfs_inode_t *ip) | ||
482 | { | 440 | { |
483 | struct inode *inode = VFS_I(ip); | 441 | queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, |
484 | DECLARE_COMPLETION_ONSTACK(completion); | 442 | msecs_to_jiffies(xfs_syncd_centisecs * 10)); |
485 | |||
486 | igrab(inode); | ||
487 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); | ||
488 | wait_for_completion(&completion); | ||
489 | xfs_log_force(ip->i_mount, XFS_LOG_SYNC); | ||
490 | } | 443 | } |
491 | 444 | ||
492 | /* | 445 | /* |
@@ -496,9 +449,10 @@ xfs_flush_inodes( | |||
496 | */ | 449 | */ |
497 | STATIC void | 450 | STATIC void |
498 | xfs_sync_worker( | 451 | xfs_sync_worker( |
499 | struct xfs_mount *mp, | 452 | struct work_struct *work) |
500 | void *unused) | ||
501 | { | 453 | { |
454 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
455 | struct xfs_mount, m_sync_work); | ||
502 | int error; | 456 | int error; |
503 | 457 | ||
504 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { | 458 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { |
@@ -508,73 +462,106 @@ xfs_sync_worker( | |||
508 | error = xfs_fs_log_dummy(mp); | 462 | error = xfs_fs_log_dummy(mp); |
509 | else | 463 | else |
510 | xfs_log_force(mp, 0); | 464 | xfs_log_force(mp, 0); |
511 | xfs_reclaim_inodes(mp, 0); | ||
512 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | 465 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); |
466 | |||
467 | /* start pushing all the metadata that is currently dirty */ | ||
468 | xfs_ail_push_all(mp->m_ail); | ||
513 | } | 469 | } |
514 | mp->m_sync_seq++; | 470 | |
515 | wake_up(&mp->m_wait_single_sync_task); | 471 | /* queue us up again */ |
472 | xfs_syncd_queue_sync(mp); | ||
516 | } | 473 | } |
517 | 474 | ||
518 | STATIC int | 475 | /* |
519 | xfssyncd( | 476 | * Queue a new inode reclaim pass if there are reclaimable inodes and there |
520 | void *arg) | 477 | * isn't a reclaim pass already in progress. By default it runs every 5s based |
478 | * on the xfs syncd work default of 30s. Perhaps this should have it's own | ||
479 | * tunable, but that can be done if this method proves to be ineffective or too | ||
480 | * aggressive. | ||
481 | */ | ||
482 | static void | ||
483 | xfs_syncd_queue_reclaim( | ||
484 | struct xfs_mount *mp) | ||
521 | { | 485 | { |
522 | struct xfs_mount *mp = arg; | ||
523 | long timeleft; | ||
524 | xfs_sync_work_t *work, *n; | ||
525 | LIST_HEAD (tmp); | ||
526 | |||
527 | set_freezable(); | ||
528 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); | ||
529 | for (;;) { | ||
530 | if (list_empty(&mp->m_sync_list)) | ||
531 | timeleft = schedule_timeout_interruptible(timeleft); | ||
532 | /* swsusp */ | ||
533 | try_to_freeze(); | ||
534 | if (kthread_should_stop() && list_empty(&mp->m_sync_list)) | ||
535 | break; | ||
536 | 486 | ||
537 | spin_lock(&mp->m_sync_lock); | 487 | /* |
538 | /* | 488 | * We can have inodes enter reclaim after we've shut down the syncd |
539 | * We can get woken by laptop mode, to do a sync - | 489 | * workqueue during unmount, so don't allow reclaim work to be queued |
540 | * that's the (only!) case where the list would be | 490 | * during unmount. |
541 | * empty with time remaining. | 491 | */ |
542 | */ | 492 | if (!(mp->m_super->s_flags & MS_ACTIVE)) |
543 | if (!timeleft || list_empty(&mp->m_sync_list)) { | 493 | return; |
544 | if (!timeleft) | ||
545 | timeleft = xfs_syncd_centisecs * | ||
546 | msecs_to_jiffies(10); | ||
547 | INIT_LIST_HEAD(&mp->m_sync_work.w_list); | ||
548 | list_add_tail(&mp->m_sync_work.w_list, | ||
549 | &mp->m_sync_list); | ||
550 | } | ||
551 | list_splice_init(&mp->m_sync_list, &tmp); | ||
552 | spin_unlock(&mp->m_sync_lock); | ||
553 | 494 | ||
554 | list_for_each_entry_safe(work, n, &tmp, w_list) { | 495 | rcu_read_lock(); |
555 | (*work->w_syncer)(mp, work->w_data); | 496 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { |
556 | list_del(&work->w_list); | 497 | queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, |
557 | if (work == &mp->m_sync_work) | 498 | msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); |
558 | continue; | ||
559 | if (work->w_completion) | ||
560 | complete(work->w_completion); | ||
561 | kmem_free(work); | ||
562 | } | ||
563 | } | 499 | } |
500 | rcu_read_unlock(); | ||
501 | } | ||
564 | 502 | ||
565 | return 0; | 503 | /* |
504 | * This is a fast pass over the inode cache to try to get reclaim moving on as | ||
505 | * many inodes as possible in a short period of time. It kicks itself every few | ||
506 | * seconds, as well as being kicked by the inode cache shrinker when memory | ||
507 | * goes low. It scans as quickly as possible avoiding locked inodes or those | ||
508 | * already being flushed, and once done schedules a future pass. | ||
509 | */ | ||
510 | STATIC void | ||
511 | xfs_reclaim_worker( | ||
512 | struct work_struct *work) | ||
513 | { | ||
514 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
515 | struct xfs_mount, m_reclaim_work); | ||
516 | |||
517 | xfs_reclaim_inodes(mp, SYNC_TRYLOCK); | ||
518 | xfs_syncd_queue_reclaim(mp); | ||
519 | } | ||
520 | |||
521 | /* | ||
522 | * Flush delayed allocate data, attempting to free up reserved space | ||
523 | * from existing allocations. At this point a new allocation attempt | ||
524 | * has failed with ENOSPC and we are in the process of scratching our | ||
525 | * heads, looking about for more room. | ||
526 | * | ||
527 | * Queue a new data flush if there isn't one already in progress and | ||
528 | * wait for completion of the flush. This means that we only ever have one | ||
529 | * inode flush in progress no matter how many ENOSPC events are occurring and | ||
530 | * so will prevent the system from bogging down due to every concurrent | ||
531 | * ENOSPC event scanning all the active inodes in the system for writeback. | ||
532 | */ | ||
533 | void | ||
534 | xfs_flush_inodes( | ||
535 | struct xfs_inode *ip) | ||
536 | { | ||
537 | struct xfs_mount *mp = ip->i_mount; | ||
538 | |||
539 | queue_work(xfs_syncd_wq, &mp->m_flush_work); | ||
540 | flush_work_sync(&mp->m_flush_work); | ||
541 | } | ||
542 | |||
543 | STATIC void | ||
544 | xfs_flush_worker( | ||
545 | struct work_struct *work) | ||
546 | { | ||
547 | struct xfs_mount *mp = container_of(work, | ||
548 | struct xfs_mount, m_flush_work); | ||
549 | |||
550 | xfs_sync_data(mp, SYNC_TRYLOCK); | ||
551 | xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); | ||
566 | } | 552 | } |
567 | 553 | ||
568 | int | 554 | int |
569 | xfs_syncd_init( | 555 | xfs_syncd_init( |
570 | struct xfs_mount *mp) | 556 | struct xfs_mount *mp) |
571 | { | 557 | { |
572 | mp->m_sync_work.w_syncer = xfs_sync_worker; | 558 | INIT_WORK(&mp->m_flush_work, xfs_flush_worker); |
573 | mp->m_sync_work.w_mount = mp; | 559 | INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); |
574 | mp->m_sync_work.w_completion = NULL; | 560 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); |
575 | mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); | 561 | |
576 | if (IS_ERR(mp->m_sync_task)) | 562 | xfs_syncd_queue_sync(mp); |
577 | return -PTR_ERR(mp->m_sync_task); | 563 | xfs_syncd_queue_reclaim(mp); |
564 | |||
578 | return 0; | 565 | return 0; |
579 | } | 566 | } |
580 | 567 | ||
@@ -582,7 +569,9 @@ void | |||
582 | xfs_syncd_stop( | 569 | xfs_syncd_stop( |
583 | struct xfs_mount *mp) | 570 | struct xfs_mount *mp) |
584 | { | 571 | { |
585 | kthread_stop(mp->m_sync_task); | 572 | cancel_delayed_work_sync(&mp->m_sync_work); |
573 | cancel_delayed_work_sync(&mp->m_reclaim_work); | ||
574 | cancel_work_sync(&mp->m_flush_work); | ||
586 | } | 575 | } |
587 | 576 | ||
588 | void | 577 | void |
@@ -601,6 +590,10 @@ __xfs_inode_set_reclaim_tag( | |||
601 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), | 590 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), |
602 | XFS_ICI_RECLAIM_TAG); | 591 | XFS_ICI_RECLAIM_TAG); |
603 | spin_unlock(&ip->i_mount->m_perag_lock); | 592 | spin_unlock(&ip->i_mount->m_perag_lock); |
593 | |||
594 | /* schedule periodic background inode reclaim */ | ||
595 | xfs_syncd_queue_reclaim(ip->i_mount); | ||
596 | |||
604 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, | 597 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, |
605 | -1, _RET_IP_); | 598 | -1, _RET_IP_); |
606 | } | 599 | } |
@@ -1017,7 +1010,13 @@ xfs_reclaim_inodes( | |||
1017 | } | 1010 | } |
1018 | 1011 | ||
1019 | /* | 1012 | /* |
1020 | * Shrinker infrastructure. | 1013 | * Inode cache shrinker. |
1014 | * | ||
1015 | * When called we make sure that there is a background (fast) inode reclaim in | ||
1016 | * progress, while we will throttle the speed of reclaim via doiing synchronous | ||
1017 | * reclaim of inodes. That means if we come across dirty inodes, we wait for | ||
1018 | * them to be cleaned, which we hope will not be very long due to the | ||
1019 | * background walker having already kicked the IO off on those dirty inodes. | ||
1021 | */ | 1020 | */ |
1022 | static int | 1021 | static int |
1023 | xfs_reclaim_inode_shrink( | 1022 | xfs_reclaim_inode_shrink( |
@@ -1032,10 +1031,15 @@ xfs_reclaim_inode_shrink( | |||
1032 | 1031 | ||
1033 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); | 1032 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); |
1034 | if (nr_to_scan) { | 1033 | if (nr_to_scan) { |
1034 | /* kick background reclaimer and push the AIL */ | ||
1035 | xfs_syncd_queue_reclaim(mp); | ||
1036 | xfs_ail_push_all(mp->m_ail); | ||
1037 | |||
1035 | if (!(gfp_mask & __GFP_FS)) | 1038 | if (!(gfp_mask & __GFP_FS)) |
1036 | return -1; | 1039 | return -1; |
1037 | 1040 | ||
1038 | xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); | 1041 | xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, |
1042 | &nr_to_scan); | ||
1039 | /* terminate if we don't exhaust the scan */ | 1043 | /* terminate if we don't exhaust the scan */ |
1040 | if (nr_to_scan > 0) | 1044 | if (nr_to_scan > 0) |
1041 | return -1; | 1045 | return -1; |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 32ba6628290c..e3a6ad27415f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work { | |||
32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ | 32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ |
33 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ | 33 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ |
34 | 34 | ||
35 | extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | ||
36 | |||
35 | int xfs_syncd_init(struct xfs_mount *mp); | 37 | int xfs_syncd_init(struct xfs_mount *mp); |
36 | void xfs_syncd_stop(struct xfs_mount *mp); | 38 | void xfs_syncd_stop(struct xfs_mount *mp); |
37 | 39 | ||
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 7e2416478503..6fa214603819 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -600,7 +600,7 @@ xfs_qm_dqread( | |||
600 | 600 | ||
601 | /* | 601 | /* |
602 | * Reservation counters are defined as reservation plus current usage | 602 | * Reservation counters are defined as reservation plus current usage |
603 | * to avoid having to add everytime. | 603 | * to avoid having to add every time. |
604 | */ | 604 | */ |
605 | dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); | 605 | dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); |
606 | dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); | 606 | dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 254ee062bd7d..69228aa8605a 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -461,12 +461,10 @@ xfs_qm_dqflush_all( | |||
461 | struct xfs_quotainfo *q = mp->m_quotainfo; | 461 | struct xfs_quotainfo *q = mp->m_quotainfo; |
462 | int recl; | 462 | int recl; |
463 | struct xfs_dquot *dqp; | 463 | struct xfs_dquot *dqp; |
464 | int niters; | ||
465 | int error; | 464 | int error; |
466 | 465 | ||
467 | if (!q) | 466 | if (!q) |
468 | return 0; | 467 | return 0; |
469 | niters = 0; | ||
470 | again: | 468 | again: |
471 | mutex_lock(&q->qi_dqlist_lock); | 469 | mutex_lock(&q->qi_dqlist_lock); |
472 | list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { | 470 | list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { |
@@ -1314,14 +1312,9 @@ xfs_qm_dqiter_bufs( | |||
1314 | { | 1312 | { |
1315 | xfs_buf_t *bp; | 1313 | xfs_buf_t *bp; |
1316 | int error; | 1314 | int error; |
1317 | int notcommitted; | ||
1318 | int incr; | ||
1319 | int type; | 1315 | int type; |
1320 | 1316 | ||
1321 | ASSERT(blkcnt > 0); | 1317 | ASSERT(blkcnt > 0); |
1322 | notcommitted = 0; | ||
1323 | incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ? | ||
1324 | XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt; | ||
1325 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : | 1318 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : |
1326 | (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); | 1319 | (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); |
1327 | error = 0; | 1320 | error = 0; |
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index c9446f1c726d..567b29b9f1b3 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h | |||
@@ -65,11 +65,6 @@ extern kmem_zone_t *qm_dqtrxzone; | |||
65 | * block in the dquot/xqm code. | 65 | * block in the dquot/xqm code. |
66 | */ | 66 | */ |
67 | #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 | 67 | #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 |
68 | /* | ||
69 | * When doing a quotacheck, we log dquot clusters of this many FSBs at most | ||
70 | * in a single transaction. We don't want to ask for too huge a log reservation. | ||
71 | */ | ||
72 | #define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 | ||
73 | 68 | ||
74 | typedef xfs_dqhash_t xfs_dqlist_t; | 69 | typedef xfs_dqhash_t xfs_dqlist_t; |
75 | 70 | ||
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index 774d7ec6df8e..a0a829addca9 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c | |||
@@ -134,7 +134,7 @@ xfs_qm_newmount( | |||
134 | */ | 134 | */ |
135 | if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { | 135 | if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { |
136 | /* | 136 | /* |
137 | * If an error occured, qm_mount_quotas code | 137 | * If an error occurred, qm_mount_quotas code |
138 | * has already disabled quotas. So, just finish | 138 | * has already disabled quotas. So, just finish |
139 | * mounting, and get on with the boring life | 139 | * mounting, and get on with the boring life |
140 | * without disk quotas. | 140 | * without disk quotas. |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index c82f06778a27..2dadb15d5ca9 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -172,7 +172,7 @@ xfs_qm_scall_quotaoff( | |||
172 | /* | 172 | /* |
173 | * Next we make the changes in the quota flag in the mount struct. | 173 | * Next we make the changes in the quota flag in the mount struct. |
174 | * This isn't protected by a particular lock directly, because we | 174 | * This isn't protected by a particular lock directly, because we |
175 | * don't want to take a mrlock everytime we depend on quotas being on. | 175 | * don't want to take a mrlock every time we depend on quotas being on. |
176 | */ | 176 | */ |
177 | mp->m_qflags &= ~(flags); | 177 | mp->m_qflags &= ~(flags); |
178 | 178 | ||
@@ -313,14 +313,12 @@ xfs_qm_scall_quotaon( | |||
313 | { | 313 | { |
314 | int error; | 314 | int error; |
315 | uint qf; | 315 | uint qf; |
316 | uint accflags; | ||
317 | __int64_t sbflags; | 316 | __int64_t sbflags; |
318 | 317 | ||
319 | flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); | 318 | flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); |
320 | /* | 319 | /* |
321 | * Switching on quota accounting must be done at mount time. | 320 | * Switching on quota accounting must be done at mount time. |
322 | */ | 321 | */ |
323 | accflags = flags & XFS_ALL_QUOTA_ACCT; | ||
324 | flags &= ~(XFS_ALL_QUOTA_ACCT); | 322 | flags &= ~(XFS_ALL_QUOTA_ACCT); |
325 | 323 | ||
326 | sbflags = 0; | 324 | sbflags = 0; |
@@ -354,7 +352,7 @@ xfs_qm_scall_quotaon( | |||
354 | return XFS_ERROR(EINVAL); | 352 | return XFS_ERROR(EINVAL); |
355 | } | 353 | } |
356 | /* | 354 | /* |
357 | * If everything's upto-date incore, then don't waste time. | 355 | * If everything's up to-date incore, then don't waste time. |
358 | */ | 356 | */ |
359 | if ((mp->m_qflags & flags) == flags) | 357 | if ((mp->m_qflags & flags) == flags) |
360 | return XFS_ERROR(EEXIST); | 358 | return XFS_ERROR(EEXIST); |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 4bc3c649aee4..27d64d752eab 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -2395,17 +2395,33 @@ xfs_free_extent( | |||
2395 | memset(&args, 0, sizeof(xfs_alloc_arg_t)); | 2395 | memset(&args, 0, sizeof(xfs_alloc_arg_t)); |
2396 | args.tp = tp; | 2396 | args.tp = tp; |
2397 | args.mp = tp->t_mountp; | 2397 | args.mp = tp->t_mountp; |
2398 | |||
2399 | /* | ||
2400 | * validate that the block number is legal - the enables us to detect | ||
2401 | * and handle a silent filesystem corruption rather than crashing. | ||
2402 | */ | ||
2398 | args.agno = XFS_FSB_TO_AGNO(args.mp, bno); | 2403 | args.agno = XFS_FSB_TO_AGNO(args.mp, bno); |
2399 | ASSERT(args.agno < args.mp->m_sb.sb_agcount); | 2404 | if (args.agno >= args.mp->m_sb.sb_agcount) |
2405 | return EFSCORRUPTED; | ||
2406 | |||
2400 | args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); | 2407 | args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); |
2408 | if (args.agbno >= args.mp->m_sb.sb_agblocks) | ||
2409 | return EFSCORRUPTED; | ||
2410 | |||
2401 | args.pag = xfs_perag_get(args.mp, args.agno); | 2411 | args.pag = xfs_perag_get(args.mp, args.agno); |
2402 | if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) | 2412 | ASSERT(args.pag); |
2413 | |||
2414 | error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); | ||
2415 | if (error) | ||
2403 | goto error0; | 2416 | goto error0; |
2404 | #ifdef DEBUG | 2417 | |
2405 | ASSERT(args.agbp != NULL); | 2418 | /* validate the extent size is legal now we have the agf locked */ |
2406 | ASSERT((args.agbno + len) <= | 2419 | if (args.agbno + len > |
2407 | be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); | 2420 | be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { |
2408 | #endif | 2421 | error = EFSCORRUPTED; |
2422 | goto error0; | ||
2423 | } | ||
2424 | |||
2409 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); | 2425 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); |
2410 | error0: | 2426 | error0: |
2411 | xfs_perag_put(args.pag); | 2427 | xfs_perag_put(args.pag); |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index e5413d96f1af..7b7e005e3dcc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -992,7 +992,7 @@ xfs_buf_iodone_callbacks( | |||
992 | lasttarg = XFS_BUF_TARGET(bp); | 992 | lasttarg = XFS_BUF_TARGET(bp); |
993 | 993 | ||
994 | /* | 994 | /* |
995 | * If the write was asynchronous then noone will be looking for the | 995 | * If the write was asynchronous then no one will be looking for the |
996 | * error. Clear the error state and write the buffer out again. | 996 | * error. Clear the error state and write the buffer out again. |
997 | * | 997 | * |
998 | * During sync or umount we'll write all pending buffers again | 998 | * During sync or umount we'll write all pending buffers again |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 742c8330994a..a37480a6e023 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -2789,7 +2789,7 @@ xfs_iflush( | |||
2789 | 2789 | ||
2790 | /* | 2790 | /* |
2791 | * We can't flush the inode until it is unpinned, so wait for it if we | 2791 | * We can't flush the inode until it is unpinned, so wait for it if we |
2792 | * are allowed to block. We know noone new can pin it, because we are | 2792 | * are allowed to block. We know no one new can pin it, because we are |
2793 | * holding the inode lock shared and you need to hold it exclusively to | 2793 | * holding the inode lock shared and you need to hold it exclusively to |
2794 | * pin the inode. | 2794 | * pin the inode. |
2795 | * | 2795 | * |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f753200cef8d..ff4e2a30227d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -111,7 +111,7 @@ struct xfs_imap { | |||
111 | * Generally, we do not want to hold the i_rlock while holding the | 111 | * Generally, we do not want to hold the i_rlock while holding the |
112 | * i_ilock. Hierarchy is i_iolock followed by i_rlock. | 112 | * i_ilock. Hierarchy is i_iolock followed by i_rlock. |
113 | * | 113 | * |
114 | * xfs_iptr_t contains all the inode fields upto and including the | 114 | * xfs_iptr_t contains all the inode fields up to and including the |
115 | * i_mnext and i_mprev fields, it is used as a marker in the inode | 115 | * i_mnext and i_mprev fields, it is used as a marker in the inode |
116 | * chain off the mount structure by xfs_sync calls. | 116 | * chain off the mount structure by xfs_sync calls. |
117 | */ | 117 | */ |
@@ -336,7 +336,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) | |||
336 | 336 | ||
337 | /* | 337 | /* |
338 | * Project quota id helpers (previously projid was 16bit only | 338 | * Project quota id helpers (previously projid was 16bit only |
339 | * and using two 16bit values to hold new 32bit projid was choosen | 339 | * and using two 16bit values to hold new 32bit projid was chosen |
340 | * to retain compatibility with "old" filesystems). | 340 | * to retain compatibility with "old" filesystems). |
341 | */ | 341 | */ |
342 | static inline prid_t | 342 | static inline prid_t |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 46cc40131d4a..576fdfe81d60 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -198,6 +198,41 @@ xfs_inode_item_size( | |||
198 | } | 198 | } |
199 | 199 | ||
200 | /* | 200 | /* |
201 | * xfs_inode_item_format_extents - convert in-core extents to on-disk form | ||
202 | * | ||
203 | * For either the data or attr fork in extent format, we need to endian convert | ||
204 | * the in-core extent as we place them into the on-disk inode. In this case, we | ||
205 | * need to do this conversion before we write the extents into the log. Because | ||
206 | * we don't have the disk inode to write into here, we allocate a buffer and | ||
207 | * format the extents into it via xfs_iextents_copy(). We free the buffer in | ||
208 | * the unlock routine after the copy for the log has been made. | ||
209 | * | ||
210 | * In the case of the data fork, the in-core and on-disk fork sizes can be | ||
211 | * different due to delayed allocation extents. We only log on-disk extents | ||
212 | * here, so always use the physical fork size to determine the size of the | ||
213 | * buffer we need to allocate. | ||
214 | */ | ||
215 | STATIC void | ||
216 | xfs_inode_item_format_extents( | ||
217 | struct xfs_inode *ip, | ||
218 | struct xfs_log_iovec *vecp, | ||
219 | int whichfork, | ||
220 | int type) | ||
221 | { | ||
222 | xfs_bmbt_rec_t *ext_buffer; | ||
223 | |||
224 | ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); | ||
225 | if (whichfork == XFS_DATA_FORK) | ||
226 | ip->i_itemp->ili_extents_buf = ext_buffer; | ||
227 | else | ||
228 | ip->i_itemp->ili_aextents_buf = ext_buffer; | ||
229 | |||
230 | vecp->i_addr = ext_buffer; | ||
231 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork); | ||
232 | vecp->i_type = type; | ||
233 | } | ||
234 | |||
235 | /* | ||
201 | * This is called to fill in the vector of log iovecs for the | 236 | * This is called to fill in the vector of log iovecs for the |
202 | * given inode log item. It fills the first item with an inode | 237 | * given inode log item. It fills the first item with an inode |
203 | * log format structure, the second with the on-disk inode structure, | 238 | * log format structure, the second with the on-disk inode structure, |
@@ -213,7 +248,6 @@ xfs_inode_item_format( | |||
213 | struct xfs_inode *ip = iip->ili_inode; | 248 | struct xfs_inode *ip = iip->ili_inode; |
214 | uint nvecs; | 249 | uint nvecs; |
215 | size_t data_bytes; | 250 | size_t data_bytes; |
216 | xfs_bmbt_rec_t *ext_buffer; | ||
217 | xfs_mount_t *mp; | 251 | xfs_mount_t *mp; |
218 | 252 | ||
219 | vecp->i_addr = &iip->ili_format; | 253 | vecp->i_addr = &iip->ili_format; |
@@ -320,22 +354,8 @@ xfs_inode_item_format( | |||
320 | } else | 354 | } else |
321 | #endif | 355 | #endif |
322 | { | 356 | { |
323 | /* | 357 | xfs_inode_item_format_extents(ip, vecp, |
324 | * There are delayed allocation extents | 358 | XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); |
325 | * in the inode, or we need to convert | ||
326 | * the extents to on disk format. | ||
327 | * Use xfs_iextents_copy() | ||
328 | * to copy only the real extents into | ||
329 | * a separate buffer. We'll free the | ||
330 | * buffer in the unlock routine. | ||
331 | */ | ||
332 | ext_buffer = kmem_alloc(ip->i_df.if_bytes, | ||
333 | KM_SLEEP); | ||
334 | iip->ili_extents_buf = ext_buffer; | ||
335 | vecp->i_addr = ext_buffer; | ||
336 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | ||
337 | XFS_DATA_FORK); | ||
338 | vecp->i_type = XLOG_REG_TYPE_IEXT; | ||
339 | } | 359 | } |
340 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); | 360 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); |
341 | iip->ili_format.ilf_dsize = vecp->i_len; | 361 | iip->ili_format.ilf_dsize = vecp->i_len; |
@@ -445,19 +465,12 @@ xfs_inode_item_format( | |||
445 | */ | 465 | */ |
446 | vecp->i_addr = ip->i_afp->if_u1.if_extents; | 466 | vecp->i_addr = ip->i_afp->if_u1.if_extents; |
447 | vecp->i_len = ip->i_afp->if_bytes; | 467 | vecp->i_len = ip->i_afp->if_bytes; |
468 | vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; | ||
448 | #else | 469 | #else |
449 | ASSERT(iip->ili_aextents_buf == NULL); | 470 | ASSERT(iip->ili_aextents_buf == NULL); |
450 | /* | 471 | xfs_inode_item_format_extents(ip, vecp, |
451 | * Need to endian flip before logging | 472 | XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); |
452 | */ | ||
453 | ext_buffer = kmem_alloc(ip->i_afp->if_bytes, | ||
454 | KM_SLEEP); | ||
455 | iip->ili_aextents_buf = ext_buffer; | ||
456 | vecp->i_addr = ext_buffer; | ||
457 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | ||
458 | XFS_ATTR_FORK); | ||
459 | #endif | 473 | #endif |
460 | vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; | ||
461 | iip->ili_format.ilf_asize = vecp->i_len; | 474 | iip->ili_format.ilf_asize = vecp->i_len; |
462 | vecp++; | 475 | vecp++; |
463 | nvecs++; | 476 | nvecs++; |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index dc1882adaf54..751e94fe1f77 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -204,7 +204,6 @@ xfs_bulkstat( | |||
204 | xfs_agi_t *agi; /* agi header data */ | 204 | xfs_agi_t *agi; /* agi header data */ |
205 | xfs_agino_t agino; /* inode # in allocation group */ | 205 | xfs_agino_t agino; /* inode # in allocation group */ |
206 | xfs_agnumber_t agno; /* allocation group number */ | 206 | xfs_agnumber_t agno; /* allocation group number */ |
207 | xfs_daddr_t bno; /* inode cluster start daddr */ | ||
208 | int chunkidx; /* current index into inode chunk */ | 207 | int chunkidx; /* current index into inode chunk */ |
209 | int clustidx; /* current index into inode cluster */ | 208 | int clustidx; /* current index into inode cluster */ |
210 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ | 209 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ |
@@ -463,7 +462,6 @@ xfs_bulkstat( | |||
463 | mp->m_sb.sb_inopblog); | 462 | mp->m_sb.sb_inopblog); |
464 | } | 463 | } |
465 | ino = XFS_AGINO_TO_INO(mp, agno, agino); | 464 | ino = XFS_AGINO_TO_INO(mp, agno, agino); |
466 | bno = XFS_AGB_TO_DADDR(mp, agno, agbno); | ||
467 | /* | 465 | /* |
468 | * Skip if this inode is free. | 466 | * Skip if this inode is free. |
469 | */ | 467 | */ |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 25efa9b8a602..b612ce4520ae 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -761,7 +761,7 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
761 | break; | 761 | break; |
762 | case XLOG_STATE_COVER_NEED: | 762 | case XLOG_STATE_COVER_NEED: |
763 | case XLOG_STATE_COVER_NEED2: | 763 | case XLOG_STATE_COVER_NEED2: |
764 | if (!xfs_trans_ail_tail(log->l_ailp) && | 764 | if (!xfs_ail_min_lsn(log->l_ailp) && |
765 | xlog_iclogs_empty(log)) { | 765 | xlog_iclogs_empty(log)) { |
766 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) | 766 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) |
767 | log->l_covered_state = XLOG_STATE_COVER_DONE; | 767 | log->l_covered_state = XLOG_STATE_COVER_DONE; |
@@ -801,7 +801,7 @@ xlog_assign_tail_lsn( | |||
801 | xfs_lsn_t tail_lsn; | 801 | xfs_lsn_t tail_lsn; |
802 | struct log *log = mp->m_log; | 802 | struct log *log = mp->m_log; |
803 | 803 | ||
804 | tail_lsn = xfs_trans_ail_tail(mp->m_ail); | 804 | tail_lsn = xfs_ail_min_lsn(mp->m_ail); |
805 | if (!tail_lsn) | 805 | if (!tail_lsn) |
806 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); | 806 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
807 | 807 | ||
@@ -1239,7 +1239,7 @@ xlog_grant_push_ail( | |||
1239 | * the filesystem is shutting down. | 1239 | * the filesystem is shutting down. |
1240 | */ | 1240 | */ |
1241 | if (!XLOG_FORCED_SHUTDOWN(log)) | 1241 | if (!XLOG_FORCED_SHUTDOWN(log)) |
1242 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); | 1242 | xfs_ail_push(log->l_ailp, threshold_lsn); |
1243 | } | 1243 | } |
1244 | 1244 | ||
1245 | /* | 1245 | /* |
@@ -3407,6 +3407,17 @@ xlog_verify_dest_ptr( | |||
3407 | xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); | 3407 | xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); |
3408 | } | 3408 | } |
3409 | 3409 | ||
3410 | /* | ||
3411 | * Check to make sure the grant write head didn't just over lap the tail. If | ||
3412 | * the cycles are the same, we can't be overlapping. Otherwise, make sure that | ||
3413 | * the cycles differ by exactly one and check the byte count. | ||
3414 | * | ||
3415 | * This check is run unlocked, so can give false positives. Rather than assert | ||
3416 | * on failures, use a warn-once flag and a panic tag to allow the admin to | ||
3417 | * determine if they want to panic the machine when such an error occurs. For | ||
3418 | * debug kernels this will have the same effect as using an assert but, unlinke | ||
3419 | * an assert, it can be turned off at runtime. | ||
3420 | */ | ||
3410 | STATIC void | 3421 | STATIC void |
3411 | xlog_verify_grant_tail( | 3422 | xlog_verify_grant_tail( |
3412 | struct log *log) | 3423 | struct log *log) |
@@ -3414,17 +3425,22 @@ xlog_verify_grant_tail( | |||
3414 | int tail_cycle, tail_blocks; | 3425 | int tail_cycle, tail_blocks; |
3415 | int cycle, space; | 3426 | int cycle, space; |
3416 | 3427 | ||
3417 | /* | ||
3418 | * Check to make sure the grant write head didn't just over lap the | ||
3419 | * tail. If the cycles are the same, we can't be overlapping. | ||
3420 | * Otherwise, make sure that the cycles differ by exactly one and | ||
3421 | * check the byte count. | ||
3422 | */ | ||
3423 | xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); | 3428 | xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); |
3424 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); | 3429 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); |
3425 | if (tail_cycle != cycle) { | 3430 | if (tail_cycle != cycle) { |
3426 | ASSERT(cycle - 1 == tail_cycle); | 3431 | if (cycle - 1 != tail_cycle && |
3427 | ASSERT(space <= BBTOB(tail_blocks)); | 3432 | !(log->l_flags & XLOG_TAIL_WARN)) { |
3433 | xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, | ||
3434 | "%s: cycle - 1 != tail_cycle", __func__); | ||
3435 | log->l_flags |= XLOG_TAIL_WARN; | ||
3436 | } | ||
3437 | |||
3438 | if (space > BBTOB(tail_blocks) && | ||
3439 | !(log->l_flags & XLOG_TAIL_WARN)) { | ||
3440 | xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, | ||
3441 | "%s: space > BBTOB(tail_blocks)", __func__); | ||
3442 | log->l_flags |= XLOG_TAIL_WARN; | ||
3443 | } | ||
3428 | } | 3444 | } |
3429 | } | 3445 | } |
3430 | 3446 | ||
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 15dbf1f9c2be..5864850e9e34 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -144,6 +144,7 @@ static inline uint xlog_get_client_id(__be32 i) | |||
144 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ | 144 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ |
145 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being | 145 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being |
146 | shutdown */ | 146 | shutdown */ |
147 | #define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ | ||
147 | 148 | ||
148 | #ifdef __KERNEL__ | 149 | #ifdef __KERNEL__ |
149 | /* | 150 | /* |
@@ -570,7 +571,7 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector, | |||
570 | * When we crack an atomic LSN, we sample it first so that the value will not | 571 | * When we crack an atomic LSN, we sample it first so that the value will not |
571 | * change while we are cracking it into the component values. This means we | 572 | * change while we are cracking it into the component values. This means we |
572 | * will always get consistent component values to work from. This should always | 573 | * will always get consistent component values to work from. This should always |
573 | * be used to smaple and crack LSNs taht are stored and updated in atomic | 574 | * be used to sample and crack LSNs that are stored and updated in atomic |
574 | * variables. | 575 | * variables. |
575 | */ | 576 | */ |
576 | static inline void | 577 | static inline void |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0c4a5618e7af..5cc464a17c93 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -101,7 +101,7 @@ xlog_get_bp( | |||
101 | /* | 101 | /* |
102 | * We do log I/O in units of log sectors (a power-of-2 | 102 | * We do log I/O in units of log sectors (a power-of-2 |
103 | * multiple of the basic block size), so we round up the | 103 | * multiple of the basic block size), so we round up the |
104 | * requested size to acommodate the basic blocks required | 104 | * requested size to accommodate the basic blocks required |
105 | * for complete log sectors. | 105 | * for complete log sectors. |
106 | * | 106 | * |
107 | * In addition, the buffer may be used for a non-sector- | 107 | * In addition, the buffer may be used for a non-sector- |
@@ -112,7 +112,7 @@ xlog_get_bp( | |||
112 | * an issue. Nor will this be a problem if the log I/O is | 112 | * an issue. Nor will this be a problem if the log I/O is |
113 | * done in basic blocks (sector size 1). But otherwise we | 113 | * done in basic blocks (sector size 1). But otherwise we |
114 | * extend the buffer by one extra log sector to ensure | 114 | * extend the buffer by one extra log sector to ensure |
115 | * there's space to accomodate this possiblility. | 115 | * there's space to accommodate this possibility. |
116 | */ | 116 | */ |
117 | if (nbblks > 1 && log->l_sectBBsize > 1) | 117 | if (nbblks > 1 && log->l_sectBBsize > 1) |
118 | nbblks += log->l_sectBBsize; | 118 | nbblks += log->l_sectBBsize; |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a62e8971539d..19af0ab0d0c6 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -203,12 +203,9 @@ typedef struct xfs_mount { | |||
203 | struct mutex m_icsb_mutex; /* balancer sync lock */ | 203 | struct mutex m_icsb_mutex; /* balancer sync lock */ |
204 | #endif | 204 | #endif |
205 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ | 205 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ |
206 | struct task_struct *m_sync_task; /* generalised sync thread */ | 206 | struct delayed_work m_sync_work; /* background sync work */ |
207 | xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ | 207 | struct delayed_work m_reclaim_work; /* background inode reclaim */ |
208 | struct list_head m_sync_list; /* sync thread work item list */ | 208 | struct work_struct m_flush_work; /* background inode flush */ |
209 | spinlock_t m_sync_lock; /* work item list lock */ | ||
210 | int m_sync_seq; /* sync thread generation no. */ | ||
211 | wait_queue_head_t m_wait_single_sync_task; | ||
212 | __int64_t m_update_flags; /* sb flags we need to update | 209 | __int64_t m_update_flags; /* sb flags we need to update |
213 | on the next remount,rw */ | 210 | on the next remount,rw */ |
214 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ | 211 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 12aff9584e29..acdb92f14d51 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -28,74 +28,138 @@ | |||
28 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
29 | #include "xfs_error.h" | 29 | #include "xfs_error.h" |
30 | 30 | ||
31 | STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); | 31 | struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ |
32 | STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); | ||
33 | STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); | ||
34 | STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); | ||
35 | 32 | ||
36 | #ifdef DEBUG | 33 | #ifdef DEBUG |
37 | STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); | 34 | /* |
38 | #else | 35 | * Check that the list is sorted as it should be. |
36 | */ | ||
37 | STATIC void | ||
38 | xfs_ail_check( | ||
39 | struct xfs_ail *ailp, | ||
40 | xfs_log_item_t *lip) | ||
41 | { | ||
42 | xfs_log_item_t *prev_lip; | ||
43 | |||
44 | if (list_empty(&ailp->xa_ail)) | ||
45 | return; | ||
46 | |||
47 | /* | ||
48 | * Check the next and previous entries are valid. | ||
49 | */ | ||
50 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
51 | prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); | ||
52 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
53 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
54 | |||
55 | prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); | ||
56 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
57 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); | ||
58 | |||
59 | |||
60 | #ifdef XFS_TRANS_DEBUG | ||
61 | /* | ||
62 | * Walk the list checking lsn ordering, and that every entry has the | ||
63 | * XFS_LI_IN_AIL flag set. This is really expensive, so only do it | ||
64 | * when specifically debugging the transaction subsystem. | ||
65 | */ | ||
66 | prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
67 | list_for_each_entry(lip, &ailp->xa_ail, li_ail) { | ||
68 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
69 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
70 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
71 | prev_lip = lip; | ||
72 | } | ||
73 | #endif /* XFS_TRANS_DEBUG */ | ||
74 | } | ||
75 | #else /* !DEBUG */ | ||
39 | #define xfs_ail_check(a,l) | 76 | #define xfs_ail_check(a,l) |
40 | #endif /* DEBUG */ | 77 | #endif /* DEBUG */ |
41 | 78 | ||
79 | /* | ||
80 | * Return a pointer to the first item in the AIL. If the AIL is empty, then | ||
81 | * return NULL. | ||
82 | */ | ||
83 | static xfs_log_item_t * | ||
84 | xfs_ail_min( | ||
85 | struct xfs_ail *ailp) | ||
86 | { | ||
87 | if (list_empty(&ailp->xa_ail)) | ||
88 | return NULL; | ||
89 | |||
90 | return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Return a pointer to the last item in the AIL. If the AIL is empty, then | ||
95 | * return NULL. | ||
96 | */ | ||
97 | static xfs_log_item_t * | ||
98 | xfs_ail_max( | ||
99 | struct xfs_ail *ailp) | ||
100 | { | ||
101 | if (list_empty(&ailp->xa_ail)) | ||
102 | return NULL; | ||
103 | |||
104 | return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * Return a pointer to the item which follows the given item in the AIL. If | ||
109 | * the given item is the last item in the list, then return NULL. | ||
110 | */ | ||
111 | static xfs_log_item_t * | ||
112 | xfs_ail_next( | ||
113 | struct xfs_ail *ailp, | ||
114 | xfs_log_item_t *lip) | ||
115 | { | ||
116 | if (lip->li_ail.next == &ailp->xa_ail) | ||
117 | return NULL; | ||
118 | |||
119 | return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); | ||
120 | } | ||
42 | 121 | ||
43 | /* | 122 | /* |
44 | * This is called by the log manager code to determine the LSN | 123 | * This is called by the log manager code to determine the LSN of the tail of |
45 | * of the tail of the log. This is exactly the LSN of the first | 124 | * the log. This is exactly the LSN of the first item in the AIL. If the AIL |
46 | * item in the AIL. If the AIL is empty, then this function | 125 | * is empty, then this function returns 0. |
47 | * returns 0. | ||
48 | * | 126 | * |
49 | * We need the AIL lock in order to get a coherent read of the | 127 | * We need the AIL lock in order to get a coherent read of the lsn of the last |
50 | * lsn of the last item in the AIL. | 128 | * item in the AIL. |
51 | */ | 129 | */ |
52 | xfs_lsn_t | 130 | xfs_lsn_t |
53 | xfs_trans_ail_tail( | 131 | xfs_ail_min_lsn( |
54 | struct xfs_ail *ailp) | 132 | struct xfs_ail *ailp) |
55 | { | 133 | { |
56 | xfs_lsn_t lsn; | 134 | xfs_lsn_t lsn = 0; |
57 | xfs_log_item_t *lip; | 135 | xfs_log_item_t *lip; |
58 | 136 | ||
59 | spin_lock(&ailp->xa_lock); | 137 | spin_lock(&ailp->xa_lock); |
60 | lip = xfs_ail_min(ailp); | 138 | lip = xfs_ail_min(ailp); |
61 | if (lip == NULL) { | 139 | if (lip) |
62 | lsn = (xfs_lsn_t)0; | ||
63 | } else { | ||
64 | lsn = lip->li_lsn; | 140 | lsn = lip->li_lsn; |
65 | } | ||
66 | spin_unlock(&ailp->xa_lock); | 141 | spin_unlock(&ailp->xa_lock); |
67 | 142 | ||
68 | return lsn; | 143 | return lsn; |
69 | } | 144 | } |
70 | 145 | ||
71 | /* | 146 | /* |
72 | * xfs_trans_push_ail | 147 | * Return the maximum lsn held in the AIL, or zero if the AIL is empty. |
73 | * | ||
74 | * This routine is called to move the tail of the AIL forward. It does this by | ||
75 | * trying to flush items in the AIL whose lsns are below the given | ||
76 | * threshold_lsn. | ||
77 | * | ||
78 | * the push is run asynchronously in a separate thread, so we return the tail | ||
79 | * of the log right now instead of the tail after the push. This means we will | ||
80 | * either continue right away, or we will sleep waiting on the async thread to | ||
81 | * do its work. | ||
82 | * | ||
83 | * We do this unlocked - we only need to know whether there is anything in the | ||
84 | * AIL at the time we are called. We don't need to access the contents of | ||
85 | * any of the objects, so the lock is not needed. | ||
86 | */ | 148 | */ |
87 | void | 149 | static xfs_lsn_t |
88 | xfs_trans_ail_push( | 150 | xfs_ail_max_lsn( |
89 | struct xfs_ail *ailp, | 151 | struct xfs_ail *ailp) |
90 | xfs_lsn_t threshold_lsn) | ||
91 | { | 152 | { |
92 | xfs_log_item_t *lip; | 153 | xfs_lsn_t lsn = 0; |
154 | xfs_log_item_t *lip; | ||
93 | 155 | ||
94 | lip = xfs_ail_min(ailp); | 156 | spin_lock(&ailp->xa_lock); |
95 | if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { | 157 | lip = xfs_ail_max(ailp); |
96 | if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) | 158 | if (lip) |
97 | xfsaild_wakeup(ailp, threshold_lsn); | 159 | lsn = lip->li_lsn; |
98 | } | 160 | spin_unlock(&ailp->xa_lock); |
161 | |||
162 | return lsn; | ||
99 | } | 163 | } |
100 | 164 | ||
101 | /* | 165 | /* |
@@ -236,16 +300,57 @@ out: | |||
236 | } | 300 | } |
237 | 301 | ||
238 | /* | 302 | /* |
239 | * xfsaild_push does the work of pushing on the AIL. Returning a timeout of | 303 | * splice the log item list into the AIL at the given LSN. |
240 | * zero indicates that the caller should sleep until woken. | ||
241 | */ | 304 | */ |
242 | long | 305 | static void |
243 | xfsaild_push( | 306 | xfs_ail_splice( |
244 | struct xfs_ail *ailp, | 307 | struct xfs_ail *ailp, |
245 | xfs_lsn_t *last_lsn) | 308 | struct list_head *list, |
309 | xfs_lsn_t lsn) | ||
246 | { | 310 | { |
247 | long tout = 0; | 311 | xfs_log_item_t *next_lip; |
248 | xfs_lsn_t last_pushed_lsn = *last_lsn; | 312 | |
313 | /* If the list is empty, just insert the item. */ | ||
314 | if (list_empty(&ailp->xa_ail)) { | ||
315 | list_splice(list, &ailp->xa_ail); | ||
316 | return; | ||
317 | } | ||
318 | |||
319 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { | ||
320 | if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) | ||
321 | break; | ||
322 | } | ||
323 | |||
324 | ASSERT(&next_lip->li_ail == &ailp->xa_ail || | ||
325 | XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); | ||
326 | |||
327 | list_splice_init(list, &next_lip->li_ail); | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * Delete the given item from the AIL. Return a pointer to the item. | ||
332 | */ | ||
333 | static void | ||
334 | xfs_ail_delete( | ||
335 | struct xfs_ail *ailp, | ||
336 | xfs_log_item_t *lip) | ||
337 | { | ||
338 | xfs_ail_check(ailp, lip); | ||
339 | list_del(&lip->li_ail); | ||
340 | xfs_trans_ail_cursor_clear(ailp, lip); | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself | ||
345 | * to run at a later time if there is more work to do to complete the push. | ||
346 | */ | ||
347 | STATIC void | ||
348 | xfs_ail_worker( | ||
349 | struct work_struct *work) | ||
350 | { | ||
351 | struct xfs_ail *ailp = container_of(to_delayed_work(work), | ||
352 | struct xfs_ail, xa_work); | ||
353 | long tout; | ||
249 | xfs_lsn_t target = ailp->xa_target; | 354 | xfs_lsn_t target = ailp->xa_target; |
250 | xfs_lsn_t lsn; | 355 | xfs_lsn_t lsn; |
251 | xfs_log_item_t *lip; | 356 | xfs_log_item_t *lip; |
@@ -256,15 +361,15 @@ xfsaild_push( | |||
256 | 361 | ||
257 | spin_lock(&ailp->xa_lock); | 362 | spin_lock(&ailp->xa_lock); |
258 | xfs_trans_ail_cursor_init(ailp, cur); | 363 | xfs_trans_ail_cursor_init(ailp, cur); |
259 | lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); | 364 | lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); |
260 | if (!lip || XFS_FORCED_SHUTDOWN(mp)) { | 365 | if (!lip || XFS_FORCED_SHUTDOWN(mp)) { |
261 | /* | 366 | /* |
262 | * AIL is empty or our push has reached the end. | 367 | * AIL is empty or our push has reached the end. |
263 | */ | 368 | */ |
264 | xfs_trans_ail_cursor_done(ailp, cur); | 369 | xfs_trans_ail_cursor_done(ailp, cur); |
265 | spin_unlock(&ailp->xa_lock); | 370 | spin_unlock(&ailp->xa_lock); |
266 | *last_lsn = 0; | 371 | ailp->xa_last_pushed_lsn = 0; |
267 | return tout; | 372 | return; |
268 | } | 373 | } |
269 | 374 | ||
270 | XFS_STATS_INC(xs_push_ail); | 375 | XFS_STATS_INC(xs_push_ail); |
@@ -301,13 +406,13 @@ xfsaild_push( | |||
301 | case XFS_ITEM_SUCCESS: | 406 | case XFS_ITEM_SUCCESS: |
302 | XFS_STATS_INC(xs_push_ail_success); | 407 | XFS_STATS_INC(xs_push_ail_success); |
303 | IOP_PUSH(lip); | 408 | IOP_PUSH(lip); |
304 | last_pushed_lsn = lsn; | 409 | ailp->xa_last_pushed_lsn = lsn; |
305 | break; | 410 | break; |
306 | 411 | ||
307 | case XFS_ITEM_PUSHBUF: | 412 | case XFS_ITEM_PUSHBUF: |
308 | XFS_STATS_INC(xs_push_ail_pushbuf); | 413 | XFS_STATS_INC(xs_push_ail_pushbuf); |
309 | IOP_PUSHBUF(lip); | 414 | IOP_PUSHBUF(lip); |
310 | last_pushed_lsn = lsn; | 415 | ailp->xa_last_pushed_lsn = lsn; |
311 | push_xfsbufd = 1; | 416 | push_xfsbufd = 1; |
312 | break; | 417 | break; |
313 | 418 | ||
@@ -319,7 +424,7 @@ xfsaild_push( | |||
319 | 424 | ||
320 | case XFS_ITEM_LOCKED: | 425 | case XFS_ITEM_LOCKED: |
321 | XFS_STATS_INC(xs_push_ail_locked); | 426 | XFS_STATS_INC(xs_push_ail_locked); |
322 | last_pushed_lsn = lsn; | 427 | ailp->xa_last_pushed_lsn = lsn; |
323 | stuck++; | 428 | stuck++; |
324 | break; | 429 | break; |
325 | 430 | ||
@@ -374,9 +479,23 @@ xfsaild_push( | |||
374 | wake_up_process(mp->m_ddev_targp->bt_task); | 479 | wake_up_process(mp->m_ddev_targp->bt_task); |
375 | } | 480 | } |
376 | 481 | ||
482 | /* assume we have more work to do in a short while */ | ||
483 | tout = 10; | ||
377 | if (!count) { | 484 | if (!count) { |
378 | /* We're past our target or empty, so idle */ | 485 | /* We're past our target or empty, so idle */ |
379 | last_pushed_lsn = 0; | 486 | ailp->xa_last_pushed_lsn = 0; |
487 | |||
488 | /* | ||
489 | * Check for an updated push target before clearing the | ||
490 | * XFS_AIL_PUSHING_BIT. If the target changed, we've got more | ||
491 | * work to do. Wait a bit longer before starting that work. | ||
492 | */ | ||
493 | smp_rmb(); | ||
494 | if (ailp->xa_target == target) { | ||
495 | clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); | ||
496 | return; | ||
497 | } | ||
498 | tout = 50; | ||
380 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { | 499 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { |
381 | /* | 500 | /* |
382 | * We reached the target so wait a bit longer for I/O to | 501 | * We reached the target so wait a bit longer for I/O to |
@@ -384,7 +503,7 @@ xfsaild_push( | |||
384 | * start the next scan from the start of the AIL. | 503 | * start the next scan from the start of the AIL. |
385 | */ | 504 | */ |
386 | tout = 50; | 505 | tout = 50; |
387 | last_pushed_lsn = 0; | 506 | ailp->xa_last_pushed_lsn = 0; |
388 | } else if ((stuck * 100) / count > 90) { | 507 | } else if ((stuck * 100) / count > 90) { |
389 | /* | 508 | /* |
390 | * Either there is a lot of contention on the AIL or we | 509 | * Either there is a lot of contention on the AIL or we |
@@ -396,14 +515,61 @@ xfsaild_push( | |||
396 | * continuing from where we were. | 515 | * continuing from where we were. |
397 | */ | 516 | */ |
398 | tout = 20; | 517 | tout = 20; |
399 | } else { | ||
400 | /* more to do, but wait a short while before continuing */ | ||
401 | tout = 10; | ||
402 | } | 518 | } |
403 | *last_lsn = last_pushed_lsn; | 519 | |
404 | return tout; | 520 | /* There is more to do, requeue us. */ |
521 | queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, | ||
522 | msecs_to_jiffies(tout)); | ||
523 | } | ||
524 | |||
525 | /* | ||
526 | * This routine is called to move the tail of the AIL forward. It does this by | ||
527 | * trying to flush items in the AIL whose lsns are below the given | ||
528 | * threshold_lsn. | ||
529 | * | ||
530 | * The push is run asynchronously in a workqueue, which means the caller needs | ||
531 | * to handle waiting on the async flush for space to become available. | ||
532 | * We don't want to interrupt any push that is in progress, hence we only queue | ||
533 | * work if we set the pushing bit approriately. | ||
534 | * | ||
535 | * We do this unlocked - we only need to know whether there is anything in the | ||
536 | * AIL at the time we are called. We don't need to access the contents of | ||
537 | * any of the objects, so the lock is not needed. | ||
538 | */ | ||
539 | void | ||
540 | xfs_ail_push( | ||
541 | struct xfs_ail *ailp, | ||
542 | xfs_lsn_t threshold_lsn) | ||
543 | { | ||
544 | xfs_log_item_t *lip; | ||
545 | |||
546 | lip = xfs_ail_min(ailp); | ||
547 | if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) || | ||
548 | XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0) | ||
549 | return; | ||
550 | |||
551 | /* | ||
552 | * Ensure that the new target is noticed in push code before it clears | ||
553 | * the XFS_AIL_PUSHING_BIT. | ||
554 | */ | ||
555 | smp_wmb(); | ||
556 | ailp->xa_target = threshold_lsn; | ||
557 | if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) | ||
558 | queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); | ||
405 | } | 559 | } |
406 | 560 | ||
561 | /* | ||
562 | * Push out all items in the AIL immediately | ||
563 | */ | ||
564 | void | ||
565 | xfs_ail_push_all( | ||
566 | struct xfs_ail *ailp) | ||
567 | { | ||
568 | xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp); | ||
569 | |||
570 | if (threshold_lsn) | ||
571 | xfs_ail_push(ailp, threshold_lsn); | ||
572 | } | ||
407 | 573 | ||
408 | /* | 574 | /* |
409 | * This is to be called when an item is unlocked that may have | 575 | * This is to be called when an item is unlocked that may have |
@@ -615,7 +781,6 @@ xfs_trans_ail_init( | |||
615 | xfs_mount_t *mp) | 781 | xfs_mount_t *mp) |
616 | { | 782 | { |
617 | struct xfs_ail *ailp; | 783 | struct xfs_ail *ailp; |
618 | int error; | ||
619 | 784 | ||
620 | ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); | 785 | ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); |
621 | if (!ailp) | 786 | if (!ailp) |
@@ -624,15 +789,9 @@ xfs_trans_ail_init( | |||
624 | ailp->xa_mount = mp; | 789 | ailp->xa_mount = mp; |
625 | INIT_LIST_HEAD(&ailp->xa_ail); | 790 | INIT_LIST_HEAD(&ailp->xa_ail); |
626 | spin_lock_init(&ailp->xa_lock); | 791 | spin_lock_init(&ailp->xa_lock); |
627 | error = xfsaild_start(ailp); | 792 | INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); |
628 | if (error) | ||
629 | goto out_free_ailp; | ||
630 | mp->m_ail = ailp; | 793 | mp->m_ail = ailp; |
631 | return 0; | 794 | return 0; |
632 | |||
633 | out_free_ailp: | ||
634 | kmem_free(ailp); | ||
635 | return error; | ||
636 | } | 795 | } |
637 | 796 | ||
638 | void | 797 | void |
@@ -641,124 +800,6 @@ xfs_trans_ail_destroy( | |||
641 | { | 800 | { |
642 | struct xfs_ail *ailp = mp->m_ail; | 801 | struct xfs_ail *ailp = mp->m_ail; |
643 | 802 | ||
644 | xfsaild_stop(ailp); | 803 | cancel_delayed_work_sync(&ailp->xa_work); |
645 | kmem_free(ailp); | 804 | kmem_free(ailp); |
646 | } | 805 | } |
647 | |||
648 | /* | ||
649 | * splice the log item list into the AIL at the given LSN. | ||
650 | */ | ||
651 | STATIC void | ||
652 | xfs_ail_splice( | ||
653 | struct xfs_ail *ailp, | ||
654 | struct list_head *list, | ||
655 | xfs_lsn_t lsn) | ||
656 | { | ||
657 | xfs_log_item_t *next_lip; | ||
658 | |||
659 | /* | ||
660 | * If the list is empty, just insert the item. | ||
661 | */ | ||
662 | if (list_empty(&ailp->xa_ail)) { | ||
663 | list_splice(list, &ailp->xa_ail); | ||
664 | return; | ||
665 | } | ||
666 | |||
667 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { | ||
668 | if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) | ||
669 | break; | ||
670 | } | ||
671 | |||
672 | ASSERT((&next_lip->li_ail == &ailp->xa_ail) || | ||
673 | (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)); | ||
674 | |||
675 | list_splice_init(list, &next_lip->li_ail); | ||
676 | return; | ||
677 | } | ||
678 | |||
679 | /* | ||
680 | * Delete the given item from the AIL. Return a pointer to the item. | ||
681 | */ | ||
682 | STATIC void | ||
683 | xfs_ail_delete( | ||
684 | struct xfs_ail *ailp, | ||
685 | xfs_log_item_t *lip) | ||
686 | { | ||
687 | xfs_ail_check(ailp, lip); | ||
688 | list_del(&lip->li_ail); | ||
689 | xfs_trans_ail_cursor_clear(ailp, lip); | ||
690 | } | ||
691 | |||
692 | /* | ||
693 | * Return a pointer to the first item in the AIL. | ||
694 | * If the AIL is empty, then return NULL. | ||
695 | */ | ||
696 | STATIC xfs_log_item_t * | ||
697 | xfs_ail_min( | ||
698 | struct xfs_ail *ailp) | ||
699 | { | ||
700 | if (list_empty(&ailp->xa_ail)) | ||
701 | return NULL; | ||
702 | |||
703 | return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
704 | } | ||
705 | |||
706 | /* | ||
707 | * Return a pointer to the item which follows | ||
708 | * the given item in the AIL. If the given item | ||
709 | * is the last item in the list, then return NULL. | ||
710 | */ | ||
711 | STATIC xfs_log_item_t * | ||
712 | xfs_ail_next( | ||
713 | struct xfs_ail *ailp, | ||
714 | xfs_log_item_t *lip) | ||
715 | { | ||
716 | if (lip->li_ail.next == &ailp->xa_ail) | ||
717 | return NULL; | ||
718 | |||
719 | return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); | ||
720 | } | ||
721 | |||
722 | #ifdef DEBUG | ||
723 | /* | ||
724 | * Check that the list is sorted as it should be. | ||
725 | */ | ||
726 | STATIC void | ||
727 | xfs_ail_check( | ||
728 | struct xfs_ail *ailp, | ||
729 | xfs_log_item_t *lip) | ||
730 | { | ||
731 | xfs_log_item_t *prev_lip; | ||
732 | |||
733 | if (list_empty(&ailp->xa_ail)) | ||
734 | return; | ||
735 | |||
736 | /* | ||
737 | * Check the next and previous entries are valid. | ||
738 | */ | ||
739 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
740 | prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); | ||
741 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
742 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
743 | |||
744 | prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); | ||
745 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
746 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); | ||
747 | |||
748 | |||
749 | #ifdef XFS_TRANS_DEBUG | ||
750 | /* | ||
751 | * Walk the list checking lsn ordering, and that every entry has the | ||
752 | * XFS_LI_IN_AIL flag set. This is really expensive, so only do it | ||
753 | * when specifically debugging the transaction subsystem. | ||
754 | */ | ||
755 | prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
756 | list_for_each_entry(lip, &ailp->xa_ail, li_ail) { | ||
757 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
758 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
759 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
760 | prev_lip = lip; | ||
761 | } | ||
762 | #endif /* XFS_TRANS_DEBUG */ | ||
763 | } | ||
764 | #endif /* DEBUG */ | ||
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 16084d8ea231..048b0c689d3e 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -81,7 +81,7 @@ xfs_trans_ijoin( | |||
81 | * | 81 | * |
82 | * | 82 | * |
83 | * Grabs a reference to the inode which will be dropped when the transaction | 83 | * Grabs a reference to the inode which will be dropped when the transaction |
84 | * is commited. The inode will also be unlocked at that point. The inode | 84 | * is committed. The inode will also be unlocked at that point. The inode |
85 | * must be locked, and it cannot be associated with any transaction. | 85 | * must be locked, and it cannot be associated with any transaction. |
86 | */ | 86 | */ |
87 | void | 87 | void |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 35162c238fa3..6b164e9e9a1f 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -65,16 +65,22 @@ struct xfs_ail_cursor { | |||
65 | struct xfs_ail { | 65 | struct xfs_ail { |
66 | struct xfs_mount *xa_mount; | 66 | struct xfs_mount *xa_mount; |
67 | struct list_head xa_ail; | 67 | struct list_head xa_ail; |
68 | uint xa_gen; | ||
69 | struct task_struct *xa_task; | ||
70 | xfs_lsn_t xa_target; | 68 | xfs_lsn_t xa_target; |
71 | struct xfs_ail_cursor xa_cursors; | 69 | struct xfs_ail_cursor xa_cursors; |
72 | spinlock_t xa_lock; | 70 | spinlock_t xa_lock; |
71 | struct delayed_work xa_work; | ||
72 | xfs_lsn_t xa_last_pushed_lsn; | ||
73 | unsigned long xa_flags; | ||
73 | }; | 74 | }; |
74 | 75 | ||
76 | #define XFS_AIL_PUSHING_BIT 0 | ||
77 | |||
75 | /* | 78 | /* |
76 | * From xfs_trans_ail.c | 79 | * From xfs_trans_ail.c |
77 | */ | 80 | */ |
81 | |||
82 | extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ | ||
83 | |||
78 | void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, | 84 | void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, |
79 | struct xfs_log_item **log_items, int nr_items, | 85 | struct xfs_log_item **log_items, int nr_items, |
80 | xfs_lsn_t lsn) __releases(ailp->xa_lock); | 86 | xfs_lsn_t lsn) __releases(ailp->xa_lock); |
@@ -98,12 +104,13 @@ xfs_trans_ail_delete( | |||
98 | xfs_trans_ail_delete_bulk(ailp, &lip, 1); | 104 | xfs_trans_ail_delete_bulk(ailp, &lip, 1); |
99 | } | 105 | } |
100 | 106 | ||
101 | void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); | 107 | void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); |
108 | void xfs_ail_push_all(struct xfs_ail *); | ||
109 | xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); | ||
110 | |||
102 | void xfs_trans_unlocked_item(struct xfs_ail *, | 111 | void xfs_trans_unlocked_item(struct xfs_ail *, |
103 | xfs_log_item_t *); | 112 | xfs_log_item_t *); |
104 | 113 | ||
105 | xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp); | ||
106 | |||
107 | struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, | 114 | struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, |
108 | struct xfs_ail_cursor *cur, | 115 | struct xfs_ail_cursor *cur, |
109 | xfs_lsn_t lsn); | 116 | xfs_lsn_t lsn); |
@@ -112,11 +119,6 @@ struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, | |||
112 | void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, | 119 | void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, |
113 | struct xfs_ail_cursor *cur); | 120 | struct xfs_ail_cursor *cur); |
114 | 121 | ||
115 | long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); | ||
116 | void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); | ||
117 | int xfsaild_start(struct xfs_ail *); | ||
118 | void xfsaild_stop(struct xfs_ail *); | ||
119 | |||
120 | #if BITS_PER_LONG != 64 | 122 | #if BITS_PER_LONG != 64 |
121 | static inline void | 123 | static inline void |
122 | xfs_trans_ail_copy_lsn( | 124 | xfs_trans_ail_copy_lsn( |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c48b4217ec47..b7a5fe7c52c8 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -953,7 +953,7 @@ xfs_release( | |||
953 | * If we previously truncated this file and removed old data | 953 | * If we previously truncated this file and removed old data |
954 | * in the process, we want to initiate "early" writeout on | 954 | * in the process, we want to initiate "early" writeout on |
955 | * the last close. This is an attempt to combat the notorious | 955 | * the last close. This is an attempt to combat the notorious |
956 | * NULL files problem which is particularly noticable from a | 956 | * NULL files problem which is particularly noticeable from a |
957 | * truncate down, buffered (re-)write (delalloc), followed by | 957 | * truncate down, buffered (re-)write (delalloc), followed by |
958 | * a crash. What we are effectively doing here is | 958 | * a crash. What we are effectively doing here is |
959 | * significantly reducing the time window where we'd otherwise | 959 | * significantly reducing the time window where we'd otherwise |
@@ -982,7 +982,7 @@ xfs_release( | |||
982 | * | 982 | * |
983 | * Further, check if the inode is being opened, written and | 983 | * Further, check if the inode is being opened, written and |
984 | * closed frequently and we have delayed allocation blocks | 984 | * closed frequently and we have delayed allocation blocks |
985 | * oustanding (e.g. streaming writes from the NFS server), | 985 | * outstanding (e.g. streaming writes from the NFS server), |
986 | * truncating the blocks past EOF will cause fragmentation to | 986 | * truncating the blocks past EOF will cause fragmentation to |
987 | * occur. | 987 | * occur. |
988 | * | 988 | * |