diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-23 18:19:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-23 18:19:16 -0400 |
commit | a77febbef105554c5a37241cf903f48ab7bc03c7 (patch) | |
tree | d6f96d9d5837312ce6dc2db9f3fc93a243eec87b /fs | |
parent | 42cd71bf1e3a081b3150018bbf448cb6c8a844a5 (diff) | |
parent | bf59170a66bc3eaf3ee513aa6ce9774aa2ab5188 (diff) |
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs:
xfs: obey minleft values during extent allocation correctly
xfs: reset buffer pointers before freeing them
xfs: avoid getting stuck during async inode flushes
xfs: fix xfs_itruncate_start tracing
xfs: fix duplicate workqueue initialisation
xfs: kill off xfs_printk()
xfs: fix race condition in AIL push trigger
xfs: make AIL target updates and compares 32bit safe.
xfs: always push the AIL to the target
xfs: exit AIL push work correctly when AIL is empty
xfs: ensure reclaim cursor is reset correctly at end of AG
xfs: add an x86 compat handler for XFS_IOC_ZERO_RANGE
xfs: fix compiler warning in xfs_trace.h
xfs: cleanup duplicate initializations
xfs: reduce the number of pagb_lock roundtrips in xfs_alloc_clear_busy
xfs: exact busy extent tracking
xfs: do not immediately reuse busy extent ranges
xfs: optimize AGFL refills
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 22 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 1 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl32.c | 3 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl32.h | 1 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_linux.h | 1 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_message.c | 20 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_message.h | 7 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 4 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 10 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_trace.h | 76 | ||||
-rw-r--r-- | fs/xfs/xfs_ag.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc.c | 844 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc.h | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc_btree.c | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_dfrag.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_log.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 75 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_types.h | 2 |
25 files changed, 721 insertions, 417 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 9ef9ed2cfe2e..52b2b5da566e 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/migrate.h> | 33 | #include <linux/migrate.h> |
34 | #include <linux/backing-dev.h> | 34 | #include <linux/backing-dev.h> |
35 | #include <linux/freezer.h> | 35 | #include <linux/freezer.h> |
36 | #include <linux/list_sort.h> | ||
37 | 36 | ||
38 | #include "xfs_sb.h" | 37 | #include "xfs_sb.h" |
39 | #include "xfs_inum.h" | 38 | #include "xfs_inum.h" |
@@ -709,6 +708,27 @@ xfs_buf_get_empty( | |||
709 | return bp; | 708 | return bp; |
710 | } | 709 | } |
711 | 710 | ||
711 | /* | ||
712 | * Return a buffer allocated as an empty buffer and associated to external | ||
713 | * memory via xfs_buf_associate_memory() back to it's empty state. | ||
714 | */ | ||
715 | void | ||
716 | xfs_buf_set_empty( | ||
717 | struct xfs_buf *bp, | ||
718 | size_t len) | ||
719 | { | ||
720 | if (bp->b_pages) | ||
721 | _xfs_buf_free_pages(bp); | ||
722 | |||
723 | bp->b_pages = NULL; | ||
724 | bp->b_page_count = 0; | ||
725 | bp->b_addr = NULL; | ||
726 | bp->b_file_offset = 0; | ||
727 | bp->b_buffer_length = bp->b_count_desired = len; | ||
728 | bp->b_bn = XFS_BUF_DADDR_NULL; | ||
729 | bp->b_flags &= ~XBF_MAPPED; | ||
730 | } | ||
731 | |||
712 | static inline struct page * | 732 | static inline struct page * |
713 | mem_to_page( | 733 | mem_to_page( |
714 | void *addr) | 734 | void *addr) |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index a9a1c4512645..50a7d5fb3b73 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -178,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, | |||
178 | xfs_buf_flags_t); | 178 | xfs_buf_flags_t); |
179 | 179 | ||
180 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); | 180 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); |
181 | extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); | ||
181 | extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); | 182 | extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); |
182 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); | 183 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); |
183 | extern void xfs_buf_hold(xfs_buf_t *); | 184 | extern void xfs_buf_hold(xfs_buf_t *); |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index b3486dfa5520..54e623bfbb85 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -586,7 +586,8 @@ xfs_file_compat_ioctl( | |||
586 | case XFS_IOC_RESVSP_32: | 586 | case XFS_IOC_RESVSP_32: |
587 | case XFS_IOC_UNRESVSP_32: | 587 | case XFS_IOC_UNRESVSP_32: |
588 | case XFS_IOC_RESVSP64_32: | 588 | case XFS_IOC_RESVSP64_32: |
589 | case XFS_IOC_UNRESVSP64_32: { | 589 | case XFS_IOC_UNRESVSP64_32: |
590 | case XFS_IOC_ZERO_RANGE_32: { | ||
590 | struct xfs_flock64 bf; | 591 | struct xfs_flock64 bf; |
591 | 592 | ||
592 | if (xfs_compat_flock64_copyin(&bf, arg)) | 593 | if (xfs_compat_flock64_copyin(&bf, arg)) |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 08b605792a99..80f4060e8970 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h | |||
@@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 { | |||
184 | #define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) | 184 | #define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) |
185 | #define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) | 185 | #define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) |
186 | #define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) | 186 | #define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) |
187 | #define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64) | ||
187 | 188 | ||
188 | typedef struct compat_xfs_fsop_geom_v1 { | 189 | typedef struct compat_xfs_fsop_geom_v1 { |
189 | __u32 blocksize; /* filesystem (data) block size */ | 190 | __u32 blocksize; /* filesystem (data) block size */ |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 244be9cbfe78..8633521b3b2e 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -70,6 +70,7 @@ | |||
70 | #include <linux/ctype.h> | 70 | #include <linux/ctype.h> |
71 | #include <linux/writeback.h> | 71 | #include <linux/writeback.h> |
72 | #include <linux/capability.h> | 72 | #include <linux/capability.h> |
73 | #include <linux/list_sort.h> | ||
73 | 74 | ||
74 | #include <asm/page.h> | 75 | #include <asm/page.h> |
75 | #include <asm/div64.h> | 76 | #include <asm/div64.h> |
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c index 9f76cceb678d..bd672def95ac 100644 --- a/fs/xfs/linux-2.6/xfs_message.c +++ b/fs/xfs/linux-2.6/xfs_message.c | |||
@@ -41,23 +41,6 @@ __xfs_printk( | |||
41 | printk("%sXFS: %pV\n", level, vaf); | 41 | printk("%sXFS: %pV\n", level, vaf); |
42 | } | 42 | } |
43 | 43 | ||
44 | void xfs_printk( | ||
45 | const char *level, | ||
46 | const struct xfs_mount *mp, | ||
47 | const char *fmt, ...) | ||
48 | { | ||
49 | struct va_format vaf; | ||
50 | va_list args; | ||
51 | |||
52 | va_start(args, fmt); | ||
53 | |||
54 | vaf.fmt = fmt; | ||
55 | vaf.va = &args; | ||
56 | |||
57 | __xfs_printk(level, mp, &vaf); | ||
58 | va_end(args); | ||
59 | } | ||
60 | |||
61 | #define define_xfs_printk_level(func, kern_level) \ | 44 | #define define_xfs_printk_level(func, kern_level) \ |
62 | void func(const struct xfs_mount *mp, const char *fmt, ...) \ | 45 | void func(const struct xfs_mount *mp, const char *fmt, ...) \ |
63 | { \ | 46 | { \ |
@@ -95,8 +78,7 @@ xfs_alert_tag( | |||
95 | int do_panic = 0; | 78 | int do_panic = 0; |
96 | 79 | ||
97 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { | 80 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { |
98 | xfs_printk(KERN_ALERT, mp, | 81 | xfs_alert(mp, "Transforming an alert into a BUG."); |
99 | "XFS: Transforming an alert into a BUG."); | ||
100 | do_panic = 1; | 82 | do_panic = 1; |
101 | } | 83 | } |
102 | 84 | ||
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h index f1b3fc1b6c4e..7fb7ea007672 100644 --- a/fs/xfs/linux-2.6/xfs_message.h +++ b/fs/xfs/linux-2.6/xfs_message.h | |||
@@ -3,9 +3,6 @@ | |||
3 | 3 | ||
4 | struct xfs_mount; | 4 | struct xfs_mount; |
5 | 5 | ||
6 | extern void xfs_printk(const char *level, const struct xfs_mount *mp, | ||
7 | const char *fmt, ...) | ||
8 | __attribute__ ((format (printf, 3, 4))); | ||
9 | extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) | 6 | extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) |
10 | __attribute__ ((format (printf, 2, 3))); | 7 | __attribute__ ((format (printf, 2, 3))); |
11 | extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) | 8 | extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) |
@@ -28,7 +25,9 @@ extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) | |||
28 | extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | 25 | extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) |
29 | __attribute__ ((format (printf, 2, 3))); | 26 | __attribute__ ((format (printf, 2, 3))); |
30 | #else | 27 | #else |
31 | static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | 28 | static inline void |
29 | __attribute__ ((format (printf, 2, 3))) | ||
30 | xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | ||
32 | { | 31 | { |
33 | } | 32 | } |
34 | #endif | 33 | #endif |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index b38e58d02299..b0aa59e51fd0 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1787,10 +1787,6 @@ init_xfs_fs(void) | |||
1787 | if (error) | 1787 | if (error) |
1788 | goto out_cleanup_procfs; | 1788 | goto out_cleanup_procfs; |
1789 | 1789 | ||
1790 | error = xfs_init_workqueues(); | ||
1791 | if (error) | ||
1792 | goto out_sysctl_unregister; | ||
1793 | |||
1794 | vfs_initquota(); | 1790 | vfs_initquota(); |
1795 | 1791 | ||
1796 | error = register_filesystem(&xfs_fs_type); | 1792 | error = register_filesystem(&xfs_fs_type); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 3e898a48122d..cb1bb2080e44 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -267,6 +267,16 @@ xfs_sync_inode_attr( | |||
267 | 267 | ||
268 | error = xfs_iflush(ip, flags); | 268 | error = xfs_iflush(ip, flags); |
269 | 269 | ||
270 | /* | ||
271 | * We don't want to try again on non-blocking flushes that can't run | ||
272 | * again immediately. If an inode really must be written, then that's | ||
273 | * what the SYNC_WAIT flag is for. | ||
274 | */ | ||
275 | if (error == EAGAIN) { | ||
276 | ASSERT(!(flags & SYNC_WAIT)); | ||
277 | error = 0; | ||
278 | } | ||
279 | |||
270 | out_unlock: | 280 | out_unlock: |
271 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 281 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
272 | return error; | 282 | return error; |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 2d0bcb479075..d48b7a579ae1 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap, | |||
1151 | 1151 | ||
1152 | ); | 1152 | ); |
1153 | 1153 | ||
1154 | #define XFS_BUSY_SYNC \ | 1154 | DECLARE_EVENT_CLASS(xfs_busy_class, |
1155 | { 0, "async" }, \ | ||
1156 | { 1, "sync" } | ||
1157 | |||
1158 | TRACE_EVENT(xfs_alloc_busy, | ||
1159 | TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno, | ||
1160 | xfs_agblock_t agbno, xfs_extlen_t len, int sync), | ||
1161 | TP_ARGS(trans, agno, agbno, len, sync), | ||
1162 | TP_STRUCT__entry( | ||
1163 | __field(dev_t, dev) | ||
1164 | __field(struct xfs_trans *, tp) | ||
1165 | __field(int, tid) | ||
1166 | __field(xfs_agnumber_t, agno) | ||
1167 | __field(xfs_agblock_t, agbno) | ||
1168 | __field(xfs_extlen_t, len) | ||
1169 | __field(int, sync) | ||
1170 | ), | ||
1171 | TP_fast_assign( | ||
1172 | __entry->dev = trans->t_mountp->m_super->s_dev; | ||
1173 | __entry->tp = trans; | ||
1174 | __entry->tid = trans->t_ticket->t_tid; | ||
1175 | __entry->agno = agno; | ||
1176 | __entry->agbno = agbno; | ||
1177 | __entry->len = len; | ||
1178 | __entry->sync = sync; | ||
1179 | ), | ||
1180 | TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s", | ||
1181 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1182 | __entry->tp, | ||
1183 | __entry->tid, | ||
1184 | __entry->agno, | ||
1185 | __entry->agbno, | ||
1186 | __entry->len, | ||
1187 | __print_symbolic(__entry->sync, XFS_BUSY_SYNC)) | ||
1188 | |||
1189 | ); | ||
1190 | |||
1191 | TRACE_EVENT(xfs_alloc_unbusy, | ||
1192 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1155 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1193 | xfs_agblock_t agbno, xfs_extlen_t len), | 1156 | xfs_agblock_t agbno, xfs_extlen_t len), |
1194 | TP_ARGS(mp, agno, agbno, len), | 1157 | TP_ARGS(mp, agno, agbno, len), |
@@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy, | |||
1210 | __entry->agbno, | 1173 | __entry->agbno, |
1211 | __entry->len) | 1174 | __entry->len) |
1212 | ); | 1175 | ); |
1176 | #define DEFINE_BUSY_EVENT(name) \ | ||
1177 | DEFINE_EVENT(xfs_busy_class, name, \ | ||
1178 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ | ||
1179 | xfs_agblock_t agbno, xfs_extlen_t len), \ | ||
1180 | TP_ARGS(mp, agno, agbno, len)) | ||
1181 | DEFINE_BUSY_EVENT(xfs_alloc_busy); | ||
1182 | DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); | ||
1183 | DEFINE_BUSY_EVENT(xfs_alloc_busy_force); | ||
1184 | DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); | ||
1185 | DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); | ||
1213 | 1186 | ||
1214 | #define XFS_BUSY_STATES \ | 1187 | TRACE_EVENT(xfs_alloc_busy_trim, |
1215 | { 0, "missing" }, \ | ||
1216 | { 1, "found" } | ||
1217 | |||
1218 | TRACE_EVENT(xfs_alloc_busysearch, | ||
1219 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1188 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1220 | xfs_agblock_t agbno, xfs_extlen_t len, int found), | 1189 | xfs_agblock_t agbno, xfs_extlen_t len, |
1221 | TP_ARGS(mp, agno, agbno, len, found), | 1190 | xfs_agblock_t tbno, xfs_extlen_t tlen), |
1191 | TP_ARGS(mp, agno, agbno, len, tbno, tlen), | ||
1222 | TP_STRUCT__entry( | 1192 | TP_STRUCT__entry( |
1223 | __field(dev_t, dev) | 1193 | __field(dev_t, dev) |
1224 | __field(xfs_agnumber_t, agno) | 1194 | __field(xfs_agnumber_t, agno) |
1225 | __field(xfs_agblock_t, agbno) | 1195 | __field(xfs_agblock_t, agbno) |
1226 | __field(xfs_extlen_t, len) | 1196 | __field(xfs_extlen_t, len) |
1227 | __field(int, found) | 1197 | __field(xfs_agblock_t, tbno) |
1198 | __field(xfs_extlen_t, tlen) | ||
1228 | ), | 1199 | ), |
1229 | TP_fast_assign( | 1200 | TP_fast_assign( |
1230 | __entry->dev = mp->m_super->s_dev; | 1201 | __entry->dev = mp->m_super->s_dev; |
1231 | __entry->agno = agno; | 1202 | __entry->agno = agno; |
1232 | __entry->agbno = agbno; | 1203 | __entry->agbno = agbno; |
1233 | __entry->len = len; | 1204 | __entry->len = len; |
1234 | __entry->found = found; | 1205 | __entry->tbno = tbno; |
1206 | __entry->tlen = tlen; | ||
1235 | ), | 1207 | ), |
1236 | TP_printk("dev %d:%d agno %u agbno %u len %u %s", | 1208 | TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u", |
1237 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1209 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1238 | __entry->agno, | 1210 | __entry->agno, |
1239 | __entry->agbno, | 1211 | __entry->agbno, |
1240 | __entry->len, | 1212 | __entry->len, |
1241 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | 1213 | __entry->tbno, |
1214 | __entry->tlen) | ||
1242 | ); | 1215 | ); |
1243 | 1216 | ||
1244 | TRACE_EVENT(xfs_trans_commit_lsn, | 1217 | TRACE_EVENT(xfs_trans_commit_lsn, |
@@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, | |||
1418 | __entry->wasfromfl, | 1391 | __entry->wasfromfl, |
1419 | __entry->isfl, | 1392 | __entry->isfl, |
1420 | __entry->userdata, | 1393 | __entry->userdata, |
1421 | __entry->firstblock) | 1394 | (unsigned long long)__entry->firstblock) |
1422 | ) | 1395 | ) |
1423 | 1396 | ||
1424 | #define DEFINE_ALLOC_EVENT(name) \ | 1397 | #define DEFINE_ALLOC_EVENT(name) \ |
@@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first); | |||
1433 | DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); | 1406 | DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); |
1434 | DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); | 1407 | DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); |
1435 | DEFINE_ALLOC_EVENT(xfs_alloc_near_error); | 1408 | DEFINE_ALLOC_EVENT(xfs_alloc_near_error); |
1409 | DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); | ||
1410 | DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); | ||
1436 | DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); | 1411 | DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); |
1437 | DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); | 1412 | DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); |
1438 | DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); | 1413 | DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); |
1439 | DEFINE_ALLOC_EVENT(xfs_alloc_size_done); | 1414 | DEFINE_ALLOC_EVENT(xfs_alloc_size_done); |
1440 | DEFINE_ALLOC_EVENT(xfs_alloc_size_error); | 1415 | DEFINE_ALLOC_EVENT(xfs_alloc_size_error); |
1416 | DEFINE_ALLOC_EVENT(xfs_alloc_size_busy); | ||
1441 | DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); | 1417 | DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); |
1442 | DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); | 1418 | DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); |
1443 | DEFINE_ALLOC_EVENT(xfs_alloc_small_done); | 1419 | DEFINE_ALLOC_EVENT(xfs_alloc_small_done); |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 58632cc17f2d..da0a561ffba2 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -187,7 +187,6 @@ struct xfs_busy_extent { | |||
187 | xfs_agnumber_t agno; | 187 | xfs_agnumber_t agno; |
188 | xfs_agblock_t bno; | 188 | xfs_agblock_t bno; |
189 | xfs_extlen_t length; | 189 | xfs_extlen_t length; |
190 | xlog_tid_t tid; /* transaction that created this */ | ||
191 | }; | 190 | }; |
192 | 191 | ||
193 | /* | 192 | /* |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 27d64d752eab..acdced86413c 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -41,19 +41,13 @@ | |||
41 | #define XFSA_FIXUP_BNO_OK 1 | 41 | #define XFSA_FIXUP_BNO_OK 1 |
42 | #define XFSA_FIXUP_CNT_OK 2 | 42 | #define XFSA_FIXUP_CNT_OK 2 |
43 | 43 | ||
44 | /* | ||
45 | * Prototypes for per-ag allocation routines | ||
46 | */ | ||
47 | |||
48 | STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); | 44 | STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); |
49 | STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); | 45 | STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); |
50 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); | 46 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); |
51 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, | 47 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, |
52 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); | 48 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); |
53 | 49 | STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *, | |
54 | /* | 50 | xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *); |
55 | * Internal functions. | ||
56 | */ | ||
57 | 51 | ||
58 | /* | 52 | /* |
59 | * Lookup the record equal to [bno, len] in the btree given by cur. | 53 | * Lookup the record equal to [bno, len] in the btree given by cur. |
@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned( | |||
154 | xfs_extlen_t *reslen) /* result length */ | 148 | xfs_extlen_t *reslen) /* result length */ |
155 | { | 149 | { |
156 | xfs_agblock_t bno; | 150 | xfs_agblock_t bno; |
157 | xfs_extlen_t diff; | ||
158 | xfs_extlen_t len; | 151 | xfs_extlen_t len; |
159 | 152 | ||
160 | if (args->alignment > 1 && foundlen >= args->minlen) { | 153 | /* Trim busy sections out of found extent */ |
161 | bno = roundup(foundbno, args->alignment); | 154 | xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len); |
162 | diff = bno - foundbno; | 155 | |
163 | len = diff >= foundlen ? 0 : foundlen - diff; | 156 | if (args->alignment > 1 && len >= args->minlen) { |
157 | xfs_agblock_t aligned_bno = roundup(bno, args->alignment); | ||
158 | xfs_extlen_t diff = aligned_bno - bno; | ||
159 | |||
160 | *resbno = aligned_bno; | ||
161 | *reslen = diff >= len ? 0 : len - diff; | ||
164 | } else { | 162 | } else { |
165 | bno = foundbno; | 163 | *resbno = bno; |
166 | len = foundlen; | 164 | *reslen = len; |
167 | } | 165 | } |
168 | *resbno = bno; | ||
169 | *reslen = len; | ||
170 | } | 166 | } |
171 | 167 | ||
172 | /* | 168 | /* |
@@ -280,7 +276,6 @@ xfs_alloc_fix_minleft( | |||
280 | return 1; | 276 | return 1; |
281 | agf = XFS_BUF_TO_AGF(args->agbp); | 277 | agf = XFS_BUF_TO_AGF(args->agbp); |
282 | diff = be32_to_cpu(agf->agf_freeblks) | 278 | diff = be32_to_cpu(agf->agf_freeblks) |
283 | + be32_to_cpu(agf->agf_flcount) | ||
284 | - args->len - args->minleft; | 279 | - args->len - args->minleft; |
285 | if (diff >= 0) | 280 | if (diff >= 0) |
286 | return 1; | 281 | return 1; |
@@ -541,16 +536,8 @@ xfs_alloc_ag_vextent( | |||
541 | if (error) | 536 | if (error) |
542 | return error; | 537 | return error; |
543 | 538 | ||
544 | /* | 539 | ASSERT(!xfs_alloc_busy_search(args->mp, args->agno, |
545 | * Search the busylist for these blocks and mark the | 540 | args->agbno, args->len)); |
546 | * transaction as synchronous if blocks are found. This | ||
547 | * avoids the need to block due to a synchronous log | ||
548 | * force to ensure correct ordering as the synchronous | ||
549 | * transaction will guarantee that for us. | ||
550 | */ | ||
551 | if (xfs_alloc_busy_search(args->mp, args->agno, | ||
552 | args->agbno, args->len)) | ||
553 | xfs_trans_set_sync(args->tp); | ||
554 | } | 541 | } |
555 | 542 | ||
556 | if (!args->isfl) { | 543 | if (!args->isfl) { |
@@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact( | |||
577 | { | 564 | { |
578 | xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ | 565 | xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ |
579 | xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ | 566 | xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ |
580 | xfs_agblock_t end; /* end of allocated extent */ | ||
581 | int error; | 567 | int error; |
582 | xfs_agblock_t fbno; /* start block of found extent */ | 568 | xfs_agblock_t fbno; /* start block of found extent */ |
583 | xfs_agblock_t fend; /* end block of found extent */ | ||
584 | xfs_extlen_t flen; /* length of found extent */ | 569 | xfs_extlen_t flen; /* length of found extent */ |
570 | xfs_agblock_t tbno; /* start block of trimmed extent */ | ||
571 | xfs_extlen_t tlen; /* length of trimmed extent */ | ||
572 | xfs_agblock_t tend; /* end block of trimmed extent */ | ||
573 | xfs_agblock_t end; /* end of allocated extent */ | ||
585 | int i; /* success/failure of operation */ | 574 | int i; /* success/failure of operation */ |
586 | xfs_agblock_t maxend; /* end of maximal extent */ | ||
587 | xfs_agblock_t minend; /* end of minimal extent */ | ||
588 | xfs_extlen_t rlen; /* length of returned extent */ | 575 | xfs_extlen_t rlen; /* length of returned extent */ |
589 | 576 | ||
590 | ASSERT(args->alignment == 1); | 577 | ASSERT(args->alignment == 1); |
@@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact( | |||
614 | goto error0; | 601 | goto error0; |
615 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 602 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
616 | ASSERT(fbno <= args->agbno); | 603 | ASSERT(fbno <= args->agbno); |
617 | minend = args->agbno + args->minlen; | ||
618 | maxend = args->agbno + args->maxlen; | ||
619 | fend = fbno + flen; | ||
620 | 604 | ||
621 | /* | 605 | /* |
622 | * Give up if the freespace isn't long enough for the minimum request. | 606 | * Check for overlapping busy extents. |
607 | */ | ||
608 | xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen); | ||
609 | |||
610 | /* | ||
611 | * Give up if the start of the extent is busy, or the freespace isn't | ||
612 | * long enough for the minimum request. | ||
623 | */ | 613 | */ |
624 | if (fend < minend) | 614 | if (tbno > args->agbno) |
615 | goto not_found; | ||
616 | if (tlen < args->minlen) | ||
617 | goto not_found; | ||
618 | tend = tbno + tlen; | ||
619 | if (tend < args->agbno + args->minlen) | ||
625 | goto not_found; | 620 | goto not_found; |
626 | 621 | ||
627 | /* | 622 | /* |
@@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact( | |||
630 | * | 625 | * |
631 | * Fix the length according to mod and prod if given. | 626 | * Fix the length according to mod and prod if given. |
632 | */ | 627 | */ |
633 | end = XFS_AGBLOCK_MIN(fend, maxend); | 628 | end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen); |
634 | args->len = end - args->agbno; | 629 | args->len = end - args->agbno; |
635 | xfs_alloc_fix_len(args); | 630 | xfs_alloc_fix_len(args); |
636 | if (!xfs_alloc_fix_minleft(args)) | 631 | if (!xfs_alloc_fix_minleft(args)) |
637 | goto not_found; | 632 | goto not_found; |
638 | 633 | ||
639 | rlen = args->len; | 634 | rlen = args->len; |
640 | ASSERT(args->agbno + rlen <= fend); | 635 | ASSERT(args->agbno + rlen <= tend); |
641 | end = args->agbno + rlen; | 636 | end = args->agbno + rlen; |
642 | 637 | ||
643 | /* | 638 | /* |
@@ -686,11 +681,11 @@ xfs_alloc_find_best_extent( | |||
686 | struct xfs_btree_cur **scur, /* searching cursor */ | 681 | struct xfs_btree_cur **scur, /* searching cursor */ |
687 | xfs_agblock_t gdiff, /* difference for search comparison */ | 682 | xfs_agblock_t gdiff, /* difference for search comparison */ |
688 | xfs_agblock_t *sbno, /* extent found by search */ | 683 | xfs_agblock_t *sbno, /* extent found by search */ |
689 | xfs_extlen_t *slen, | 684 | xfs_extlen_t *slen, /* extent length */ |
690 | xfs_extlen_t *slena, /* aligned length */ | 685 | xfs_agblock_t *sbnoa, /* aligned extent found by search */ |
686 | xfs_extlen_t *slena, /* aligned extent length */ | ||
691 | int dir) /* 0 = search right, 1 = search left */ | 687 | int dir) /* 0 = search right, 1 = search left */ |
692 | { | 688 | { |
693 | xfs_agblock_t bno; | ||
694 | xfs_agblock_t new; | 689 | xfs_agblock_t new; |
695 | xfs_agblock_t sdiff; | 690 | xfs_agblock_t sdiff; |
696 | int error; | 691 | int error; |
@@ -708,16 +703,16 @@ xfs_alloc_find_best_extent( | |||
708 | if (error) | 703 | if (error) |
709 | goto error0; | 704 | goto error0; |
710 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 705 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
711 | xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena); | 706 | xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); |
712 | 707 | ||
713 | /* | 708 | /* |
714 | * The good extent is closer than this one. | 709 | * The good extent is closer than this one. |
715 | */ | 710 | */ |
716 | if (!dir) { | 711 | if (!dir) { |
717 | if (bno >= args->agbno + gdiff) | 712 | if (*sbnoa >= args->agbno + gdiff) |
718 | goto out_use_good; | 713 | goto out_use_good; |
719 | } else { | 714 | } else { |
720 | if (bno <= args->agbno - gdiff) | 715 | if (*sbnoa <= args->agbno - gdiff) |
721 | goto out_use_good; | 716 | goto out_use_good; |
722 | } | 717 | } |
723 | 718 | ||
@@ -729,8 +724,8 @@ xfs_alloc_find_best_extent( | |||
729 | xfs_alloc_fix_len(args); | 724 | xfs_alloc_fix_len(args); |
730 | 725 | ||
731 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 726 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
732 | args->alignment, *sbno, | 727 | args->alignment, *sbnoa, |
733 | *slen, &new); | 728 | *slena, &new); |
734 | 729 | ||
735 | /* | 730 | /* |
736 | * Choose closer size and invalidate other cursor. | 731 | * Choose closer size and invalidate other cursor. |
@@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near( | |||
780 | xfs_agblock_t gtbnoa; /* aligned ... */ | 775 | xfs_agblock_t gtbnoa; /* aligned ... */ |
781 | xfs_extlen_t gtdiff; /* difference to right side entry */ | 776 | xfs_extlen_t gtdiff; /* difference to right side entry */ |
782 | xfs_extlen_t gtlen; /* length of right side entry */ | 777 | xfs_extlen_t gtlen; /* length of right side entry */ |
783 | xfs_extlen_t gtlena = 0; /* aligned ... */ | 778 | xfs_extlen_t gtlena; /* aligned ... */ |
784 | xfs_agblock_t gtnew; /* useful start bno of right side */ | 779 | xfs_agblock_t gtnew; /* useful start bno of right side */ |
785 | int error; /* error code */ | 780 | int error; /* error code */ |
786 | int i; /* result code, temporary */ | 781 | int i; /* result code, temporary */ |
@@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near( | |||
789 | xfs_agblock_t ltbnoa; /* aligned ... */ | 784 | xfs_agblock_t ltbnoa; /* aligned ... */ |
790 | xfs_extlen_t ltdiff; /* difference to left side entry */ | 785 | xfs_extlen_t ltdiff; /* difference to left side entry */ |
791 | xfs_extlen_t ltlen; /* length of left side entry */ | 786 | xfs_extlen_t ltlen; /* length of left side entry */ |
792 | xfs_extlen_t ltlena = 0; /* aligned ... */ | 787 | xfs_extlen_t ltlena; /* aligned ... */ |
793 | xfs_agblock_t ltnew; /* useful start bno of left side */ | 788 | xfs_agblock_t ltnew; /* useful start bno of left side */ |
794 | xfs_extlen_t rlen; /* length of returned extent */ | 789 | xfs_extlen_t rlen; /* length of returned extent */ |
790 | int forced = 0; | ||
795 | #if defined(DEBUG) && defined(__KERNEL__) | 791 | #if defined(DEBUG) && defined(__KERNEL__) |
796 | /* | 792 | /* |
797 | * Randomly don't execute the first algorithm. | 793 | * Randomly don't execute the first algorithm. |
@@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near( | |||
800 | 796 | ||
801 | dofirst = random32() & 1; | 797 | dofirst = random32() & 1; |
802 | #endif | 798 | #endif |
799 | |||
800 | restart: | ||
801 | bno_cur_lt = NULL; | ||
802 | bno_cur_gt = NULL; | ||
803 | ltlen = 0; | ||
804 | gtlena = 0; | ||
805 | ltlena = 0; | ||
806 | |||
803 | /* | 807 | /* |
804 | * Get a cursor for the by-size btree. | 808 | * Get a cursor for the by-size btree. |
805 | */ | 809 | */ |
806 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 810 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
807 | args->agno, XFS_BTNUM_CNT); | 811 | args->agno, XFS_BTNUM_CNT); |
808 | ltlen = 0; | 812 | |
809 | bno_cur_lt = bno_cur_gt = NULL; | ||
810 | /* | 813 | /* |
811 | * See if there are any free extents as big as maxlen. | 814 | * See if there are any free extents as big as maxlen. |
812 | */ | 815 | */ |
@@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near( | |||
822 | goto error0; | 825 | goto error0; |
823 | if (i == 0 || ltlen == 0) { | 826 | if (i == 0 || ltlen == 0) { |
824 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 827 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
828 | trace_xfs_alloc_near_noentry(args); | ||
825 | return 0; | 829 | return 0; |
826 | } | 830 | } |
827 | ASSERT(i == 1); | 831 | ASSERT(i == 1); |
828 | } | 832 | } |
829 | args->wasfromfl = 0; | 833 | args->wasfromfl = 0; |
834 | |||
830 | /* | 835 | /* |
831 | * First algorithm. | 836 | * First algorithm. |
832 | * If the requested extent is large wrt the freespaces available | 837 | * If the requested extent is large wrt the freespaces available |
@@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near( | |||
890 | if (args->len < blen) | 895 | if (args->len < blen) |
891 | continue; | 896 | continue; |
892 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 897 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
893 | args->alignment, ltbno, ltlen, <new); | 898 | args->alignment, ltbnoa, ltlena, <new); |
894 | if (ltnew != NULLAGBLOCK && | 899 | if (ltnew != NULLAGBLOCK && |
895 | (args->len > blen || ltdiff < bdiff)) { | 900 | (args->len > blen || ltdiff < bdiff)) { |
896 | bdiff = ltdiff; | 901 | bdiff = ltdiff; |
@@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near( | |||
1042 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 1047 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
1043 | xfs_alloc_fix_len(args); | 1048 | xfs_alloc_fix_len(args); |
1044 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 1049 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
1045 | args->alignment, ltbno, ltlen, <new); | 1050 | args->alignment, ltbnoa, ltlena, <new); |
1046 | 1051 | ||
1047 | error = xfs_alloc_find_best_extent(args, | 1052 | error = xfs_alloc_find_best_extent(args, |
1048 | &bno_cur_lt, &bno_cur_gt, | 1053 | &bno_cur_lt, &bno_cur_gt, |
1049 | ltdiff, >bno, >len, >lena, | 1054 | ltdiff, >bno, >len, |
1055 | >bnoa, >lena, | ||
1050 | 0 /* search right */); | 1056 | 0 /* search right */); |
1051 | } else { | 1057 | } else { |
1052 | ASSERT(gtlena >= args->minlen); | 1058 | ASSERT(gtlena >= args->minlen); |
@@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near( | |||
1057 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); | 1063 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); |
1058 | xfs_alloc_fix_len(args); | 1064 | xfs_alloc_fix_len(args); |
1059 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 1065 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
1060 | args->alignment, gtbno, gtlen, >new); | 1066 | args->alignment, gtbnoa, gtlena, >new); |
1061 | 1067 | ||
1062 | error = xfs_alloc_find_best_extent(args, | 1068 | error = xfs_alloc_find_best_extent(args, |
1063 | &bno_cur_gt, &bno_cur_lt, | 1069 | &bno_cur_gt, &bno_cur_lt, |
1064 | gtdiff, <bno, <len, <lena, | 1070 | gtdiff, <bno, <len, |
1071 | <bnoa, <lena, | ||
1065 | 1 /* search left */); | 1072 | 1 /* search left */); |
1066 | } | 1073 | } |
1067 | 1074 | ||
@@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near( | |||
1073 | * If we couldn't get anything, give up. | 1080 | * If we couldn't get anything, give up. |
1074 | */ | 1081 | */ |
1075 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { | 1082 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { |
1083 | if (!forced++) { | ||
1084 | trace_xfs_alloc_near_busy(args); | ||
1085 | xfs_log_force(args->mp, XFS_LOG_SYNC); | ||
1086 | goto restart; | ||
1087 | } | ||
1088 | |||
1076 | trace_xfs_alloc_size_neither(args); | 1089 | trace_xfs_alloc_size_neither(args); |
1077 | args->agbno = NULLAGBLOCK; | 1090 | args->agbno = NULLAGBLOCK; |
1078 | return 0; | 1091 | return 0; |
@@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near( | |||
1107 | return 0; | 1120 | return 0; |
1108 | } | 1121 | } |
1109 | rlen = args->len; | 1122 | rlen = args->len; |
1110 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, | 1123 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, |
1111 | ltlen, <new); | 1124 | ltbnoa, ltlena, <new); |
1112 | ASSERT(ltnew >= ltbno); | 1125 | ASSERT(ltnew >= ltbno); |
1113 | ASSERT(ltnew + rlen <= ltbno + ltlen); | 1126 | ASSERT(ltnew + rlen <= ltbnoa + ltlena); |
1114 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | 1127 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); |
1115 | args->agbno = ltnew; | 1128 | args->agbno = ltnew; |
1129 | |||
1116 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, | 1130 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, |
1117 | ltnew, rlen, XFSA_FIXUP_BNO_OK))) | 1131 | ltnew, rlen, XFSA_FIXUP_BNO_OK))) |
1118 | goto error0; | 1132 | goto error0; |
@@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size( | |||
1155 | int i; /* temp status variable */ | 1169 | int i; /* temp status variable */ |
1156 | xfs_agblock_t rbno; /* returned block number */ | 1170 | xfs_agblock_t rbno; /* returned block number */ |
1157 | xfs_extlen_t rlen; /* length of returned extent */ | 1171 | xfs_extlen_t rlen; /* length of returned extent */ |
1172 | int forced = 0; | ||
1158 | 1173 | ||
1174 | restart: | ||
1159 | /* | 1175 | /* |
1160 | * Allocate and initialize a cursor for the by-size btree. | 1176 | * Allocate and initialize a cursor for the by-size btree. |
1161 | */ | 1177 | */ |
1162 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 1178 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
1163 | args->agno, XFS_BTNUM_CNT); | 1179 | args->agno, XFS_BTNUM_CNT); |
1164 | bno_cur = NULL; | 1180 | bno_cur = NULL; |
1181 | |||
1165 | /* | 1182 | /* |
1166 | * Look for an entry >= maxlen+alignment-1 blocks. | 1183 | * Look for an entry >= maxlen+alignment-1 blocks. |
1167 | */ | 1184 | */ |
1168 | if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, | 1185 | if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, |
1169 | args->maxlen + args->alignment - 1, &i))) | 1186 | args->maxlen + args->alignment - 1, &i))) |
1170 | goto error0; | 1187 | goto error0; |
1188 | |||
1171 | /* | 1189 | /* |
1172 | * If none, then pick up the last entry in the tree unless the | 1190 | * If none or we have busy extents that we cannot allocate from, then |
1173 | * tree is empty. | 1191 | * we have to settle for a smaller extent. In the case that there are |
1192 | * no large extents, this will return the last entry in the tree unless | ||
1193 | * the tree is empty. In the case that there are only busy large | ||
1194 | * extents, this will return the largest small extent unless there | ||
1195 | * are no smaller extents available. | ||
1174 | */ | 1196 | */ |
1175 | if (!i) { | 1197 | if (!i || forced > 1) { |
1176 | if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, | 1198 | error = xfs_alloc_ag_vextent_small(args, cnt_cur, |
1177 | &flen, &i))) | 1199 | &fbno, &flen, &i); |
1200 | if (error) | ||
1178 | goto error0; | 1201 | goto error0; |
1179 | if (i == 0 || flen == 0) { | 1202 | if (i == 0 || flen == 0) { |
1180 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1203 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
@@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size( | |||
1182 | return 0; | 1205 | return 0; |
1183 | } | 1206 | } |
1184 | ASSERT(i == 1); | 1207 | ASSERT(i == 1); |
1208 | xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); | ||
1209 | } else { | ||
1210 | /* | ||
1211 | * Search for a non-busy extent that is large enough. | ||
1212 | * If we are at low space, don't check, or if we fall of | ||
1213 | * the end of the btree, turn off the busy check and | ||
1214 | * restart. | ||
1215 | */ | ||
1216 | for (;;) { | ||
1217 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); | ||
1218 | if (error) | ||
1219 | goto error0; | ||
1220 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1221 | |||
1222 | xfs_alloc_compute_aligned(args, fbno, flen, | ||
1223 | &rbno, &rlen); | ||
1224 | |||
1225 | if (rlen >= args->maxlen) | ||
1226 | break; | ||
1227 | |||
1228 | error = xfs_btree_increment(cnt_cur, 0, &i); | ||
1229 | if (error) | ||
1230 | goto error0; | ||
1231 | if (i == 0) { | ||
1232 | /* | ||
1233 | * Our only valid extents must have been busy. | ||
1234 | * Make it unbusy by forcing the log out and | ||
1235 | * retrying. If we've been here before, forcing | ||
1236 | * the log isn't making the extents available, | ||
1237 | * which means they have probably been freed in | ||
1238 | * this transaction. In that case, we have to | ||
1239 | * give up on them and we'll attempt a minlen | ||
1240 | * allocation the next time around. | ||
1241 | */ | ||
1242 | xfs_btree_del_cursor(cnt_cur, | ||
1243 | XFS_BTREE_NOERROR); | ||
1244 | trace_xfs_alloc_size_busy(args); | ||
1245 | if (!forced++) | ||
1246 | xfs_log_force(args->mp, XFS_LOG_SYNC); | ||
1247 | goto restart; | ||
1248 | } | ||
1249 | } | ||
1185 | } | 1250 | } |
1186 | /* | 1251 | |
1187 | * There's a freespace as big as maxlen+alignment-1, get it. | ||
1188 | */ | ||
1189 | else { | ||
1190 | if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) | ||
1191 | goto error0; | ||
1192 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1193 | } | ||
1194 | /* | 1252 | /* |
1195 | * In the first case above, we got the last entry in the | 1253 | * In the first case above, we got the last entry in the |
1196 | * by-size btree. Now we check to see if the space hits maxlen | 1254 | * by-size btree. Now we check to see if the space hits maxlen |
1197 | * once aligned; if not, we search left for something better. | 1255 | * once aligned; if not, we search left for something better. |
1198 | * This can't happen in the second case above. | 1256 | * This can't happen in the second case above. |
1199 | */ | 1257 | */ |
1200 | xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); | ||
1201 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1258 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1202 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | 1259 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || |
1203 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); | 1260 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); |
@@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size( | |||
1251 | * Fix up the length. | 1308 | * Fix up the length. |
1252 | */ | 1309 | */ |
1253 | args->len = rlen; | 1310 | args->len = rlen; |
1254 | xfs_alloc_fix_len(args); | 1311 | if (rlen < args->minlen) { |
1255 | if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { | 1312 | if (!forced++) { |
1256 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1313 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1257 | trace_xfs_alloc_size_nominleft(args); | 1314 | trace_xfs_alloc_size_busy(args); |
1258 | args->agbno = NULLAGBLOCK; | 1315 | xfs_log_force(args->mp, XFS_LOG_SYNC); |
1259 | return 0; | 1316 | goto restart; |
1317 | } | ||
1318 | goto out_nominleft; | ||
1260 | } | 1319 | } |
1320 | xfs_alloc_fix_len(args); | ||
1321 | |||
1322 | if (!xfs_alloc_fix_minleft(args)) | ||
1323 | goto out_nominleft; | ||
1261 | rlen = args->len; | 1324 | rlen = args->len; |
1262 | XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); | 1325 | XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); |
1263 | /* | 1326 | /* |
@@ -1287,6 +1350,12 @@ error0: | |||
1287 | if (bno_cur) | 1350 | if (bno_cur) |
1288 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); | 1351 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); |
1289 | return error; | 1352 | return error; |
1353 | |||
1354 | out_nominleft: | ||
1355 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1356 | trace_xfs_alloc_size_nominleft(args); | ||
1357 | args->agbno = NULLAGBLOCK; | ||
1358 | return 0; | ||
1290 | } | 1359 | } |
1291 | 1360 | ||
1292 | /* | 1361 | /* |
@@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small( | |||
1326 | if (error) | 1395 | if (error) |
1327 | goto error0; | 1396 | goto error0; |
1328 | if (fbno != NULLAGBLOCK) { | 1397 | if (fbno != NULLAGBLOCK) { |
1398 | xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1, | ||
1399 | args->userdata); | ||
1400 | |||
1329 | if (args->userdata) { | 1401 | if (args->userdata) { |
1330 | xfs_buf_t *bp; | 1402 | xfs_buf_t *bp; |
1331 | 1403 | ||
@@ -1617,18 +1689,6 @@ xfs_free_ag_extent( | |||
1617 | 1689 | ||
1618 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); | 1690 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); |
1619 | 1691 | ||
1620 | /* | ||
1621 | * Since blocks move to the free list without the coordination | ||
1622 | * used in xfs_bmap_finish, we can't allow block to be available | ||
1623 | * for reallocation and non-transaction writing (user data) | ||
1624 | * until we know that the transaction that moved it to the free | ||
1625 | * list is permanently on disk. We track the blocks by declaring | ||
1626 | * these blocks as "busy"; the busy list is maintained on a per-ag | ||
1627 | * basis and each transaction records which entries should be removed | ||
1628 | * when the iclog commits to disk. If a busy block is allocated, | ||
1629 | * the iclog is pushed up to the LSN that freed the block. | ||
1630 | */ | ||
1631 | xfs_alloc_busy_insert(tp, agno, bno, len); | ||
1632 | return 0; | 1692 | return 0; |
1633 | 1693 | ||
1634 | error0: | 1694 | error0: |
@@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist( | |||
1923 | xfs_alloc_log_agf(tp, agbp, logflags); | 1983 | xfs_alloc_log_agf(tp, agbp, logflags); |
1924 | *bnop = bno; | 1984 | *bnop = bno; |
1925 | 1985 | ||
1926 | /* | ||
1927 | * As blocks are freed, they are added to the per-ag busy list and | ||
1928 | * remain there until the freeing transaction is committed to disk. | ||
1929 | * Now that we have allocated blocks, this list must be searched to see | ||
1930 | * if a block is being reused. If one is, then the freeing transaction | ||
1931 | * must be pushed to disk before this transaction. | ||
1932 | * | ||
1933 | * We do this by setting the current transaction to a sync transaction | ||
1934 | * which guarantees that the freeing transaction is on disk before this | ||
1935 | * transaction. This is done instead of a synchronous log force here so | ||
1936 | * that we don't sit and wait with the AGF locked in the transaction | ||
1937 | * during the log force. | ||
1938 | */ | ||
1939 | if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1)) | ||
1940 | xfs_trans_set_sync(tp); | ||
1941 | return 0; | 1986 | return 0; |
1942 | } | 1987 | } |
1943 | 1988 | ||
@@ -2423,105 +2468,13 @@ xfs_free_extent( | |||
2423 | } | 2468 | } |
2424 | 2469 | ||
2425 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); | 2470 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); |
2471 | if (!error) | ||
2472 | xfs_alloc_busy_insert(tp, args.agno, args.agbno, len); | ||
2426 | error0: | 2473 | error0: |
2427 | xfs_perag_put(args.pag); | 2474 | xfs_perag_put(args.pag); |
2428 | return error; | 2475 | return error; |
2429 | } | 2476 | } |
2430 | 2477 | ||
2431 | |||
2432 | /* | ||
2433 | * AG Busy list management | ||
2434 | * The busy list contains block ranges that have been freed but whose | ||
2435 | * transactions have not yet hit disk. If any block listed in a busy | ||
2436 | * list is reused, the transaction that freed it must be forced to disk | ||
2437 | * before continuing to use the block. | ||
2438 | * | ||
2439 | * xfs_alloc_busy_insert - add to the per-ag busy list | ||
2440 | * xfs_alloc_busy_clear - remove an item from the per-ag busy list | ||
2441 | * xfs_alloc_busy_search - search for a busy extent | ||
2442 | */ | ||
2443 | |||
2444 | /* | ||
2445 | * Insert a new extent into the busy tree. | ||
2446 | * | ||
2447 | * The busy extent tree is indexed by the start block of the busy extent. | ||
2448 | * there can be multiple overlapping ranges in the busy extent tree but only | ||
2449 | * ever one entry at a given start block. The reason for this is that | ||
2450 | * multi-block extents can be freed, then smaller chunks of that extent | ||
2451 | * allocated and freed again before the first transaction commit is on disk. | ||
2452 | * If the exact same start block is freed a second time, we have to wait for | ||
2453 | * that busy extent to pass out of the tree before the new extent is inserted. | ||
2454 | * There are two main cases we have to handle here. | ||
2455 | * | ||
2456 | * The first case is a transaction that triggers a "free - allocate - free" | ||
2457 | * cycle. This can occur during btree manipulations as a btree block is freed | ||
2458 | * to the freelist, then allocated from the free list, then freed again. In | ||
2459 | * this case, the second extxpnet free is what triggers the duplicate and as | ||
2460 | * such the transaction IDs should match. Because the extent was allocated in | ||
2461 | * this transaction, the transaction must be marked as synchronous. This is | ||
2462 | * true for all cases where the free/alloc/free occurs in the one transaction, | ||
2463 | * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case. | ||
2464 | * This serves to catch violations of the second case quite effectively. | ||
2465 | * | ||
2466 | * The second case is where the free/alloc/free occur in different | ||
2467 | * transactions. In this case, the thread freeing the extent the second time | ||
2468 | * can't mark the extent busy immediately because it is already tracked in a | ||
2469 | * transaction that may be committing. When the log commit for the existing | ||
2470 | * busy extent completes, the busy extent will be removed from the tree. If we | ||
2471 | * allow the second busy insert to continue using that busy extent structure, | ||
2472 | * it can be freed before this transaction is safely in the log. Hence our | ||
2473 | * only option in this case is to force the log to remove the existing busy | ||
2474 | * extent from the list before we insert the new one with the current | ||
2475 | * transaction ID. | ||
2476 | * | ||
2477 | * The problem we are trying to avoid in the free-alloc-free in separate | ||
2478 | * transactions is most easily described with a timeline: | ||
2479 | * | ||
2480 | * Thread 1 Thread 2 Thread 3 xfslogd | ||
2481 | * xact alloc | ||
2482 | * free X | ||
2483 | * mark busy | ||
2484 | * commit xact | ||
2485 | * free xact | ||
2486 | * xact alloc | ||
2487 | * alloc X | ||
2488 | * busy search | ||
2489 | * mark xact sync | ||
2490 | * commit xact | ||
2491 | * free xact | ||
2492 | * force log | ||
2493 | * checkpoint starts | ||
2494 | * .... | ||
2495 | * xact alloc | ||
2496 | * free X | ||
2497 | * mark busy | ||
2498 | * finds match | ||
2499 | * *** KABOOM! *** | ||
2500 | * .... | ||
2501 | * log IO completes | ||
2502 | * unbusy X | ||
2503 | * checkpoint completes | ||
2504 | * | ||
2505 | * By issuing a log force in thread 3 @ "KABOOM", the thread will block until | ||
2506 | * the checkpoint completes, and the busy extent it matched will have been | ||
2507 | * removed from the tree when it is woken. Hence it can then continue safely. | ||
2508 | * | ||
2509 | * However, to ensure this matching process is robust, we need to use the | ||
2510 | * transaction ID for identifying transaction, as delayed logging results in | ||
2511 | * the busy extent and transaction lifecycles being different. i.e. the busy | ||
2512 | * extent is active for a lot longer than the transaction. Hence the | ||
2513 | * transaction structure can be freed and reallocated, then mark the same | ||
2514 | * extent busy again in the new transaction. In this case the new transaction | ||
2515 | * will have a different tid but can have the same address, and hence we need | ||
2516 | * to check against the tid. | ||
2517 | * | ||
2518 | * Future: for delayed logging, we could avoid the log force if the extent was | ||
2519 | * first freed in the current checkpoint sequence. This, however, requires the | ||
2520 | * ability to pin the current checkpoint in memory until this transaction | ||
2521 | * commits to ensure that both the original free and the current one combine | ||
2522 | * logically into the one checkpoint. If the checkpoint sequences are | ||
2523 | * different, however, we still need to wait on a log force. | ||
2524 | */ | ||
2525 | void | 2478 | void |
2526 | xfs_alloc_busy_insert( | 2479 | xfs_alloc_busy_insert( |
2527 | struct xfs_trans *tp, | 2480 | struct xfs_trans *tp, |
@@ -2533,9 +2486,7 @@ xfs_alloc_busy_insert( | |||
2533 | struct xfs_busy_extent *busyp; | 2486 | struct xfs_busy_extent *busyp; |
2534 | struct xfs_perag *pag; | 2487 | struct xfs_perag *pag; |
2535 | struct rb_node **rbp; | 2488 | struct rb_node **rbp; |
2536 | struct rb_node *parent; | 2489 | struct rb_node *parent = NULL; |
2537 | int match; | ||
2538 | |||
2539 | 2490 | ||
2540 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); | 2491 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); |
2541 | if (!new) { | 2492 | if (!new) { |
@@ -2544,7 +2495,7 @@ xfs_alloc_busy_insert( | |||
2544 | * block, make this a synchronous transaction to insure that | 2495 | * block, make this a synchronous transaction to insure that |
2545 | * the block is not reused before this transaction commits. | 2496 | * the block is not reused before this transaction commits. |
2546 | */ | 2497 | */ |
2547 | trace_xfs_alloc_busy(tp, agno, bno, len, 1); | 2498 | trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len); |
2548 | xfs_trans_set_sync(tp); | 2499 | xfs_trans_set_sync(tp); |
2549 | return; | 2500 | return; |
2550 | } | 2501 | } |
@@ -2552,66 +2503,28 @@ xfs_alloc_busy_insert( | |||
2552 | new->agno = agno; | 2503 | new->agno = agno; |
2553 | new->bno = bno; | 2504 | new->bno = bno; |
2554 | new->length = len; | 2505 | new->length = len; |
2555 | new->tid = xfs_log_get_trans_ident(tp); | ||
2556 | |||
2557 | INIT_LIST_HEAD(&new->list); | 2506 | INIT_LIST_HEAD(&new->list); |
2558 | 2507 | ||
2559 | /* trace before insert to be able to see failed inserts */ | 2508 | /* trace before insert to be able to see failed inserts */ |
2560 | trace_xfs_alloc_busy(tp, agno, bno, len, 0); | 2509 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); |
2561 | 2510 | ||
2562 | pag = xfs_perag_get(tp->t_mountp, new->agno); | 2511 | pag = xfs_perag_get(tp->t_mountp, new->agno); |
2563 | restart: | ||
2564 | spin_lock(&pag->pagb_lock); | 2512 | spin_lock(&pag->pagb_lock); |
2565 | rbp = &pag->pagb_tree.rb_node; | 2513 | rbp = &pag->pagb_tree.rb_node; |
2566 | parent = NULL; | 2514 | while (*rbp) { |
2567 | busyp = NULL; | ||
2568 | match = 0; | ||
2569 | while (*rbp && match >= 0) { | ||
2570 | parent = *rbp; | 2515 | parent = *rbp; |
2571 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); | 2516 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); |
2572 | 2517 | ||
2573 | if (new->bno < busyp->bno) { | 2518 | if (new->bno < busyp->bno) { |
2574 | /* may overlap, but exact start block is lower */ | ||
2575 | rbp = &(*rbp)->rb_left; | 2519 | rbp = &(*rbp)->rb_left; |
2576 | if (new->bno + new->length > busyp->bno) | 2520 | ASSERT(new->bno + new->length <= busyp->bno); |
2577 | match = busyp->tid == new->tid ? 1 : -1; | ||
2578 | } else if (new->bno > busyp->bno) { | 2521 | } else if (new->bno > busyp->bno) { |
2579 | /* may overlap, but exact start block is higher */ | ||
2580 | rbp = &(*rbp)->rb_right; | 2522 | rbp = &(*rbp)->rb_right; |
2581 | if (bno < busyp->bno + busyp->length) | 2523 | ASSERT(bno >= busyp->bno + busyp->length); |
2582 | match = busyp->tid == new->tid ? 1 : -1; | ||
2583 | } else { | 2524 | } else { |
2584 | match = busyp->tid == new->tid ? 1 : -1; | 2525 | ASSERT(0); |
2585 | break; | ||
2586 | } | 2526 | } |
2587 | } | 2527 | } |
2588 | if (match < 0) { | ||
2589 | /* overlap marked busy in different transaction */ | ||
2590 | spin_unlock(&pag->pagb_lock); | ||
2591 | xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); | ||
2592 | goto restart; | ||
2593 | } | ||
2594 | if (match > 0) { | ||
2595 | /* | ||
2596 | * overlap marked busy in same transaction. Update if exact | ||
2597 | * start block match, otherwise combine the busy extents into | ||
2598 | * a single range. | ||
2599 | */ | ||
2600 | if (busyp->bno == new->bno) { | ||
2601 | busyp->length = max(busyp->length, new->length); | ||
2602 | spin_unlock(&pag->pagb_lock); | ||
2603 | ASSERT(tp->t_flags & XFS_TRANS_SYNC); | ||
2604 | xfs_perag_put(pag); | ||
2605 | kmem_free(new); | ||
2606 | return; | ||
2607 | } | ||
2608 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2609 | new->length = max(busyp->bno + busyp->length, | ||
2610 | new->bno + new->length) - | ||
2611 | min(busyp->bno, new->bno); | ||
2612 | new->bno = min(busyp->bno, new->bno); | ||
2613 | } else | ||
2614 | busyp = NULL; | ||
2615 | 2528 | ||
2616 | rb_link_node(&new->rb_node, parent, rbp); | 2529 | rb_link_node(&new->rb_node, parent, rbp); |
2617 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | 2530 | rb_insert_color(&new->rb_node, &pag->pagb_tree); |
@@ -2619,7 +2532,6 @@ restart: | |||
2619 | list_add(&new->list, &tp->t_busy); | 2532 | list_add(&new->list, &tp->t_busy); |
2620 | spin_unlock(&pag->pagb_lock); | 2533 | spin_unlock(&pag->pagb_lock); |
2621 | xfs_perag_put(pag); | 2534 | xfs_perag_put(pag); |
2622 | kmem_free(busyp); | ||
2623 | } | 2535 | } |
2624 | 2536 | ||
2625 | /* | 2537 | /* |
@@ -2668,31 +2580,443 @@ xfs_alloc_busy_search( | |||
2668 | } | 2580 | } |
2669 | } | 2581 | } |
2670 | spin_unlock(&pag->pagb_lock); | 2582 | spin_unlock(&pag->pagb_lock); |
2671 | trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match); | ||
2672 | xfs_perag_put(pag); | 2583 | xfs_perag_put(pag); |
2673 | return match; | 2584 | return match; |
2674 | } | 2585 | } |
2675 | 2586 | ||
2587 | /* | ||
2588 | * The found free extent [fbno, fend] overlaps part or all of the given busy | ||
2589 | * extent. If the overlap covers the beginning, the end, or all of the busy | ||
2590 | * extent, the overlapping portion can be made unbusy and used for the | ||
2591 | * allocation. We can't split a busy extent because we can't modify a | ||
2592 | * transaction/CIL context busy list, but we can update an entries block | ||
2593 | * number or length. | ||
2594 | * | ||
2595 | * Returns true if the extent can safely be reused, or false if the search | ||
2596 | * needs to be restarted. | ||
2597 | */ | ||
2598 | STATIC bool | ||
2599 | xfs_alloc_busy_update_extent( | ||
2600 | struct xfs_mount *mp, | ||
2601 | struct xfs_perag *pag, | ||
2602 | struct xfs_busy_extent *busyp, | ||
2603 | xfs_agblock_t fbno, | ||
2604 | xfs_extlen_t flen, | ||
2605 | bool userdata) | ||
2606 | { | ||
2607 | xfs_agblock_t fend = fbno + flen; | ||
2608 | xfs_agblock_t bbno = busyp->bno; | ||
2609 | xfs_agblock_t bend = bbno + busyp->length; | ||
2610 | |||
2611 | /* | ||
2612 | * If there is a busy extent overlapping a user allocation, we have | ||
2613 | * no choice but to force the log and retry the search. | ||
2614 | * | ||
2615 | * Fortunately this does not happen during normal operation, but | ||
2616 | * only if the filesystem is very low on space and has to dip into | ||
2617 | * the AGFL for normal allocations. | ||
2618 | */ | ||
2619 | if (userdata) | ||
2620 | goto out_force_log; | ||
2621 | |||
2622 | if (bbno < fbno && bend > fend) { | ||
2623 | /* | ||
2624 | * Case 1: | ||
2625 | * bbno bend | ||
2626 | * +BBBBBBBBBBBBBBBBB+ | ||
2627 | * +---------+ | ||
2628 | * fbno fend | ||
2629 | */ | ||
2630 | |||
2631 | /* | ||
2632 | * We would have to split the busy extent to be able to track | ||
2633 | * it correct, which we cannot do because we would have to | ||
2634 | * modify the list of busy extents attached to the transaction | ||
2635 | * or CIL context, which is immutable. | ||
2636 | * | ||
2637 | * Force out the log to clear the busy extent and retry the | ||
2638 | * search. | ||
2639 | */ | ||
2640 | goto out_force_log; | ||
2641 | } else if (bbno >= fbno && bend <= fend) { | ||
2642 | /* | ||
2643 | * Case 2: | ||
2644 | * bbno bend | ||
2645 | * +BBBBBBBBBBBBBBBBB+ | ||
2646 | * +-----------------+ | ||
2647 | * fbno fend | ||
2648 | * | ||
2649 | * Case 3: | ||
2650 | * bbno bend | ||
2651 | * +BBBBBBBBBBBBBBBBB+ | ||
2652 | * +--------------------------+ | ||
2653 | * fbno fend | ||
2654 | * | ||
2655 | * Case 4: | ||
2656 | * bbno bend | ||
2657 | * +BBBBBBBBBBBBBBBBB+ | ||
2658 | * +--------------------------+ | ||
2659 | * fbno fend | ||
2660 | * | ||
2661 | * Case 5: | ||
2662 | * bbno bend | ||
2663 | * +BBBBBBBBBBBBBBBBB+ | ||
2664 | * +-----------------------------------+ | ||
2665 | * fbno fend | ||
2666 | * | ||
2667 | */ | ||
2668 | |||
2669 | /* | ||
2670 | * The busy extent is fully covered by the extent we are | ||
2671 | * allocating, and can simply be removed from the rbtree. | ||
2672 | * However we cannot remove it from the immutable list | ||
2673 | * tracking busy extents in the transaction or CIL context, | ||
2674 | * so set the length to zero to mark it invalid. | ||
2675 | * | ||
2676 | * We also need to restart the busy extent search from the | ||
2677 | * tree root, because erasing the node can rearrange the | ||
2678 | * tree topology. | ||
2679 | */ | ||
2680 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2681 | busyp->length = 0; | ||
2682 | return false; | ||
2683 | } else if (fend < bend) { | ||
2684 | /* | ||
2685 | * Case 6: | ||
2686 | * bbno bend | ||
2687 | * +BBBBBBBBBBBBBBBBB+ | ||
2688 | * +---------+ | ||
2689 | * fbno fend | ||
2690 | * | ||
2691 | * Case 7: | ||
2692 | * bbno bend | ||
2693 | * +BBBBBBBBBBBBBBBBB+ | ||
2694 | * +------------------+ | ||
2695 | * fbno fend | ||
2696 | * | ||
2697 | */ | ||
2698 | busyp->bno = fend; | ||
2699 | } else if (bbno < fbno) { | ||
2700 | /* | ||
2701 | * Case 8: | ||
2702 | * bbno bend | ||
2703 | * +BBBBBBBBBBBBBBBBB+ | ||
2704 | * +-------------+ | ||
2705 | * fbno fend | ||
2706 | * | ||
2707 | * Case 9: | ||
2708 | * bbno bend | ||
2709 | * +BBBBBBBBBBBBBBBBB+ | ||
2710 | * +----------------------+ | ||
2711 | * fbno fend | ||
2712 | */ | ||
2713 | busyp->length = fbno - busyp->bno; | ||
2714 | } else { | ||
2715 | ASSERT(0); | ||
2716 | } | ||
2717 | |||
2718 | trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen); | ||
2719 | return true; | ||
2720 | |||
2721 | out_force_log: | ||
2722 | spin_unlock(&pag->pagb_lock); | ||
2723 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
2724 | trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen); | ||
2725 | spin_lock(&pag->pagb_lock); | ||
2726 | return false; | ||
2727 | } | ||
2728 | |||
2729 | |||
2730 | /* | ||
2731 | * For a given extent [fbno, flen], make sure we can reuse it safely. | ||
2732 | */ | ||
2676 | void | 2733 | void |
2677 | xfs_alloc_busy_clear( | 2734 | xfs_alloc_busy_reuse( |
2678 | struct xfs_mount *mp, | 2735 | struct xfs_mount *mp, |
2679 | struct xfs_busy_extent *busyp) | 2736 | xfs_agnumber_t agno, |
2737 | xfs_agblock_t fbno, | ||
2738 | xfs_extlen_t flen, | ||
2739 | bool userdata) | ||
2680 | { | 2740 | { |
2681 | struct xfs_perag *pag; | 2741 | struct xfs_perag *pag; |
2742 | struct rb_node *rbp; | ||
2682 | 2743 | ||
2683 | trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, | 2744 | ASSERT(flen > 0); |
2684 | busyp->length); | ||
2685 | 2745 | ||
2686 | ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, | 2746 | pag = xfs_perag_get(mp, agno); |
2687 | busyp->length) == 1); | 2747 | spin_lock(&pag->pagb_lock); |
2748 | restart: | ||
2749 | rbp = pag->pagb_tree.rb_node; | ||
2750 | while (rbp) { | ||
2751 | struct xfs_busy_extent *busyp = | ||
2752 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
2753 | xfs_agblock_t bbno = busyp->bno; | ||
2754 | xfs_agblock_t bend = bbno + busyp->length; | ||
2688 | 2755 | ||
2689 | list_del_init(&busyp->list); | 2756 | if (fbno + flen <= bbno) { |
2757 | rbp = rbp->rb_left; | ||
2758 | continue; | ||
2759 | } else if (fbno >= bend) { | ||
2760 | rbp = rbp->rb_right; | ||
2761 | continue; | ||
2762 | } | ||
2690 | 2763 | ||
2691 | pag = xfs_perag_get(mp, busyp->agno); | 2764 | if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen, |
2692 | spin_lock(&pag->pagb_lock); | 2765 | userdata)) |
2693 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | 2766 | goto restart; |
2767 | } | ||
2694 | spin_unlock(&pag->pagb_lock); | 2768 | spin_unlock(&pag->pagb_lock); |
2695 | xfs_perag_put(pag); | 2769 | xfs_perag_put(pag); |
2770 | } | ||
2771 | |||
2772 | /* | ||
2773 | * For a given extent [fbno, flen], search the busy extent list to find a | ||
2774 | * subset of the extent that is not busy. If *rlen is smaller than | ||
2775 | * args->minlen no suitable extent could be found, and the higher level | ||
2776 | * code needs to force out the log and retry the allocation. | ||
2777 | */ | ||
2778 | STATIC void | ||
2779 | xfs_alloc_busy_trim( | ||
2780 | struct xfs_alloc_arg *args, | ||
2781 | xfs_agblock_t bno, | ||
2782 | xfs_extlen_t len, | ||
2783 | xfs_agblock_t *rbno, | ||
2784 | xfs_extlen_t *rlen) | ||
2785 | { | ||
2786 | xfs_agblock_t fbno; | ||
2787 | xfs_extlen_t flen; | ||
2788 | struct rb_node *rbp; | ||
2789 | |||
2790 | ASSERT(len > 0); | ||
2696 | 2791 | ||
2792 | spin_lock(&args->pag->pagb_lock); | ||
2793 | restart: | ||
2794 | fbno = bno; | ||
2795 | flen = len; | ||
2796 | rbp = args->pag->pagb_tree.rb_node; | ||
2797 | while (rbp && flen >= args->minlen) { | ||
2798 | struct xfs_busy_extent *busyp = | ||
2799 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
2800 | xfs_agblock_t fend = fbno + flen; | ||
2801 | xfs_agblock_t bbno = busyp->bno; | ||
2802 | xfs_agblock_t bend = bbno + busyp->length; | ||
2803 | |||
2804 | if (fend <= bbno) { | ||
2805 | rbp = rbp->rb_left; | ||
2806 | continue; | ||
2807 | } else if (fbno >= bend) { | ||
2808 | rbp = rbp->rb_right; | ||
2809 | continue; | ||
2810 | } | ||
2811 | |||
2812 | /* | ||
2813 | * If this is a metadata allocation, try to reuse the busy | ||
2814 | * extent instead of trimming the allocation. | ||
2815 | */ | ||
2816 | if (!args->userdata) { | ||
2817 | if (!xfs_alloc_busy_update_extent(args->mp, args->pag, | ||
2818 | busyp, fbno, flen, | ||
2819 | false)) | ||
2820 | goto restart; | ||
2821 | continue; | ||
2822 | } | ||
2823 | |||
2824 | if (bbno <= fbno) { | ||
2825 | /* start overlap */ | ||
2826 | |||
2827 | /* | ||
2828 | * Case 1: | ||
2829 | * bbno bend | ||
2830 | * +BBBBBBBBBBBBBBBBB+ | ||
2831 | * +---------+ | ||
2832 | * fbno fend | ||
2833 | * | ||
2834 | * Case 2: | ||
2835 | * bbno bend | ||
2836 | * +BBBBBBBBBBBBBBBBB+ | ||
2837 | * +-------------+ | ||
2838 | * fbno fend | ||
2839 | * | ||
2840 | * Case 3: | ||
2841 | * bbno bend | ||
2842 | * +BBBBBBBBBBBBBBBBB+ | ||
2843 | * +-------------+ | ||
2844 | * fbno fend | ||
2845 | * | ||
2846 | * Case 4: | ||
2847 | * bbno bend | ||
2848 | * +BBBBBBBBBBBBBBBBB+ | ||
2849 | * +-----------------+ | ||
2850 | * fbno fend | ||
2851 | * | ||
2852 | * No unbusy region in extent, return failure. | ||
2853 | */ | ||
2854 | if (fend <= bend) | ||
2855 | goto fail; | ||
2856 | |||
2857 | /* | ||
2858 | * Case 5: | ||
2859 | * bbno bend | ||
2860 | * +BBBBBBBBBBBBBBBBB+ | ||
2861 | * +----------------------+ | ||
2862 | * fbno fend | ||
2863 | * | ||
2864 | * Case 6: | ||
2865 | * bbno bend | ||
2866 | * +BBBBBBBBBBBBBBBBB+ | ||
2867 | * +--------------------------+ | ||
2868 | * fbno fend | ||
2869 | * | ||
2870 | * Needs to be trimmed to: | ||
2871 | * +-------+ | ||
2872 | * fbno fend | ||
2873 | */ | ||
2874 | fbno = bend; | ||
2875 | } else if (bend >= fend) { | ||
2876 | /* end overlap */ | ||
2877 | |||
2878 | /* | ||
2879 | * Case 7: | ||
2880 | * bbno bend | ||
2881 | * +BBBBBBBBBBBBBBBBB+ | ||
2882 | * +------------------+ | ||
2883 | * fbno fend | ||
2884 | * | ||
2885 | * Case 8: | ||
2886 | * bbno bend | ||
2887 | * +BBBBBBBBBBBBBBBBB+ | ||
2888 | * +--------------------------+ | ||
2889 | * fbno fend | ||
2890 | * | ||
2891 | * Needs to be trimmed to: | ||
2892 | * +-------+ | ||
2893 | * fbno fend | ||
2894 | */ | ||
2895 | fend = bbno; | ||
2896 | } else { | ||
2897 | /* middle overlap */ | ||
2898 | |||
2899 | /* | ||
2900 | * Case 9: | ||
2901 | * bbno bend | ||
2902 | * +BBBBBBBBBBBBBBBBB+ | ||
2903 | * +-----------------------------------+ | ||
2904 | * fbno fend | ||
2905 | * | ||
2906 | * Can be trimmed to: | ||
2907 | * +-------+ OR +-------+ | ||
2908 | * fbno fend fbno fend | ||
2909 | * | ||
2910 | * Backward allocation leads to significant | ||
2911 | * fragmentation of directories, which degrades | ||
2912 | * directory performance, therefore we always want to | ||
2913 | * choose the option that produces forward allocation | ||
2914 | * patterns. | ||
2915 | * Preferring the lower bno extent will make the next | ||
2916 | * request use "fend" as the start of the next | ||
2917 | * allocation; if the segment is no longer busy at | ||
2918 | * that point, we'll get a contiguous allocation, but | ||
2919 | * even if it is still busy, we will get a forward | ||
2920 | * allocation. | ||
2921 | * We try to avoid choosing the segment at "bend", | ||
2922 | * because that can lead to the next allocation | ||
2923 | * taking the segment at "fbno", which would be a | ||
2924 | * backward allocation. We only use the segment at | ||
2925 | * "fbno" if it is much larger than the current | ||
2926 | * requested size, because in that case there's a | ||
2927 | * good chance subsequent allocations will be | ||
2928 | * contiguous. | ||
2929 | */ | ||
2930 | if (bbno - fbno >= args->maxlen) { | ||
2931 | /* left candidate fits perfect */ | ||
2932 | fend = bbno; | ||
2933 | } else if (fend - bend >= args->maxlen * 4) { | ||
2934 | /* right candidate has enough free space */ | ||
2935 | fbno = bend; | ||
2936 | } else if (bbno - fbno >= args->minlen) { | ||
2937 | /* left candidate fits minimum requirement */ | ||
2938 | fend = bbno; | ||
2939 | } else { | ||
2940 | goto fail; | ||
2941 | } | ||
2942 | } | ||
2943 | |||
2944 | flen = fend - fbno; | ||
2945 | } | ||
2946 | spin_unlock(&args->pag->pagb_lock); | ||
2947 | |||
2948 | if (fbno != bno || flen != len) { | ||
2949 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, | ||
2950 | fbno, flen); | ||
2951 | } | ||
2952 | *rbno = fbno; | ||
2953 | *rlen = flen; | ||
2954 | return; | ||
2955 | fail: | ||
2956 | /* | ||
2957 | * Return a zero extent length as failure indications. All callers | ||
2958 | * re-check if the trimmed extent satisfies the minlen requirement. | ||
2959 | */ | ||
2960 | spin_unlock(&args->pag->pagb_lock); | ||
2961 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0); | ||
2962 | *rbno = fbno; | ||
2963 | *rlen = 0; | ||
2964 | } | ||
2965 | |||
2966 | static void | ||
2967 | xfs_alloc_busy_clear_one( | ||
2968 | struct xfs_mount *mp, | ||
2969 | struct xfs_perag *pag, | ||
2970 | struct xfs_busy_extent *busyp) | ||
2971 | { | ||
2972 | if (busyp->length) { | ||
2973 | trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno, | ||
2974 | busyp->length); | ||
2975 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2976 | } | ||
2977 | |||
2978 | list_del_init(&busyp->list); | ||
2697 | kmem_free(busyp); | 2979 | kmem_free(busyp); |
2698 | } | 2980 | } |
2981 | |||
2982 | void | ||
2983 | xfs_alloc_busy_clear( | ||
2984 | struct xfs_mount *mp, | ||
2985 | struct list_head *list) | ||
2986 | { | ||
2987 | struct xfs_busy_extent *busyp, *n; | ||
2988 | struct xfs_perag *pag = NULL; | ||
2989 | xfs_agnumber_t agno = NULLAGNUMBER; | ||
2990 | |||
2991 | list_for_each_entry_safe(busyp, n, list, list) { | ||
2992 | if (busyp->agno != agno) { | ||
2993 | if (pag) { | ||
2994 | spin_unlock(&pag->pagb_lock); | ||
2995 | xfs_perag_put(pag); | ||
2996 | } | ||
2997 | pag = xfs_perag_get(mp, busyp->agno); | ||
2998 | spin_lock(&pag->pagb_lock); | ||
2999 | agno = busyp->agno; | ||
3000 | } | ||
3001 | |||
3002 | xfs_alloc_busy_clear_one(mp, pag, busyp); | ||
3003 | } | ||
3004 | |||
3005 | if (pag) { | ||
3006 | spin_unlock(&pag->pagb_lock); | ||
3007 | xfs_perag_put(pag); | ||
3008 | } | ||
3009 | } | ||
3010 | |||
3011 | /* | ||
3012 | * Callback for list_sort to sort busy extents by the AG they reside in. | ||
3013 | */ | ||
3014 | int | ||
3015 | xfs_busy_extent_ag_cmp( | ||
3016 | void *priv, | ||
3017 | struct list_head *a, | ||
3018 | struct list_head *b) | ||
3019 | { | ||
3020 | return container_of(a, struct xfs_busy_extent, list)->agno - | ||
3021 | container_of(b, struct xfs_busy_extent, list)->agno; | ||
3022 | } | ||
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index d0b3bc72005b..240ad288f2f9 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -140,11 +140,24 @@ xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, | |||
140 | xfs_agblock_t bno, xfs_extlen_t len); | 140 | xfs_agblock_t bno, xfs_extlen_t len); |
141 | 141 | ||
142 | void | 142 | void |
143 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); | 143 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list); |
144 | 144 | ||
145 | int | 145 | int |
146 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | 146 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, |
147 | xfs_agblock_t bno, xfs_extlen_t len); | 147 | xfs_agblock_t bno, xfs_extlen_t len); |
148 | |||
149 | void | ||
150 | xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
151 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); | ||
152 | |||
153 | int | ||
154 | xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b); | ||
155 | |||
156 | static inline void xfs_alloc_busy_sort(struct list_head *list) | ||
157 | { | ||
158 | list_sort(NULL, list, xfs_busy_extent_ag_cmp); | ||
159 | } | ||
160 | |||
148 | #endif /* __KERNEL__ */ | 161 | #endif /* __KERNEL__ */ |
149 | 162 | ||
150 | /* | 163 | /* |
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 3916925e2584..8b469d53599f 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -95,6 +95,8 @@ xfs_allocbt_alloc_block( | |||
95 | return 0; | 95 | return 0; |
96 | } | 96 | } |
97 | 97 | ||
98 | xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); | ||
99 | |||
98 | xfs_trans_agbtree_delta(cur->bc_tp, 1); | 100 | xfs_trans_agbtree_delta(cur->bc_tp, 1); |
99 | new->s = cpu_to_be32(bno); | 101 | new->s = cpu_to_be32(bno); |
100 | 102 | ||
@@ -118,17 +120,6 @@ xfs_allocbt_free_block( | |||
118 | if (error) | 120 | if (error) |
119 | return error; | 121 | return error; |
120 | 122 | ||
121 | /* | ||
122 | * Since blocks move to the free list without the coordination used in | ||
123 | * xfs_bmap_finish, we can't allow block to be available for | ||
124 | * reallocation and non-transaction writing (user data) until we know | ||
125 | * that the transaction that moved it to the free list is permanently | ||
126 | * on disk. We track the blocks by declaring these blocks as "busy"; | ||
127 | * the busy list is maintained on a per-ag basis and each transaction | ||
128 | * records which entries should be removed when the iclog commits to | ||
129 | * disk. If a busy block is allocated, the iclog is pushed up to the | ||
130 | * LSN that freed the block. | ||
131 | */ | ||
132 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 123 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); |
133 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | 124 | xfs_trans_agbtree_delta(cur->bc_tp, -1); |
134 | return 0; | 125 | return 0; |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index be628677c288..9a84a85c03b1 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -202,7 +202,7 @@ xfs_swap_extents( | |||
202 | xfs_inode_t *tip, /* tmp inode */ | 202 | xfs_inode_t *tip, /* tmp inode */ |
203 | xfs_swapext_t *sxp) | 203 | xfs_swapext_t *sxp) |
204 | { | 204 | { |
205 | xfs_mount_t *mp; | 205 | xfs_mount_t *mp = ip->i_mount; |
206 | xfs_trans_t *tp; | 206 | xfs_trans_t *tp; |
207 | xfs_bstat_t *sbp = &sxp->sx_stat; | 207 | xfs_bstat_t *sbp = &sxp->sx_stat; |
208 | xfs_ifork_t *tempifp, *ifp, *tifp; | 208 | xfs_ifork_t *tempifp, *ifp, *tifp; |
@@ -212,16 +212,12 @@ xfs_swap_extents( | |||
212 | int taforkblks = 0; | 212 | int taforkblks = 0; |
213 | __uint64_t tmp; | 213 | __uint64_t tmp; |
214 | 214 | ||
215 | mp = ip->i_mount; | ||
216 | |||
217 | tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); | 215 | tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); |
218 | if (!tempifp) { | 216 | if (!tempifp) { |
219 | error = XFS_ERROR(ENOMEM); | 217 | error = XFS_ERROR(ENOMEM); |
220 | goto out; | 218 | goto out; |
221 | } | 219 | } |
222 | 220 | ||
223 | sbp = &sxp->sx_stat; | ||
224 | |||
225 | /* | 221 | /* |
226 | * we have to do two separate lock calls here to keep lockdep | 222 | * we have to do two separate lock calls here to keep lockdep |
227 | * happy. If we try to get all the locks in one call, lock will | 223 | * happy. If we try to get all the locks in one call, lock will |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d11ce613d692..c8e3349c287c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1354,7 +1354,7 @@ xfs_itruncate_start( | |||
1354 | return 0; | 1354 | return 0; |
1355 | } | 1355 | } |
1356 | last_byte = xfs_file_last_byte(ip); | 1356 | last_byte = xfs_file_last_byte(ip); |
1357 | trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte); | 1357 | trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte); |
1358 | if (last_byte > toss_start) { | 1358 | if (last_byte > toss_start) { |
1359 | if (flags & XFS_ITRUNC_DEFINITE) { | 1359 | if (flags & XFS_ITRUNC_DEFINITE) { |
1360 | xfs_tosspages(ip, toss_start, | 1360 | xfs_tosspages(ip, toss_start, |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 576fdfe81d60..09983a3344a5 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -970,7 +970,6 @@ xfs_iflush_abort( | |||
970 | { | 970 | { |
971 | xfs_inode_log_item_t *iip = ip->i_itemp; | 971 | xfs_inode_log_item_t *iip = ip->i_itemp; |
972 | 972 | ||
973 | iip = ip->i_itemp; | ||
974 | if (iip) { | 973 | if (iip) { |
975 | struct xfs_ail *ailp = iip->ili_item.li_ailp; | 974 | struct xfs_ail *ailp = iip->ili_item.li_ailp; |
976 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { | 975 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b612ce4520ae..211930246f20 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -1449,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log) | |||
1449 | 1449 | ||
1450 | xlog_cil_destroy(log); | 1450 | xlog_cil_destroy(log); |
1451 | 1451 | ||
1452 | /* | ||
1453 | * always need to ensure that the extra buffer does not point to memory | ||
1454 | * owned by another log buffer before we free it. | ||
1455 | */ | ||
1456 | xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size); | ||
1457 | xfs_buf_free(log->l_xbuf); | ||
1458 | |||
1452 | iclog = log->l_iclog; | 1459 | iclog = log->l_iclog; |
1453 | for (i=0; i<log->l_iclog_bufs; i++) { | 1460 | for (i=0; i<log->l_iclog_bufs; i++) { |
1454 | xfs_buf_free(iclog->ic_bp); | 1461 | xfs_buf_free(iclog->ic_bp); |
@@ -1458,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log) | |||
1458 | } | 1465 | } |
1459 | spinlock_destroy(&log->l_icloglock); | 1466 | spinlock_destroy(&log->l_icloglock); |
1460 | 1467 | ||
1461 | xfs_buf_free(log->l_xbuf); | ||
1462 | log->l_mp->m_log = NULL; | 1468 | log->l_mp->m_log = NULL; |
1463 | kmem_free(log); | 1469 | kmem_free(log); |
1464 | } /* xlog_dealloc_log */ | 1470 | } /* xlog_dealloc_log */ |
@@ -3248,13 +3254,6 @@ xfs_log_ticket_get( | |||
3248 | return ticket; | 3254 | return ticket; |
3249 | } | 3255 | } |
3250 | 3256 | ||
3251 | xlog_tid_t | ||
3252 | xfs_log_get_trans_ident( | ||
3253 | struct xfs_trans *tp) | ||
3254 | { | ||
3255 | return tp->t_ticket->t_tid; | ||
3256 | } | ||
3257 | |||
3258 | /* | 3257 | /* |
3259 | * Allocate and initialise a new log ticket. | 3258 | * Allocate and initialise a new log ticket. |
3260 | */ | 3259 | */ |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 3bd3291ef8d2..78c9039994af 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *); | |||
189 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); | 189 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); |
190 | void xfs_log_ticket_put(struct xlog_ticket *ticket); | 190 | void xfs_log_ticket_put(struct xlog_ticket *ticket); |
191 | 191 | ||
192 | xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); | ||
193 | |||
194 | void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | 192 | void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, |
195 | struct xfs_log_vec *log_vector, | 193 | struct xfs_log_vec *log_vector, |
196 | xfs_lsn_t *commit_lsn, int flags); | 194 | xfs_lsn_t *commit_lsn, int flags); |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 9ca59be08977..7d56e88a3f0e 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -361,13 +361,12 @@ xlog_cil_committed( | |||
361 | int abort) | 361 | int abort) |
362 | { | 362 | { |
363 | struct xfs_cil_ctx *ctx = args; | 363 | struct xfs_cil_ctx *ctx = args; |
364 | struct xfs_busy_extent *busyp, *n; | ||
365 | 364 | ||
366 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, | 365 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, |
367 | ctx->start_lsn, abort); | 366 | ctx->start_lsn, abort); |
368 | 367 | ||
369 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) | 368 | xfs_alloc_busy_sort(&ctx->busy_extents); |
370 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); | 369 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents); |
371 | 370 | ||
372 | spin_lock(&ctx->cil->xc_cil_lock); | 371 | spin_lock(&ctx->cil->xc_cil_lock); |
373 | list_del(&ctx->committing); | 372 | list_del(&ctx->committing); |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 5864850e9e34..2d3b6a498d63 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i) | |||
146 | shutdown */ | 146 | shutdown */ |
147 | #define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ | 147 | #define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ |
148 | 148 | ||
149 | typedef __uint32_t xlog_tid_t; | ||
150 | |||
149 | #ifdef __KERNEL__ | 151 | #ifdef __KERNEL__ |
150 | /* | 152 | /* |
151 | * Below are states for covering allocation transactions. | 153 | * Below are states for covering allocation transactions. |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 5cc464a17c93..04142caedb2b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -205,6 +205,35 @@ xlog_bread( | |||
205 | } | 205 | } |
206 | 206 | ||
207 | /* | 207 | /* |
208 | * Read at an offset into the buffer. Returns with the buffer in it's original | ||
209 | * state regardless of the result of the read. | ||
210 | */ | ||
211 | STATIC int | ||
212 | xlog_bread_offset( | ||
213 | xlog_t *log, | ||
214 | xfs_daddr_t blk_no, /* block to read from */ | ||
215 | int nbblks, /* blocks to read */ | ||
216 | xfs_buf_t *bp, | ||
217 | xfs_caddr_t offset) | ||
218 | { | ||
219 | xfs_caddr_t orig_offset = XFS_BUF_PTR(bp); | ||
220 | int orig_len = bp->b_buffer_length; | ||
221 | int error, error2; | ||
222 | |||
223 | error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks)); | ||
224 | if (error) | ||
225 | return error; | ||
226 | |||
227 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); | ||
228 | |||
229 | /* must reset buffer pointer even on error */ | ||
230 | error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len); | ||
231 | if (error) | ||
232 | return error; | ||
233 | return error2; | ||
234 | } | ||
235 | |||
236 | /* | ||
208 | * Write out the buffer at the given block for the given number of blocks. | 237 | * Write out the buffer at the given block for the given number of blocks. |
209 | * The buffer is kept locked across the write and is returned locked. | 238 | * The buffer is kept locked across the write and is returned locked. |
210 | * This can only be used for synchronous log writes. | 239 | * This can only be used for synchronous log writes. |
@@ -1229,20 +1258,12 @@ xlog_write_log_records( | |||
1229 | */ | 1258 | */ |
1230 | ealign = round_down(end_block, sectbb); | 1259 | ealign = round_down(end_block, sectbb); |
1231 | if (j == 0 && (start_block + endcount > ealign)) { | 1260 | if (j == 0 && (start_block + endcount > ealign)) { |
1232 | offset = XFS_BUF_PTR(bp); | 1261 | offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block); |
1233 | balign = BBTOB(ealign - start_block); | 1262 | error = xlog_bread_offset(log, ealign, sectbb, |
1234 | error = XFS_BUF_SET_PTR(bp, offset + balign, | 1263 | bp, offset); |
1235 | BBTOB(sectbb)); | ||
1236 | if (error) | 1264 | if (error) |
1237 | break; | 1265 | break; |
1238 | 1266 | ||
1239 | error = xlog_bread_noalign(log, ealign, sectbb, bp); | ||
1240 | if (error) | ||
1241 | break; | ||
1242 | |||
1243 | error = XFS_BUF_SET_PTR(bp, offset, bufblks); | ||
1244 | if (error) | ||
1245 | break; | ||
1246 | } | 1267 | } |
1247 | 1268 | ||
1248 | offset = xlog_align(log, start_block, endcount, bp); | 1269 | offset = xlog_align(log, start_block, endcount, bp); |
@@ -3448,19 +3469,9 @@ xlog_do_recovery_pass( | |||
3448 | * - order is important. | 3469 | * - order is important. |
3449 | */ | 3470 | */ |
3450 | wrapped_hblks = hblks - split_hblks; | 3471 | wrapped_hblks = hblks - split_hblks; |
3451 | error = XFS_BUF_SET_PTR(hbp, | 3472 | error = xlog_bread_offset(log, 0, |
3452 | offset + BBTOB(split_hblks), | 3473 | wrapped_hblks, hbp, |
3453 | BBTOB(hblks - split_hblks)); | 3474 | offset + BBTOB(split_hblks)); |
3454 | if (error) | ||
3455 | goto bread_err2; | ||
3456 | |||
3457 | error = xlog_bread_noalign(log, 0, | ||
3458 | wrapped_hblks, hbp); | ||
3459 | if (error) | ||
3460 | goto bread_err2; | ||
3461 | |||
3462 | error = XFS_BUF_SET_PTR(hbp, offset, | ||
3463 | BBTOB(hblks)); | ||
3464 | if (error) | 3475 | if (error) |
3465 | goto bread_err2; | 3476 | goto bread_err2; |
3466 | } | 3477 | } |
@@ -3511,19 +3522,9 @@ xlog_do_recovery_pass( | |||
3511 | * _first_, then the log start (LR header end) | 3522 | * _first_, then the log start (LR header end) |
3512 | * - order is important. | 3523 | * - order is important. |
3513 | */ | 3524 | */ |
3514 | error = XFS_BUF_SET_PTR(dbp, | 3525 | error = xlog_bread_offset(log, 0, |
3515 | offset + BBTOB(split_bblks), | 3526 | bblks - split_bblks, hbp, |
3516 | BBTOB(bblks - split_bblks)); | 3527 | offset + BBTOB(split_bblks)); |
3517 | if (error) | ||
3518 | goto bread_err2; | ||
3519 | |||
3520 | error = xlog_bread_noalign(log, wrapped_hblks, | ||
3521 | bblks - split_bblks, | ||
3522 | dbp); | ||
3523 | if (error) | ||
3524 | goto bread_err2; | ||
3525 | |||
3526 | error = XFS_BUF_SET_PTR(dbp, offset, h_size); | ||
3527 | if (error) | 3528 | if (error) |
3528 | goto bread_err2; | 3529 | goto bread_err2; |
3529 | } | 3530 | } |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index bb3f9a7b24ed..b49b82363d20 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -1900,7 +1900,7 @@ xfs_mod_incore_sb_batch( | |||
1900 | uint nmsb, | 1900 | uint nmsb, |
1901 | int rsvd) | 1901 | int rsvd) |
1902 | { | 1902 | { |
1903 | xfs_mod_sb_t *msbp = &msb[0]; | 1903 | xfs_mod_sb_t *msbp; |
1904 | int error = 0; | 1904 | int error = 0; |
1905 | 1905 | ||
1906 | /* | 1906 | /* |
@@ -1910,7 +1910,7 @@ xfs_mod_incore_sb_batch( | |||
1910 | * changes will be atomic. | 1910 | * changes will be atomic. |
1911 | */ | 1911 | */ |
1912 | spin_lock(&mp->m_sb_lock); | 1912 | spin_lock(&mp->m_sb_lock); |
1913 | for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { | 1913 | for (msbp = msb; msbp < (msb + nmsb); msbp++) { |
1914 | ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || | 1914 | ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || |
1915 | msbp->msb_field > XFS_SBS_FDBLOCKS); | 1915 | msbp->msb_field > XFS_SBS_FDBLOCKS); |
1916 | 1916 | ||
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 76922793f64f..d1f24858ccc4 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -608,10 +608,8 @@ STATIC void | |||
608 | xfs_trans_free( | 608 | xfs_trans_free( |
609 | struct xfs_trans *tp) | 609 | struct xfs_trans *tp) |
610 | { | 610 | { |
611 | struct xfs_busy_extent *busyp, *n; | 611 | xfs_alloc_busy_sort(&tp->t_busy); |
612 | 612 | xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy); | |
613 | list_for_each_entry_safe(busyp, n, &tp->t_busy, list) | ||
614 | xfs_alloc_busy_clear(tp->t_mountp, busyp); | ||
615 | 613 | ||
616 | atomic_dec(&tp->t_mountp->m_active_trans); | 614 | atomic_dec(&tp->t_mountp->m_active_trans); |
617 | xfs_trans_free_dqinfo(tp); | 615 | xfs_trans_free_dqinfo(tp); |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 26d1867d8156..65584b55607d 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ | |||
73 | typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ | 73 | typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ |
74 | typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ | 74 | typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ |
75 | 75 | ||
76 | typedef __uint32_t xlog_tid_t; /* transaction ID type */ | ||
77 | |||
78 | /* | 76 | /* |
79 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. | 77 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. |
80 | * Disk based types: | 78 | * Disk based types: |