diff options
81 files changed, 1315 insertions, 1089 deletions
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 16bb9a328678..da031b93e182 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c | |||
@@ -3,10 +3,10 @@ | |||
3 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | 3 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
4 | * All Rights Reserved. | 4 | * All Rights Reserved. |
5 | */ | 5 | */ |
6 | #include <linux/sched/mm.h> | 6 | #include "xfs.h" |
7 | #include <linux/backing-dev.h> | 7 | #include <linux/backing-dev.h> |
8 | #include "kmem.h" | ||
9 | #include "xfs_message.h" | 8 | #include "xfs_message.h" |
9 | #include "xfs_trace.h" | ||
10 | 10 | ||
11 | void * | 11 | void * |
12 | kmem_alloc(size_t size, xfs_km_flags_t flags) | 12 | kmem_alloc(size_t size, xfs_km_flags_t flags) |
@@ -15,9 +15,11 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) | |||
15 | gfp_t lflags = kmem_flags_convert(flags); | 15 | gfp_t lflags = kmem_flags_convert(flags); |
16 | void *ptr; | 16 | void *ptr; |
17 | 17 | ||
18 | trace_kmem_alloc(size, flags, _RET_IP_); | ||
19 | |||
18 | do { | 20 | do { |
19 | ptr = kmalloc(size, lflags); | 21 | ptr = kmalloc(size, lflags); |
20 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) | 22 | if (ptr || (flags & KM_MAYFAIL)) |
21 | return ptr; | 23 | return ptr; |
22 | if (!(++retries % 100)) | 24 | if (!(++retries % 100)) |
23 | xfs_err(NULL, | 25 | xfs_err(NULL, |
@@ -28,28 +30,24 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) | |||
28 | } while (1); | 30 | } while (1); |
29 | } | 31 | } |
30 | 32 | ||
31 | void * | 33 | |
32 | kmem_alloc_large(size_t size, xfs_km_flags_t flags) | 34 | /* |
35 | * __vmalloc() will allocate data pages and auxillary structures (e.g. | ||
36 | * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context here. Hence | ||
37 | * we need to tell memory reclaim that we are in such a context via | ||
38 | * PF_MEMALLOC_NOFS to prevent memory reclaim re-entering the filesystem here | ||
39 | * and potentially deadlocking. | ||
40 | */ | ||
41 | static void * | ||
42 | __kmem_vmalloc(size_t size, xfs_km_flags_t flags) | ||
33 | { | 43 | { |
34 | unsigned nofs_flag = 0; | 44 | unsigned nofs_flag = 0; |
35 | void *ptr; | 45 | void *ptr; |
36 | gfp_t lflags; | 46 | gfp_t lflags = kmem_flags_convert(flags); |
37 | |||
38 | ptr = kmem_alloc(size, flags | KM_MAYFAIL); | ||
39 | if (ptr) | ||
40 | return ptr; | ||
41 | 47 | ||
42 | /* | ||
43 | * __vmalloc() will allocate data pages and auxillary structures (e.g. | ||
44 | * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context | ||
45 | * here. Hence we need to tell memory reclaim that we are in such a | ||
46 | * context via PF_MEMALLOC_NOFS to prevent memory reclaim re-entering | ||
47 | * the filesystem here and potentially deadlocking. | ||
48 | */ | ||
49 | if (flags & KM_NOFS) | 48 | if (flags & KM_NOFS) |
50 | nofs_flag = memalloc_nofs_save(); | 49 | nofs_flag = memalloc_nofs_save(); |
51 | 50 | ||
52 | lflags = kmem_flags_convert(flags); | ||
53 | ptr = __vmalloc(size, lflags, PAGE_KERNEL); | 51 | ptr = __vmalloc(size, lflags, PAGE_KERNEL); |
54 | 52 | ||
55 | if (flags & KM_NOFS) | 53 | if (flags & KM_NOFS) |
@@ -58,6 +56,44 @@ kmem_alloc_large(size_t size, xfs_km_flags_t flags) | |||
58 | return ptr; | 56 | return ptr; |
59 | } | 57 | } |
60 | 58 | ||
59 | /* | ||
60 | * Same as kmem_alloc_large, except we guarantee the buffer returned is aligned | ||
61 | * to the @align_mask. We only guarantee alignment up to page size, we'll clamp | ||
62 | * alignment at page size if it is larger. vmalloc always returns a PAGE_SIZE | ||
63 | * aligned region. | ||
64 | */ | ||
65 | void * | ||
66 | kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags) | ||
67 | { | ||
68 | void *ptr; | ||
69 | |||
70 | trace_kmem_alloc_io(size, flags, _RET_IP_); | ||
71 | |||
72 | if (WARN_ON_ONCE(align_mask >= PAGE_SIZE)) | ||
73 | align_mask = PAGE_SIZE - 1; | ||
74 | |||
75 | ptr = kmem_alloc(size, flags | KM_MAYFAIL); | ||
76 | if (ptr) { | ||
77 | if (!((uintptr_t)ptr & align_mask)) | ||
78 | return ptr; | ||
79 | kfree(ptr); | ||
80 | } | ||
81 | return __kmem_vmalloc(size, flags); | ||
82 | } | ||
83 | |||
84 | void * | ||
85 | kmem_alloc_large(size_t size, xfs_km_flags_t flags) | ||
86 | { | ||
87 | void *ptr; | ||
88 | |||
89 | trace_kmem_alloc_large(size, flags, _RET_IP_); | ||
90 | |||
91 | ptr = kmem_alloc(size, flags | KM_MAYFAIL); | ||
92 | if (ptr) | ||
93 | return ptr; | ||
94 | return __kmem_vmalloc(size, flags); | ||
95 | } | ||
96 | |||
61 | void * | 97 | void * |
62 | kmem_realloc(const void *old, size_t newsize, xfs_km_flags_t flags) | 98 | kmem_realloc(const void *old, size_t newsize, xfs_km_flags_t flags) |
63 | { | 99 | { |
@@ -65,9 +101,11 @@ kmem_realloc(const void *old, size_t newsize, xfs_km_flags_t flags) | |||
65 | gfp_t lflags = kmem_flags_convert(flags); | 101 | gfp_t lflags = kmem_flags_convert(flags); |
66 | void *ptr; | 102 | void *ptr; |
67 | 103 | ||
104 | trace_kmem_realloc(newsize, flags, _RET_IP_); | ||
105 | |||
68 | do { | 106 | do { |
69 | ptr = krealloc(old, newsize, lflags); | 107 | ptr = krealloc(old, newsize, lflags); |
70 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) | 108 | if (ptr || (flags & KM_MAYFAIL)) |
71 | return ptr; | 109 | return ptr; |
72 | if (!(++retries % 100)) | 110 | if (!(++retries % 100)) |
73 | xfs_err(NULL, | 111 | xfs_err(NULL, |
@@ -85,9 +123,10 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags) | |||
85 | gfp_t lflags = kmem_flags_convert(flags); | 123 | gfp_t lflags = kmem_flags_convert(flags); |
86 | void *ptr; | 124 | void *ptr; |
87 | 125 | ||
126 | trace_kmem_zone_alloc(kmem_cache_size(zone), flags, _RET_IP_); | ||
88 | do { | 127 | do { |
89 | ptr = kmem_cache_alloc(zone, lflags); | 128 | ptr = kmem_cache_alloc(zone, lflags); |
90 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) | 129 | if (ptr || (flags & KM_MAYFAIL)) |
91 | return ptr; | 130 | return ptr; |
92 | if (!(++retries % 100)) | 131 | if (!(++retries % 100)) |
93 | xfs_err(NULL, | 132 | xfs_err(NULL, |
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 267655acd426..8170d95cf930 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h | |||
@@ -16,8 +16,6 @@ | |||
16 | */ | 16 | */ |
17 | 17 | ||
18 | typedef unsigned __bitwise xfs_km_flags_t; | 18 | typedef unsigned __bitwise xfs_km_flags_t; |
19 | #define KM_SLEEP ((__force xfs_km_flags_t)0x0001u) | ||
20 | #define KM_NOSLEEP ((__force xfs_km_flags_t)0x0002u) | ||
21 | #define KM_NOFS ((__force xfs_km_flags_t)0x0004u) | 19 | #define KM_NOFS ((__force xfs_km_flags_t)0x0004u) |
22 | #define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u) | 20 | #define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u) |
23 | #define KM_ZERO ((__force xfs_km_flags_t)0x0010u) | 21 | #define KM_ZERO ((__force xfs_km_flags_t)0x0010u) |
@@ -32,15 +30,11 @@ kmem_flags_convert(xfs_km_flags_t flags) | |||
32 | { | 30 | { |
33 | gfp_t lflags; | 31 | gfp_t lflags; |
34 | 32 | ||
35 | BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_ZERO)); | 33 | BUG_ON(flags & ~(KM_NOFS|KM_MAYFAIL|KM_ZERO)); |
36 | 34 | ||
37 | if (flags & KM_NOSLEEP) { | 35 | lflags = GFP_KERNEL | __GFP_NOWARN; |
38 | lflags = GFP_ATOMIC | __GFP_NOWARN; | 36 | if (flags & KM_NOFS) |
39 | } else { | 37 | lflags &= ~__GFP_FS; |
40 | lflags = GFP_KERNEL | __GFP_NOWARN; | ||
41 | if (flags & KM_NOFS) | ||
42 | lflags &= ~__GFP_FS; | ||
43 | } | ||
44 | 38 | ||
45 | /* | 39 | /* |
46 | * Default page/slab allocator behavior is to retry for ever | 40 | * Default page/slab allocator behavior is to retry for ever |
@@ -59,6 +53,7 @@ kmem_flags_convert(xfs_km_flags_t flags) | |||
59 | } | 53 | } |
60 | 54 | ||
61 | extern void *kmem_alloc(size_t, xfs_km_flags_t); | 55 | extern void *kmem_alloc(size_t, xfs_km_flags_t); |
56 | extern void *kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags); | ||
62 | extern void *kmem_alloc_large(size_t size, xfs_km_flags_t); | 57 | extern void *kmem_alloc_large(size_t size, xfs_km_flags_t); |
63 | extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t); | 58 | extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t); |
64 | static inline void kmem_free(const void *ptr) | 59 | static inline void kmem_free(const void *ptr) |
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 372ad55631fc..533b04aaf6f6 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c | |||
@@ -2205,7 +2205,7 @@ xfs_defer_agfl_block( | |||
2205 | ASSERT(xfs_bmap_free_item_zone != NULL); | 2205 | ASSERT(xfs_bmap_free_item_zone != NULL); |
2206 | ASSERT(oinfo != NULL); | 2206 | ASSERT(oinfo != NULL); |
2207 | 2207 | ||
2208 | new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); | 2208 | new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0); |
2209 | new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); | 2209 | new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); |
2210 | new->xefi_blockcount = 1; | 2210 | new->xefi_blockcount = 1; |
2211 | new->xefi_oinfo = *oinfo; | 2211 | new->xefi_oinfo = *oinfo; |
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index d6ed5d2c07c2..58fa85cec325 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h | |||
@@ -81,10 +81,9 @@ typedef struct xfs_alloc_arg { | |||
81 | /* | 81 | /* |
82 | * Defines for datatype | 82 | * Defines for datatype |
83 | */ | 83 | */ |
84 | #define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/ | 84 | #define XFS_ALLOC_INITIAL_USER_DATA (1 << 0)/* special case start of file */ |
85 | #define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ | 85 | #define XFS_ALLOC_USERDATA_ZERO (1 << 1)/* zero extent on allocation */ |
86 | #define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ | 86 | #define XFS_ALLOC_NOBUSY (1 << 2)/* Busy extents not allowed */ |
87 | #define XFS_ALLOC_NOBUSY (1 << 3)/* Busy extents not allowed */ | ||
88 | 87 | ||
89 | static inline bool | 88 | static inline bool |
90 | xfs_alloc_is_userdata(int datatype) | 89 | xfs_alloc_is_userdata(int datatype) |
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index d48fcf11cc35..510ca6974604 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c | |||
@@ -97,7 +97,10 @@ xfs_inode_hasattr( | |||
97 | * Overall external interface routines. | 97 | * Overall external interface routines. |
98 | *========================================================================*/ | 98 | *========================================================================*/ |
99 | 99 | ||
100 | /* Retrieve an extended attribute and its value. Must have ilock. */ | 100 | /* |
101 | * Retrieve an extended attribute and its value. Must have ilock. | ||
102 | * Returns 0 on successful retrieval, otherwise an error. | ||
103 | */ | ||
101 | int | 104 | int |
102 | xfs_attr_get_ilocked( | 105 | xfs_attr_get_ilocked( |
103 | struct xfs_inode *ip, | 106 | struct xfs_inode *ip, |
@@ -115,12 +118,28 @@ xfs_attr_get_ilocked( | |||
115 | return xfs_attr_node_get(args); | 118 | return xfs_attr_node_get(args); |
116 | } | 119 | } |
117 | 120 | ||
118 | /* Retrieve an extended attribute by name, and its value. */ | 121 | /* |
122 | * Retrieve an extended attribute by name, and its value if requested. | ||
123 | * | ||
124 | * If ATTR_KERNOVAL is set in @flags, then the caller does not want the value, | ||
125 | * just an indication whether the attribute exists and the size of the value if | ||
126 | * it exists. The size is returned in @valuelenp, | ||
127 | * | ||
128 | * If the attribute is found, but exceeds the size limit set by the caller in | ||
129 | * @valuelenp, return -ERANGE with the size of the attribute that was found in | ||
130 | * @valuelenp. | ||
131 | * | ||
132 | * If ATTR_ALLOC is set in @flags, allocate the buffer for the value after | ||
133 | * existence of the attribute has been determined. On success, return that | ||
134 | * buffer to the caller and leave them to free it. On failure, free any | ||
135 | * allocated buffer and ensure the buffer pointer returned to the caller is | ||
136 | * null. | ||
137 | */ | ||
119 | int | 138 | int |
120 | xfs_attr_get( | 139 | xfs_attr_get( |
121 | struct xfs_inode *ip, | 140 | struct xfs_inode *ip, |
122 | const unsigned char *name, | 141 | const unsigned char *name, |
123 | unsigned char *value, | 142 | unsigned char **value, |
124 | int *valuelenp, | 143 | int *valuelenp, |
125 | int flags) | 144 | int flags) |
126 | { | 145 | { |
@@ -128,6 +147,8 @@ xfs_attr_get( | |||
128 | uint lock_mode; | 147 | uint lock_mode; |
129 | int error; | 148 | int error; |
130 | 149 | ||
150 | ASSERT((flags & (ATTR_ALLOC | ATTR_KERNOVAL)) || *value); | ||
151 | |||
131 | XFS_STATS_INC(ip->i_mount, xs_attr_get); | 152 | XFS_STATS_INC(ip->i_mount, xs_attr_get); |
132 | 153 | ||
133 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 154 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
@@ -137,17 +158,29 @@ xfs_attr_get( | |||
137 | if (error) | 158 | if (error) |
138 | return error; | 159 | return error; |
139 | 160 | ||
140 | args.value = value; | ||
141 | args.valuelen = *valuelenp; | ||
142 | /* Entirely possible to look up a name which doesn't exist */ | 161 | /* Entirely possible to look up a name which doesn't exist */ |
143 | args.op_flags = XFS_DA_OP_OKNOENT; | 162 | args.op_flags = XFS_DA_OP_OKNOENT; |
163 | if (flags & ATTR_ALLOC) | ||
164 | args.op_flags |= XFS_DA_OP_ALLOCVAL; | ||
165 | else | ||
166 | args.value = *value; | ||
167 | args.valuelen = *valuelenp; | ||
144 | 168 | ||
145 | lock_mode = xfs_ilock_attr_map_shared(ip); | 169 | lock_mode = xfs_ilock_attr_map_shared(ip); |
146 | error = xfs_attr_get_ilocked(ip, &args); | 170 | error = xfs_attr_get_ilocked(ip, &args); |
147 | xfs_iunlock(ip, lock_mode); | 171 | xfs_iunlock(ip, lock_mode); |
148 | |||
149 | *valuelenp = args.valuelen; | 172 | *valuelenp = args.valuelen; |
150 | return error == -EEXIST ? 0 : error; | 173 | |
174 | /* on error, we have to clean up allocated value buffers */ | ||
175 | if (error) { | ||
176 | if (flags & ATTR_ALLOC) { | ||
177 | kmem_free(args.value); | ||
178 | *value = NULL; | ||
179 | } | ||
180 | return error; | ||
181 | } | ||
182 | *value = args.value; | ||
183 | return 0; | ||
151 | } | 184 | } |
152 | 185 | ||
153 | /* | 186 | /* |
@@ -768,6 +801,8 @@ xfs_attr_leaf_removename( | |||
768 | * | 801 | * |
769 | * This leaf block cannot have a "remote" value, we only call this routine | 802 | * This leaf block cannot have a "remote" value, we only call this routine |
770 | * if bmap_one_block() says there is only one block (ie: no remote blks). | 803 | * if bmap_one_block() says there is only one block (ie: no remote blks). |
804 | * | ||
805 | * Returns 0 on successful retrieval, otherwise an error. | ||
771 | */ | 806 | */ |
772 | STATIC int | 807 | STATIC int |
773 | xfs_attr_leaf_get(xfs_da_args_t *args) | 808 | xfs_attr_leaf_get(xfs_da_args_t *args) |
@@ -789,9 +824,6 @@ xfs_attr_leaf_get(xfs_da_args_t *args) | |||
789 | } | 824 | } |
790 | error = xfs_attr3_leaf_getvalue(bp, args); | 825 | error = xfs_attr3_leaf_getvalue(bp, args); |
791 | xfs_trans_brelse(args->trans, bp); | 826 | xfs_trans_brelse(args->trans, bp); |
792 | if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) { | ||
793 | error = xfs_attr_rmtval_get(args); | ||
794 | } | ||
795 | return error; | 827 | return error; |
796 | } | 828 | } |
797 | 829 | ||
@@ -1268,11 +1300,13 @@ xfs_attr_refillstate(xfs_da_state_t *state) | |||
1268 | } | 1300 | } |
1269 | 1301 | ||
1270 | /* | 1302 | /* |
1271 | * Look up a filename in a node attribute list. | 1303 | * Retrieve the attribute data from a node attribute list. |
1272 | * | 1304 | * |
1273 | * This routine gets called for any attribute fork that has more than one | 1305 | * This routine gets called for any attribute fork that has more than one |
1274 | * block, ie: both true Btree attr lists and for single-leaf-blocks with | 1306 | * block, ie: both true Btree attr lists and for single-leaf-blocks with |
1275 | * "remote" values taking up more blocks. | 1307 | * "remote" values taking up more blocks. |
1308 | * | ||
1309 | * Returns 0 on successful retrieval, otherwise an error. | ||
1276 | */ | 1310 | */ |
1277 | STATIC int | 1311 | STATIC int |
1278 | xfs_attr_node_get(xfs_da_args_t *args) | 1312 | xfs_attr_node_get(xfs_da_args_t *args) |
@@ -1294,24 +1328,21 @@ xfs_attr_node_get(xfs_da_args_t *args) | |||
1294 | error = xfs_da3_node_lookup_int(state, &retval); | 1328 | error = xfs_da3_node_lookup_int(state, &retval); |
1295 | if (error) { | 1329 | if (error) { |
1296 | retval = error; | 1330 | retval = error; |
1297 | } else if (retval == -EEXIST) { | 1331 | goto out_release; |
1298 | blk = &state->path.blk[ state->path.active-1 ]; | ||
1299 | ASSERT(blk->bp != NULL); | ||
1300 | ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); | ||
1301 | |||
1302 | /* | ||
1303 | * Get the value, local or "remote" | ||
1304 | */ | ||
1305 | retval = xfs_attr3_leaf_getvalue(blk->bp, args); | ||
1306 | if (!retval && (args->rmtblkno > 0) | ||
1307 | && !(args->flags & ATTR_KERNOVAL)) { | ||
1308 | retval = xfs_attr_rmtval_get(args); | ||
1309 | } | ||
1310 | } | 1332 | } |
1333 | if (retval != -EEXIST) | ||
1334 | goto out_release; | ||
1335 | |||
1336 | /* | ||
1337 | * Get the value, local or "remote" | ||
1338 | */ | ||
1339 | blk = &state->path.blk[state->path.active - 1]; | ||
1340 | retval = xfs_attr3_leaf_getvalue(blk->bp, args); | ||
1311 | 1341 | ||
1312 | /* | 1342 | /* |
1313 | * If not in a transaction, we have to release all the buffers. | 1343 | * If not in a transaction, we have to release all the buffers. |
1314 | */ | 1344 | */ |
1345 | out_release: | ||
1315 | for (i = 0; i < state->path.active; i++) { | 1346 | for (i = 0; i < state->path.active; i++) { |
1316 | xfs_trans_brelse(args->trans, state->path.blk[i].bp); | 1347 | xfs_trans_brelse(args->trans, state->path.blk[i].bp); |
1317 | state->path.blk[i].bp = NULL; | 1348 | state->path.blk[i].bp = NULL; |
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index ff28ebf3b635..94badfa1743e 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h | |||
@@ -37,6 +37,7 @@ struct xfs_attr_list_context; | |||
37 | #define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ | 37 | #define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ |
38 | 38 | ||
39 | #define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */ | 39 | #define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */ |
40 | #define ATTR_ALLOC 0x8000 /* allocate xattr buffer on demand */ | ||
40 | 41 | ||
41 | #define XFS_ATTR_FLAGS \ | 42 | #define XFS_ATTR_FLAGS \ |
42 | { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \ | 43 | { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \ |
@@ -47,7 +48,8 @@ struct xfs_attr_list_context; | |||
47 | { ATTR_REPLACE, "REPLACE" }, \ | 48 | { ATTR_REPLACE, "REPLACE" }, \ |
48 | { ATTR_KERNOTIME, "KERNOTIME" }, \ | 49 | { ATTR_KERNOTIME, "KERNOTIME" }, \ |
49 | { ATTR_KERNOVAL, "KERNOVAL" }, \ | 50 | { ATTR_KERNOVAL, "KERNOVAL" }, \ |
50 | { ATTR_INCOMPLETE, "INCOMPLETE" } | 51 | { ATTR_INCOMPLETE, "INCOMPLETE" }, \ |
52 | { ATTR_ALLOC, "ALLOC" } | ||
51 | 53 | ||
52 | /* | 54 | /* |
53 | * The maximum size (into the kernel or returned from the kernel) of an | 55 | * The maximum size (into the kernel or returned from the kernel) of an |
@@ -143,7 +145,7 @@ int xfs_attr_list_int(struct xfs_attr_list_context *); | |||
143 | int xfs_inode_hasattr(struct xfs_inode *ip); | 145 | int xfs_inode_hasattr(struct xfs_inode *ip); |
144 | int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args); | 146 | int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args); |
145 | int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, | 147 | int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, |
146 | unsigned char *value, int *valuelenp, int flags); | 148 | unsigned char **value, int *valuelenp, int flags); |
147 | int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, | 149 | int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, |
148 | unsigned char *value, int valuelen, int flags); | 150 | unsigned char *value, int valuelen, int flags); |
149 | int xfs_attr_set_args(struct xfs_da_args *args); | 151 | int xfs_attr_set_args(struct xfs_da_args *args); |
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 70eb941d02e4..b9f019603d0b 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c | |||
@@ -393,6 +393,50 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags) | |||
393 | return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); | 393 | return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); |
394 | } | 394 | } |
395 | 395 | ||
396 | static int | ||
397 | xfs_attr_copy_value( | ||
398 | struct xfs_da_args *args, | ||
399 | unsigned char *value, | ||
400 | int valuelen) | ||
401 | { | ||
402 | /* | ||
403 | * No copy if all we have to do is get the length | ||
404 | */ | ||
405 | if (args->flags & ATTR_KERNOVAL) { | ||
406 | args->valuelen = valuelen; | ||
407 | return 0; | ||
408 | } | ||
409 | |||
410 | /* | ||
411 | * No copy if the length of the existing buffer is too small | ||
412 | */ | ||
413 | if (args->valuelen < valuelen) { | ||
414 | args->valuelen = valuelen; | ||
415 | return -ERANGE; | ||
416 | } | ||
417 | |||
418 | if (args->op_flags & XFS_DA_OP_ALLOCVAL) { | ||
419 | args->value = kmem_alloc_large(valuelen, 0); | ||
420 | if (!args->value) | ||
421 | return -ENOMEM; | ||
422 | } | ||
423 | args->valuelen = valuelen; | ||
424 | |||
425 | /* remote block xattr requires IO for copy-in */ | ||
426 | if (args->rmtblkno) | ||
427 | return xfs_attr_rmtval_get(args); | ||
428 | |||
429 | /* | ||
430 | * This is to prevent a GCC warning because the remote xattr case | ||
431 | * doesn't have a value to pass in. In that case, we never reach here, | ||
432 | * but GCC can't work that out and so throws a "passing NULL to | ||
433 | * memcpy" warning. | ||
434 | */ | ||
435 | if (!value) | ||
436 | return -EINVAL; | ||
437 | memcpy(args->value, value, valuelen); | ||
438 | return 0; | ||
439 | } | ||
396 | 440 | ||
397 | /*======================================================================== | 441 | /*======================================================================== |
398 | * External routines when attribute fork size < XFS_LITINO(mp). | 442 | * External routines when attribute fork size < XFS_LITINO(mp). |
@@ -720,15 +764,19 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) | |||
720 | } | 764 | } |
721 | 765 | ||
722 | /* | 766 | /* |
723 | * Look up a name in a shortform attribute list structure. | 767 | * Retreive the attribute value and length. |
768 | * | ||
769 | * If ATTR_KERNOVAL is specified, only the length needs to be returned. | ||
770 | * Unlike a lookup, we only return an error if the attribute does not | ||
771 | * exist or we can't retrieve the value. | ||
724 | */ | 772 | */ |
725 | /*ARGSUSED*/ | ||
726 | int | 773 | int |
727 | xfs_attr_shortform_getvalue(xfs_da_args_t *args) | 774 | xfs_attr_shortform_getvalue( |
775 | struct xfs_da_args *args) | ||
728 | { | 776 | { |
729 | xfs_attr_shortform_t *sf; | 777 | struct xfs_attr_shortform *sf; |
730 | xfs_attr_sf_entry_t *sfe; | 778 | struct xfs_attr_sf_entry *sfe; |
731 | int i; | 779 | int i; |
732 | 780 | ||
733 | ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE); | 781 | ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE); |
734 | sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data; | 782 | sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data; |
@@ -741,18 +789,8 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) | |||
741 | continue; | 789 | continue; |
742 | if (!xfs_attr_namesp_match(args->flags, sfe->flags)) | 790 | if (!xfs_attr_namesp_match(args->flags, sfe->flags)) |
743 | continue; | 791 | continue; |
744 | if (args->flags & ATTR_KERNOVAL) { | 792 | return xfs_attr_copy_value(args, &sfe->nameval[args->namelen], |
745 | args->valuelen = sfe->valuelen; | 793 | sfe->valuelen); |
746 | return -EEXIST; | ||
747 | } | ||
748 | if (args->valuelen < sfe->valuelen) { | ||
749 | args->valuelen = sfe->valuelen; | ||
750 | return -ERANGE; | ||
751 | } | ||
752 | args->valuelen = sfe->valuelen; | ||
753 | memcpy(args->value, &sfe->nameval[args->namelen], | ||
754 | args->valuelen); | ||
755 | return -EEXIST; | ||
756 | } | 794 | } |
757 | return -ENOATTR; | 795 | return -ENOATTR; |
758 | } | 796 | } |
@@ -782,7 +820,7 @@ xfs_attr_shortform_to_leaf( | |||
782 | ifp = dp->i_afp; | 820 | ifp = dp->i_afp; |
783 | sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; | 821 | sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; |
784 | size = be16_to_cpu(sf->hdr.totsize); | 822 | size = be16_to_cpu(sf->hdr.totsize); |
785 | tmpbuffer = kmem_alloc(size, KM_SLEEP); | 823 | tmpbuffer = kmem_alloc(size, 0); |
786 | ASSERT(tmpbuffer != NULL); | 824 | ASSERT(tmpbuffer != NULL); |
787 | memcpy(tmpbuffer, ifp->if_u1.if_data, size); | 825 | memcpy(tmpbuffer, ifp->if_u1.if_data, size); |
788 | sf = (xfs_attr_shortform_t *)tmpbuffer; | 826 | sf = (xfs_attr_shortform_t *)tmpbuffer; |
@@ -985,7 +1023,7 @@ xfs_attr3_leaf_to_shortform( | |||
985 | 1023 | ||
986 | trace_xfs_attr_leaf_to_sf(args); | 1024 | trace_xfs_attr_leaf_to_sf(args); |
987 | 1025 | ||
988 | tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); | 1026 | tmpbuffer = kmem_alloc(args->geo->blksize, 0); |
989 | if (!tmpbuffer) | 1027 | if (!tmpbuffer) |
990 | return -ENOMEM; | 1028 | return -ENOMEM; |
991 | 1029 | ||
@@ -1448,7 +1486,7 @@ xfs_attr3_leaf_compact( | |||
1448 | 1486 | ||
1449 | trace_xfs_attr_leaf_compact(args); | 1487 | trace_xfs_attr_leaf_compact(args); |
1450 | 1488 | ||
1451 | tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); | 1489 | tmpbuffer = kmem_alloc(args->geo->blksize, 0); |
1452 | memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); | 1490 | memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); |
1453 | memset(bp->b_addr, 0, args->geo->blksize); | 1491 | memset(bp->b_addr, 0, args->geo->blksize); |
1454 | leaf_src = (xfs_attr_leafblock_t *)tmpbuffer; | 1492 | leaf_src = (xfs_attr_leafblock_t *)tmpbuffer; |
@@ -2167,7 +2205,7 @@ xfs_attr3_leaf_unbalance( | |||
2167 | struct xfs_attr_leafblock *tmp_leaf; | 2205 | struct xfs_attr_leafblock *tmp_leaf; |
2168 | struct xfs_attr3_icleaf_hdr tmphdr; | 2206 | struct xfs_attr3_icleaf_hdr tmphdr; |
2169 | 2207 | ||
2170 | tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP); | 2208 | tmp_leaf = kmem_zalloc(state->args->geo->blksize, 0); |
2171 | 2209 | ||
2172 | /* | 2210 | /* |
2173 | * Copy the header into the temp leaf so that all the stuff | 2211 | * Copy the header into the temp leaf so that all the stuff |
@@ -2350,6 +2388,10 @@ xfs_attr3_leaf_lookup_int( | |||
2350 | /* | 2388 | /* |
2351 | * Get the value associated with an attribute name from a leaf attribute | 2389 | * Get the value associated with an attribute name from a leaf attribute |
2352 | * list structure. | 2390 | * list structure. |
2391 | * | ||
2392 | * If ATTR_KERNOVAL is specified, only the length needs to be returned. | ||
2393 | * Unlike a lookup, we only return an error if the attribute does not | ||
2394 | * exist or we can't retrieve the value. | ||
2353 | */ | 2395 | */ |
2354 | int | 2396 | int |
2355 | xfs_attr3_leaf_getvalue( | 2397 | xfs_attr3_leaf_getvalue( |
@@ -2361,7 +2403,6 @@ xfs_attr3_leaf_getvalue( | |||
2361 | struct xfs_attr_leaf_entry *entry; | 2403 | struct xfs_attr_leaf_entry *entry; |
2362 | struct xfs_attr_leaf_name_local *name_loc; | 2404 | struct xfs_attr_leaf_name_local *name_loc; |
2363 | struct xfs_attr_leaf_name_remote *name_rmt; | 2405 | struct xfs_attr_leaf_name_remote *name_rmt; |
2364 | int valuelen; | ||
2365 | 2406 | ||
2366 | leaf = bp->b_addr; | 2407 | leaf = bp->b_addr; |
2367 | xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); | 2408 | xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); |
@@ -2373,36 +2414,19 @@ xfs_attr3_leaf_getvalue( | |||
2373 | name_loc = xfs_attr3_leaf_name_local(leaf, args->index); | 2414 | name_loc = xfs_attr3_leaf_name_local(leaf, args->index); |
2374 | ASSERT(name_loc->namelen == args->namelen); | 2415 | ASSERT(name_loc->namelen == args->namelen); |
2375 | ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0); | 2416 | ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0); |
2376 | valuelen = be16_to_cpu(name_loc->valuelen); | 2417 | return xfs_attr_copy_value(args, |
2377 | if (args->flags & ATTR_KERNOVAL) { | 2418 | &name_loc->nameval[args->namelen], |
2378 | args->valuelen = valuelen; | 2419 | be16_to_cpu(name_loc->valuelen)); |
2379 | return 0; | 2420 | } |
2380 | } | 2421 | |
2381 | if (args->valuelen < valuelen) { | 2422 | name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); |
2382 | args->valuelen = valuelen; | 2423 | ASSERT(name_rmt->namelen == args->namelen); |
2383 | return -ERANGE; | 2424 | ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); |
2384 | } | 2425 | args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); |
2385 | args->valuelen = valuelen; | 2426 | args->rmtblkno = be32_to_cpu(name_rmt->valueblk); |
2386 | memcpy(args->value, &name_loc->nameval[args->namelen], valuelen); | 2427 | args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, |
2387 | } else { | 2428 | args->rmtvaluelen); |
2388 | name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); | 2429 | return xfs_attr_copy_value(args, NULL, args->rmtvaluelen); |
2389 | ASSERT(name_rmt->namelen == args->namelen); | ||
2390 | ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); | ||
2391 | args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); | ||
2392 | args->rmtblkno = be32_to_cpu(name_rmt->valueblk); | ||
2393 | args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, | ||
2394 | args->rmtvaluelen); | ||
2395 | if (args->flags & ATTR_KERNOVAL) { | ||
2396 | args->valuelen = args->rmtvaluelen; | ||
2397 | return 0; | ||
2398 | } | ||
2399 | if (args->valuelen < args->rmtvaluelen) { | ||
2400 | args->valuelen = args->rmtvaluelen; | ||
2401 | return -ERANGE; | ||
2402 | } | ||
2403 | args->valuelen = args->rmtvaluelen; | ||
2404 | } | ||
2405 | return 0; | ||
2406 | } | 2430 | } |
2407 | 2431 | ||
2408 | /*======================================================================== | 2432 | /*======================================================================== |
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 4eb30d357045..3e39b7d40f25 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c | |||
@@ -358,6 +358,8 @@ xfs_attr_rmtval_copyin( | |||
358 | /* | 358 | /* |
359 | * Read the value associated with an attribute from the out-of-line buffer | 359 | * Read the value associated with an attribute from the out-of-line buffer |
360 | * that we stored it in. | 360 | * that we stored it in. |
361 | * | ||
362 | * Returns 0 on successful retrieval, otherwise an error. | ||
361 | */ | 363 | */ |
362 | int | 364 | int |
363 | xfs_attr_rmtval_get( | 365 | xfs_attr_rmtval_get( |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 07aad70f3931..054b4ce30033 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -553,7 +553,7 @@ __xfs_bmap_add_free( | |||
553 | #endif | 553 | #endif |
554 | ASSERT(xfs_bmap_free_item_zone != NULL); | 554 | ASSERT(xfs_bmap_free_item_zone != NULL); |
555 | 555 | ||
556 | new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); | 556 | new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0); |
557 | new->xefi_startblock = bno; | 557 | new->xefi_startblock = bno; |
558 | new->xefi_blockcount = (xfs_extlen_t)len; | 558 | new->xefi_blockcount = (xfs_extlen_t)len; |
559 | if (oinfo) | 559 | if (oinfo) |
@@ -1099,7 +1099,7 @@ xfs_bmap_add_attrfork( | |||
1099 | if (error) | 1099 | if (error) |
1100 | goto trans_cancel; | 1100 | goto trans_cancel; |
1101 | ASSERT(ip->i_afp == NULL); | 1101 | ASSERT(ip->i_afp == NULL); |
1102 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); | 1102 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, 0); |
1103 | ip->i_afp->if_flags = XFS_IFEXTENTS; | 1103 | ip->i_afp->if_flags = XFS_IFEXTENTS; |
1104 | logflags = 0; | 1104 | logflags = 0; |
1105 | switch (ip->i_d.di_format) { | 1105 | switch (ip->i_d.di_format) { |
@@ -1985,11 +1985,8 @@ xfs_bmap_add_extent_delay_real( | |||
1985 | } | 1985 | } |
1986 | 1986 | ||
1987 | /* add reverse mapping unless caller opted out */ | 1987 | /* add reverse mapping unless caller opted out */ |
1988 | if (!(bma->flags & XFS_BMAPI_NORMAP)) { | 1988 | if (!(bma->flags & XFS_BMAPI_NORMAP)) |
1989 | error = xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new); | 1989 | xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new); |
1990 | if (error) | ||
1991 | goto done; | ||
1992 | } | ||
1993 | 1990 | ||
1994 | /* convert to a btree if necessary */ | 1991 | /* convert to a btree if necessary */ |
1995 | if (xfs_bmap_needs_btree(bma->ip, whichfork)) { | 1992 | if (xfs_bmap_needs_btree(bma->ip, whichfork)) { |
@@ -2471,9 +2468,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2471 | } | 2468 | } |
2472 | 2469 | ||
2473 | /* update reverse mappings */ | 2470 | /* update reverse mappings */ |
2474 | error = xfs_rmap_convert_extent(mp, tp, ip, whichfork, new); | 2471 | xfs_rmap_convert_extent(mp, tp, ip, whichfork, new); |
2475 | if (error) | ||
2476 | goto done; | ||
2477 | 2472 | ||
2478 | /* convert to a btree if necessary */ | 2473 | /* convert to a btree if necessary */ |
2479 | if (xfs_bmap_needs_btree(ip, whichfork)) { | 2474 | if (xfs_bmap_needs_btree(ip, whichfork)) { |
@@ -2832,11 +2827,8 @@ xfs_bmap_add_extent_hole_real( | |||
2832 | } | 2827 | } |
2833 | 2828 | ||
2834 | /* add reverse mapping unless caller opted out */ | 2829 | /* add reverse mapping unless caller opted out */ |
2835 | if (!(flags & XFS_BMAPI_NORMAP)) { | 2830 | if (!(flags & XFS_BMAPI_NORMAP)) |
2836 | error = xfs_rmap_map_extent(tp, ip, whichfork, new); | 2831 | xfs_rmap_map_extent(tp, ip, whichfork, new); |
2837 | if (error) | ||
2838 | goto done; | ||
2839 | } | ||
2840 | 2832 | ||
2841 | /* convert to a btree if necessary */ | 2833 | /* convert to a btree if necessary */ |
2842 | if (xfs_bmap_needs_btree(ip, whichfork)) { | 2834 | if (xfs_bmap_needs_btree(ip, whichfork)) { |
@@ -4050,12 +4042,8 @@ xfs_bmapi_allocate( | |||
4050 | */ | 4042 | */ |
4051 | if (!(bma->flags & XFS_BMAPI_METADATA)) { | 4043 | if (!(bma->flags & XFS_BMAPI_METADATA)) { |
4052 | bma->datatype = XFS_ALLOC_NOBUSY; | 4044 | bma->datatype = XFS_ALLOC_NOBUSY; |
4053 | if (whichfork == XFS_DATA_FORK) { | 4045 | if (whichfork == XFS_DATA_FORK && bma->offset == 0) |
4054 | if (bma->offset == 0) | 4046 | bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; |
4055 | bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; | ||
4056 | else | ||
4057 | bma->datatype |= XFS_ALLOC_USERDATA; | ||
4058 | } | ||
4059 | if (bma->flags & XFS_BMAPI_ZERO) | 4047 | if (bma->flags & XFS_BMAPI_ZERO) |
4060 | bma->datatype |= XFS_ALLOC_USERDATA_ZERO; | 4048 | bma->datatype |= XFS_ALLOC_USERDATA_ZERO; |
4061 | } | 4049 | } |
@@ -4401,12 +4389,9 @@ xfs_bmapi_write( | |||
4401 | * If this is a CoW allocation, record the data in | 4389 | * If this is a CoW allocation, record the data in |
4402 | * the refcount btree for orphan recovery. | 4390 | * the refcount btree for orphan recovery. |
4403 | */ | 4391 | */ |
4404 | if (whichfork == XFS_COW_FORK) { | 4392 | if (whichfork == XFS_COW_FORK) |
4405 | error = xfs_refcount_alloc_cow_extent(tp, | 4393 | xfs_refcount_alloc_cow_extent(tp, bma.blkno, |
4406 | bma.blkno, bma.length); | 4394 | bma.length); |
4407 | if (error) | ||
4408 | goto error0; | ||
4409 | } | ||
4410 | } | 4395 | } |
4411 | 4396 | ||
4412 | /* Deal with the allocated space we found. */ | 4397 | /* Deal with the allocated space we found. */ |
@@ -4530,7 +4515,7 @@ xfs_bmapi_convert_delalloc( | |||
4530 | if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK)) | 4515 | if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK)) |
4531 | goto out_finish; | 4516 | goto out_finish; |
4532 | error = -EFSCORRUPTED; | 4517 | error = -EFSCORRUPTED; |
4533 | if (WARN_ON_ONCE(!bma.got.br_startblock && !XFS_IS_REALTIME_INODE(ip))) | 4518 | if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock))) |
4534 | goto out_finish; | 4519 | goto out_finish; |
4535 | 4520 | ||
4536 | XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); | 4521 | XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); |
@@ -4540,12 +4525,8 @@ xfs_bmapi_convert_delalloc( | |||
4540 | *imap = bma.got; | 4525 | *imap = bma.got; |
4541 | *seq = READ_ONCE(ifp->if_seq); | 4526 | *seq = READ_ONCE(ifp->if_seq); |
4542 | 4527 | ||
4543 | if (whichfork == XFS_COW_FORK) { | 4528 | if (whichfork == XFS_COW_FORK) |
4544 | error = xfs_refcount_alloc_cow_extent(tp, bma.blkno, | 4529 | xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length); |
4545 | bma.length); | ||
4546 | if (error) | ||
4547 | goto out_finish; | ||
4548 | } | ||
4549 | 4530 | ||
4550 | error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags, | 4531 | error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags, |
4551 | whichfork); | 4532 | whichfork); |
@@ -5149,18 +5130,14 @@ xfs_bmap_del_extent_real( | |||
5149 | } | 5130 | } |
5150 | 5131 | ||
5151 | /* remove reverse mapping */ | 5132 | /* remove reverse mapping */ |
5152 | error = xfs_rmap_unmap_extent(tp, ip, whichfork, del); | 5133 | xfs_rmap_unmap_extent(tp, ip, whichfork, del); |
5153 | if (error) | ||
5154 | goto done; | ||
5155 | 5134 | ||
5156 | /* | 5135 | /* |
5157 | * If we need to, add to list of extents to delete. | 5136 | * If we need to, add to list of extents to delete. |
5158 | */ | 5137 | */ |
5159 | if (do_fx && !(bflags & XFS_BMAPI_REMAP)) { | 5138 | if (do_fx && !(bflags & XFS_BMAPI_REMAP)) { |
5160 | if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { | 5139 | if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { |
5161 | error = xfs_refcount_decrease_extent(tp, del); | 5140 | xfs_refcount_decrease_extent(tp, del); |
5162 | if (error) | ||
5163 | goto done; | ||
5164 | } else { | 5141 | } else { |
5165 | __xfs_bmap_add_free(tp, del->br_startblock, | 5142 | __xfs_bmap_add_free(tp, del->br_startblock, |
5166 | del->br_blockcount, NULL, | 5143 | del->br_blockcount, NULL, |
@@ -5651,12 +5628,11 @@ done: | |||
5651 | &new); | 5628 | &new); |
5652 | 5629 | ||
5653 | /* update reverse mapping. rmap functions merge the rmaps for us */ | 5630 | /* update reverse mapping. rmap functions merge the rmaps for us */ |
5654 | error = xfs_rmap_unmap_extent(tp, ip, whichfork, got); | 5631 | xfs_rmap_unmap_extent(tp, ip, whichfork, got); |
5655 | if (error) | ||
5656 | return error; | ||
5657 | memcpy(&new, got, sizeof(new)); | 5632 | memcpy(&new, got, sizeof(new)); |
5658 | new.br_startoff = left->br_startoff + left->br_blockcount; | 5633 | new.br_startoff = left->br_startoff + left->br_blockcount; |
5659 | return xfs_rmap_map_extent(tp, ip, whichfork, &new); | 5634 | xfs_rmap_map_extent(tp, ip, whichfork, &new); |
5635 | return 0; | ||
5660 | } | 5636 | } |
5661 | 5637 | ||
5662 | static int | 5638 | static int |
@@ -5695,10 +5671,9 @@ xfs_bmap_shift_update_extent( | |||
5695 | got); | 5671 | got); |
5696 | 5672 | ||
5697 | /* update reverse mapping */ | 5673 | /* update reverse mapping */ |
5698 | error = xfs_rmap_unmap_extent(tp, ip, whichfork, &prev); | 5674 | xfs_rmap_unmap_extent(tp, ip, whichfork, &prev); |
5699 | if (error) | 5675 | xfs_rmap_map_extent(tp, ip, whichfork, got); |
5700 | return error; | 5676 | return 0; |
5701 | return xfs_rmap_map_extent(tp, ip, whichfork, got); | ||
5702 | } | 5677 | } |
5703 | 5678 | ||
5704 | int | 5679 | int |
@@ -6094,7 +6069,7 @@ __xfs_bmap_add( | |||
6094 | bmap->br_blockcount, | 6069 | bmap->br_blockcount, |
6095 | bmap->br_state); | 6070 | bmap->br_state); |
6096 | 6071 | ||
6097 | bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS); | 6072 | bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS); |
6098 | INIT_LIST_HEAD(&bi->bi_list); | 6073 | INIT_LIST_HEAD(&bi->bi_list); |
6099 | bi->bi_type = type; | 6074 | bi->bi_type = type; |
6100 | bi->bi_owner = ip; | 6075 | bi->bi_owner = ip; |
@@ -6106,29 +6081,29 @@ __xfs_bmap_add( | |||
6106 | } | 6081 | } |
6107 | 6082 | ||
6108 | /* Map an extent into a file. */ | 6083 | /* Map an extent into a file. */ |
6109 | int | 6084 | void |
6110 | xfs_bmap_map_extent( | 6085 | xfs_bmap_map_extent( |
6111 | struct xfs_trans *tp, | 6086 | struct xfs_trans *tp, |
6112 | struct xfs_inode *ip, | 6087 | struct xfs_inode *ip, |
6113 | struct xfs_bmbt_irec *PREV) | 6088 | struct xfs_bmbt_irec *PREV) |
6114 | { | 6089 | { |
6115 | if (!xfs_bmap_is_update_needed(PREV)) | 6090 | if (!xfs_bmap_is_update_needed(PREV)) |
6116 | return 0; | 6091 | return; |
6117 | 6092 | ||
6118 | return __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV); | 6093 | __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV); |
6119 | } | 6094 | } |
6120 | 6095 | ||
6121 | /* Unmap an extent out of a file. */ | 6096 | /* Unmap an extent out of a file. */ |
6122 | int | 6097 | void |
6123 | xfs_bmap_unmap_extent( | 6098 | xfs_bmap_unmap_extent( |
6124 | struct xfs_trans *tp, | 6099 | struct xfs_trans *tp, |
6125 | struct xfs_inode *ip, | 6100 | struct xfs_inode *ip, |
6126 | struct xfs_bmbt_irec *PREV) | 6101 | struct xfs_bmbt_irec *PREV) |
6127 | { | 6102 | { |
6128 | if (!xfs_bmap_is_update_needed(PREV)) | 6103 | if (!xfs_bmap_is_update_needed(PREV)) |
6129 | return 0; | 6104 | return; |
6130 | 6105 | ||
6131 | return __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV); | 6106 | __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV); |
6132 | } | 6107 | } |
6133 | 6108 | ||
6134 | /* | 6109 | /* |
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 8f597f9abdbe..5bb446d80542 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h | |||
@@ -171,6 +171,13 @@ static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec) | |||
171 | !isnullstartblock(irec->br_startblock); | 171 | !isnullstartblock(irec->br_startblock); |
172 | } | 172 | } |
173 | 173 | ||
174 | /* | ||
175 | * Check the mapping for obviously garbage allocations that could trash the | ||
176 | * filesystem immediately. | ||
177 | */ | ||
178 | #define xfs_valid_startblock(ip, startblock) \ | ||
179 | ((startblock) != 0 || XFS_IS_REALTIME_INODE(ip)) | ||
180 | |||
174 | void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, | 181 | void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, |
175 | xfs_filblks_t len); | 182 | xfs_filblks_t len); |
176 | int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); | 183 | int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); |
@@ -254,9 +261,9 @@ int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_inode *ip, | |||
254 | enum xfs_bmap_intent_type type, int whichfork, | 261 | enum xfs_bmap_intent_type type, int whichfork, |
255 | xfs_fileoff_t startoff, xfs_fsblock_t startblock, | 262 | xfs_fileoff_t startoff, xfs_fsblock_t startblock, |
256 | xfs_filblks_t *blockcount, xfs_exntst_t state); | 263 | xfs_filblks_t *blockcount, xfs_exntst_t state); |
257 | int xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, | 264 | void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, |
258 | struct xfs_bmbt_irec *imap); | 265 | struct xfs_bmbt_irec *imap); |
259 | int xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, | 266 | void xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, |
260 | struct xfs_bmbt_irec *imap); | 267 | struct xfs_bmbt_irec *imap); |
261 | 268 | ||
262 | static inline int xfs_bmap_fork_to_state(int whichfork) | 269 | static inline int xfs_bmap_fork_to_state(int whichfork) |
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index fbb18ba5d905..ffe608d2a2d9 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c | |||
@@ -400,8 +400,20 @@ xfs_bmbt_diff_two_keys( | |||
400 | union xfs_btree_key *k1, | 400 | union xfs_btree_key *k1, |
401 | union xfs_btree_key *k2) | 401 | union xfs_btree_key *k2) |
402 | { | 402 | { |
403 | return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) - | 403 | uint64_t a = be64_to_cpu(k1->bmbt.br_startoff); |
404 | be64_to_cpu(k2->bmbt.br_startoff); | 404 | uint64_t b = be64_to_cpu(k2->bmbt.br_startoff); |
405 | |||
406 | /* | ||
407 | * Note: This routine previously casted a and b to int64 and subtracted | ||
408 | * them to generate a result. This lead to problems if b was the | ||
409 | * "maximum" key value (all ones) being signed incorrectly, hence this | ||
410 | * somewhat less efficient version. | ||
411 | */ | ||
412 | if (a > b) | ||
413 | return 1; | ||
414 | if (b > a) | ||
415 | return -1; | ||
416 | return 0; | ||
405 | } | 417 | } |
406 | 418 | ||
407 | static xfs_failaddr_t | 419 | static xfs_failaddr_t |
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index f1048efa4268..71de937f9e64 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c | |||
@@ -4466,8 +4466,6 @@ xfs_btree_lblock_verify( | |||
4466 | * btree block | 4466 | * btree block |
4467 | * | 4467 | * |
4468 | * @bp: buffer containing the btree block | 4468 | * @bp: buffer containing the btree block |
4469 | * @max_recs: pointer to the m_*_mxr max records field in the xfs mount | ||
4470 | * @pag_max_level: pointer to the per-ag max level field | ||
4471 | */ | 4469 | */ |
4472 | xfs_failaddr_t | 4470 | xfs_failaddr_t |
4473 | xfs_btree_sblock_v5hdr_verify( | 4471 | xfs_btree_sblock_v5hdr_verify( |
@@ -4600,7 +4598,7 @@ xfs_btree_simple_query_range( | |||
4600 | 4598 | ||
4601 | /* Callback */ | 4599 | /* Callback */ |
4602 | error = fn(cur, recp, priv); | 4600 | error = fn(cur, recp, priv); |
4603 | if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT) | 4601 | if (error) |
4604 | break; | 4602 | break; |
4605 | 4603 | ||
4606 | advloop: | 4604 | advloop: |
@@ -4702,8 +4700,7 @@ pop_up: | |||
4702 | */ | 4700 | */ |
4703 | if (ldiff >= 0 && hdiff >= 0) { | 4701 | if (ldiff >= 0 && hdiff >= 0) { |
4704 | error = fn(cur, recp, priv); | 4702 | error = fn(cur, recp, priv); |
4705 | if (error < 0 || | 4703 | if (error) |
4706 | error == XFS_BTREE_QUERY_RANGE_ABORT) | ||
4707 | break; | 4704 | break; |
4708 | } else if (hdiff < 0) { | 4705 | } else if (hdiff < 0) { |
4709 | /* Record is larger than high key; pop. */ | 4706 | /* Record is larger than high key; pop. */ |
@@ -4774,8 +4771,7 @@ out: | |||
4774 | * Query a btree for all records overlapping a given interval of keys. The | 4771 | * Query a btree for all records overlapping a given interval of keys. The |
4775 | * supplied function will be called with each record found; return one of the | 4772 | * supplied function will be called with each record found; return one of the |
4776 | * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error | 4773 | * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error |
4777 | * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a | 4774 | * code. This function returns -ECANCELED, zero, or a negative error code. |
4778 | * negative error code. | ||
4779 | */ | 4775 | */ |
4780 | int | 4776 | int |
4781 | xfs_btree_query_range( | 4777 | xfs_btree_query_range( |
@@ -4891,7 +4887,7 @@ xfs_btree_has_record_helper( | |||
4891 | union xfs_btree_rec *rec, | 4887 | union xfs_btree_rec *rec, |
4892 | void *priv) | 4888 | void *priv) |
4893 | { | 4889 | { |
4894 | return XFS_BTREE_QUERY_RANGE_ABORT; | 4890 | return -ECANCELED; |
4895 | } | 4891 | } |
4896 | 4892 | ||
4897 | /* Is there a record covering a given range of keys? */ | 4893 | /* Is there a record covering a given range of keys? */ |
@@ -4906,7 +4902,7 @@ xfs_btree_has_record( | |||
4906 | 4902 | ||
4907 | error = xfs_btree_query_range(cur, low, high, | 4903 | error = xfs_btree_query_range(cur, low, high, |
4908 | &xfs_btree_has_record_helper, NULL); | 4904 | &xfs_btree_has_record_helper, NULL); |
4909 | if (error == XFS_BTREE_QUERY_RANGE_ABORT) { | 4905 | if (error == -ECANCELED) { |
4910 | *exists = true; | 4906 | *exists = true; |
4911 | return 0; | 4907 | return 0; |
4912 | } | 4908 | } |
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index fa3cd8ab9aba..ced1e65d1483 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h | |||
@@ -464,9 +464,13 @@ xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, | |||
464 | uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len); | 464 | uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len); |
465 | unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); | 465 | unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); |
466 | 466 | ||
467 | /* return codes */ | 467 | /* |
468 | #define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */ | 468 | * Return codes for the query range iterator function are 0 to continue |
469 | #define XFS_BTREE_QUERY_RANGE_ABORT (XFS_ITER_ABORT) /* stop iterating */ | 469 | * iterating, and non-zero to stop iterating. Any non-zero value will be |
470 | * passed up to the _query_range caller. The special value -ECANCELED can be | ||
471 | * used to stop iteration, because _query_range never generates that error | ||
472 | * code on its own. | ||
473 | */ | ||
470 | typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, | 474 | typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, |
471 | union xfs_btree_rec *rec, void *priv); | 475 | union xfs_btree_rec *rec, void *priv); |
472 | 476 | ||
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 0bf56e94bfe9..4fd1223c1bd5 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c | |||
@@ -2098,7 +2098,7 @@ xfs_da_grow_inode_int( | |||
2098 | * If we didn't get it and the block might work if fragmented, | 2098 | * If we didn't get it and the block might work if fragmented, |
2099 | * try without the CONTIG flag. Loop until we get it all. | 2099 | * try without the CONTIG flag. Loop until we get it all. |
2100 | */ | 2100 | */ |
2101 | mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); | 2101 | mapp = kmem_alloc(sizeof(*mapp) * count, 0); |
2102 | for (b = *bno, mapi = 0; b < *bno + count; ) { | 2102 | for (b = *bno, mapi = 0; b < *bno + count; ) { |
2103 | nmap = min(XFS_BMAP_MAX_NMAP, count); | 2103 | nmap = min(XFS_BMAP_MAX_NMAP, count); |
2104 | c = (int)(*bno + count - b); | 2104 | c = (int)(*bno + count - b); |
@@ -2480,7 +2480,7 @@ xfs_buf_map_from_irec( | |||
2480 | 2480 | ||
2481 | if (nirecs > 1) { | 2481 | if (nirecs > 1) { |
2482 | map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), | 2482 | map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), |
2483 | KM_SLEEP | KM_NOFS); | 2483 | KM_NOFS); |
2484 | if (!map) | 2484 | if (!map) |
2485 | return -ENOMEM; | 2485 | return -ENOMEM; |
2486 | *mapp = map; | 2486 | *mapp = map; |
@@ -2539,7 +2539,7 @@ xfs_dabuf_map( | |||
2539 | */ | 2539 | */ |
2540 | if (nfsb != 1) | 2540 | if (nfsb != 1) |
2541 | irecs = kmem_zalloc(sizeof(irec) * nfsb, | 2541 | irecs = kmem_zalloc(sizeof(irec) * nfsb, |
2542 | KM_SLEEP | KM_NOFS); | 2542 | KM_NOFS); |
2543 | 2543 | ||
2544 | nirecs = nfsb; | 2544 | nirecs = nfsb; |
2545 | error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, | 2545 | error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, |
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 84dd865b6c3d..ae0bbd20d9ca 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h | |||
@@ -81,13 +81,15 @@ typedef struct xfs_da_args { | |||
81 | #define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */ | 81 | #define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */ |
82 | #define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ | 82 | #define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ |
83 | #define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ | 83 | #define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ |
84 | #define XFS_DA_OP_ALLOCVAL 0x0020 /* lookup to alloc buffer if found */ | ||
84 | 85 | ||
85 | #define XFS_DA_OP_FLAGS \ | 86 | #define XFS_DA_OP_FLAGS \ |
86 | { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \ | 87 | { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \ |
87 | { XFS_DA_OP_RENAME, "RENAME" }, \ | 88 | { XFS_DA_OP_RENAME, "RENAME" }, \ |
88 | { XFS_DA_OP_ADDNAME, "ADDNAME" }, \ | 89 | { XFS_DA_OP_ADDNAME, "ADDNAME" }, \ |
89 | { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ | 90 | { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ |
90 | { XFS_DA_OP_CILOOKUP, "CILOOKUP" } | 91 | { XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \ |
92 | { XFS_DA_OP_ALLOCVAL, "ALLOCVAL" } | ||
91 | 93 | ||
92 | /* | 94 | /* |
93 | * Storage for holding state during Btree searches and split/join ops. | 95 | * Storage for holding state during Btree searches and split/join ops. |
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index eb2be2a6a25a..22557527cfdb 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c | |||
@@ -517,7 +517,7 @@ xfs_defer_add( | |||
517 | } | 517 | } |
518 | if (!dfp) { | 518 | if (!dfp) { |
519 | dfp = kmem_alloc(sizeof(struct xfs_defer_pending), | 519 | dfp = kmem_alloc(sizeof(struct xfs_defer_pending), |
520 | KM_SLEEP | KM_NOFS); | 520 | KM_NOFS); |
521 | dfp->dfp_type = type; | 521 | dfp->dfp_type = type; |
522 | dfp->dfp_intent = NULL; | 522 | dfp->dfp_intent = NULL; |
523 | dfp->dfp_done = NULL; | 523 | dfp->dfp_done = NULL; |
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 67840723edbb..867c5dee0751 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c | |||
@@ -110,9 +110,9 @@ xfs_da_mount( | |||
110 | 110 | ||
111 | nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; | 111 | nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; |
112 | mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), | 112 | mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), |
113 | KM_SLEEP | KM_MAYFAIL); | 113 | KM_MAYFAIL); |
114 | mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), | 114 | mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), |
115 | KM_SLEEP | KM_MAYFAIL); | 115 | KM_MAYFAIL); |
116 | if (!mp->m_dir_geo || !mp->m_attr_geo) { | 116 | if (!mp->m_dir_geo || !mp->m_attr_geo) { |
117 | kmem_free(mp->m_dir_geo); | 117 | kmem_free(mp->m_dir_geo); |
118 | kmem_free(mp->m_attr_geo); | 118 | kmem_free(mp->m_attr_geo); |
@@ -217,7 +217,7 @@ xfs_dir_init( | |||
217 | if (error) | 217 | if (error) |
218 | return error; | 218 | return error; |
219 | 219 | ||
220 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | 220 | args = kmem_zalloc(sizeof(*args), KM_NOFS); |
221 | if (!args) | 221 | if (!args) |
222 | return -ENOMEM; | 222 | return -ENOMEM; |
223 | 223 | ||
@@ -254,7 +254,7 @@ xfs_dir_createname( | |||
254 | XFS_STATS_INC(dp->i_mount, xs_dir_create); | 254 | XFS_STATS_INC(dp->i_mount, xs_dir_create); |
255 | } | 255 | } |
256 | 256 | ||
257 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | 257 | args = kmem_zalloc(sizeof(*args), KM_NOFS); |
258 | if (!args) | 258 | if (!args) |
259 | return -ENOMEM; | 259 | return -ENOMEM; |
260 | 260 | ||
@@ -353,7 +353,7 @@ xfs_dir_lookup( | |||
353 | * lockdep Doing this avoids having to add a bunch of lockdep class | 353 | * lockdep Doing this avoids having to add a bunch of lockdep class |
354 | * annotations into the reclaim path for the ilock. | 354 | * annotations into the reclaim path for the ilock. |
355 | */ | 355 | */ |
356 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | 356 | args = kmem_zalloc(sizeof(*args), KM_NOFS); |
357 | args->geo = dp->i_mount->m_dir_geo; | 357 | args->geo = dp->i_mount->m_dir_geo; |
358 | args->name = name->name; | 358 | args->name = name->name; |
359 | args->namelen = name->len; | 359 | args->namelen = name->len; |
@@ -422,7 +422,7 @@ xfs_dir_removename( | |||
422 | ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); | 422 | ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); |
423 | XFS_STATS_INC(dp->i_mount, xs_dir_remove); | 423 | XFS_STATS_INC(dp->i_mount, xs_dir_remove); |
424 | 424 | ||
425 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | 425 | args = kmem_zalloc(sizeof(*args), KM_NOFS); |
426 | if (!args) | 426 | if (!args) |
427 | return -ENOMEM; | 427 | return -ENOMEM; |
428 | 428 | ||
@@ -483,7 +483,7 @@ xfs_dir_replace( | |||
483 | if (rval) | 483 | if (rval) |
484 | return rval; | 484 | return rval; |
485 | 485 | ||
486 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | 486 | args = kmem_zalloc(sizeof(*args), KM_NOFS); |
487 | if (!args) | 487 | if (!args) |
488 | return -ENOMEM; | 488 | return -ENOMEM; |
489 | 489 | ||
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index a6fb0cc2085e..9595ced393dc 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c | |||
@@ -1092,7 +1092,7 @@ xfs_dir2_sf_to_block( | |||
1092 | * Copy the directory into a temporary buffer. | 1092 | * Copy the directory into a temporary buffer. |
1093 | * Then pitch the incore inode data so we can make extents. | 1093 | * Then pitch the incore inode data so we can make extents. |
1094 | */ | 1094 | */ |
1095 | sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP); | 1095 | sfp = kmem_alloc(ifp->if_bytes, 0); |
1096 | memcpy(sfp, oldsfp, ifp->if_bytes); | 1096 | memcpy(sfp, oldsfp, ifp->if_bytes); |
1097 | 1097 | ||
1098 | xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); | 1098 | xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); |
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 1fc44efc344d..705c4f562758 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c | |||
@@ -32,8 +32,6 @@ static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state, | |||
32 | static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp, | 32 | static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp, |
33 | int index, xfs_da_state_blk_t *dblk, | 33 | int index, xfs_da_state_blk_t *dblk, |
34 | int *rval); | 34 | int *rval); |
35 | static int xfs_dir2_node_addname_int(xfs_da_args_t *args, | ||
36 | xfs_da_state_blk_t *fblk); | ||
37 | 35 | ||
38 | /* | 36 | /* |
39 | * Check internal consistency of a leafn block. | 37 | * Check internal consistency of a leafn block. |
@@ -1611,113 +1609,152 @@ xfs_dir2_leafn_unbalance( | |||
1611 | } | 1609 | } |
1612 | 1610 | ||
1613 | /* | 1611 | /* |
1614 | * Top-level node form directory addname routine. | 1612 | * Add a new data block to the directory at the free space index that the caller |
1613 | * has specified. | ||
1615 | */ | 1614 | */ |
1616 | int /* error */ | 1615 | static int |
1617 | xfs_dir2_node_addname( | 1616 | xfs_dir2_node_add_datablk( |
1618 | xfs_da_args_t *args) /* operation arguments */ | 1617 | struct xfs_da_args *args, |
1618 | struct xfs_da_state_blk *fblk, | ||
1619 | xfs_dir2_db_t *dbno, | ||
1620 | struct xfs_buf **dbpp, | ||
1621 | struct xfs_buf **fbpp, | ||
1622 | int *findex) | ||
1619 | { | 1623 | { |
1620 | xfs_da_state_blk_t *blk; /* leaf block for insert */ | 1624 | struct xfs_inode *dp = args->dp; |
1621 | int error; /* error return value */ | 1625 | struct xfs_trans *tp = args->trans; |
1622 | int rval; /* sub-return value */ | 1626 | struct xfs_mount *mp = dp->i_mount; |
1623 | xfs_da_state_t *state; /* btree cursor */ | 1627 | struct xfs_dir3_icfree_hdr freehdr; |
1628 | struct xfs_dir2_data_free *bf; | ||
1629 | struct xfs_dir2_data_hdr *hdr; | ||
1630 | struct xfs_dir2_free *free = NULL; | ||
1631 | xfs_dir2_db_t fbno; | ||
1632 | struct xfs_buf *fbp; | ||
1633 | struct xfs_buf *dbp; | ||
1634 | __be16 *bests = NULL; | ||
1635 | int error; | ||
1624 | 1636 | ||
1625 | trace_xfs_dir2_node_addname(args); | 1637 | /* Not allowed to allocate, return failure. */ |
1638 | if (args->total == 0) | ||
1639 | return -ENOSPC; | ||
1640 | |||
1641 | /* Allocate and initialize the new data block. */ | ||
1642 | error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, dbno); | ||
1643 | if (error) | ||
1644 | return error; | ||
1645 | error = xfs_dir3_data_init(args, *dbno, &dbp); | ||
1646 | if (error) | ||
1647 | return error; | ||
1626 | 1648 | ||
1627 | /* | 1649 | /* |
1628 | * Allocate and initialize the state (btree cursor). | 1650 | * Get the freespace block corresponding to the data block |
1629 | */ | 1651 | * that was just allocated. |
1630 | state = xfs_da_state_alloc(); | ||
1631 | state->args = args; | ||
1632 | state->mp = args->dp->i_mount; | ||
1633 | /* | ||
1634 | * Look up the name. We're not supposed to find it, but | ||
1635 | * this gives us the insertion point. | ||
1636 | */ | 1652 | */ |
1637 | error = xfs_da3_node_lookup_int(state, &rval); | 1653 | fbno = dp->d_ops->db_to_fdb(args->geo, *dbno); |
1654 | error = xfs_dir2_free_try_read(tp, dp, | ||
1655 | xfs_dir2_db_to_da(args->geo, fbno), &fbp); | ||
1638 | if (error) | 1656 | if (error) |
1639 | rval = error; | 1657 | return error; |
1640 | if (rval != -ENOENT) { | 1658 | |
1641 | goto done; | ||
1642 | } | ||
1643 | /* | 1659 | /* |
1644 | * Add the data entry to a data block. | 1660 | * If there wasn't a freespace block, the read will |
1645 | * Extravalid is set to a freeblock found by lookup. | 1661 | * return a NULL fbp. Allocate and initialize a new one. |
1646 | */ | 1662 | */ |
1647 | rval = xfs_dir2_node_addname_int(args, | 1663 | if (!fbp) { |
1648 | state->extravalid ? &state->extrablk : NULL); | 1664 | error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fbno); |
1649 | if (rval) { | 1665 | if (error) |
1650 | goto done; | 1666 | return error; |
1667 | |||
1668 | if (dp->d_ops->db_to_fdb(args->geo, *dbno) != fbno) { | ||
1669 | xfs_alert(mp, | ||
1670 | "%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld", | ||
1671 | __func__, (unsigned long long)dp->i_ino, | ||
1672 | (long long)dp->d_ops->db_to_fdb(args->geo, *dbno), | ||
1673 | (long long)*dbno, (long long)fbno); | ||
1674 | if (fblk) { | ||
1675 | xfs_alert(mp, | ||
1676 | " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", | ||
1677 | fblk, (unsigned long long)fblk->blkno, | ||
1678 | fblk->index, fblk->magic); | ||
1679 | } else { | ||
1680 | xfs_alert(mp, " ... fblk is NULL"); | ||
1681 | } | ||
1682 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); | ||
1683 | return -EFSCORRUPTED; | ||
1684 | } | ||
1685 | |||
1686 | /* Get a buffer for the new block. */ | ||
1687 | error = xfs_dir3_free_get_buf(args, fbno, &fbp); | ||
1688 | if (error) | ||
1689 | return error; | ||
1690 | free = fbp->b_addr; | ||
1691 | bests = dp->d_ops->free_bests_p(free); | ||
1692 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1693 | |||
1694 | /* Remember the first slot as our empty slot. */ | ||
1695 | freehdr.firstdb = (fbno - xfs_dir2_byte_to_db(args->geo, | ||
1696 | XFS_DIR2_FREE_OFFSET)) * | ||
1697 | dp->d_ops->free_max_bests(args->geo); | ||
1698 | } else { | ||
1699 | free = fbp->b_addr; | ||
1700 | bests = dp->d_ops->free_bests_p(free); | ||
1701 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1651 | } | 1702 | } |
1652 | blk = &state->path.blk[state->path.active - 1]; | 1703 | |
1653 | ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); | 1704 | /* Set the freespace block index from the data block number. */ |
1705 | *findex = dp->d_ops->db_to_fdindex(args->geo, *dbno); | ||
1706 | |||
1707 | /* Extend the freespace table if the new data block is off the end. */ | ||
1708 | if (*findex >= freehdr.nvalid) { | ||
1709 | ASSERT(*findex < dp->d_ops->free_max_bests(args->geo)); | ||
1710 | freehdr.nvalid = *findex + 1; | ||
1711 | bests[*findex] = cpu_to_be16(NULLDATAOFF); | ||
1712 | } | ||
1713 | |||
1654 | /* | 1714 | /* |
1655 | * Add the new leaf entry. | 1715 | * If this entry was for an empty data block (this should always be |
1716 | * true) then update the header. | ||
1656 | */ | 1717 | */ |
1657 | rval = xfs_dir2_leafn_add(blk->bp, args, blk->index); | 1718 | if (bests[*findex] == cpu_to_be16(NULLDATAOFF)) { |
1658 | if (rval == 0) { | 1719 | freehdr.nused++; |
1659 | /* | 1720 | dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); |
1660 | * It worked, fix the hash values up the btree. | 1721 | xfs_dir2_free_log_header(args, fbp); |
1661 | */ | ||
1662 | if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) | ||
1663 | xfs_da3_fixhashpath(state, &state->path); | ||
1664 | } else { | ||
1665 | /* | ||
1666 | * It didn't work, we need to split the leaf block. | ||
1667 | */ | ||
1668 | if (args->total == 0) { | ||
1669 | ASSERT(rval == -ENOSPC); | ||
1670 | goto done; | ||
1671 | } | ||
1672 | /* | ||
1673 | * Split the leaf block and insert the new entry. | ||
1674 | */ | ||
1675 | rval = xfs_da3_split(state); | ||
1676 | } | 1722 | } |
1677 | done: | 1723 | |
1678 | xfs_da_state_free(state); | 1724 | /* Update the freespace value for the new block in the table. */ |
1679 | return rval; | 1725 | hdr = dbp->b_addr; |
1726 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
1727 | bests[*findex] = bf[0].length; | ||
1728 | |||
1729 | *dbpp = dbp; | ||
1730 | *fbpp = fbp; | ||
1731 | return 0; | ||
1680 | } | 1732 | } |
1681 | 1733 | ||
1682 | /* | 1734 | static int |
1683 | * Add the data entry for a node-format directory name addition. | 1735 | xfs_dir2_node_find_freeblk( |
1684 | * The leaf entry is added in xfs_dir2_leafn_add. | 1736 | struct xfs_da_args *args, |
1685 | * We may enter with a freespace block that the lookup found. | 1737 | struct xfs_da_state_blk *fblk, |
1686 | */ | 1738 | xfs_dir2_db_t *dbnop, |
1687 | static int /* error */ | 1739 | struct xfs_buf **fbpp, |
1688 | xfs_dir2_node_addname_int( | 1740 | int *findexp, |
1689 | xfs_da_args_t *args, /* operation arguments */ | 1741 | int length) |
1690 | xfs_da_state_blk_t *fblk) /* optional freespace block */ | ||
1691 | { | 1742 | { |
1692 | xfs_dir2_data_hdr_t *hdr; /* data block header */ | ||
1693 | xfs_dir2_db_t dbno; /* data block number */ | ||
1694 | struct xfs_buf *dbp; /* data block buffer */ | ||
1695 | xfs_dir2_data_entry_t *dep; /* data entry pointer */ | ||
1696 | xfs_inode_t *dp; /* incore directory inode */ | ||
1697 | xfs_dir2_data_unused_t *dup; /* data unused entry pointer */ | ||
1698 | int error; /* error return value */ | ||
1699 | xfs_dir2_db_t fbno; /* freespace block number */ | ||
1700 | struct xfs_buf *fbp; /* freespace buffer */ | ||
1701 | int findex; /* freespace entry index */ | ||
1702 | xfs_dir2_free_t *free=NULL; /* freespace block structure */ | ||
1703 | xfs_dir2_db_t ifbno; /* initial freespace block no */ | ||
1704 | xfs_dir2_db_t lastfbno=0; /* highest freespace block no */ | ||
1705 | int length; /* length of the new entry */ | ||
1706 | int logfree; /* need to log free entry */ | ||
1707 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1708 | int needlog; /* need to log data header */ | ||
1709 | int needscan; /* need to rescan data frees */ | ||
1710 | __be16 *tagp; /* data entry tag pointer */ | ||
1711 | xfs_trans_t *tp; /* transaction pointer */ | ||
1712 | __be16 *bests; | ||
1713 | struct xfs_dir3_icfree_hdr freehdr; | 1743 | struct xfs_dir3_icfree_hdr freehdr; |
1714 | struct xfs_dir2_data_free *bf; | 1744 | struct xfs_dir2_free *free = NULL; |
1715 | xfs_dir2_data_aoff_t aoff; | 1745 | struct xfs_inode *dp = args->dp; |
1746 | struct xfs_trans *tp = args->trans; | ||
1747 | struct xfs_buf *fbp = NULL; | ||
1748 | xfs_dir2_db_t firstfbno; | ||
1749 | xfs_dir2_db_t lastfbno; | ||
1750 | xfs_dir2_db_t ifbno = -1; | ||
1751 | xfs_dir2_db_t dbno = -1; | ||
1752 | xfs_dir2_db_t fbno; | ||
1753 | xfs_fileoff_t fo; | ||
1754 | __be16 *bests = NULL; | ||
1755 | int findex = 0; | ||
1756 | int error; | ||
1716 | 1757 | ||
1717 | dp = args->dp; | ||
1718 | mp = dp->i_mount; | ||
1719 | tp = args->trans; | ||
1720 | length = dp->d_ops->data_entsize(args->namelen); | ||
1721 | /* | 1758 | /* |
1722 | * If we came in with a freespace block that means that lookup | 1759 | * If we came in with a freespace block that means that lookup |
1723 | * found an entry with our hash value. This is the freespace | 1760 | * found an entry with our hash value. This is the freespace |
@@ -1725,288 +1762,157 @@ xfs_dir2_node_addname_int( | |||
1725 | */ | 1762 | */ |
1726 | if (fblk) { | 1763 | if (fblk) { |
1727 | fbp = fblk->bp; | 1764 | fbp = fblk->bp; |
1728 | /* | ||
1729 | * Remember initial freespace block number. | ||
1730 | */ | ||
1731 | ifbno = fblk->blkno; | ||
1732 | free = fbp->b_addr; | 1765 | free = fbp->b_addr; |
1733 | findex = fblk->index; | 1766 | findex = fblk->index; |
1734 | bests = dp->d_ops->free_bests_p(free); | ||
1735 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1736 | |||
1737 | /* | ||
1738 | * This means the free entry showed that the data block had | ||
1739 | * space for our entry, so we remembered it. | ||
1740 | * Use that data block. | ||
1741 | */ | ||
1742 | if (findex >= 0) { | 1767 | if (findex >= 0) { |
1768 | /* caller already found the freespace for us. */ | ||
1769 | bests = dp->d_ops->free_bests_p(free); | ||
1770 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1771 | |||
1743 | ASSERT(findex < freehdr.nvalid); | 1772 | ASSERT(findex < freehdr.nvalid); |
1744 | ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF); | 1773 | ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF); |
1745 | ASSERT(be16_to_cpu(bests[findex]) >= length); | 1774 | ASSERT(be16_to_cpu(bests[findex]) >= length); |
1746 | dbno = freehdr.firstdb + findex; | 1775 | dbno = freehdr.firstdb + findex; |
1747 | } else { | 1776 | goto found_block; |
1748 | /* | ||
1749 | * The data block looked at didn't have enough room. | ||
1750 | * We'll start at the beginning of the freespace entries. | ||
1751 | */ | ||
1752 | dbno = -1; | ||
1753 | findex = 0; | ||
1754 | } | 1777 | } |
1755 | } else { | 1778 | |
1756 | /* | 1779 | /* |
1757 | * Didn't come in with a freespace block, so no data block. | 1780 | * The data block looked at didn't have enough room. |
1781 | * We'll start at the beginning of the freespace entries. | ||
1758 | */ | 1782 | */ |
1759 | ifbno = dbno = -1; | 1783 | ifbno = fblk->blkno; |
1784 | xfs_trans_brelse(tp, fbp); | ||
1760 | fbp = NULL; | 1785 | fbp = NULL; |
1761 | findex = 0; | 1786 | fblk->bp = NULL; |
1762 | } | 1787 | } |
1763 | 1788 | ||
1764 | /* | 1789 | /* |
1765 | * If we don't have a data block yet, we're going to scan the | 1790 | * If we don't have a data block yet, we're going to scan the freespace |
1766 | * freespace blocks looking for one. Figure out what the | 1791 | * data for a data block with enough free space in it. |
1767 | * highest freespace block number is. | ||
1768 | */ | ||
1769 | if (dbno == -1) { | ||
1770 | xfs_fileoff_t fo; /* freespace block number */ | ||
1771 | |||
1772 | if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) | ||
1773 | return error; | ||
1774 | lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); | ||
1775 | fbno = ifbno; | ||
1776 | } | ||
1777 | /* | ||
1778 | * While we haven't identified a data block, search the freeblock | ||
1779 | * data for a good data block. If we find a null freeblock entry, | ||
1780 | * indicating a hole in the data blocks, remember that. | ||
1781 | */ | 1792 | */ |
1782 | while (dbno == -1) { | 1793 | error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK); |
1783 | /* | 1794 | if (error) |
1784 | * If we don't have a freeblock in hand, get the next one. | 1795 | return error; |
1785 | */ | 1796 | lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); |
1786 | if (fbp == NULL) { | 1797 | firstfbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET); |
1787 | /* | ||
1788 | * Happens the first time through unless lookup gave | ||
1789 | * us a freespace block to start with. | ||
1790 | */ | ||
1791 | if (++fbno == 0) | ||
1792 | fbno = xfs_dir2_byte_to_db(args->geo, | ||
1793 | XFS_DIR2_FREE_OFFSET); | ||
1794 | /* | ||
1795 | * If it's ifbno we already looked at it. | ||
1796 | */ | ||
1797 | if (fbno == ifbno) | ||
1798 | fbno++; | ||
1799 | /* | ||
1800 | * If it's off the end we're done. | ||
1801 | */ | ||
1802 | if (fbno >= lastfbno) | ||
1803 | break; | ||
1804 | /* | ||
1805 | * Read the block. There can be holes in the | ||
1806 | * freespace blocks, so this might not succeed. | ||
1807 | * This should be really rare, so there's no reason | ||
1808 | * to avoid it. | ||
1809 | */ | ||
1810 | error = xfs_dir2_free_try_read(tp, dp, | ||
1811 | xfs_dir2_db_to_da(args->geo, fbno), | ||
1812 | &fbp); | ||
1813 | if (error) | ||
1814 | return error; | ||
1815 | if (!fbp) | ||
1816 | continue; | ||
1817 | free = fbp->b_addr; | ||
1818 | findex = 0; | ||
1819 | } | ||
1820 | /* | ||
1821 | * Look at the current free entry. Is it good enough? | ||
1822 | * | ||
1823 | * The bests initialisation should be where the bufer is read in | ||
1824 | * the above branch. But gcc is too stupid to realise that bests | ||
1825 | * and the freehdr are actually initialised if they are placed | ||
1826 | * there, so we have to do it here to avoid warnings. Blech. | ||
1827 | */ | ||
1828 | bests = dp->d_ops->free_bests_p(free); | ||
1829 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1830 | if (be16_to_cpu(bests[findex]) != NULLDATAOFF && | ||
1831 | be16_to_cpu(bests[findex]) >= length) | ||
1832 | dbno = freehdr.firstdb + findex; | ||
1833 | else { | ||
1834 | /* | ||
1835 | * Are we done with the freeblock? | ||
1836 | */ | ||
1837 | if (++findex == freehdr.nvalid) { | ||
1838 | /* | ||
1839 | * Drop the block. | ||
1840 | */ | ||
1841 | xfs_trans_brelse(tp, fbp); | ||
1842 | fbp = NULL; | ||
1843 | if (fblk && fblk->bp) | ||
1844 | fblk->bp = NULL; | ||
1845 | } | ||
1846 | } | ||
1847 | } | ||
1848 | /* | ||
1849 | * If we don't have a data block, we need to allocate one and make | ||
1850 | * the freespace entries refer to it. | ||
1851 | */ | ||
1852 | if (unlikely(dbno == -1)) { | ||
1853 | /* | ||
1854 | * Not allowed to allocate, return failure. | ||
1855 | */ | ||
1856 | if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) | ||
1857 | return -ENOSPC; | ||
1858 | |||
1859 | /* | ||
1860 | * Allocate and initialize the new data block. | ||
1861 | */ | ||
1862 | if (unlikely((error = xfs_dir2_grow_inode(args, | ||
1863 | XFS_DIR2_DATA_SPACE, | ||
1864 | &dbno)) || | ||
1865 | (error = xfs_dir3_data_init(args, dbno, &dbp)))) | ||
1866 | return error; | ||
1867 | 1798 | ||
1868 | /* | 1799 | for (fbno = lastfbno - 1; fbno >= firstfbno; fbno--) { |
1869 | * If (somehow) we have a freespace block, get rid of it. | 1800 | /* If it's ifbno we already looked at it. */ |
1870 | */ | 1801 | if (fbno == ifbno) |
1871 | if (fbp) | 1802 | continue; |
1872 | xfs_trans_brelse(tp, fbp); | ||
1873 | if (fblk && fblk->bp) | ||
1874 | fblk->bp = NULL; | ||
1875 | 1803 | ||
1876 | /* | 1804 | /* |
1877 | * Get the freespace block corresponding to the data block | 1805 | * Read the block. There can be holes in the freespace blocks, |
1878 | * that was just allocated. | 1806 | * so this might not succeed. This should be really rare, so |
1807 | * there's no reason to avoid it. | ||
1879 | */ | 1808 | */ |
1880 | fbno = dp->d_ops->db_to_fdb(args->geo, dbno); | ||
1881 | error = xfs_dir2_free_try_read(tp, dp, | 1809 | error = xfs_dir2_free_try_read(tp, dp, |
1882 | xfs_dir2_db_to_da(args->geo, fbno), | 1810 | xfs_dir2_db_to_da(args->geo, fbno), |
1883 | &fbp); | 1811 | &fbp); |
1884 | if (error) | 1812 | if (error) |
1885 | return error; | 1813 | return error; |
1814 | if (!fbp) | ||
1815 | continue; | ||
1886 | 1816 | ||
1887 | /* | 1817 | free = fbp->b_addr; |
1888 | * If there wasn't a freespace block, the read will | 1818 | bests = dp->d_ops->free_bests_p(free); |
1889 | * return a NULL fbp. Allocate and initialize a new one. | 1819 | dp->d_ops->free_hdr_from_disk(&freehdr, free); |
1890 | */ | ||
1891 | if (!fbp) { | ||
1892 | error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, | ||
1893 | &fbno); | ||
1894 | if (error) | ||
1895 | return error; | ||
1896 | 1820 | ||
1897 | if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) { | 1821 | /* Scan the free entry array for a large enough free space. */ |
1898 | xfs_alert(mp, | 1822 | for (findex = freehdr.nvalid - 1; findex >= 0; findex--) { |
1899 | "%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld ifbno %llu lastfbno %d", | 1823 | if (be16_to_cpu(bests[findex]) != NULLDATAOFF && |
1900 | __func__, (unsigned long long)dp->i_ino, | 1824 | be16_to_cpu(bests[findex]) >= length) { |
1901 | (long long)dp->d_ops->db_to_fdb( | 1825 | dbno = freehdr.firstdb + findex; |
1902 | args->geo, dbno), | 1826 | goto found_block; |
1903 | (long long)dbno, (long long)fbno, | ||
1904 | (unsigned long long)ifbno, lastfbno); | ||
1905 | if (fblk) { | ||
1906 | xfs_alert(mp, | ||
1907 | " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", | ||
1908 | fblk, | ||
1909 | (unsigned long long)fblk->blkno, | ||
1910 | fblk->index, | ||
1911 | fblk->magic); | ||
1912 | } else { | ||
1913 | xfs_alert(mp, " ... fblk is NULL"); | ||
1914 | } | ||
1915 | XFS_ERROR_REPORT("xfs_dir2_node_addname_int", | ||
1916 | XFS_ERRLEVEL_LOW, mp); | ||
1917 | return -EFSCORRUPTED; | ||
1918 | } | 1827 | } |
1919 | |||
1920 | /* | ||
1921 | * Get a buffer for the new block. | ||
1922 | */ | ||
1923 | error = xfs_dir3_free_get_buf(args, fbno, &fbp); | ||
1924 | if (error) | ||
1925 | return error; | ||
1926 | free = fbp->b_addr; | ||
1927 | bests = dp->d_ops->free_bests_p(free); | ||
1928 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1929 | |||
1930 | /* | ||
1931 | * Remember the first slot as our empty slot. | ||
1932 | */ | ||
1933 | freehdr.firstdb = | ||
1934 | (fbno - xfs_dir2_byte_to_db(args->geo, | ||
1935 | XFS_DIR2_FREE_OFFSET)) * | ||
1936 | dp->d_ops->free_max_bests(args->geo); | ||
1937 | } else { | ||
1938 | free = fbp->b_addr; | ||
1939 | bests = dp->d_ops->free_bests_p(free); | ||
1940 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1941 | } | 1828 | } |
1942 | 1829 | ||
1943 | /* | 1830 | /* Didn't find free space, go on to next free block */ |
1944 | * Set the freespace block index from the data block number. | 1831 | xfs_trans_brelse(tp, fbp); |
1945 | */ | ||
1946 | findex = dp->d_ops->db_to_fdindex(args->geo, dbno); | ||
1947 | /* | ||
1948 | * If it's after the end of the current entries in the | ||
1949 | * freespace block, extend that table. | ||
1950 | */ | ||
1951 | if (findex >= freehdr.nvalid) { | ||
1952 | ASSERT(findex < dp->d_ops->free_max_bests(args->geo)); | ||
1953 | freehdr.nvalid = findex + 1; | ||
1954 | /* | ||
1955 | * Tag new entry so nused will go up. | ||
1956 | */ | ||
1957 | bests[findex] = cpu_to_be16(NULLDATAOFF); | ||
1958 | } | ||
1959 | /* | ||
1960 | * If this entry was for an empty data block | ||
1961 | * (this should always be true) then update the header. | ||
1962 | */ | ||
1963 | if (bests[findex] == cpu_to_be16(NULLDATAOFF)) { | ||
1964 | freehdr.nused++; | ||
1965 | dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); | ||
1966 | xfs_dir2_free_log_header(args, fbp); | ||
1967 | } | ||
1968 | /* | ||
1969 | * Update the real value in the table. | ||
1970 | * We haven't allocated the data entry yet so this will | ||
1971 | * change again. | ||
1972 | */ | ||
1973 | hdr = dbp->b_addr; | ||
1974 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
1975 | bests[findex] = bf[0].length; | ||
1976 | logfree = 1; | ||
1977 | } | 1832 | } |
1833 | |||
1834 | found_block: | ||
1835 | *dbnop = dbno; | ||
1836 | *fbpp = fbp; | ||
1837 | *findexp = findex; | ||
1838 | return 0; | ||
1839 | } | ||
1840 | |||
1841 | |||
1842 | /* | ||
1843 | * Add the data entry for a node-format directory name addition. | ||
1844 | * The leaf entry is added in xfs_dir2_leafn_add. | ||
1845 | * We may enter with a freespace block that the lookup found. | ||
1846 | */ | ||
1847 | static int | ||
1848 | xfs_dir2_node_addname_int( | ||
1849 | struct xfs_da_args *args, /* operation arguments */ | ||
1850 | struct xfs_da_state_blk *fblk) /* optional freespace block */ | ||
1851 | { | ||
1852 | struct xfs_dir2_data_unused *dup; /* data unused entry pointer */ | ||
1853 | struct xfs_dir2_data_entry *dep; /* data entry pointer */ | ||
1854 | struct xfs_dir2_data_hdr *hdr; /* data block header */ | ||
1855 | struct xfs_dir2_data_free *bf; | ||
1856 | struct xfs_dir2_free *free = NULL; /* freespace block structure */ | ||
1857 | struct xfs_trans *tp = args->trans; | ||
1858 | struct xfs_inode *dp = args->dp; | ||
1859 | struct xfs_buf *dbp; /* data block buffer */ | ||
1860 | struct xfs_buf *fbp; /* freespace buffer */ | ||
1861 | xfs_dir2_data_aoff_t aoff; | ||
1862 | xfs_dir2_db_t dbno; /* data block number */ | ||
1863 | int error; /* error return value */ | ||
1864 | int findex; /* freespace entry index */ | ||
1865 | int length; /* length of the new entry */ | ||
1866 | int logfree = 0; /* need to log free entry */ | ||
1867 | int needlog = 0; /* need to log data header */ | ||
1868 | int needscan = 0; /* need to rescan data frees */ | ||
1869 | __be16 *tagp; /* data entry tag pointer */ | ||
1870 | __be16 *bests; | ||
1871 | |||
1872 | length = dp->d_ops->data_entsize(args->namelen); | ||
1873 | error = xfs_dir2_node_find_freeblk(args, fblk, &dbno, &fbp, &findex, | ||
1874 | length); | ||
1875 | if (error) | ||
1876 | return error; | ||
1877 | |||
1978 | /* | 1878 | /* |
1979 | * We had a data block so we don't have to make a new one. | 1879 | * Now we know if we must allocate blocks, so if we are checking whether |
1880 | * we can insert without allocation then we can return now. | ||
1980 | */ | 1881 | */ |
1981 | else { | 1882 | if (args->op_flags & XFS_DA_OP_JUSTCHECK) { |
1982 | /* | 1883 | if (dbno == -1) |
1983 | * If just checking, we succeeded. | 1884 | return -ENOSPC; |
1984 | */ | 1885 | return 0; |
1985 | if (args->op_flags & XFS_DA_OP_JUSTCHECK) | 1886 | } |
1986 | return 0; | ||
1987 | 1887 | ||
1988 | /* | 1888 | /* |
1989 | * Read the data block in. | 1889 | * If we don't have a data block, we need to allocate one and make |
1990 | */ | 1890 | * the freespace entries refer to it. |
1891 | */ | ||
1892 | if (dbno == -1) { | ||
1893 | /* we're going to have to log the free block index later */ | ||
1894 | logfree = 1; | ||
1895 | error = xfs_dir2_node_add_datablk(args, fblk, &dbno, &dbp, &fbp, | ||
1896 | &findex); | ||
1897 | } else { | ||
1898 | /* Read the data block in. */ | ||
1991 | error = xfs_dir3_data_read(tp, dp, | 1899 | error = xfs_dir3_data_read(tp, dp, |
1992 | xfs_dir2_db_to_da(args->geo, dbno), | 1900 | xfs_dir2_db_to_da(args->geo, dbno), |
1993 | -1, &dbp); | 1901 | -1, &dbp); |
1994 | if (error) | ||
1995 | return error; | ||
1996 | hdr = dbp->b_addr; | ||
1997 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
1998 | logfree = 0; | ||
1999 | } | 1902 | } |
1903 | if (error) | ||
1904 | return error; | ||
1905 | |||
1906 | /* setup for data block up now */ | ||
1907 | hdr = dbp->b_addr; | ||
1908 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
2000 | ASSERT(be16_to_cpu(bf[0].length) >= length); | 1909 | ASSERT(be16_to_cpu(bf[0].length) >= length); |
2001 | /* | 1910 | |
2002 | * Point to the existing unused space. | 1911 | /* Point to the existing unused space. */ |
2003 | */ | ||
2004 | dup = (xfs_dir2_data_unused_t *) | 1912 | dup = (xfs_dir2_data_unused_t *) |
2005 | ((char *)hdr + be16_to_cpu(bf[0].offset)); | 1913 | ((char *)hdr + be16_to_cpu(bf[0].offset)); |
2006 | needscan = needlog = 0; | 1914 | |
2007 | /* | 1915 | /* Mark the first part of the unused space, inuse for us. */ |
2008 | * Mark the first part of the unused space, inuse for us. | ||
2009 | */ | ||
2010 | aoff = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); | 1916 | aoff = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); |
2011 | error = xfs_dir2_data_use_free(args, dbp, dup, aoff, length, | 1917 | error = xfs_dir2_data_use_free(args, dbp, dup, aoff, length, |
2012 | &needlog, &needscan); | 1918 | &needlog, &needscan); |
@@ -2014,9 +1920,8 @@ xfs_dir2_node_addname_int( | |||
2014 | xfs_trans_brelse(tp, dbp); | 1920 | xfs_trans_brelse(tp, dbp); |
2015 | return error; | 1921 | return error; |
2016 | } | 1922 | } |
2017 | /* | 1923 | |
2018 | * Fill in the new entry and log it. | 1924 | /* Fill in the new entry and log it. */ |
2019 | */ | ||
2020 | dep = (xfs_dir2_data_entry_t *)dup; | 1925 | dep = (xfs_dir2_data_entry_t *)dup; |
2021 | dep->inumber = cpu_to_be64(args->inumber); | 1926 | dep->inumber = cpu_to_be64(args->inumber); |
2022 | dep->namelen = args->namelen; | 1927 | dep->namelen = args->namelen; |
@@ -2025,38 +1930,101 @@ xfs_dir2_node_addname_int( | |||
2025 | tagp = dp->d_ops->data_entry_tag_p(dep); | 1930 | tagp = dp->d_ops->data_entry_tag_p(dep); |
2026 | *tagp = cpu_to_be16((char *)dep - (char *)hdr); | 1931 | *tagp = cpu_to_be16((char *)dep - (char *)hdr); |
2027 | xfs_dir2_data_log_entry(args, dbp, dep); | 1932 | xfs_dir2_data_log_entry(args, dbp, dep); |
2028 | /* | 1933 | |
2029 | * Rescan the block for bestfree if needed. | 1934 | /* Rescan the freespace and log the data block if needed. */ |
2030 | */ | ||
2031 | if (needscan) | 1935 | if (needscan) |
2032 | xfs_dir2_data_freescan(dp, hdr, &needlog); | 1936 | xfs_dir2_data_freescan(dp, hdr, &needlog); |
2033 | /* | ||
2034 | * Log the data block header if needed. | ||
2035 | */ | ||
2036 | if (needlog) | 1937 | if (needlog) |
2037 | xfs_dir2_data_log_header(args, dbp); | 1938 | xfs_dir2_data_log_header(args, dbp); |
2038 | /* | 1939 | |
2039 | * If the freespace entry is now wrong, update it. | 1940 | /* If the freespace block entry is now wrong, update it. */ |
2040 | */ | 1941 | free = fbp->b_addr; |
2041 | bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */ | 1942 | bests = dp->d_ops->free_bests_p(free); |
2042 | if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) { | 1943 | if (bests[findex] != bf[0].length) { |
2043 | bests[findex] = bf[0].length; | 1944 | bests[findex] = bf[0].length; |
2044 | logfree = 1; | 1945 | logfree = 1; |
2045 | } | 1946 | } |
2046 | /* | 1947 | |
2047 | * Log the freespace entry if needed. | 1948 | /* Log the freespace entry if needed. */ |
2048 | */ | ||
2049 | if (logfree) | 1949 | if (logfree) |
2050 | xfs_dir2_free_log_bests(args, fbp, findex, findex); | 1950 | xfs_dir2_free_log_bests(args, fbp, findex, findex); |
2051 | /* | 1951 | |
2052 | * Return the data block and offset in args, then drop the data block. | 1952 | /* Return the data block and offset in args. */ |
2053 | */ | ||
2054 | args->blkno = (xfs_dablk_t)dbno; | 1953 | args->blkno = (xfs_dablk_t)dbno; |
2055 | args->index = be16_to_cpu(*tagp); | 1954 | args->index = be16_to_cpu(*tagp); |
2056 | return 0; | 1955 | return 0; |
2057 | } | 1956 | } |
2058 | 1957 | ||
2059 | /* | 1958 | /* |
1959 | * Top-level node form directory addname routine. | ||
1960 | */ | ||
1961 | int /* error */ | ||
1962 | xfs_dir2_node_addname( | ||
1963 | xfs_da_args_t *args) /* operation arguments */ | ||
1964 | { | ||
1965 | xfs_da_state_blk_t *blk; /* leaf block for insert */ | ||
1966 | int error; /* error return value */ | ||
1967 | int rval; /* sub-return value */ | ||
1968 | xfs_da_state_t *state; /* btree cursor */ | ||
1969 | |||
1970 | trace_xfs_dir2_node_addname(args); | ||
1971 | |||
1972 | /* | ||
1973 | * Allocate and initialize the state (btree cursor). | ||
1974 | */ | ||
1975 | state = xfs_da_state_alloc(); | ||
1976 | state->args = args; | ||
1977 | state->mp = args->dp->i_mount; | ||
1978 | /* | ||
1979 | * Look up the name. We're not supposed to find it, but | ||
1980 | * this gives us the insertion point. | ||
1981 | */ | ||
1982 | error = xfs_da3_node_lookup_int(state, &rval); | ||
1983 | if (error) | ||
1984 | rval = error; | ||
1985 | if (rval != -ENOENT) { | ||
1986 | goto done; | ||
1987 | } | ||
1988 | /* | ||
1989 | * Add the data entry to a data block. | ||
1990 | * Extravalid is set to a freeblock found by lookup. | ||
1991 | */ | ||
1992 | rval = xfs_dir2_node_addname_int(args, | ||
1993 | state->extravalid ? &state->extrablk : NULL); | ||
1994 | if (rval) { | ||
1995 | goto done; | ||
1996 | } | ||
1997 | blk = &state->path.blk[state->path.active - 1]; | ||
1998 | ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); | ||
1999 | /* | ||
2000 | * Add the new leaf entry. | ||
2001 | */ | ||
2002 | rval = xfs_dir2_leafn_add(blk->bp, args, blk->index); | ||
2003 | if (rval == 0) { | ||
2004 | /* | ||
2005 | * It worked, fix the hash values up the btree. | ||
2006 | */ | ||
2007 | if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) | ||
2008 | xfs_da3_fixhashpath(state, &state->path); | ||
2009 | } else { | ||
2010 | /* | ||
2011 | * It didn't work, we need to split the leaf block. | ||
2012 | */ | ||
2013 | if (args->total == 0) { | ||
2014 | ASSERT(rval == -ENOSPC); | ||
2015 | goto done; | ||
2016 | } | ||
2017 | /* | ||
2018 | * Split the leaf block and insert the new entry. | ||
2019 | */ | ||
2020 | rval = xfs_da3_split(state); | ||
2021 | } | ||
2022 | done: | ||
2023 | xfs_da_state_free(state); | ||
2024 | return rval; | ||
2025 | } | ||
2026 | |||
2027 | /* | ||
2060 | * Lookup an entry in a node-format directory. | 2028 | * Lookup an entry in a node-format directory. |
2061 | * All the real work happens in xfs_da3_node_lookup_int. | 2029 | * All the real work happens in xfs_da3_node_lookup_int. |
2062 | * The only real output is the inode number of the entry. | 2030 | * The only real output is the inode number of the entry. |
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 033589257f54..85f14fc2a8da 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c | |||
@@ -164,7 +164,7 @@ xfs_dir2_block_to_sf( | |||
164 | * can free the block and copy the formatted data into the inode literal | 164 | * can free the block and copy the formatted data into the inode literal |
165 | * area. | 165 | * area. |
166 | */ | 166 | */ |
167 | dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP); | 167 | dst = kmem_alloc(mp->m_sb.sb_inodesize, 0); |
168 | hdr = bp->b_addr; | 168 | hdr = bp->b_addr; |
169 | 169 | ||
170 | /* | 170 | /* |
@@ -436,7 +436,7 @@ xfs_dir2_sf_addname_hard( | |||
436 | 436 | ||
437 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | 437 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; |
438 | old_isize = (int)dp->i_d.di_size; | 438 | old_isize = (int)dp->i_d.di_size; |
439 | buf = kmem_alloc(old_isize, KM_SLEEP); | 439 | buf = kmem_alloc(old_isize, 0); |
440 | oldsfp = (xfs_dir2_sf_hdr_t *)buf; | 440 | oldsfp = (xfs_dir2_sf_hdr_t *)buf; |
441 | memcpy(oldsfp, sfp, old_isize); | 441 | memcpy(oldsfp, sfp, old_isize); |
442 | /* | 442 | /* |
@@ -1096,7 +1096,7 @@ xfs_dir2_sf_toino4( | |||
1096 | * Don't want xfs_idata_realloc copying the data here. | 1096 | * Don't want xfs_idata_realloc copying the data here. |
1097 | */ | 1097 | */ |
1098 | oldsize = dp->i_df.if_bytes; | 1098 | oldsize = dp->i_df.if_bytes; |
1099 | buf = kmem_alloc(oldsize, KM_SLEEP); | 1099 | buf = kmem_alloc(oldsize, 0); |
1100 | oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | 1100 | oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; |
1101 | ASSERT(oldsfp->i8count == 1); | 1101 | ASSERT(oldsfp->i8count == 1); |
1102 | memcpy(buf, oldsfp, oldsize); | 1102 | memcpy(buf, oldsfp, oldsize); |
@@ -1169,7 +1169,7 @@ xfs_dir2_sf_toino8( | |||
1169 | * Don't want xfs_idata_realloc copying the data here. | 1169 | * Don't want xfs_idata_realloc copying the data here. |
1170 | */ | 1170 | */ |
1171 | oldsize = dp->i_df.if_bytes; | 1171 | oldsize = dp->i_df.if_bytes; |
1172 | buf = kmem_alloc(oldsize, KM_SLEEP); | 1172 | buf = kmem_alloc(oldsize, 0); |
1173 | oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | 1173 | oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; |
1174 | ASSERT(oldsfp->i8count == 0); | 1174 | ASSERT(oldsfp->i8count == 0); |
1175 | memcpy(buf, oldsfp, oldsize); | 1175 | memcpy(buf, oldsfp, oldsize); |
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 52d03a3a02a4..39dd2b908106 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h | |||
@@ -287,7 +287,7 @@ struct xfs_ag_geometry { | |||
287 | uint32_t ag_ifree; /* o: inodes free */ | 287 | uint32_t ag_ifree; /* o: inodes free */ |
288 | uint32_t ag_sick; /* o: sick things in ag */ | 288 | uint32_t ag_sick; /* o: sick things in ag */ |
289 | uint32_t ag_checked; /* o: checked metadata in ag */ | 289 | uint32_t ag_checked; /* o: checked metadata in ag */ |
290 | uint32_t ag_reserved32; /* o: zero */ | 290 | uint32_t ag_flags; /* i/o: flags for this ag */ |
291 | uint64_t ag_reserved[12];/* o: zero */ | 291 | uint64_t ag_reserved[12];/* o: zero */ |
292 | }; | 292 | }; |
293 | #define XFS_AG_GEOM_SICK_SB (1 << 0) /* superblock */ | 293 | #define XFS_AG_GEOM_SICK_SB (1 << 0) /* superblock */ |
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 04377ab75863..588d44613094 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c | |||
@@ -2787,8 +2787,13 @@ xfs_ialloc_setup_geometry( | |||
2787 | igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr, | 2787 | igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr, |
2788 | inodes); | 2788 | inodes); |
2789 | 2789 | ||
2790 | /* Set the maximum inode count for this filesystem. */ | 2790 | /* |
2791 | if (sbp->sb_imax_pct) { | 2791 | * Set the maximum inode count for this filesystem, being careful not |
2792 | * to use obviously garbage sb_inopblog/sb_inopblock values. Regular | ||
2793 | * users should never get here due to failing sb verification, but | ||
2794 | * certain users (xfs_db) need to be usable even with corrupt metadata. | ||
2795 | */ | ||
2796 | if (sbp->sb_imax_pct && igeo->ialloc_blks) { | ||
2792 | /* | 2797 | /* |
2793 | * Make sure the maximum inode count is a multiple | 2798 | * Make sure the maximum inode count is a multiple |
2794 | * of the units we allocate inodes in. | 2799 | * of the units we allocate inodes in. |
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 27aa3f2bc4bc..7bc87408f1a0 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c | |||
@@ -616,7 +616,7 @@ xfs_iext_realloc_root( | |||
616 | * sequence counter is seen before the modifications to the extent tree itself | 616 | * sequence counter is seen before the modifications to the extent tree itself |
617 | * take effect. | 617 | * take effect. |
618 | */ | 618 | */ |
619 | static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp, int state) | 619 | static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp) |
620 | { | 620 | { |
621 | WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1); | 621 | WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1); |
622 | } | 622 | } |
@@ -633,7 +633,7 @@ xfs_iext_insert( | |||
633 | struct xfs_iext_leaf *new = NULL; | 633 | struct xfs_iext_leaf *new = NULL; |
634 | int nr_entries, i; | 634 | int nr_entries, i; |
635 | 635 | ||
636 | xfs_iext_inc_seq(ifp, state); | 636 | xfs_iext_inc_seq(ifp); |
637 | 637 | ||
638 | if (ifp->if_height == 0) | 638 | if (ifp->if_height == 0) |
639 | xfs_iext_alloc_root(ifp, cur); | 639 | xfs_iext_alloc_root(ifp, cur); |
@@ -875,7 +875,7 @@ xfs_iext_remove( | |||
875 | ASSERT(ifp->if_u1.if_root != NULL); | 875 | ASSERT(ifp->if_u1.if_root != NULL); |
876 | ASSERT(xfs_iext_valid(ifp, cur)); | 876 | ASSERT(xfs_iext_valid(ifp, cur)); |
877 | 877 | ||
878 | xfs_iext_inc_seq(ifp, state); | 878 | xfs_iext_inc_seq(ifp); |
879 | 879 | ||
880 | nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1; | 880 | nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1; |
881 | for (i = cur->pos; i < nr_entries; i++) | 881 | for (i = cur->pos; i < nr_entries; i++) |
@@ -983,7 +983,7 @@ xfs_iext_update_extent( | |||
983 | { | 983 | { |
984 | struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); | 984 | struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); |
985 | 985 | ||
986 | xfs_iext_inc_seq(ifp, state); | 986 | xfs_iext_inc_seq(ifp); |
987 | 987 | ||
988 | if (cur->pos == 0) { | 988 | if (cur->pos == 0) { |
989 | struct xfs_bmbt_irec old; | 989 | struct xfs_bmbt_irec old; |
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index bf3e04018246..c643beeb5a24 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c | |||
@@ -94,7 +94,7 @@ xfs_iformat_fork( | |||
94 | return 0; | 94 | return 0; |
95 | 95 | ||
96 | ASSERT(ip->i_afp == NULL); | 96 | ASSERT(ip->i_afp == NULL); |
97 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); | 97 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_NOFS); |
98 | 98 | ||
99 | switch (dip->di_aformat) { | 99 | switch (dip->di_aformat) { |
100 | case XFS_DINODE_FMT_LOCAL: | 100 | case XFS_DINODE_FMT_LOCAL: |
@@ -147,7 +147,7 @@ xfs_init_local_fork( | |||
147 | 147 | ||
148 | if (size) { | 148 | if (size) { |
149 | real_size = roundup(mem_size, 4); | 149 | real_size = roundup(mem_size, 4); |
150 | ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); | 150 | ifp->if_u1.if_data = kmem_alloc(real_size, KM_NOFS); |
151 | memcpy(ifp->if_u1.if_data, data, size); | 151 | memcpy(ifp->if_u1.if_data, data, size); |
152 | if (zero_terminate) | 152 | if (zero_terminate) |
153 | ifp->if_u1.if_data[size] = '\0'; | 153 | ifp->if_u1.if_data[size] = '\0'; |
@@ -302,7 +302,7 @@ xfs_iformat_btree( | |||
302 | } | 302 | } |
303 | 303 | ||
304 | ifp->if_broot_bytes = size; | 304 | ifp->if_broot_bytes = size; |
305 | ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS); | 305 | ifp->if_broot = kmem_alloc(size, KM_NOFS); |
306 | ASSERT(ifp->if_broot != NULL); | 306 | ASSERT(ifp->if_broot != NULL); |
307 | /* | 307 | /* |
308 | * Copy and convert from the on-disk structure | 308 | * Copy and convert from the on-disk structure |
@@ -367,7 +367,7 @@ xfs_iroot_realloc( | |||
367 | */ | 367 | */ |
368 | if (ifp->if_broot_bytes == 0) { | 368 | if (ifp->if_broot_bytes == 0) { |
369 | new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff); | 369 | new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff); |
370 | ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); | 370 | ifp->if_broot = kmem_alloc(new_size, KM_NOFS); |
371 | ifp->if_broot_bytes = (int)new_size; | 371 | ifp->if_broot_bytes = (int)new_size; |
372 | return; | 372 | return; |
373 | } | 373 | } |
@@ -382,7 +382,7 @@ xfs_iroot_realloc( | |||
382 | new_max = cur_max + rec_diff; | 382 | new_max = cur_max + rec_diff; |
383 | new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); | 383 | new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); |
384 | ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, | 384 | ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, |
385 | KM_SLEEP | KM_NOFS); | 385 | KM_NOFS); |
386 | op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | 386 | op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, |
387 | ifp->if_broot_bytes); | 387 | ifp->if_broot_bytes); |
388 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | 388 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, |
@@ -408,7 +408,7 @@ xfs_iroot_realloc( | |||
408 | else | 408 | else |
409 | new_size = 0; | 409 | new_size = 0; |
410 | if (new_size > 0) { | 410 | if (new_size > 0) { |
411 | new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); | 411 | new_broot = kmem_alloc(new_size, KM_NOFS); |
412 | /* | 412 | /* |
413 | * First copy over the btree block header. | 413 | * First copy over the btree block header. |
414 | */ | 414 | */ |
@@ -492,7 +492,7 @@ xfs_idata_realloc( | |||
492 | * We enforce that here. | 492 | * We enforce that here. |
493 | */ | 493 | */ |
494 | ifp->if_u1.if_data = kmem_realloc(ifp->if_u1.if_data, | 494 | ifp->if_u1.if_data = kmem_realloc(ifp->if_u1.if_data, |
495 | roundup(new_size, 4), KM_SLEEP | KM_NOFS); | 495 | roundup(new_size, 4), KM_NOFS); |
496 | ifp->if_bytes = new_size; | 496 | ifp->if_bytes = new_size; |
497 | } | 497 | } |
498 | 498 | ||
@@ -683,7 +683,7 @@ xfs_ifork_init_cow( | |||
683 | return; | 683 | return; |
684 | 684 | ||
685 | ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone, | 685 | ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone, |
686 | KM_SLEEP | KM_NOFS); | 686 | KM_NOFS); |
687 | ip->i_cowfp->if_flags = XFS_IFEXTENTS; | 687 | ip->i_cowfp->if_flags = XFS_IFEXTENTS; |
688 | ip->i_cformat = XFS_DINODE_FMT_EXTENTS; | 688 | ip->i_cformat = XFS_DINODE_FMT_EXTENTS; |
689 | ip->i_cnextents = 0; | 689 | ip->i_cnextents = 0; |
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 51bb9bdb0e84..9a7fadb1361c 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c | |||
@@ -1174,7 +1174,7 @@ out_cur: | |||
1174 | /* | 1174 | /* |
1175 | * Record a refcount intent for later processing. | 1175 | * Record a refcount intent for later processing. |
1176 | */ | 1176 | */ |
1177 | static int | 1177 | static void |
1178 | __xfs_refcount_add( | 1178 | __xfs_refcount_add( |
1179 | struct xfs_trans *tp, | 1179 | struct xfs_trans *tp, |
1180 | enum xfs_refcount_intent_type type, | 1180 | enum xfs_refcount_intent_type type, |
@@ -1189,44 +1189,43 @@ __xfs_refcount_add( | |||
1189 | blockcount); | 1189 | blockcount); |
1190 | 1190 | ||
1191 | ri = kmem_alloc(sizeof(struct xfs_refcount_intent), | 1191 | ri = kmem_alloc(sizeof(struct xfs_refcount_intent), |
1192 | KM_SLEEP | KM_NOFS); | 1192 | KM_NOFS); |
1193 | INIT_LIST_HEAD(&ri->ri_list); | 1193 | INIT_LIST_HEAD(&ri->ri_list); |
1194 | ri->ri_type = type; | 1194 | ri->ri_type = type; |
1195 | ri->ri_startblock = startblock; | 1195 | ri->ri_startblock = startblock; |
1196 | ri->ri_blockcount = blockcount; | 1196 | ri->ri_blockcount = blockcount; |
1197 | 1197 | ||
1198 | xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); | 1198 | xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); |
1199 | return 0; | ||
1200 | } | 1199 | } |
1201 | 1200 | ||
1202 | /* | 1201 | /* |
1203 | * Increase the reference count of the blocks backing a file's extent. | 1202 | * Increase the reference count of the blocks backing a file's extent. |
1204 | */ | 1203 | */ |
1205 | int | 1204 | void |
1206 | xfs_refcount_increase_extent( | 1205 | xfs_refcount_increase_extent( |
1207 | struct xfs_trans *tp, | 1206 | struct xfs_trans *tp, |
1208 | struct xfs_bmbt_irec *PREV) | 1207 | struct xfs_bmbt_irec *PREV) |
1209 | { | 1208 | { |
1210 | if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) | 1209 | if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) |
1211 | return 0; | 1210 | return; |
1212 | 1211 | ||
1213 | return __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, | 1212 | __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, |
1214 | PREV->br_startblock, PREV->br_blockcount); | 1213 | PREV->br_blockcount); |
1215 | } | 1214 | } |
1216 | 1215 | ||
1217 | /* | 1216 | /* |
1218 | * Decrease the reference count of the blocks backing a file's extent. | 1217 | * Decrease the reference count of the blocks backing a file's extent. |
1219 | */ | 1218 | */ |
1220 | int | 1219 | void |
1221 | xfs_refcount_decrease_extent( | 1220 | xfs_refcount_decrease_extent( |
1222 | struct xfs_trans *tp, | 1221 | struct xfs_trans *tp, |
1223 | struct xfs_bmbt_irec *PREV) | 1222 | struct xfs_bmbt_irec *PREV) |
1224 | { | 1223 | { |
1225 | if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) | 1224 | if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) |
1226 | return 0; | 1225 | return; |
1227 | 1226 | ||
1228 | return __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, | 1227 | __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, |
1229 | PREV->br_startblock, PREV->br_blockcount); | 1228 | PREV->br_blockcount); |
1230 | } | 1229 | } |
1231 | 1230 | ||
1232 | /* | 1231 | /* |
@@ -1541,47 +1540,40 @@ __xfs_refcount_cow_free( | |||
1541 | } | 1540 | } |
1542 | 1541 | ||
1543 | /* Record a CoW staging extent in the refcount btree. */ | 1542 | /* Record a CoW staging extent in the refcount btree. */ |
1544 | int | 1543 | void |
1545 | xfs_refcount_alloc_cow_extent( | 1544 | xfs_refcount_alloc_cow_extent( |
1546 | struct xfs_trans *tp, | 1545 | struct xfs_trans *tp, |
1547 | xfs_fsblock_t fsb, | 1546 | xfs_fsblock_t fsb, |
1548 | xfs_extlen_t len) | 1547 | xfs_extlen_t len) |
1549 | { | 1548 | { |
1550 | struct xfs_mount *mp = tp->t_mountp; | 1549 | struct xfs_mount *mp = tp->t_mountp; |
1551 | int error; | ||
1552 | 1550 | ||
1553 | if (!xfs_sb_version_hasreflink(&mp->m_sb)) | 1551 | if (!xfs_sb_version_hasreflink(&mp->m_sb)) |
1554 | return 0; | 1552 | return; |
1555 | 1553 | ||
1556 | error = __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); | 1554 | __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); |
1557 | if (error) | ||
1558 | return error; | ||
1559 | 1555 | ||
1560 | /* Add rmap entry */ | 1556 | /* Add rmap entry */ |
1561 | return xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), | 1557 | xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), |
1562 | XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); | 1558 | XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); |
1563 | } | 1559 | } |
1564 | 1560 | ||
1565 | /* Forget a CoW staging event in the refcount btree. */ | 1561 | /* Forget a CoW staging event in the refcount btree. */ |
1566 | int | 1562 | void |
1567 | xfs_refcount_free_cow_extent( | 1563 | xfs_refcount_free_cow_extent( |
1568 | struct xfs_trans *tp, | 1564 | struct xfs_trans *tp, |
1569 | xfs_fsblock_t fsb, | 1565 | xfs_fsblock_t fsb, |
1570 | xfs_extlen_t len) | 1566 | xfs_extlen_t len) |
1571 | { | 1567 | { |
1572 | struct xfs_mount *mp = tp->t_mountp; | 1568 | struct xfs_mount *mp = tp->t_mountp; |
1573 | int error; | ||
1574 | 1569 | ||
1575 | if (!xfs_sb_version_hasreflink(&mp->m_sb)) | 1570 | if (!xfs_sb_version_hasreflink(&mp->m_sb)) |
1576 | return 0; | 1571 | return; |
1577 | 1572 | ||
1578 | /* Remove rmap entry */ | 1573 | /* Remove rmap entry */ |
1579 | error = xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), | 1574 | xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), |
1580 | XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); | 1575 | XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); |
1581 | if (error) | 1576 | __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); |
1582 | return error; | ||
1583 | |||
1584 | return __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); | ||
1585 | } | 1577 | } |
1586 | 1578 | ||
1587 | struct xfs_refcount_recovery { | 1579 | struct xfs_refcount_recovery { |
@@ -1602,7 +1594,7 @@ xfs_refcount_recover_extent( | |||
1602 | if (be32_to_cpu(rec->refc.rc_refcount) != 1) | 1594 | if (be32_to_cpu(rec->refc.rc_refcount) != 1) |
1603 | return -EFSCORRUPTED; | 1595 | return -EFSCORRUPTED; |
1604 | 1596 | ||
1605 | rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP); | 1597 | rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); |
1606 | xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); | 1598 | xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); |
1607 | list_add_tail(&rr->rr_list, debris); | 1599 | list_add_tail(&rr->rr_list, debris); |
1608 | 1600 | ||
@@ -1679,10 +1671,8 @@ xfs_refcount_recover_cow_leftovers( | |||
1679 | /* Free the orphan record */ | 1671 | /* Free the orphan record */ |
1680 | agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; | 1672 | agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; |
1681 | fsb = XFS_AGB_TO_FSB(mp, agno, agbno); | 1673 | fsb = XFS_AGB_TO_FSB(mp, agno, agbno); |
1682 | error = xfs_refcount_free_cow_extent(tp, fsb, | 1674 | xfs_refcount_free_cow_extent(tp, fsb, |
1683 | rr->rr_rrec.rc_blockcount); | 1675 | rr->rr_rrec.rc_blockcount); |
1684 | if (error) | ||
1685 | goto out_trans; | ||
1686 | 1676 | ||
1687 | /* Free the block. */ | 1677 | /* Free the block. */ |
1688 | xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); | 1678 | xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); |
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 1d9c518575e7..209795539c8d 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h | |||
@@ -29,9 +29,9 @@ struct xfs_refcount_intent { | |||
29 | xfs_extlen_t ri_blockcount; | 29 | xfs_extlen_t ri_blockcount; |
30 | }; | 30 | }; |
31 | 31 | ||
32 | extern int xfs_refcount_increase_extent(struct xfs_trans *tp, | 32 | void xfs_refcount_increase_extent(struct xfs_trans *tp, |
33 | struct xfs_bmbt_irec *irec); | 33 | struct xfs_bmbt_irec *irec); |
34 | extern int xfs_refcount_decrease_extent(struct xfs_trans *tp, | 34 | void xfs_refcount_decrease_extent(struct xfs_trans *tp, |
35 | struct xfs_bmbt_irec *irec); | 35 | struct xfs_bmbt_irec *irec); |
36 | 36 | ||
37 | extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp, | 37 | extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp, |
@@ -45,10 +45,10 @@ extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur, | |||
45 | xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, | 45 | xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, |
46 | xfs_extlen_t *flen, bool find_end_of_shared); | 46 | xfs_extlen_t *flen, bool find_end_of_shared); |
47 | 47 | ||
48 | extern int xfs_refcount_alloc_cow_extent(struct xfs_trans *tp, | 48 | void xfs_refcount_alloc_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb, |
49 | xfs_fsblock_t fsb, xfs_extlen_t len); | 49 | xfs_extlen_t len); |
50 | extern int xfs_refcount_free_cow_extent(struct xfs_trans *tp, | 50 | void xfs_refcount_free_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb, |
51 | xfs_fsblock_t fsb, xfs_extlen_t len); | 51 | xfs_extlen_t len); |
52 | extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, | 52 | extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, |
53 | xfs_agnumber_t agno); | 53 | xfs_agnumber_t agno); |
54 | 54 | ||
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index e6aeb390b2fb..38e9414878b3 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c | |||
@@ -168,7 +168,6 @@ xfs_rmap_btrec_to_irec( | |||
168 | union xfs_btree_rec *rec, | 168 | union xfs_btree_rec *rec, |
169 | struct xfs_rmap_irec *irec) | 169 | struct xfs_rmap_irec *irec) |
170 | { | 170 | { |
171 | irec->rm_flags = 0; | ||
172 | irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock); | 171 | irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock); |
173 | irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount); | 172 | irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount); |
174 | irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner); | 173 | irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner); |
@@ -254,15 +253,15 @@ xfs_rmap_find_left_neighbor_helper( | |||
254 | rec->rm_flags); | 253 | rec->rm_flags); |
255 | 254 | ||
256 | if (rec->rm_owner != info->high.rm_owner) | 255 | if (rec->rm_owner != info->high.rm_owner) |
257 | return XFS_BTREE_QUERY_RANGE_CONTINUE; | 256 | return 0; |
258 | if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) && | 257 | if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) && |
259 | !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && | 258 | !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && |
260 | rec->rm_offset + rec->rm_blockcount - 1 != info->high.rm_offset) | 259 | rec->rm_offset + rec->rm_blockcount - 1 != info->high.rm_offset) |
261 | return XFS_BTREE_QUERY_RANGE_CONTINUE; | 260 | return 0; |
262 | 261 | ||
263 | *info->irec = *rec; | 262 | *info->irec = *rec; |
264 | *info->stat = 1; | 263 | *info->stat = 1; |
265 | return XFS_BTREE_QUERY_RANGE_ABORT; | 264 | return -ECANCELED; |
266 | } | 265 | } |
267 | 266 | ||
268 | /* | 267 | /* |
@@ -305,7 +304,7 @@ xfs_rmap_find_left_neighbor( | |||
305 | 304 | ||
306 | error = xfs_rmap_query_range(cur, &info.high, &info.high, | 305 | error = xfs_rmap_query_range(cur, &info.high, &info.high, |
307 | xfs_rmap_find_left_neighbor_helper, &info); | 306 | xfs_rmap_find_left_neighbor_helper, &info); |
308 | if (error == XFS_BTREE_QUERY_RANGE_ABORT) | 307 | if (error == -ECANCELED) |
309 | error = 0; | 308 | error = 0; |
310 | if (*stat) | 309 | if (*stat) |
311 | trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, | 310 | trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, |
@@ -330,16 +329,16 @@ xfs_rmap_lookup_le_range_helper( | |||
330 | rec->rm_flags); | 329 | rec->rm_flags); |
331 | 330 | ||
332 | if (rec->rm_owner != info->high.rm_owner) | 331 | if (rec->rm_owner != info->high.rm_owner) |
333 | return XFS_BTREE_QUERY_RANGE_CONTINUE; | 332 | return 0; |
334 | if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) && | 333 | if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) && |
335 | !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && | 334 | !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && |
336 | (rec->rm_offset > info->high.rm_offset || | 335 | (rec->rm_offset > info->high.rm_offset || |
337 | rec->rm_offset + rec->rm_blockcount <= info->high.rm_offset)) | 336 | rec->rm_offset + rec->rm_blockcount <= info->high.rm_offset)) |
338 | return XFS_BTREE_QUERY_RANGE_CONTINUE; | 337 | return 0; |
339 | 338 | ||
340 | *info->irec = *rec; | 339 | *info->irec = *rec; |
341 | *info->stat = 1; | 340 | *info->stat = 1; |
342 | return XFS_BTREE_QUERY_RANGE_ABORT; | 341 | return -ECANCELED; |
343 | } | 342 | } |
344 | 343 | ||
345 | /* | 344 | /* |
@@ -377,7 +376,7 @@ xfs_rmap_lookup_le_range( | |||
377 | cur->bc_private.a.agno, bno, 0, owner, offset, flags); | 376 | cur->bc_private.a.agno, bno, 0, owner, offset, flags); |
378 | error = xfs_rmap_query_range(cur, &info.high, &info.high, | 377 | error = xfs_rmap_query_range(cur, &info.high, &info.high, |
379 | xfs_rmap_lookup_le_range_helper, &info); | 378 | xfs_rmap_lookup_le_range_helper, &info); |
380 | if (error == XFS_BTREE_QUERY_RANGE_ABORT) | 379 | if (error == -ECANCELED) |
381 | error = 0; | 380 | error = 0; |
382 | if (*stat) | 381 | if (*stat) |
383 | trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, | 382 | trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, |
@@ -2268,7 +2267,7 @@ xfs_rmap_update_is_needed( | |||
2268 | * Record a rmap intent; the list is kept sorted first by AG and then by | 2267 | * Record a rmap intent; the list is kept sorted first by AG and then by |
2269 | * increasing age. | 2268 | * increasing age. |
2270 | */ | 2269 | */ |
2271 | static int | 2270 | static void |
2272 | __xfs_rmap_add( | 2271 | __xfs_rmap_add( |
2273 | struct xfs_trans *tp, | 2272 | struct xfs_trans *tp, |
2274 | enum xfs_rmap_intent_type type, | 2273 | enum xfs_rmap_intent_type type, |
@@ -2287,7 +2286,7 @@ __xfs_rmap_add( | |||
2287 | bmap->br_blockcount, | 2286 | bmap->br_blockcount, |
2288 | bmap->br_state); | 2287 | bmap->br_state); |
2289 | 2288 | ||
2290 | ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS); | 2289 | ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_NOFS); |
2291 | INIT_LIST_HEAD(&ri->ri_list); | 2290 | INIT_LIST_HEAD(&ri->ri_list); |
2292 | ri->ri_type = type; | 2291 | ri->ri_type = type; |
2293 | ri->ri_owner = owner; | 2292 | ri->ri_owner = owner; |
@@ -2295,11 +2294,10 @@ __xfs_rmap_add( | |||
2295 | ri->ri_bmap = *bmap; | 2294 | ri->ri_bmap = *bmap; |
2296 | 2295 | ||
2297 | xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list); | 2296 | xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list); |
2298 | return 0; | ||
2299 | } | 2297 | } |
2300 | 2298 | ||
2301 | /* Map an extent into a file. */ | 2299 | /* Map an extent into a file. */ |
2302 | int | 2300 | void |
2303 | xfs_rmap_map_extent( | 2301 | xfs_rmap_map_extent( |
2304 | struct xfs_trans *tp, | 2302 | struct xfs_trans *tp, |
2305 | struct xfs_inode *ip, | 2303 | struct xfs_inode *ip, |
@@ -2307,15 +2305,15 @@ xfs_rmap_map_extent( | |||
2307 | struct xfs_bmbt_irec *PREV) | 2305 | struct xfs_bmbt_irec *PREV) |
2308 | { | 2306 | { |
2309 | if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) | 2307 | if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) |
2310 | return 0; | 2308 | return; |
2311 | 2309 | ||
2312 | return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? | 2310 | __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? |
2313 | XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino, | 2311 | XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino, |
2314 | whichfork, PREV); | 2312 | whichfork, PREV); |
2315 | } | 2313 | } |
2316 | 2314 | ||
2317 | /* Unmap an extent out of a file. */ | 2315 | /* Unmap an extent out of a file. */ |
2318 | int | 2316 | void |
2319 | xfs_rmap_unmap_extent( | 2317 | xfs_rmap_unmap_extent( |
2320 | struct xfs_trans *tp, | 2318 | struct xfs_trans *tp, |
2321 | struct xfs_inode *ip, | 2319 | struct xfs_inode *ip, |
@@ -2323,9 +2321,9 @@ xfs_rmap_unmap_extent( | |||
2323 | struct xfs_bmbt_irec *PREV) | 2321 | struct xfs_bmbt_irec *PREV) |
2324 | { | 2322 | { |
2325 | if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) | 2323 | if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) |
2326 | return 0; | 2324 | return; |
2327 | 2325 | ||
2328 | return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? | 2326 | __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? |
2329 | XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino, | 2327 | XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino, |
2330 | whichfork, PREV); | 2328 | whichfork, PREV); |
2331 | } | 2329 | } |
@@ -2336,7 +2334,7 @@ xfs_rmap_unmap_extent( | |||
2336 | * Note that tp can be NULL here as no transaction is used for COW fork | 2334 | * Note that tp can be NULL here as no transaction is used for COW fork |
2337 | * unwritten conversion. | 2335 | * unwritten conversion. |
2338 | */ | 2336 | */ |
2339 | int | 2337 | void |
2340 | xfs_rmap_convert_extent( | 2338 | xfs_rmap_convert_extent( |
2341 | struct xfs_mount *mp, | 2339 | struct xfs_mount *mp, |
2342 | struct xfs_trans *tp, | 2340 | struct xfs_trans *tp, |
@@ -2345,15 +2343,15 @@ xfs_rmap_convert_extent( | |||
2345 | struct xfs_bmbt_irec *PREV) | 2343 | struct xfs_bmbt_irec *PREV) |
2346 | { | 2344 | { |
2347 | if (!xfs_rmap_update_is_needed(mp, whichfork)) | 2345 | if (!xfs_rmap_update_is_needed(mp, whichfork)) |
2348 | return 0; | 2346 | return; |
2349 | 2347 | ||
2350 | return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? | 2348 | __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? |
2351 | XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino, | 2349 | XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino, |
2352 | whichfork, PREV); | 2350 | whichfork, PREV); |
2353 | } | 2351 | } |
2354 | 2352 | ||
2355 | /* Schedule the creation of an rmap for non-file data. */ | 2353 | /* Schedule the creation of an rmap for non-file data. */ |
2356 | int | 2354 | void |
2357 | xfs_rmap_alloc_extent( | 2355 | xfs_rmap_alloc_extent( |
2358 | struct xfs_trans *tp, | 2356 | struct xfs_trans *tp, |
2359 | xfs_agnumber_t agno, | 2357 | xfs_agnumber_t agno, |
@@ -2364,18 +2362,18 @@ xfs_rmap_alloc_extent( | |||
2364 | struct xfs_bmbt_irec bmap; | 2362 | struct xfs_bmbt_irec bmap; |
2365 | 2363 | ||
2366 | if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK)) | 2364 | if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK)) |
2367 | return 0; | 2365 | return; |
2368 | 2366 | ||
2369 | bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno); | 2367 | bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno); |
2370 | bmap.br_blockcount = len; | 2368 | bmap.br_blockcount = len; |
2371 | bmap.br_startoff = 0; | 2369 | bmap.br_startoff = 0; |
2372 | bmap.br_state = XFS_EXT_NORM; | 2370 | bmap.br_state = XFS_EXT_NORM; |
2373 | 2371 | ||
2374 | return __xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap); | 2372 | __xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap); |
2375 | } | 2373 | } |
2376 | 2374 | ||
2377 | /* Schedule the deletion of an rmap for non-file data. */ | 2375 | /* Schedule the deletion of an rmap for non-file data. */ |
2378 | int | 2376 | void |
2379 | xfs_rmap_free_extent( | 2377 | xfs_rmap_free_extent( |
2380 | struct xfs_trans *tp, | 2378 | struct xfs_trans *tp, |
2381 | xfs_agnumber_t agno, | 2379 | xfs_agnumber_t agno, |
@@ -2386,14 +2384,14 @@ xfs_rmap_free_extent( | |||
2386 | struct xfs_bmbt_irec bmap; | 2384 | struct xfs_bmbt_irec bmap; |
2387 | 2385 | ||
2388 | if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK)) | 2386 | if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK)) |
2389 | return 0; | 2387 | return; |
2390 | 2388 | ||
2391 | bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno); | 2389 | bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno); |
2392 | bmap.br_blockcount = len; | 2390 | bmap.br_blockcount = len; |
2393 | bmap.br_startoff = 0; | 2391 | bmap.br_startoff = 0; |
2394 | bmap.br_state = XFS_EXT_NORM; | 2392 | bmap.br_state = XFS_EXT_NORM; |
2395 | 2393 | ||
2396 | return __xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap); | 2394 | __xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap); |
2397 | } | 2395 | } |
2398 | 2396 | ||
2399 | /* Compare rmap records. Returns -1 if a < b, 1 if a > b, and 0 if equal. */ | 2397 | /* Compare rmap records. Returns -1 if a < b, 1 if a > b, and 0 if equal. */ |
@@ -2511,7 +2509,7 @@ xfs_rmap_has_other_keys_helper( | |||
2511 | ((rks->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rks->flags) | 2509 | ((rks->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rks->flags) |
2512 | return 0; | 2510 | return 0; |
2513 | rks->has_rmap = true; | 2511 | rks->has_rmap = true; |
2514 | return XFS_BTREE_QUERY_RANGE_ABORT; | 2512 | return -ECANCELED; |
2515 | } | 2513 | } |
2516 | 2514 | ||
2517 | /* | 2515 | /* |
@@ -2540,8 +2538,11 @@ xfs_rmap_has_other_keys( | |||
2540 | 2538 | ||
2541 | error = xfs_rmap_query_range(cur, &low, &high, | 2539 | error = xfs_rmap_query_range(cur, &low, &high, |
2542 | xfs_rmap_has_other_keys_helper, &rks); | 2540 | xfs_rmap_has_other_keys_helper, &rks); |
2541 | if (error < 0) | ||
2542 | return error; | ||
2543 | |||
2543 | *has_rmap = rks.has_rmap; | 2544 | *has_rmap = rks.has_rmap; |
2544 | return error; | 2545 | return 0; |
2545 | } | 2546 | } |
2546 | 2547 | ||
2547 | const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = { | 2548 | const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = { |
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index e21ed0294e5c..abe633403fd1 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h | |||
@@ -68,6 +68,7 @@ xfs_rmap_irec_offset_unpack( | |||
68 | if (offset & ~(XFS_RMAP_OFF_MASK | XFS_RMAP_OFF_FLAGS)) | 68 | if (offset & ~(XFS_RMAP_OFF_MASK | XFS_RMAP_OFF_FLAGS)) |
69 | return -EFSCORRUPTED; | 69 | return -EFSCORRUPTED; |
70 | irec->rm_offset = XFS_RMAP_OFF(offset); | 70 | irec->rm_offset = XFS_RMAP_OFF(offset); |
71 | irec->rm_flags = 0; | ||
71 | if (offset & XFS_RMAP_OFF_ATTR_FORK) | 72 | if (offset & XFS_RMAP_OFF_ATTR_FORK) |
72 | irec->rm_flags |= XFS_RMAP_ATTR_FORK; | 73 | irec->rm_flags |= XFS_RMAP_ATTR_FORK; |
73 | if (offset & XFS_RMAP_OFF_BMBT_BLOCK) | 74 | if (offset & XFS_RMAP_OFF_BMBT_BLOCK) |
@@ -161,16 +162,16 @@ struct xfs_rmap_intent { | |||
161 | }; | 162 | }; |
162 | 163 | ||
163 | /* functions for updating the rmapbt based on bmbt map/unmap operations */ | 164 | /* functions for updating the rmapbt based on bmbt map/unmap operations */ |
164 | int xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, | 165 | void xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, |
165 | int whichfork, struct xfs_bmbt_irec *imap); | 166 | int whichfork, struct xfs_bmbt_irec *imap); |
166 | int xfs_rmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, | 167 | void xfs_rmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, |
167 | int whichfork, struct xfs_bmbt_irec *imap); | 168 | int whichfork, struct xfs_bmbt_irec *imap); |
168 | int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_trans *tp, | 169 | void xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_trans *tp, |
169 | struct xfs_inode *ip, int whichfork, | 170 | struct xfs_inode *ip, int whichfork, |
170 | struct xfs_bmbt_irec *imap); | 171 | struct xfs_bmbt_irec *imap); |
171 | int xfs_rmap_alloc_extent(struct xfs_trans *tp, xfs_agnumber_t agno, | 172 | void xfs_rmap_alloc_extent(struct xfs_trans *tp, xfs_agnumber_t agno, |
172 | xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner); | 173 | xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner); |
173 | int xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno, | 174 | void xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno, |
174 | xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner); | 175 | xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner); |
175 | 176 | ||
176 | void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp, | 177 | void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp, |
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index e0641b7337b3..c45acbd3add9 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h | |||
@@ -177,10 +177,4 @@ struct xfs_ino_geometry { | |||
177 | unsigned int agino_log; /* #bits for agino in inum */ | 177 | unsigned int agino_log; /* #bits for agino in inum */ |
178 | }; | 178 | }; |
179 | 179 | ||
180 | /* Keep iterating the data structure. */ | ||
181 | #define XFS_ITER_CONTINUE (0) | ||
182 | |||
183 | /* Stop iterating the data structure. */ | ||
184 | #define XFS_ITER_ABORT (1) | ||
185 | |||
186 | #endif /* __XFS_SHARED_H__ */ | 180 | #endif /* __XFS_SHARED_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 802b34cd10fe..300b3e91ca3a 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h | |||
@@ -169,6 +169,14 @@ typedef struct xfs_bmbt_irec | |||
169 | xfs_exntst_t br_state; /* extent state */ | 169 | xfs_exntst_t br_state; /* extent state */ |
170 | } xfs_bmbt_irec_t; | 170 | } xfs_bmbt_irec_t; |
171 | 171 | ||
172 | /* per-AG block reservation types */ | ||
173 | enum xfs_ag_resv_type { | ||
174 | XFS_AG_RESV_NONE = 0, | ||
175 | XFS_AG_RESV_AGFL, | ||
176 | XFS_AG_RESV_METADATA, | ||
177 | XFS_AG_RESV_RMAPBT, | ||
178 | }; | ||
179 | |||
172 | /* | 180 | /* |
173 | * Type verifier functions | 181 | * Type verifier functions |
174 | */ | 182 | */ |
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 16b09b941441..ba0f747c82e8 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c | |||
@@ -639,7 +639,7 @@ xchk_agfl_block( | |||
639 | xchk_agfl_block_xref(sc, agbno); | 639 | xchk_agfl_block_xref(sc, agbno); |
640 | 640 | ||
641 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) | 641 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
642 | return XFS_ITER_ABORT; | 642 | return -ECANCELED; |
643 | 643 | ||
644 | return 0; | 644 | return 0; |
645 | } | 645 | } |
@@ -730,7 +730,7 @@ xchk_agfl( | |||
730 | /* Check the blocks in the AGFL. */ | 730 | /* Check the blocks in the AGFL. */ |
731 | error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp), | 731 | error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp), |
732 | sc->sa.agfl_bp, xchk_agfl_block, &sai); | 732 | sc->sa.agfl_bp, xchk_agfl_block, &sai); |
733 | if (error == XFS_ITER_ABORT) { | 733 | if (error == -ECANCELED) { |
734 | error = 0; | 734 | error = 0; |
735 | goto out_free; | 735 | goto out_free; |
736 | } | 736 | } |
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 1afc58bf71dd..0edc7f8eb96e 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c | |||
@@ -80,7 +80,7 @@ xchk_setup_xattr( | |||
80 | * without the inode lock held, which means we can sleep. | 80 | * without the inode lock held, which means we can sleep. |
81 | */ | 81 | */ |
82 | if (sc->flags & XCHK_TRY_HARDER) { | 82 | if (sc->flags & XCHK_TRY_HARDER) { |
83 | error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, KM_SLEEP); | 83 | error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, 0); |
84 | if (error) | 84 | if (error) |
85 | return error; | 85 | return error; |
86 | } | 86 | } |
@@ -163,8 +163,6 @@ xchk_xattr_listent( | |||
163 | args.valuelen = valuelen; | 163 | args.valuelen = valuelen; |
164 | 164 | ||
165 | error = xfs_attr_get_ilocked(context->dp, &args); | 165 | error = xfs_attr_get_ilocked(context->dp, &args); |
166 | if (error == -EEXIST) | ||
167 | error = 0; | ||
168 | if (!xchk_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno, | 166 | if (!xchk_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno, |
169 | &error)) | 167 | &error)) |
170 | goto fail_xref; | 168 | goto fail_xref; |
@@ -173,7 +171,7 @@ xchk_xattr_listent( | |||
173 | args.blkno); | 171 | args.blkno); |
174 | fail_xref: | 172 | fail_xref: |
175 | if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) | 173 | if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
176 | context->seen_enough = XFS_ITER_ABORT; | 174 | context->seen_enough = 1; |
177 | return; | 175 | return; |
178 | } | 176 | } |
179 | 177 | ||
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 1bd29fdc2ab5..fa6ea6407992 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c | |||
@@ -75,6 +75,7 @@ struct xchk_bmap_info { | |||
75 | xfs_fileoff_t lastoff; | 75 | xfs_fileoff_t lastoff; |
76 | bool is_rt; | 76 | bool is_rt; |
77 | bool is_shared; | 77 | bool is_shared; |
78 | bool was_loaded; | ||
78 | int whichfork; | 79 | int whichfork; |
79 | }; | 80 | }; |
80 | 81 | ||
@@ -213,25 +214,20 @@ xchk_bmap_xref_rmap( | |||
213 | 214 | ||
214 | /* Cross-reference a single rtdev extent record. */ | 215 | /* Cross-reference a single rtdev extent record. */ |
215 | STATIC void | 216 | STATIC void |
216 | xchk_bmap_rt_extent_xref( | 217 | xchk_bmap_rt_iextent_xref( |
217 | struct xchk_bmap_info *info, | ||
218 | struct xfs_inode *ip, | 218 | struct xfs_inode *ip, |
219 | struct xfs_btree_cur *cur, | 219 | struct xchk_bmap_info *info, |
220 | struct xfs_bmbt_irec *irec) | 220 | struct xfs_bmbt_irec *irec) |
221 | { | 221 | { |
222 | if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) | ||
223 | return; | ||
224 | |||
225 | xchk_xref_is_used_rt_space(info->sc, irec->br_startblock, | 222 | xchk_xref_is_used_rt_space(info->sc, irec->br_startblock, |
226 | irec->br_blockcount); | 223 | irec->br_blockcount); |
227 | } | 224 | } |
228 | 225 | ||
229 | /* Cross-reference a single datadev extent record. */ | 226 | /* Cross-reference a single datadev extent record. */ |
230 | STATIC void | 227 | STATIC void |
231 | xchk_bmap_extent_xref( | 228 | xchk_bmap_iextent_xref( |
232 | struct xchk_bmap_info *info, | ||
233 | struct xfs_inode *ip, | 229 | struct xfs_inode *ip, |
234 | struct xfs_btree_cur *cur, | 230 | struct xchk_bmap_info *info, |
235 | struct xfs_bmbt_irec *irec) | 231 | struct xfs_bmbt_irec *irec) |
236 | { | 232 | { |
237 | struct xfs_mount *mp = info->sc->mp; | 233 | struct xfs_mount *mp = info->sc->mp; |
@@ -240,9 +236,6 @@ xchk_bmap_extent_xref( | |||
240 | xfs_extlen_t len; | 236 | xfs_extlen_t len; |
241 | int error; | 237 | int error; |
242 | 238 | ||
243 | if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) | ||
244 | return; | ||
245 | |||
246 | agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); | 239 | agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); |
247 | agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); | 240 | agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); |
248 | len = irec->br_blockcount; | 241 | len = irec->br_blockcount; |
@@ -300,20 +293,15 @@ xchk_bmap_dirattr_extent( | |||
300 | 293 | ||
301 | /* Scrub a single extent record. */ | 294 | /* Scrub a single extent record. */ |
302 | STATIC int | 295 | STATIC int |
303 | xchk_bmap_extent( | 296 | xchk_bmap_iextent( |
304 | struct xfs_inode *ip, | 297 | struct xfs_inode *ip, |
305 | struct xfs_btree_cur *cur, | ||
306 | struct xchk_bmap_info *info, | 298 | struct xchk_bmap_info *info, |
307 | struct xfs_bmbt_irec *irec) | 299 | struct xfs_bmbt_irec *irec) |
308 | { | 300 | { |
309 | struct xfs_mount *mp = info->sc->mp; | 301 | struct xfs_mount *mp = info->sc->mp; |
310 | struct xfs_buf *bp = NULL; | ||
311 | xfs_filblks_t end; | 302 | xfs_filblks_t end; |
312 | int error = 0; | 303 | int error = 0; |
313 | 304 | ||
314 | if (cur) | ||
315 | xfs_btree_get_block(cur, 0, &bp); | ||
316 | |||
317 | /* | 305 | /* |
318 | * Check for out-of-order extents. This record could have come | 306 | * Check for out-of-order extents. This record could have come |
319 | * from the incore list, for which there is no ordering check. | 307 | * from the incore list, for which there is no ordering check. |
@@ -364,10 +352,13 @@ xchk_bmap_extent( | |||
364 | xchk_fblock_set_corrupt(info->sc, info->whichfork, | 352 | xchk_fblock_set_corrupt(info->sc, info->whichfork, |
365 | irec->br_startoff); | 353 | irec->br_startoff); |
366 | 354 | ||
355 | if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) | ||
356 | return 0; | ||
357 | |||
367 | if (info->is_rt) | 358 | if (info->is_rt) |
368 | xchk_bmap_rt_extent_xref(info, ip, cur, irec); | 359 | xchk_bmap_rt_iextent_xref(ip, info, irec); |
369 | else | 360 | else |
370 | xchk_bmap_extent_xref(info, ip, cur, irec); | 361 | xchk_bmap_iextent_xref(ip, info, irec); |
371 | 362 | ||
372 | info->lastoff = irec->br_startoff + irec->br_blockcount; | 363 | info->lastoff = irec->br_startoff + irec->br_blockcount; |
373 | return error; | 364 | return error; |
@@ -380,10 +371,13 @@ xchk_bmapbt_rec( | |||
380 | union xfs_btree_rec *rec) | 371 | union xfs_btree_rec *rec) |
381 | { | 372 | { |
382 | struct xfs_bmbt_irec irec; | 373 | struct xfs_bmbt_irec irec; |
374 | struct xfs_bmbt_irec iext_irec; | ||
375 | struct xfs_iext_cursor icur; | ||
383 | struct xchk_bmap_info *info = bs->private; | 376 | struct xchk_bmap_info *info = bs->private; |
384 | struct xfs_inode *ip = bs->cur->bc_private.b.ip; | 377 | struct xfs_inode *ip = bs->cur->bc_private.b.ip; |
385 | struct xfs_buf *bp = NULL; | 378 | struct xfs_buf *bp = NULL; |
386 | struct xfs_btree_block *block; | 379 | struct xfs_btree_block *block; |
380 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, info->whichfork); | ||
387 | uint64_t owner; | 381 | uint64_t owner; |
388 | int i; | 382 | int i; |
389 | 383 | ||
@@ -402,9 +396,26 @@ xchk_bmapbt_rec( | |||
402 | } | 396 | } |
403 | } | 397 | } |
404 | 398 | ||
405 | /* Set up the in-core record and scrub it. */ | 399 | /* |
400 | * Check that the incore extent tree contains an extent that matches | ||
401 | * this one exactly. We validate those cached bmaps later, so we don't | ||
402 | * need to check them here. If the incore extent tree was just loaded | ||
403 | * from disk by the scrubber, we assume that its contents match what's | ||
404 | * on disk (we still hold the ILOCK) and skip the equivalence check. | ||
405 | */ | ||
406 | if (!info->was_loaded) | ||
407 | return 0; | ||
408 | |||
406 | xfs_bmbt_disk_get_all(&rec->bmbt, &irec); | 409 | xfs_bmbt_disk_get_all(&rec->bmbt, &irec); |
407 | return xchk_bmap_extent(ip, bs->cur, info, &irec); | 410 | if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur, |
411 | &iext_irec) || | ||
412 | irec.br_startoff != iext_irec.br_startoff || | ||
413 | irec.br_startblock != iext_irec.br_startblock || | ||
414 | irec.br_blockcount != iext_irec.br_blockcount || | ||
415 | irec.br_state != iext_irec.br_state) | ||
416 | xchk_fblock_set_corrupt(bs->sc, info->whichfork, | ||
417 | irec.br_startoff); | ||
418 | return 0; | ||
408 | } | 419 | } |
409 | 420 | ||
410 | /* Scan the btree records. */ | 421 | /* Scan the btree records. */ |
@@ -415,15 +426,26 @@ xchk_bmap_btree( | |||
415 | struct xchk_bmap_info *info) | 426 | struct xchk_bmap_info *info) |
416 | { | 427 | { |
417 | struct xfs_owner_info oinfo; | 428 | struct xfs_owner_info oinfo; |
429 | struct xfs_ifork *ifp = XFS_IFORK_PTR(sc->ip, whichfork); | ||
418 | struct xfs_mount *mp = sc->mp; | 430 | struct xfs_mount *mp = sc->mp; |
419 | struct xfs_inode *ip = sc->ip; | 431 | struct xfs_inode *ip = sc->ip; |
420 | struct xfs_btree_cur *cur; | 432 | struct xfs_btree_cur *cur; |
421 | int error; | 433 | int error; |
422 | 434 | ||
435 | /* Load the incore bmap cache if it's not loaded. */ | ||
436 | info->was_loaded = ifp->if_flags & XFS_IFEXTENTS; | ||
437 | if (!info->was_loaded) { | ||
438 | error = xfs_iread_extents(sc->tp, ip, whichfork); | ||
439 | if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) | ||
440 | goto out; | ||
441 | } | ||
442 | |||
443 | /* Check the btree structure. */ | ||
423 | cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); | 444 | cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); |
424 | xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); | 445 | xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); |
425 | error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info); | 446 | error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info); |
426 | xfs_btree_del_cursor(cur, error); | 447 | xfs_btree_del_cursor(cur, error); |
448 | out: | ||
427 | return error; | 449 | return error; |
428 | } | 450 | } |
429 | 451 | ||
@@ -500,7 +522,7 @@ xchk_bmap_check_rmap( | |||
500 | 522 | ||
501 | out: | 523 | out: |
502 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) | 524 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
503 | return XFS_BTREE_QUERY_RANGE_ABORT; | 525 | return -ECANCELED; |
504 | return 0; | 526 | return 0; |
505 | } | 527 | } |
506 | 528 | ||
@@ -529,7 +551,7 @@ xchk_bmap_check_ag_rmaps( | |||
529 | sbcri.sc = sc; | 551 | sbcri.sc = sc; |
530 | sbcri.whichfork = whichfork; | 552 | sbcri.whichfork = whichfork; |
531 | error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri); | 553 | error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri); |
532 | if (error == XFS_BTREE_QUERY_RANGE_ABORT) | 554 | if (error == -ECANCELED) |
533 | error = 0; | 555 | error = 0; |
534 | 556 | ||
535 | xfs_btree_del_cursor(cur, error); | 557 | xfs_btree_del_cursor(cur, error); |
@@ -671,13 +693,6 @@ xchk_bmap( | |||
671 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) | 693 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
672 | goto out; | 694 | goto out; |
673 | 695 | ||
674 | /* Now try to scrub the in-memory extent list. */ | ||
675 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
676 | error = xfs_iread_extents(sc->tp, ip, whichfork); | ||
677 | if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) | ||
678 | goto out; | ||
679 | } | ||
680 | |||
681 | /* Find the offset of the last extent in the mapping. */ | 696 | /* Find the offset of the last extent in the mapping. */ |
682 | error = xfs_bmap_last_offset(ip, &endoff, whichfork); | 697 | error = xfs_bmap_last_offset(ip, &endoff, whichfork); |
683 | if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) | 698 | if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) |
@@ -689,7 +704,7 @@ xchk_bmap( | |||
689 | for_each_xfs_iext(ifp, &icur, &irec) { | 704 | for_each_xfs_iext(ifp, &icur, &irec) { |
690 | if (xchk_should_terminate(sc, &error) || | 705 | if (xchk_should_terminate(sc, &error) || |
691 | (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) | 706 | (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) |
692 | break; | 707 | goto out; |
693 | if (isnullstartblock(irec.br_startblock)) | 708 | if (isnullstartblock(irec.br_startblock)) |
694 | continue; | 709 | continue; |
695 | if (irec.br_startoff >= endoff) { | 710 | if (irec.br_startoff >= endoff) { |
@@ -697,7 +712,7 @@ xchk_bmap( | |||
697 | irec.br_startoff); | 712 | irec.br_startoff); |
698 | goto out; | 713 | goto out; |
699 | } | 714 | } |
700 | error = xchk_bmap_extent(ip, NULL, &info, &irec); | 715 | error = xchk_bmap_iextent(ip, &info, &irec); |
701 | if (error) | 716 | if (error) |
702 | goto out; | 717 | goto out; |
703 | } | 718 | } |
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index fc3f510c9034..98f82d7c8b40 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c | |||
@@ -125,7 +125,7 @@ xchk_setup_fscounters( | |||
125 | struct xchk_fscounters *fsc; | 125 | struct xchk_fscounters *fsc; |
126 | int error; | 126 | int error; |
127 | 127 | ||
128 | sc->buf = kmem_zalloc(sizeof(struct xchk_fscounters), KM_SLEEP); | 128 | sc->buf = kmem_zalloc(sizeof(struct xchk_fscounters), 0); |
129 | if (!sc->buf) | 129 | if (!sc->buf) |
130 | return -ENOMEM; | 130 | return -ENOMEM; |
131 | fsc = sc->buf; | 131 | fsc = sc->buf; |
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 4cfeec57fb05..b70a88bc975e 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c | |||
@@ -351,7 +351,7 @@ xrep_init_btblock( | |||
351 | xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); | 351 | xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); |
352 | xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno); | 352 | xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno); |
353 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); | 353 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); |
354 | xfs_trans_log_buf(tp, bp, 0, bp->b_length); | 354 | xfs_trans_log_buf(tp, bp, 0, BBTOB(bp->b_length) - 1); |
355 | bp->b_ops = ops; | 355 | bp->b_ops = ops; |
356 | *bpp = bp; | 356 | *bpp = bp; |
357 | 357 | ||
@@ -664,7 +664,7 @@ xrep_findroot_agfl_walk( | |||
664 | { | 664 | { |
665 | xfs_agblock_t *agbno = priv; | 665 | xfs_agblock_t *agbno = priv; |
666 | 666 | ||
667 | return (*agbno == bno) ? XFS_ITER_ABORT : 0; | 667 | return (*agbno == bno) ? -ECANCELED : 0; |
668 | } | 668 | } |
669 | 669 | ||
670 | /* Does this block match the btree information passed in? */ | 670 | /* Does this block match the btree information passed in? */ |
@@ -694,7 +694,7 @@ xrep_findroot_block( | |||
694 | if (owner == XFS_RMAP_OWN_AG) { | 694 | if (owner == XFS_RMAP_OWN_AG) { |
695 | error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp, | 695 | error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp, |
696 | xrep_findroot_agfl_walk, &agbno); | 696 | xrep_findroot_agfl_walk, &agbno); |
697 | if (error == XFS_ITER_ABORT) | 697 | if (error == -ECANCELED) |
698 | return 0; | 698 | return 0; |
699 | if (error) | 699 | if (error) |
700 | return error; | 700 | return error; |
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c index 99c0b1234c3c..5641ae512c9e 100644 --- a/fs/xfs/scrub/symlink.c +++ b/fs/xfs/scrub/symlink.c | |||
@@ -22,7 +22,7 @@ xchk_setup_symlink( | |||
22 | struct xfs_inode *ip) | 22 | struct xfs_inode *ip) |
23 | { | 23 | { |
24 | /* Allocate the buffer without the inode lock held. */ | 24 | /* Allocate the buffer without the inode lock held. */ |
25 | sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, KM_SLEEP); | 25 | sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, 0); |
26 | if (!sc->buf) | 26 | if (!sc->buf) |
27 | return -ENOMEM; | 27 | return -ENOMEM; |
28 | 28 | ||
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index cbda40d40326..96d7071cfa46 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
@@ -112,7 +112,7 @@ xfs_get_acl(struct inode *inode, int type) | |||
112 | { | 112 | { |
113 | struct xfs_inode *ip = XFS_I(inode); | 113 | struct xfs_inode *ip = XFS_I(inode); |
114 | struct posix_acl *acl = NULL; | 114 | struct posix_acl *acl = NULL; |
115 | struct xfs_acl *xfs_acl; | 115 | struct xfs_acl *xfs_acl = NULL; |
116 | unsigned char *ea_name; | 116 | unsigned char *ea_name; |
117 | int error; | 117 | int error; |
118 | int len; | 118 | int len; |
@@ -135,12 +135,8 @@ xfs_get_acl(struct inode *inode, int type) | |||
135 | * go out to the disk. | 135 | * go out to the disk. |
136 | */ | 136 | */ |
137 | len = XFS_ACL_MAX_SIZE(ip->i_mount); | 137 | len = XFS_ACL_MAX_SIZE(ip->i_mount); |
138 | xfs_acl = kmem_zalloc_large(len, KM_SLEEP); | 138 | error = xfs_attr_get(ip, ea_name, (unsigned char **)&xfs_acl, &len, |
139 | if (!xfs_acl) | 139 | ATTR_ALLOC | ATTR_ROOT); |
140 | return ERR_PTR(-ENOMEM); | ||
141 | |||
142 | error = xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, | ||
143 | &len, ATTR_ROOT); | ||
144 | if (error) { | 140 | if (error) { |
145 | /* | 141 | /* |
146 | * If the attribute doesn't exist make sure we have a negative | 142 | * If the attribute doesn't exist make sure we have a negative |
@@ -151,8 +147,8 @@ xfs_get_acl(struct inode *inode, int type) | |||
151 | } else { | 147 | } else { |
152 | acl = xfs_acl_from_disk(xfs_acl, len, | 148 | acl = xfs_acl_from_disk(xfs_acl, len, |
153 | XFS_ACL_MAX_ENTRIES(ip->i_mount)); | 149 | XFS_ACL_MAX_ENTRIES(ip->i_mount)); |
150 | kmem_free(xfs_acl); | ||
154 | } | 151 | } |
155 | kmem_free(xfs_acl); | ||
156 | return acl; | 152 | return acl; |
157 | } | 153 | } |
158 | 154 | ||
@@ -180,7 +176,7 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
180 | struct xfs_acl *xfs_acl; | 176 | struct xfs_acl *xfs_acl; |
181 | int len = XFS_ACL_MAX_SIZE(ip->i_mount); | 177 | int len = XFS_ACL_MAX_SIZE(ip->i_mount); |
182 | 178 | ||
183 | xfs_acl = kmem_zalloc_large(len, KM_SLEEP); | 179 | xfs_acl = kmem_zalloc_large(len, 0); |
184 | if (!xfs_acl) | 180 | if (!xfs_acl) |
185 | return -ENOMEM; | 181 | return -ENOMEM; |
186 | 182 | ||
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index dc93c51c17de..a640a285cc52 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c | |||
@@ -147,7 +147,7 @@ xfs_attr3_leaf_inactive( | |||
147 | * Allocate storage for a list of all the "remote" value extents. | 147 | * Allocate storage for a list of all the "remote" value extents. |
148 | */ | 148 | */ |
149 | size = count * sizeof(xfs_attr_inactive_list_t); | 149 | size = count * sizeof(xfs_attr_inactive_list_t); |
150 | list = kmem_alloc(size, KM_SLEEP); | 150 | list = kmem_alloc(size, 0); |
151 | 151 | ||
152 | /* | 152 | /* |
153 | * Identify each of the "remote" value extents. | 153 | * Identify each of the "remote" value extents. |
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 58fc820a70c6..00758fdc2fec 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c | |||
@@ -109,7 +109,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) | |||
109 | * It didn't all fit, so we have to sort everything on hashval. | 109 | * It didn't all fit, so we have to sort everything on hashval. |
110 | */ | 110 | */ |
111 | sbsize = sf->hdr.count * sizeof(*sbuf); | 111 | sbsize = sf->hdr.count * sizeof(*sbuf); |
112 | sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS); | 112 | sbp = sbuf = kmem_alloc(sbsize, KM_NOFS); |
113 | 113 | ||
114 | /* | 114 | /* |
115 | * Scan the attribute list for the rest of the entries, storing | 115 | * Scan the attribute list for the rest of the entries, storing |
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 9fa4a7ee8cfc..83d24e983d4c 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c | |||
@@ -141,7 +141,7 @@ xfs_bui_init( | |||
141 | { | 141 | { |
142 | struct xfs_bui_log_item *buip; | 142 | struct xfs_bui_log_item *buip; |
143 | 143 | ||
144 | buip = kmem_zone_zalloc(xfs_bui_zone, KM_SLEEP); | 144 | buip = kmem_zone_zalloc(xfs_bui_zone, 0); |
145 | 145 | ||
146 | xfs_log_item_init(mp, &buip->bui_item, XFS_LI_BUI, &xfs_bui_item_ops); | 146 | xfs_log_item_init(mp, &buip->bui_item, XFS_LI_BUI, &xfs_bui_item_ops); |
147 | buip->bui_format.bui_nextents = XFS_BUI_MAX_FAST_EXTENTS; | 147 | buip->bui_format.bui_nextents = XFS_BUI_MAX_FAST_EXTENTS; |
@@ -218,7 +218,7 @@ xfs_trans_get_bud( | |||
218 | { | 218 | { |
219 | struct xfs_bud_log_item *budp; | 219 | struct xfs_bud_log_item *budp; |
220 | 220 | ||
221 | budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP); | 221 | budp = kmem_zone_zalloc(xfs_bud_zone, 0); |
222 | xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD, | 222 | xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD, |
223 | &xfs_bud_item_ops); | 223 | &xfs_bud_item_ops); |
224 | budp->bud_buip = buip; | 224 | budp->bud_buip = buip; |
@@ -542,9 +542,7 @@ xfs_bui_recover( | |||
542 | irec.br_blockcount = count; | 542 | irec.br_blockcount = count; |
543 | irec.br_startoff = bmap->me_startoff; | 543 | irec.br_startoff = bmap->me_startoff; |
544 | irec.br_state = state; | 544 | irec.br_state = state; |
545 | error = xfs_bmap_unmap_extent(tp, ip, &irec); | 545 | xfs_bmap_unmap_extent(tp, ip, &irec); |
546 | if (error) | ||
547 | goto err_inode; | ||
548 | } | 546 | } |
549 | 547 | ||
550 | set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); | 548 | set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 98c6a7a71427..0910cb75b65d 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -39,9 +39,9 @@ | |||
39 | xfs_daddr_t | 39 | xfs_daddr_t |
40 | xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) | 40 | xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) |
41 | { | 41 | { |
42 | return (XFS_IS_REALTIME_INODE(ip) ? \ | 42 | if (XFS_IS_REALTIME_INODE(ip)) |
43 | (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ | 43 | return XFS_FSB_TO_BB(ip->i_mount, fsb); |
44 | XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); | 44 | return XFS_FSB_TO_DADDR(ip->i_mount, fsb); |
45 | } | 45 | } |
46 | 46 | ||
47 | /* | 47 | /* |
@@ -1532,24 +1532,16 @@ xfs_swap_extent_rmap( | |||
1532 | trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec); | 1532 | trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec); |
1533 | 1533 | ||
1534 | /* Remove the mapping from the donor file. */ | 1534 | /* Remove the mapping from the donor file. */ |
1535 | error = xfs_bmap_unmap_extent(tp, tip, &uirec); | 1535 | xfs_bmap_unmap_extent(tp, tip, &uirec); |
1536 | if (error) | ||
1537 | goto out; | ||
1538 | 1536 | ||
1539 | /* Remove the mapping from the source file. */ | 1537 | /* Remove the mapping from the source file. */ |
1540 | error = xfs_bmap_unmap_extent(tp, ip, &irec); | 1538 | xfs_bmap_unmap_extent(tp, ip, &irec); |
1541 | if (error) | ||
1542 | goto out; | ||
1543 | 1539 | ||
1544 | /* Map the donor file's blocks into the source file. */ | 1540 | /* Map the donor file's blocks into the source file. */ |
1545 | error = xfs_bmap_map_extent(tp, ip, &uirec); | 1541 | xfs_bmap_map_extent(tp, ip, &uirec); |
1546 | if (error) | ||
1547 | goto out; | ||
1548 | 1542 | ||
1549 | /* Map the source file's blocks into the donor file. */ | 1543 | /* Map the source file's blocks into the donor file. */ |
1550 | error = xfs_bmap_map_extent(tp, tip, &irec); | 1544 | xfs_bmap_map_extent(tp, tip, &irec); |
1551 | if (error) | ||
1552 | goto out; | ||
1553 | 1545 | ||
1554 | error = xfs_defer_finish(tpp); | 1546 | error = xfs_defer_finish(tpp); |
1555 | tp = *tpp; | 1547 | tp = *tpp; |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index ca0849043f54..120ef99d09e8 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -353,7 +353,8 @@ xfs_buf_allocate_memory( | |||
353 | */ | 353 | */ |
354 | size = BBTOB(bp->b_length); | 354 | size = BBTOB(bp->b_length); |
355 | if (size < PAGE_SIZE) { | 355 | if (size < PAGE_SIZE) { |
356 | bp->b_addr = kmem_alloc(size, KM_NOFS); | 356 | int align_mask = xfs_buftarg_dma_alignment(bp->b_target); |
357 | bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS); | ||
357 | if (!bp->b_addr) { | 358 | if (!bp->b_addr) { |
358 | /* low memory - use alloc_page loop instead */ | 359 | /* low memory - use alloc_page loop instead */ |
359 | goto use_alloc_page; | 360 | goto use_alloc_page; |
@@ -368,7 +369,7 @@ xfs_buf_allocate_memory( | |||
368 | } | 369 | } |
369 | bp->b_offset = offset_in_page(bp->b_addr); | 370 | bp->b_offset = offset_in_page(bp->b_addr); |
370 | bp->b_pages = bp->b_page_array; | 371 | bp->b_pages = bp->b_page_array; |
371 | bp->b_pages[0] = virt_to_page(bp->b_addr); | 372 | bp->b_pages[0] = kmem_to_page(bp->b_addr); |
372 | bp->b_page_count = 1; | 373 | bp->b_page_count = 1; |
373 | bp->b_flags |= _XBF_KMEM; | 374 | bp->b_flags |= _XBF_KMEM; |
374 | return 0; | 375 | return 0; |
@@ -1741,7 +1742,7 @@ xfs_alloc_buftarg( | |||
1741 | { | 1742 | { |
1742 | xfs_buftarg_t *btp; | 1743 | xfs_buftarg_t *btp; |
1743 | 1744 | ||
1744 | btp = kmem_zalloc(sizeof(*btp), KM_SLEEP | KM_NOFS); | 1745 | btp = kmem_zalloc(sizeof(*btp), KM_NOFS); |
1745 | 1746 | ||
1746 | btp->bt_mount = mp; | 1747 | btp->bt_mount = mp; |
1747 | btp->bt_dev = bdev->bd_dev; | 1748 | btp->bt_dev = bdev->bd_dev; |
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index c6e57a3f409e..f6ce17d8d848 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -350,6 +350,12 @@ extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int); | |||
350 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) | 350 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) |
351 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) | 351 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) |
352 | 352 | ||
353 | static inline int | ||
354 | xfs_buftarg_dma_alignment(struct xfs_buftarg *bt) | ||
355 | { | ||
356 | return queue_dma_alignment(bt->bt_bdev->bd_disk->queue); | ||
357 | } | ||
358 | |||
353 | int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); | 359 | int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); |
354 | bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); | 360 | bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); |
355 | bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); | 361 | bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 7dcaec54a20b..d74fbd1e9d3e 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -702,7 +702,7 @@ xfs_buf_item_get_format( | |||
702 | } | 702 | } |
703 | 703 | ||
704 | bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format), | 704 | bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format), |
705 | KM_SLEEP); | 705 | 0); |
706 | if (!bip->bli_formats) | 706 | if (!bip->bli_formats) |
707 | return -ENOMEM; | 707 | return -ENOMEM; |
708 | return 0; | 708 | return 0; |
@@ -747,7 +747,7 @@ xfs_buf_item_init( | |||
747 | return 0; | 747 | return 0; |
748 | } | 748 | } |
749 | 749 | ||
750 | bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP); | 750 | bip = kmem_zone_zalloc(xfs_buf_item_zone, 0); |
751 | xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); | 751 | xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); |
752 | bip->bli_buf = bp; | 752 | bip->bli_buf = bp; |
753 | 753 | ||
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index fb1ad4483081..aeb95e7391c1 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -440,7 +440,7 @@ xfs_dquot_alloc( | |||
440 | { | 440 | { |
441 | struct xfs_dquot *dqp; | 441 | struct xfs_dquot *dqp; |
442 | 442 | ||
443 | dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); | 443 | dqp = kmem_zone_zalloc(xfs_qm_dqzone, 0); |
444 | 444 | ||
445 | dqp->dq_flags = type; | 445 | dqp->dq_flags = type; |
446 | dqp->q_core.d_id = cpu_to_be32(id); | 446 | dqp->q_core.d_id = cpu_to_be32(id); |
@@ -1239,7 +1239,7 @@ xfs_qm_exit(void) | |||
1239 | /* | 1239 | /* |
1240 | * Iterate every dquot of a particular type. The caller must ensure that the | 1240 | * Iterate every dquot of a particular type. The caller must ensure that the |
1241 | * particular quota type is active. iter_fn can return negative error codes, | 1241 | * particular quota type is active. iter_fn can return negative error codes, |
1242 | * or XFS_ITER_ABORT to indicate that it wants to stop iterating. | 1242 | * or -ECANCELED to indicate that it wants to stop iterating. |
1243 | */ | 1243 | */ |
1244 | int | 1244 | int |
1245 | xfs_qm_dqiterate( | 1245 | xfs_qm_dqiterate( |
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 282ec5af293e..d60647d7197b 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c | |||
@@ -347,7 +347,7 @@ xfs_qm_qoff_logitem_init( | |||
347 | { | 347 | { |
348 | struct xfs_qoff_logitem *qf; | 348 | struct xfs_qoff_logitem *qf; |
349 | 349 | ||
350 | qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP); | 350 | qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), 0); |
351 | 351 | ||
352 | xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? | 352 | xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? |
353 | &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); | 353 | &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 544c9482a0ef..849fd4476950 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -213,7 +213,7 @@ xfs_errortag_init( | |||
213 | struct xfs_mount *mp) | 213 | struct xfs_mount *mp) |
214 | { | 214 | { |
215 | mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX, | 215 | mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX, |
216 | KM_SLEEP | KM_MAYFAIL); | 216 | KM_MAYFAIL); |
217 | if (!mp->m_errortag) | 217 | if (!mp->m_errortag) |
218 | return -ENOMEM; | 218 | return -ENOMEM; |
219 | 219 | ||
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 0ed68379e551..2183d87be4cf 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c | |||
@@ -33,7 +33,7 @@ xfs_extent_busy_insert( | |||
33 | struct rb_node **rbp; | 33 | struct rb_node **rbp; |
34 | struct rb_node *parent = NULL; | 34 | struct rb_node *parent = NULL; |
35 | 35 | ||
36 | new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP); | 36 | new = kmem_zalloc(sizeof(struct xfs_extent_busy), 0); |
37 | new->agno = agno; | 37 | new->agno = agno; |
38 | new->bno = bno; | 38 | new->bno = bno; |
39 | new->length = len; | 39 | new->length = len; |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 86f6512d6864..e44efc41a041 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -163,9 +163,9 @@ xfs_efi_init( | |||
163 | if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { | 163 | if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { |
164 | size = (uint)(sizeof(xfs_efi_log_item_t) + | 164 | size = (uint)(sizeof(xfs_efi_log_item_t) + |
165 | ((nextents - 1) * sizeof(xfs_extent_t))); | 165 | ((nextents - 1) * sizeof(xfs_extent_t))); |
166 | efip = kmem_zalloc(size, KM_SLEEP); | 166 | efip = kmem_zalloc(size, 0); |
167 | } else { | 167 | } else { |
168 | efip = kmem_zone_zalloc(xfs_efi_zone, KM_SLEEP); | 168 | efip = kmem_zone_zalloc(xfs_efi_zone, 0); |
169 | } | 169 | } |
170 | 170 | ||
171 | xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); | 171 | xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); |
@@ -333,9 +333,9 @@ xfs_trans_get_efd( | |||
333 | if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { | 333 | if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { |
334 | efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + | 334 | efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + |
335 | (nextents - 1) * sizeof(struct xfs_extent), | 335 | (nextents - 1) * sizeof(struct xfs_extent), |
336 | KM_SLEEP); | 336 | 0); |
337 | } else { | 337 | } else { |
338 | efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP); | 338 | efdp = kmem_zone_zalloc(xfs_efd_zone, 0); |
339 | } | 339 | } |
340 | 340 | ||
341 | xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD, | 341 | xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD, |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 28101bbc0b78..d952d5962e93 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/falloc.h> | 28 | #include <linux/falloc.h> |
29 | #include <linux/backing-dev.h> | 29 | #include <linux/backing-dev.h> |
30 | #include <linux/mman.h> | 30 | #include <linux/mman.h> |
31 | #include <linux/fadvise.h> | ||
31 | 32 | ||
32 | static const struct vm_operations_struct xfs_file_vm_ops; | 33 | static const struct vm_operations_struct xfs_file_vm_ops; |
33 | 34 | ||
@@ -933,6 +934,30 @@ out_unlock: | |||
933 | return error; | 934 | return error; |
934 | } | 935 | } |
935 | 936 | ||
937 | STATIC int | ||
938 | xfs_file_fadvise( | ||
939 | struct file *file, | ||
940 | loff_t start, | ||
941 | loff_t end, | ||
942 | int advice) | ||
943 | { | ||
944 | struct xfs_inode *ip = XFS_I(file_inode(file)); | ||
945 | int ret; | ||
946 | int lockflags = 0; | ||
947 | |||
948 | /* | ||
949 | * Operations creating pages in page cache need protection from hole | ||
950 | * punching and similar ops | ||
951 | */ | ||
952 | if (advice == POSIX_FADV_WILLNEED) { | ||
953 | lockflags = XFS_IOLOCK_SHARED; | ||
954 | xfs_ilock(ip, lockflags); | ||
955 | } | ||
956 | ret = generic_fadvise(file, start, end, advice); | ||
957 | if (lockflags) | ||
958 | xfs_iunlock(ip, lockflags); | ||
959 | return ret; | ||
960 | } | ||
936 | 961 | ||
937 | STATIC loff_t | 962 | STATIC loff_t |
938 | xfs_file_remap_range( | 963 | xfs_file_remap_range( |
@@ -1232,6 +1257,7 @@ const struct file_operations xfs_file_operations = { | |||
1232 | .fsync = xfs_file_fsync, | 1257 | .fsync = xfs_file_fsync, |
1233 | .get_unmapped_area = thp_get_unmapped_area, | 1258 | .get_unmapped_area = thp_get_unmapped_area, |
1234 | .fallocate = xfs_file_fallocate, | 1259 | .fallocate = xfs_file_fallocate, |
1260 | .fadvise = xfs_file_fadvise, | ||
1235 | .remap_file_range = xfs_file_remap_range, | 1261 | .remap_file_range = xfs_file_remap_range, |
1236 | }; | 1262 | }; |
1237 | 1263 | ||
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 5a8f9641562a..d082143feb5a 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c | |||
@@ -250,7 +250,7 @@ xfs_getfsmap_helper( | |||
250 | rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); | 250 | rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); |
251 | if (info->next_daddr < rec_daddr) | 251 | if (info->next_daddr < rec_daddr) |
252 | info->next_daddr = rec_daddr; | 252 | info->next_daddr = rec_daddr; |
253 | return XFS_BTREE_QUERY_RANGE_CONTINUE; | 253 | return 0; |
254 | } | 254 | } |
255 | 255 | ||
256 | /* Are we just counting mappings? */ | 256 | /* Are we just counting mappings? */ |
@@ -259,14 +259,14 @@ xfs_getfsmap_helper( | |||
259 | info->head->fmh_entries++; | 259 | info->head->fmh_entries++; |
260 | 260 | ||
261 | if (info->last) | 261 | if (info->last) |
262 | return XFS_BTREE_QUERY_RANGE_CONTINUE; | 262 | return 0; |
263 | 263 | ||
264 | info->head->fmh_entries++; | 264 | info->head->fmh_entries++; |
265 | 265 | ||
266 | rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); | 266 | rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); |
267 | if (info->next_daddr < rec_daddr) | 267 | if (info->next_daddr < rec_daddr) |
268 | info->next_daddr = rec_daddr; | 268 | info->next_daddr = rec_daddr; |
269 | return XFS_BTREE_QUERY_RANGE_CONTINUE; | 269 | return 0; |
270 | } | 270 | } |
271 | 271 | ||
272 | /* | 272 | /* |
@@ -276,7 +276,7 @@ xfs_getfsmap_helper( | |||
276 | */ | 276 | */ |
277 | if (rec_daddr > info->next_daddr) { | 277 | if (rec_daddr > info->next_daddr) { |
278 | if (info->head->fmh_entries >= info->head->fmh_count) | 278 | if (info->head->fmh_entries >= info->head->fmh_count) |
279 | return XFS_BTREE_QUERY_RANGE_ABORT; | 279 | return -ECANCELED; |
280 | 280 | ||
281 | fmr.fmr_device = info->dev; | 281 | fmr.fmr_device = info->dev; |
282 | fmr.fmr_physical = info->next_daddr; | 282 | fmr.fmr_physical = info->next_daddr; |
@@ -295,7 +295,7 @@ xfs_getfsmap_helper( | |||
295 | 295 | ||
296 | /* Fill out the extent we found */ | 296 | /* Fill out the extent we found */ |
297 | if (info->head->fmh_entries >= info->head->fmh_count) | 297 | if (info->head->fmh_entries >= info->head->fmh_count) |
298 | return XFS_BTREE_QUERY_RANGE_ABORT; | 298 | return -ECANCELED; |
299 | 299 | ||
300 | trace_xfs_fsmap_mapping(mp, info->dev, info->agno, rec); | 300 | trace_xfs_fsmap_mapping(mp, info->dev, info->agno, rec); |
301 | 301 | ||
@@ -328,7 +328,7 @@ out: | |||
328 | rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); | 328 | rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); |
329 | if (info->next_daddr < rec_daddr) | 329 | if (info->next_daddr < rec_daddr) |
330 | info->next_daddr = rec_daddr; | 330 | info->next_daddr = rec_daddr; |
331 | return XFS_BTREE_QUERY_RANGE_CONTINUE; | 331 | return 0; |
332 | } | 332 | } |
333 | 333 | ||
334 | /* Transform a rmapbt irec into a fsmap */ | 334 | /* Transform a rmapbt irec into a fsmap */ |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 0b0fd10a36d4..944add5ff8e0 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -40,7 +40,7 @@ xfs_inode_alloc( | |||
40 | * KM_MAYFAIL and return NULL here on ENOMEM. Set the | 40 | * KM_MAYFAIL and return NULL here on ENOMEM. Set the |
41 | * code up to do this anyway. | 41 | * code up to do this anyway. |
42 | */ | 42 | */ |
43 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); | 43 | ip = kmem_zone_alloc(xfs_inode_zone, 0); |
44 | if (!ip) | 44 | if (!ip) |
45 | return NULL; | 45 | return NULL; |
46 | if (inode_init_always(mp->m_super, VFS_I(ip))) { | 46 | if (inode_init_always(mp->m_super, VFS_I(ip))) { |
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index d99a0a3e5f40..3ebd1b7f49d8 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c | |||
@@ -89,7 +89,7 @@ xfs_icreate_log( | |||
89 | { | 89 | { |
90 | struct xfs_icreate_item *icp; | 90 | struct xfs_icreate_item *icp; |
91 | 91 | ||
92 | icp = kmem_zone_zalloc(xfs_icreate_zone, KM_SLEEP); | 92 | icp = kmem_zone_zalloc(xfs_icreate_zone, 0); |
93 | 93 | ||
94 | xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, | 94 | xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, |
95 | &xfs_icreate_item_ops); | 95 | &xfs_icreate_item_ops); |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 6467d5e1df2d..18f4b262e61c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -2018,7 +2018,7 @@ xfs_iunlink_add_backref( | |||
2018 | if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK)) | 2018 | if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK)) |
2019 | return 0; | 2019 | return 0; |
2020 | 2020 | ||
2021 | iu = kmem_zalloc(sizeof(*iu), KM_SLEEP | KM_NOFS); | 2021 | iu = kmem_zalloc(sizeof(*iu), KM_NOFS); |
2022 | iu->iu_agino = prev_agino; | 2022 | iu->iu_agino = prev_agino; |
2023 | iu->iu_next_unlinked = this_agino; | 2023 | iu->iu_next_unlinked = this_agino; |
2024 | 2024 | ||
@@ -3282,7 +3282,8 @@ xfs_rename( | |||
3282 | spaceres); | 3282 | spaceres); |
3283 | 3283 | ||
3284 | /* | 3284 | /* |
3285 | * Set up the target. | 3285 | * Check for expected errors before we dirty the transaction |
3286 | * so we can return an error without a transaction abort. | ||
3286 | */ | 3287 | */ |
3287 | if (target_ip == NULL) { | 3288 | if (target_ip == NULL) { |
3288 | /* | 3289 | /* |
@@ -3294,6 +3295,46 @@ xfs_rename( | |||
3294 | if (error) | 3295 | if (error) |
3295 | goto out_trans_cancel; | 3296 | goto out_trans_cancel; |
3296 | } | 3297 | } |
3298 | } else { | ||
3299 | /* | ||
3300 | * If target exists and it's a directory, check that whether | ||
3301 | * it can be destroyed. | ||
3302 | */ | ||
3303 | if (S_ISDIR(VFS_I(target_ip)->i_mode) && | ||
3304 | (!xfs_dir_isempty(target_ip) || | ||
3305 | (VFS_I(target_ip)->i_nlink > 2))) { | ||
3306 | error = -EEXIST; | ||
3307 | goto out_trans_cancel; | ||
3308 | } | ||
3309 | } | ||
3310 | |||
3311 | /* | ||
3312 | * Directory entry creation below may acquire the AGF. Remove | ||
3313 | * the whiteout from the unlinked list first to preserve correct | ||
3314 | * AGI/AGF locking order. This dirties the transaction so failures | ||
3315 | * after this point will abort and log recovery will clean up the | ||
3316 | * mess. | ||
3317 | * | ||
3318 | * For whiteouts, we need to bump the link count on the whiteout | ||
3319 | * inode. After this point, we have a real link, clear the tmpfile | ||
3320 | * state flag from the inode so it doesn't accidentally get misused | ||
3321 | * in future. | ||
3322 | */ | ||
3323 | if (wip) { | ||
3324 | ASSERT(VFS_I(wip)->i_nlink == 0); | ||
3325 | error = xfs_iunlink_remove(tp, wip); | ||
3326 | if (error) | ||
3327 | goto out_trans_cancel; | ||
3328 | |||
3329 | xfs_bumplink(tp, wip); | ||
3330 | xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); | ||
3331 | VFS_I(wip)->i_state &= ~I_LINKABLE; | ||
3332 | } | ||
3333 | |||
3334 | /* | ||
3335 | * Set up the target. | ||
3336 | */ | ||
3337 | if (target_ip == NULL) { | ||
3297 | /* | 3338 | /* |
3298 | * If target does not exist and the rename crosses | 3339 | * If target does not exist and the rename crosses |
3299 | * directories, adjust the target directory link count | 3340 | * directories, adjust the target directory link count |
@@ -3312,22 +3353,6 @@ xfs_rename( | |||
3312 | } | 3353 | } |
3313 | } else { /* target_ip != NULL */ | 3354 | } else { /* target_ip != NULL */ |
3314 | /* | 3355 | /* |
3315 | * If target exists and it's a directory, check that both | ||
3316 | * target and source are directories and that target can be | ||
3317 | * destroyed, or that neither is a directory. | ||
3318 | */ | ||
3319 | if (S_ISDIR(VFS_I(target_ip)->i_mode)) { | ||
3320 | /* | ||
3321 | * Make sure target dir is empty. | ||
3322 | */ | ||
3323 | if (!(xfs_dir_isempty(target_ip)) || | ||
3324 | (VFS_I(target_ip)->i_nlink > 2)) { | ||
3325 | error = -EEXIST; | ||
3326 | goto out_trans_cancel; | ||
3327 | } | ||
3328 | } | ||
3329 | |||
3330 | /* | ||
3331 | * Link the source inode under the target name. | 3356 | * Link the source inode under the target name. |
3332 | * If the source inode is a directory and we are moving | 3357 | * If the source inode is a directory and we are moving |
3333 | * it across directories, its ".." entry will be | 3358 | * it across directories, its ".." entry will be |
@@ -3417,30 +3442,6 @@ xfs_rename( | |||
3417 | if (error) | 3442 | if (error) |
3418 | goto out_trans_cancel; | 3443 | goto out_trans_cancel; |
3419 | 3444 | ||
3420 | /* | ||
3421 | * For whiteouts, we need to bump the link count on the whiteout inode. | ||
3422 | * This means that failures all the way up to this point leave the inode | ||
3423 | * on the unlinked list and so cleanup is a simple matter of dropping | ||
3424 | * the remaining reference to it. If we fail here after bumping the link | ||
3425 | * count, we're shutting down the filesystem so we'll never see the | ||
3426 | * intermediate state on disk. | ||
3427 | */ | ||
3428 | if (wip) { | ||
3429 | ASSERT(VFS_I(wip)->i_nlink == 0); | ||
3430 | xfs_bumplink(tp, wip); | ||
3431 | error = xfs_iunlink_remove(tp, wip); | ||
3432 | if (error) | ||
3433 | goto out_trans_cancel; | ||
3434 | xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); | ||
3435 | |||
3436 | /* | ||
3437 | * Now we have a real link, clear the "I'm a tmpfile" state | ||
3438 | * flag from the inode so it doesn't accidentally get misused in | ||
3439 | * future. | ||
3440 | */ | ||
3441 | VFS_I(wip)->i_state &= ~I_LINKABLE; | ||
3442 | } | ||
3443 | |||
3444 | xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 3445 | xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
3445 | xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); | 3446 | xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); |
3446 | if (new_parent) | 3447 | if (new_parent) |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c9a502eed204..bb8f076805b9 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -651,7 +651,7 @@ xfs_inode_item_init( | |||
651 | struct xfs_inode_log_item *iip; | 651 | struct xfs_inode_log_item *iip; |
652 | 652 | ||
653 | ASSERT(ip->i_itemp == NULL); | 653 | ASSERT(ip->i_itemp == NULL); |
654 | iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); | 654 | iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, 0); |
655 | 655 | ||
656 | iip->ili_inode = ip; | 656 | iip->ili_inode = ip; |
657 | xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE, | 657 | xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE, |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index affa557c2337..d58f0d6a699e 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -396,7 +396,7 @@ xfs_attrlist_by_handle( | |||
396 | if (IS_ERR(dentry)) | 396 | if (IS_ERR(dentry)) |
397 | return PTR_ERR(dentry); | 397 | return PTR_ERR(dentry); |
398 | 398 | ||
399 | kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP); | 399 | kbuf = kmem_zalloc_large(al_hreq.buflen, 0); |
400 | if (!kbuf) | 400 | if (!kbuf) |
401 | goto out_dput; | 401 | goto out_dput; |
402 | 402 | ||
@@ -434,11 +434,11 @@ xfs_attrmulti_attr_get( | |||
434 | 434 | ||
435 | if (*len > XFS_XATTR_SIZE_MAX) | 435 | if (*len > XFS_XATTR_SIZE_MAX) |
436 | return -EINVAL; | 436 | return -EINVAL; |
437 | kbuf = kmem_zalloc_large(*len, KM_SLEEP); | 437 | kbuf = kmem_zalloc_large(*len, 0); |
438 | if (!kbuf) | 438 | if (!kbuf) |
439 | return -ENOMEM; | 439 | return -ENOMEM; |
440 | 440 | ||
441 | error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); | 441 | error = xfs_attr_get(XFS_I(inode), name, &kbuf, (int *)len, flags); |
442 | if (error) | 442 | if (error) |
443 | goto out_kfree; | 443 | goto out_kfree; |
444 | 444 | ||
@@ -831,7 +831,7 @@ xfs_bulkstat_fmt( | |||
831 | /* | 831 | /* |
832 | * Check the incoming bulk request @hdr from userspace and initialize the | 832 | * Check the incoming bulk request @hdr from userspace and initialize the |
833 | * internal @breq bulk request appropriately. Returns 0 if the bulk request | 833 | * internal @breq bulk request appropriately. Returns 0 if the bulk request |
834 | * should proceed; XFS_ITER_ABORT if there's nothing to do; or the usual | 834 | * should proceed; -ECANCELED if there's nothing to do; or the usual |
835 | * negative error code. | 835 | * negative error code. |
836 | */ | 836 | */ |
837 | static int | 837 | static int |
@@ -889,13 +889,13 @@ xfs_bulk_ireq_setup( | |||
889 | 889 | ||
890 | /* Asking for an inode past the end of the AG? We're done! */ | 890 | /* Asking for an inode past the end of the AG? We're done! */ |
891 | if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno) | 891 | if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno) |
892 | return XFS_ITER_ABORT; | 892 | return -ECANCELED; |
893 | } else if (hdr->agno) | 893 | } else if (hdr->agno) |
894 | return -EINVAL; | 894 | return -EINVAL; |
895 | 895 | ||
896 | /* Asking for an inode past the end of the FS? We're done! */ | 896 | /* Asking for an inode past the end of the FS? We're done! */ |
897 | if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount) | 897 | if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount) |
898 | return XFS_ITER_ABORT; | 898 | return -ECANCELED; |
899 | 899 | ||
900 | return 0; | 900 | return 0; |
901 | } | 901 | } |
@@ -936,7 +936,7 @@ xfs_ioc_bulkstat( | |||
936 | return -EFAULT; | 936 | return -EFAULT; |
937 | 937 | ||
938 | error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat); | 938 | error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat); |
939 | if (error == XFS_ITER_ABORT) | 939 | if (error == -ECANCELED) |
940 | goto out_teardown; | 940 | goto out_teardown; |
941 | if (error < 0) | 941 | if (error < 0) |
942 | return error; | 942 | return error; |
@@ -986,7 +986,7 @@ xfs_ioc_inumbers( | |||
986 | return -EFAULT; | 986 | return -EFAULT; |
987 | 987 | ||
988 | error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers); | 988 | error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers); |
989 | if (error == XFS_ITER_ABORT) | 989 | if (error == -ECANCELED) |
990 | goto out_teardown; | 990 | goto out_teardown; |
991 | if (error < 0) | 991 | if (error < 0) |
992 | return error; | 992 | return error; |
@@ -1038,6 +1038,10 @@ xfs_ioc_ag_geometry( | |||
1038 | 1038 | ||
1039 | if (copy_from_user(&ageo, arg, sizeof(ageo))) | 1039 | if (copy_from_user(&ageo, arg, sizeof(ageo))) |
1040 | return -EFAULT; | 1040 | return -EFAULT; |
1041 | if (ageo.ag_flags) | ||
1042 | return -EINVAL; | ||
1043 | if (memchr_inv(&ageo.ag_reserved, 0, sizeof(ageo.ag_reserved))) | ||
1044 | return -EINVAL; | ||
1041 | 1045 | ||
1042 | error = xfs_ag_get_geometry(mp, ageo.ag_number, &ageo); | 1046 | error = xfs_ag_get_geometry(mp, ageo.ag_number, &ageo); |
1043 | if (error) | 1047 | if (error) |
@@ -1309,8 +1313,7 @@ xfs_ioctl_setattr_dax_invalidate( | |||
1309 | if (fa->fsx_xflags & FS_XFLAG_DAX) { | 1313 | if (fa->fsx_xflags & FS_XFLAG_DAX) { |
1310 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) | 1314 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) |
1311 | return -EINVAL; | 1315 | return -EINVAL; |
1312 | if (S_ISREG(inode->i_mode) && | 1316 | if (!bdev_dax_supported(xfs_find_bdev_for_inode(VFS_I(ip)), |
1313 | !bdev_dax_supported(xfs_find_bdev_for_inode(VFS_I(ip)), | ||
1314 | sb->s_blocksize)) | 1317 | sb->s_blocksize)) |
1315 | return -EINVAL; | 1318 | return -EINVAL; |
1316 | } | 1319 | } |
@@ -1881,7 +1884,7 @@ xfs_ioc_getfsmap( | |||
1881 | info.mp = ip->i_mount; | 1884 | info.mp = ip->i_mount; |
1882 | info.data = arg; | 1885 | info.data = arg; |
1883 | error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info); | 1886 | error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info); |
1884 | if (error == XFS_BTREE_QUERY_RANGE_ABORT) { | 1887 | if (error == -ECANCELED) { |
1885 | error = 0; | 1888 | error = 0; |
1886 | aborted = true; | 1889 | aborted = true; |
1887 | } else if (error) | 1890 | } else if (error) |
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 7bd7534f5051..1e08bf79b478 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
@@ -381,7 +381,7 @@ xfs_compat_attrlist_by_handle( | |||
381 | return PTR_ERR(dentry); | 381 | return PTR_ERR(dentry); |
382 | 382 | ||
383 | error = -ENOMEM; | 383 | error = -ENOMEM; |
384 | kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP); | 384 | kbuf = kmem_zalloc_large(al_hreq.buflen, 0); |
385 | if (!kbuf) | 385 | if (!kbuf) |
386 | goto out_dput; | 386 | goto out_dput; |
387 | 387 | ||
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 3a4310d7cb59..f780e223b118 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -58,7 +58,7 @@ xfs_bmbt_to_iomap( | |||
58 | { | 58 | { |
59 | struct xfs_mount *mp = ip->i_mount; | 59 | struct xfs_mount *mp = ip->i_mount; |
60 | 60 | ||
61 | if (unlikely(!imap->br_startblock && !XFS_IS_REALTIME_INODE(ip))) | 61 | if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) |
62 | return xfs_alert_fsblock_zero(ip, imap); | 62 | return xfs_alert_fsblock_zero(ip, imap); |
63 | 63 | ||
64 | if (imap->br_startblock == HOLESTARTBLOCK) { | 64 | if (imap->br_startblock == HOLESTARTBLOCK) { |
@@ -297,7 +297,7 @@ xfs_iomap_write_direct( | |||
297 | goto out_unlock; | 297 | goto out_unlock; |
298 | } | 298 | } |
299 | 299 | ||
300 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) | 300 | if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) |
301 | error = xfs_alert_fsblock_zero(ip, imap); | 301 | error = xfs_alert_fsblock_zero(ip, imap); |
302 | 302 | ||
303 | out_unlock: | 303 | out_unlock: |
@@ -814,7 +814,7 @@ xfs_iomap_write_unwritten( | |||
814 | if (error) | 814 | if (error) |
815 | return error; | 815 | return error; |
816 | 816 | ||
817 | if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) | 817 | if (unlikely(!xfs_valid_startblock(ip, imap.br_startblock))) |
818 | return xfs_alert_fsblock_zero(ip, &imap); | 818 | return xfs_alert_fsblock_zero(ip, &imap); |
819 | 819 | ||
820 | if ((numblks_fsb = imap.br_blockcount) == 0) { | 820 | if ((numblks_fsb = imap.br_blockcount) == 0) { |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f5c955d35be4..884950adbd16 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -137,7 +137,7 @@ xfs_bulkstat_one_int( | |||
137 | xfs_irele(ip); | 137 | xfs_irele(ip); |
138 | 138 | ||
139 | error = bc->formatter(bc->breq, buf); | 139 | error = bc->formatter(bc->breq, buf); |
140 | if (error == XFS_IBULK_ABORT) | 140 | if (error == -ECANCELED) |
141 | goto out_advance; | 141 | goto out_advance; |
142 | if (error) | 142 | if (error) |
143 | goto out; | 143 | goto out; |
@@ -169,7 +169,7 @@ xfs_bulkstat_one( | |||
169 | ASSERT(breq->icount == 1); | 169 | ASSERT(breq->icount == 1); |
170 | 170 | ||
171 | bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), | 171 | bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), |
172 | KM_SLEEP | KM_MAYFAIL); | 172 | KM_MAYFAIL); |
173 | if (!bc.buf) | 173 | if (!bc.buf) |
174 | return -ENOMEM; | 174 | return -ENOMEM; |
175 | 175 | ||
@@ -181,7 +181,7 @@ xfs_bulkstat_one( | |||
181 | * If we reported one inode to userspace then we abort because we hit | 181 | * If we reported one inode to userspace then we abort because we hit |
182 | * the end of the buffer. Don't leak that back to userspace. | 182 | * the end of the buffer. Don't leak that back to userspace. |
183 | */ | 183 | */ |
184 | if (error == XFS_IWALK_ABORT) | 184 | if (error == -ECANCELED) |
185 | error = 0; | 185 | error = 0; |
186 | 186 | ||
187 | return error; | 187 | return error; |
@@ -243,7 +243,7 @@ xfs_bulkstat( | |||
243 | return 0; | 243 | return 0; |
244 | 244 | ||
245 | bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), | 245 | bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), |
246 | KM_SLEEP | KM_MAYFAIL); | 246 | KM_MAYFAIL); |
247 | if (!bc.buf) | 247 | if (!bc.buf) |
248 | return -ENOMEM; | 248 | return -ENOMEM; |
249 | 249 | ||
@@ -342,7 +342,7 @@ xfs_inumbers_walk( | |||
342 | int error; | 342 | int error; |
343 | 343 | ||
344 | error = ic->formatter(ic->breq, &inogrp); | 344 | error = ic->formatter(ic->breq, &inogrp); |
345 | if (error && error != XFS_IBULK_ABORT) | 345 | if (error && error != -ECANCELED) |
346 | return error; | 346 | return error; |
347 | 347 | ||
348 | ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) + | 348 | ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) + |
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index e90c1fc5b981..96a1e2a9be3f 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h | |||
@@ -18,9 +18,6 @@ struct xfs_ibulk { | |||
18 | /* Only iterate within the same AG as startino */ | 18 | /* Only iterate within the same AG as startino */ |
19 | #define XFS_IBULK_SAME_AG (XFS_IWALK_SAME_AG) | 19 | #define XFS_IBULK_SAME_AG (XFS_IWALK_SAME_AG) |
20 | 20 | ||
21 | /* Return value that means we want to abort the walk. */ | ||
22 | #define XFS_IBULK_ABORT (XFS_IWALK_ABORT) | ||
23 | |||
24 | /* | 21 | /* |
25 | * Advance the user buffer pointer by one record of the given size. If the | 22 | * Advance the user buffer pointer by one record of the given size. If the |
26 | * buffer is now full, return the appropriate error code. | 23 | * buffer is now full, return the appropriate error code. |
@@ -34,13 +31,21 @@ xfs_ibulk_advance( | |||
34 | 31 | ||
35 | breq->ubuffer = b + bytes; | 32 | breq->ubuffer = b + bytes; |
36 | breq->ocount++; | 33 | breq->ocount++; |
37 | return breq->ocount == breq->icount ? XFS_IBULK_ABORT : 0; | 34 | return breq->ocount == breq->icount ? -ECANCELED : 0; |
38 | } | 35 | } |
39 | 36 | ||
40 | /* | 37 | /* |
41 | * Return stat information in bulk (by-inode) for the filesystem. | 38 | * Return stat information in bulk (by-inode) for the filesystem. |
42 | */ | 39 | */ |
43 | 40 | ||
41 | /* | ||
42 | * Return codes for the formatter function are 0 to continue iterating, and | ||
43 | * non-zero to stop iterating. Any non-zero value will be passed up to the | ||
44 | * bulkstat/inumbers caller. The special value -ECANCELED can be used to stop | ||
45 | * iteration, as neither bulkstat nor inumbers will ever generate that error | ||
46 | * code on their own. | ||
47 | */ | ||
48 | |||
44 | typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, | 49 | typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, |
45 | const struct xfs_bulkstat *bstat); | 50 | const struct xfs_bulkstat *bstat); |
46 | 51 | ||
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 8c7d727149ea..aa375cf53021 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c | |||
@@ -31,7 +31,7 @@ | |||
31 | * inode it finds, it calls a walk function with the relevant inode number and | 31 | * inode it finds, it calls a walk function with the relevant inode number and |
32 | * a pointer to caller-provided data. The walk function can return the usual | 32 | * a pointer to caller-provided data. The walk function can return the usual |
33 | * negative error code to stop the iteration; 0 to continue the iteration; or | 33 | * negative error code to stop the iteration; 0 to continue the iteration; or |
34 | * XFS_IWALK_ABORT to stop the iteration. This return value is returned to the | 34 | * -ECANCELED to stop the iteration. This return value is returned to the |
35 | * caller. | 35 | * caller. |
36 | * | 36 | * |
37 | * Internally, we allow the walk function to do anything, which means that we | 37 | * Internally, we allow the walk function to do anything, which means that we |
@@ -616,7 +616,7 @@ xfs_iwalk_threaded( | |||
616 | if (xfs_pwork_ctl_want_abort(&pctl)) | 616 | if (xfs_pwork_ctl_want_abort(&pctl)) |
617 | break; | 617 | break; |
618 | 618 | ||
619 | iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), KM_SLEEP); | 619 | iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), 0); |
620 | iwag->mp = mp; | 620 | iwag->mp = mp; |
621 | iwag->iwalk_fn = iwalk_fn; | 621 | iwag->iwalk_fn = iwalk_fn; |
622 | iwag->data = data; | 622 | iwag->data = data; |
diff --git a/fs/xfs/xfs_iwalk.h b/fs/xfs/xfs_iwalk.h index 6c960e10ed4d..37a795f03267 100644 --- a/fs/xfs/xfs_iwalk.h +++ b/fs/xfs/xfs_iwalk.h | |||
@@ -6,12 +6,17 @@ | |||
6 | #ifndef __XFS_IWALK_H__ | 6 | #ifndef __XFS_IWALK_H__ |
7 | #define __XFS_IWALK_H__ | 7 | #define __XFS_IWALK_H__ |
8 | 8 | ||
9 | /* | ||
10 | * Return codes for the inode/inobt walk function are 0 to continue iterating, | ||
11 | * and non-zero to stop iterating. Any non-zero value will be passed up to the | ||
12 | * iwalk or inobt_walk caller. The special value -ECANCELED can be used to | ||
13 | * stop iteration, as neither iwalk nor inobt_walk will ever generate that | ||
14 | * error code on their own. | ||
15 | */ | ||
16 | |||
9 | /* Walk all inodes in the filesystem starting from @startino. */ | 17 | /* Walk all inodes in the filesystem starting from @startino. */ |
10 | typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp, | 18 | typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp, |
11 | xfs_ino_t ino, void *data); | 19 | xfs_ino_t ino, void *data); |
12 | /* Return values for xfs_iwalk_fn. */ | ||
13 | #define XFS_IWALK_CONTINUE (XFS_ITER_CONTINUE) | ||
14 | #define XFS_IWALK_ABORT (XFS_ITER_ABORT) | ||
15 | 20 | ||
16 | int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino, | 21 | int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino, |
17 | unsigned int flags, xfs_iwalk_fn iwalk_fn, | 22 | unsigned int flags, xfs_iwalk_fn iwalk_fn, |
@@ -30,8 +35,6 @@ typedef int (*xfs_inobt_walk_fn)(struct xfs_mount *mp, struct xfs_trans *tp, | |||
30 | xfs_agnumber_t agno, | 35 | xfs_agnumber_t agno, |
31 | const struct xfs_inobt_rec_incore *irec, | 36 | const struct xfs_inobt_rec_incore *irec, |
32 | void *data); | 37 | void *data); |
33 | /* Return value (for xfs_inobt_walk_fn) that aborts the walk immediately. */ | ||
34 | #define XFS_INOBT_WALK_ABORT (XFS_IWALK_ABORT) | ||
35 | 38 | ||
36 | int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp, | 39 | int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp, |
37 | xfs_ino_t startino, unsigned int flags, | 40 | xfs_ino_t startino, unsigned int flags, |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 7fc3c1ad36bc..a2beee9f74da 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -214,15 +214,42 @@ xlog_grant_head_wake( | |||
214 | { | 214 | { |
215 | struct xlog_ticket *tic; | 215 | struct xlog_ticket *tic; |
216 | int need_bytes; | 216 | int need_bytes; |
217 | bool woken_task = false; | ||
217 | 218 | ||
218 | list_for_each_entry(tic, &head->waiters, t_queue) { | 219 | list_for_each_entry(tic, &head->waiters, t_queue) { |
220 | |||
221 | /* | ||
222 | * There is a chance that the size of the CIL checkpoints in | ||
223 | * progress at the last AIL push target calculation resulted in | ||
224 | * limiting the target to the log head (l_last_sync_lsn) at the | ||
225 | * time. This may not reflect where the log head is now as the | ||
226 | * CIL checkpoints may have completed. | ||
227 | * | ||
228 | * Hence when we are woken here, it may be that the head of the | ||
229 | * log that has moved rather than the tail. As the tail didn't | ||
230 | * move, there still won't be space available for the | ||
231 | * reservation we require. However, if the AIL has already | ||
232 | * pushed to the target defined by the old log head location, we | ||
233 | * will hang here waiting for something else to update the AIL | ||
234 | * push target. | ||
235 | * | ||
236 | * Therefore, if there isn't space to wake the first waiter on | ||
237 | * the grant head, we need to push the AIL again to ensure the | ||
238 | * target reflects both the current log tail and log head | ||
239 | * position before we wait for the tail to move again. | ||
240 | */ | ||
241 | |||
219 | need_bytes = xlog_ticket_reservation(log, head, tic); | 242 | need_bytes = xlog_ticket_reservation(log, head, tic); |
220 | if (*free_bytes < need_bytes) | 243 | if (*free_bytes < need_bytes) { |
244 | if (!woken_task) | ||
245 | xlog_grant_push_ail(log, need_bytes); | ||
221 | return false; | 246 | return false; |
247 | } | ||
222 | 248 | ||
223 | *free_bytes -= need_bytes; | 249 | *free_bytes -= need_bytes; |
224 | trace_xfs_log_grant_wake_up(log, tic); | 250 | trace_xfs_log_grant_wake_up(log, tic); |
225 | wake_up_process(tic->t_task); | 251 | wake_up_process(tic->t_task); |
252 | woken_task = true; | ||
226 | } | 253 | } |
227 | 254 | ||
228 | return true; | 255 | return true; |
@@ -428,8 +455,7 @@ xfs_log_reserve( | |||
428 | XFS_STATS_INC(mp, xs_try_logspace); | 455 | XFS_STATS_INC(mp, xs_try_logspace); |
429 | 456 | ||
430 | ASSERT(*ticp == NULL); | 457 | ASSERT(*ticp == NULL); |
431 | tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, | 458 | tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, 0); |
432 | KM_SLEEP); | ||
433 | *ticp = tic; | 459 | *ticp = tic; |
434 | 460 | ||
435 | xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt | 461 | xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt |
@@ -1404,6 +1430,7 @@ xlog_alloc_log( | |||
1404 | */ | 1430 | */ |
1405 | ASSERT(log->l_iclog_size >= 4096); | 1431 | ASSERT(log->l_iclog_size >= 4096); |
1406 | for (i = 0; i < log->l_iclog_bufs; i++) { | 1432 | for (i = 0; i < log->l_iclog_bufs; i++) { |
1433 | int align_mask = xfs_buftarg_dma_alignment(mp->m_logdev_targp); | ||
1407 | size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) * | 1434 | size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) * |
1408 | sizeof(struct bio_vec); | 1435 | sizeof(struct bio_vec); |
1409 | 1436 | ||
@@ -1415,8 +1442,8 @@ xlog_alloc_log( | |||
1415 | iclog->ic_prev = prev_iclog; | 1442 | iclog->ic_prev = prev_iclog; |
1416 | prev_iclog = iclog; | 1443 | prev_iclog = iclog; |
1417 | 1444 | ||
1418 | iclog->ic_data = kmem_alloc_large(log->l_iclog_size, | 1445 | iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask, |
1419 | KM_MAYFAIL); | 1446 | KM_MAYFAIL); |
1420 | if (!iclog->ic_data) | 1447 | if (!iclog->ic_data) |
1421 | goto out_free_iclog; | 1448 | goto out_free_iclog; |
1422 | #ifdef DEBUG | 1449 | #ifdef DEBUG |
@@ -2496,21 +2523,35 @@ next_lv: | |||
2496 | ***************************************************************************** | 2523 | ***************************************************************************** |
2497 | */ | 2524 | */ |
2498 | 2525 | ||
2499 | /* Clean iclogs starting from the head. This ordering must be | 2526 | /* |
2500 | * maintained, so an iclog doesn't become ACTIVE beyond one that | 2527 | * An iclog has just finished IO completion processing, so we need to update |
2501 | * is SYNCING. This is also required to maintain the notion that we use | 2528 | * the iclog state and propagate that up into the overall log state. Hence we |
2502 | * a ordered wait queue to hold off would be writers to the log when every | 2529 | * prepare the iclog for cleaning, and then clean all the pending dirty iclogs |
2503 | * iclog is trying to sync to disk. | 2530 | * starting from the head, and then wake up any threads that are waiting for the |
2531 | * iclog to be marked clean. | ||
2532 | * | ||
2533 | * The ordering of marking iclogs ACTIVE must be maintained, so an iclog | ||
2534 | * doesn't become ACTIVE beyond one that is SYNCING. This is also required to | ||
2535 | * maintain the notion that we use a ordered wait queue to hold off would be | ||
2536 | * writers to the log when every iclog is trying to sync to disk. | ||
2537 | * | ||
2538 | * Caller must hold the icloglock before calling us. | ||
2504 | * | 2539 | * |
2505 | * State Change: DIRTY -> ACTIVE | 2540 | * State Change: !IOERROR -> DIRTY -> ACTIVE |
2506 | */ | 2541 | */ |
2507 | STATIC void | 2542 | STATIC void |
2508 | xlog_state_clean_log( | 2543 | xlog_state_clean_iclog( |
2509 | struct xlog *log) | 2544 | struct xlog *log, |
2545 | struct xlog_in_core *dirty_iclog) | ||
2510 | { | 2546 | { |
2511 | xlog_in_core_t *iclog; | 2547 | struct xlog_in_core *iclog; |
2512 | int changed = 0; | 2548 | int changed = 0; |
2513 | 2549 | ||
2550 | /* Prepare the completed iclog. */ | ||
2551 | if (!(dirty_iclog->ic_state & XLOG_STATE_IOERROR)) | ||
2552 | dirty_iclog->ic_state = XLOG_STATE_DIRTY; | ||
2553 | |||
2554 | /* Walk all the iclogs to update the ordered active state. */ | ||
2514 | iclog = log->l_iclog; | 2555 | iclog = log->l_iclog; |
2515 | do { | 2556 | do { |
2516 | if (iclog->ic_state == XLOG_STATE_DIRTY) { | 2557 | if (iclog->ic_state == XLOG_STATE_DIRTY) { |
@@ -2548,7 +2589,13 @@ xlog_state_clean_log( | |||
2548 | iclog = iclog->ic_next; | 2589 | iclog = iclog->ic_next; |
2549 | } while (iclog != log->l_iclog); | 2590 | } while (iclog != log->l_iclog); |
2550 | 2591 | ||
2551 | /* log is locked when we are called */ | 2592 | |
2593 | /* | ||
2594 | * Wake up threads waiting in xfs_log_force() for the dirty iclog | ||
2595 | * to be cleaned. | ||
2596 | */ | ||
2597 | wake_up_all(&dirty_iclog->ic_force_wait); | ||
2598 | |||
2552 | /* | 2599 | /* |
2553 | * Change state for the dummy log recording. | 2600 | * Change state for the dummy log recording. |
2554 | * We usually go to NEED. But we go to NEED2 if the changed indicates | 2601 | * We usually go to NEED. But we go to NEED2 if the changed indicates |
@@ -2582,7 +2629,7 @@ xlog_state_clean_log( | |||
2582 | ASSERT(0); | 2629 | ASSERT(0); |
2583 | } | 2630 | } |
2584 | } | 2631 | } |
2585 | } /* xlog_state_clean_log */ | 2632 | } |
2586 | 2633 | ||
2587 | STATIC xfs_lsn_t | 2634 | STATIC xfs_lsn_t |
2588 | xlog_get_lowest_lsn( | 2635 | xlog_get_lowest_lsn( |
@@ -2603,30 +2650,205 @@ xlog_get_lowest_lsn( | |||
2603 | return lowest_lsn; | 2650 | return lowest_lsn; |
2604 | } | 2651 | } |
2605 | 2652 | ||
2653 | /* | ||
2654 | * Completion of a iclog IO does not imply that a transaction has completed, as | ||
2655 | * transactions can be large enough to span many iclogs. We cannot change the | ||
2656 | * tail of the log half way through a transaction as this may be the only | ||
2657 | * transaction in the log and moving the tail to point to the middle of it | ||
2658 | * will prevent recovery from finding the start of the transaction. Hence we | ||
2659 | * should only update the last_sync_lsn if this iclog contains transaction | ||
2660 | * completion callbacks on it. | ||
2661 | * | ||
2662 | * We have to do this before we drop the icloglock to ensure we are the only one | ||
2663 | * that can update it. | ||
2664 | * | ||
2665 | * If we are moving the last_sync_lsn forwards, we also need to ensure we kick | ||
2666 | * the reservation grant head pushing. This is due to the fact that the push | ||
2667 | * target is bound by the current last_sync_lsn value. Hence if we have a large | ||
2668 | * amount of log space bound up in this committing transaction then the | ||
2669 | * last_sync_lsn value may be the limiting factor preventing tail pushing from | ||
2670 | * freeing space in the log. Hence once we've updated the last_sync_lsn we | ||
2671 | * should push the AIL to ensure the push target (and hence the grant head) is | ||
2672 | * no longer bound by the old log head location and can move forwards and make | ||
2673 | * progress again. | ||
2674 | */ | ||
2675 | static void | ||
2676 | xlog_state_set_callback( | ||
2677 | struct xlog *log, | ||
2678 | struct xlog_in_core *iclog, | ||
2679 | xfs_lsn_t header_lsn) | ||
2680 | { | ||
2681 | iclog->ic_state = XLOG_STATE_CALLBACK; | ||
2682 | |||
2683 | ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), | ||
2684 | header_lsn) <= 0); | ||
2685 | |||
2686 | if (list_empty_careful(&iclog->ic_callbacks)) | ||
2687 | return; | ||
2688 | |||
2689 | atomic64_set(&log->l_last_sync_lsn, header_lsn); | ||
2690 | xlog_grant_push_ail(log, 0); | ||
2691 | } | ||
2692 | |||
2693 | /* | ||
2694 | * Return true if we need to stop processing, false to continue to the next | ||
2695 | * iclog. The caller will need to run callbacks if the iclog is returned in the | ||
2696 | * XLOG_STATE_CALLBACK state. | ||
2697 | */ | ||
2698 | static bool | ||
2699 | xlog_state_iodone_process_iclog( | ||
2700 | struct xlog *log, | ||
2701 | struct xlog_in_core *iclog, | ||
2702 | struct xlog_in_core *completed_iclog, | ||
2703 | bool *ioerror) | ||
2704 | { | ||
2705 | xfs_lsn_t lowest_lsn; | ||
2706 | xfs_lsn_t header_lsn; | ||
2707 | |||
2708 | /* Skip all iclogs in the ACTIVE & DIRTY states */ | ||
2709 | if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)) | ||
2710 | return false; | ||
2711 | |||
2712 | /* | ||
2713 | * Between marking a filesystem SHUTDOWN and stopping the log, we do | ||
2714 | * flush all iclogs to disk (if there wasn't a log I/O error). So, we do | ||
2715 | * want things to go smoothly in case of just a SHUTDOWN w/o a | ||
2716 | * LOG_IO_ERROR. | ||
2717 | */ | ||
2718 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
2719 | *ioerror = true; | ||
2720 | return false; | ||
2721 | } | ||
2722 | |||
2723 | /* | ||
2724 | * Can only perform callbacks in order. Since this iclog is not in the | ||
2725 | * DONE_SYNC/ DO_CALLBACK state, we skip the rest and just try to clean | ||
2726 | * up. If we set our iclog to DO_CALLBACK, we will not process it when | ||
2727 | * we retry since a previous iclog is in the CALLBACK and the state | ||
2728 | * cannot change since we are holding the l_icloglock. | ||
2729 | */ | ||
2730 | if (!(iclog->ic_state & | ||
2731 | (XLOG_STATE_DONE_SYNC | XLOG_STATE_DO_CALLBACK))) { | ||
2732 | if (completed_iclog && | ||
2733 | (completed_iclog->ic_state == XLOG_STATE_DONE_SYNC)) { | ||
2734 | completed_iclog->ic_state = XLOG_STATE_DO_CALLBACK; | ||
2735 | } | ||
2736 | return true; | ||
2737 | } | ||
2738 | |||
2739 | /* | ||
2740 | * We now have an iclog that is in either the DO_CALLBACK or DONE_SYNC | ||
2741 | * states. The other states (WANT_SYNC, SYNCING, or CALLBACK were caught | ||
2742 | * by the above if and are going to clean (i.e. we aren't doing their | ||
2743 | * callbacks) see the above if. | ||
2744 | * | ||
2745 | * We will do one more check here to see if we have chased our tail | ||
2746 | * around. If this is not the lowest lsn iclog, then we will leave it | ||
2747 | * for another completion to process. | ||
2748 | */ | ||
2749 | header_lsn = be64_to_cpu(iclog->ic_header.h_lsn); | ||
2750 | lowest_lsn = xlog_get_lowest_lsn(log); | ||
2751 | if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0) | ||
2752 | return false; | ||
2753 | |||
2754 | xlog_state_set_callback(log, iclog, header_lsn); | ||
2755 | return false; | ||
2756 | |||
2757 | } | ||
2758 | |||
2759 | /* | ||
2760 | * Keep processing entries in the iclog callback list until we come around and | ||
2761 | * it is empty. We need to atomically see that the list is empty and change the | ||
2762 | * state to DIRTY so that we don't miss any more callbacks being added. | ||
2763 | * | ||
2764 | * This function is called with the icloglock held and returns with it held. We | ||
2765 | * drop it while running callbacks, however, as holding it over thousands of | ||
2766 | * callbacks is unnecessary and causes excessive contention if we do. | ||
2767 | */ | ||
2768 | static void | ||
2769 | xlog_state_do_iclog_callbacks( | ||
2770 | struct xlog *log, | ||
2771 | struct xlog_in_core *iclog, | ||
2772 | bool aborted) | ||
2773 | { | ||
2774 | spin_unlock(&log->l_icloglock); | ||
2775 | spin_lock(&iclog->ic_callback_lock); | ||
2776 | while (!list_empty(&iclog->ic_callbacks)) { | ||
2777 | LIST_HEAD(tmp); | ||
2778 | |||
2779 | list_splice_init(&iclog->ic_callbacks, &tmp); | ||
2780 | |||
2781 | spin_unlock(&iclog->ic_callback_lock); | ||
2782 | xlog_cil_process_committed(&tmp, aborted); | ||
2783 | spin_lock(&iclog->ic_callback_lock); | ||
2784 | } | ||
2785 | |||
2786 | /* | ||
2787 | * Pick up the icloglock while still holding the callback lock so we | ||
2788 | * serialise against anyone trying to add more callbacks to this iclog | ||
2789 | * now we've finished processing. | ||
2790 | */ | ||
2791 | spin_lock(&log->l_icloglock); | ||
2792 | spin_unlock(&iclog->ic_callback_lock); | ||
2793 | } | ||
2794 | |||
2795 | #ifdef DEBUG | ||
2796 | /* | ||
2797 | * Make one last gasp attempt to see if iclogs are being left in limbo. If the | ||
2798 | * above loop finds an iclog earlier than the current iclog and in one of the | ||
2799 | * syncing states, the current iclog is put into DO_CALLBACK and the callbacks | ||
2800 | * are deferred to the completion of the earlier iclog. Walk the iclogs in order | ||
2801 | * and make sure that no iclog is in DO_CALLBACK unless an earlier iclog is in | ||
2802 | * one of the syncing states. | ||
2803 | * | ||
2804 | * Note that SYNCING|IOERROR is a valid state so we cannot just check for | ||
2805 | * ic_state == SYNCING. | ||
2806 | */ | ||
2807 | static void | ||
2808 | xlog_state_callback_check_state( | ||
2809 | struct xlog *log) | ||
2810 | { | ||
2811 | struct xlog_in_core *first_iclog = log->l_iclog; | ||
2812 | struct xlog_in_core *iclog = first_iclog; | ||
2813 | |||
2814 | do { | ||
2815 | ASSERT(iclog->ic_state != XLOG_STATE_DO_CALLBACK); | ||
2816 | /* | ||
2817 | * Terminate the loop if iclogs are found in states | ||
2818 | * which will cause other threads to clean up iclogs. | ||
2819 | * | ||
2820 | * SYNCING - i/o completion will go through logs | ||
2821 | * DONE_SYNC - interrupt thread should be waiting for | ||
2822 | * l_icloglock | ||
2823 | * IOERROR - give up hope all ye who enter here | ||
2824 | */ | ||
2825 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC || | ||
2826 | iclog->ic_state & XLOG_STATE_SYNCING || | ||
2827 | iclog->ic_state == XLOG_STATE_DONE_SYNC || | ||
2828 | iclog->ic_state == XLOG_STATE_IOERROR ) | ||
2829 | break; | ||
2830 | iclog = iclog->ic_next; | ||
2831 | } while (first_iclog != iclog); | ||
2832 | } | ||
2833 | #else | ||
2834 | #define xlog_state_callback_check_state(l) ((void)0) | ||
2835 | #endif | ||
2836 | |||
2606 | STATIC void | 2837 | STATIC void |
2607 | xlog_state_do_callback( | 2838 | xlog_state_do_callback( |
2608 | struct xlog *log, | 2839 | struct xlog *log, |
2609 | bool aborted, | 2840 | bool aborted, |
2610 | struct xlog_in_core *ciclog) | 2841 | struct xlog_in_core *ciclog) |
2611 | { | 2842 | { |
2612 | xlog_in_core_t *iclog; | 2843 | struct xlog_in_core *iclog; |
2613 | xlog_in_core_t *first_iclog; /* used to know when we've | 2844 | struct xlog_in_core *first_iclog; |
2614 | * processed all iclogs once */ | 2845 | bool did_callbacks = false; |
2615 | int flushcnt = 0; | 2846 | bool cycled_icloglock; |
2616 | xfs_lsn_t lowest_lsn; | 2847 | bool ioerror; |
2617 | int ioerrors; /* counter: iclogs with errors */ | 2848 | int flushcnt = 0; |
2618 | int loopdidcallbacks; /* flag: inner loop did callbacks*/ | 2849 | int repeats = 0; |
2619 | int funcdidcallbacks; /* flag: function did callbacks */ | ||
2620 | int repeats; /* for issuing console warnings if | ||
2621 | * looping too many times */ | ||
2622 | int wake = 0; | ||
2623 | 2850 | ||
2624 | spin_lock(&log->l_icloglock); | 2851 | spin_lock(&log->l_icloglock); |
2625 | first_iclog = iclog = log->l_iclog; | ||
2626 | ioerrors = 0; | ||
2627 | funcdidcallbacks = 0; | ||
2628 | repeats = 0; | ||
2629 | |||
2630 | do { | 2852 | do { |
2631 | /* | 2853 | /* |
2632 | * Scan all iclogs starting with the one pointed to by the | 2854 | * Scan all iclogs starting with the one pointed to by the |
@@ -2638,137 +2860,34 @@ xlog_state_do_callback( | |||
2638 | */ | 2860 | */ |
2639 | first_iclog = log->l_iclog; | 2861 | first_iclog = log->l_iclog; |
2640 | iclog = log->l_iclog; | 2862 | iclog = log->l_iclog; |
2641 | loopdidcallbacks = 0; | 2863 | cycled_icloglock = false; |
2864 | ioerror = false; | ||
2642 | repeats++; | 2865 | repeats++; |
2643 | 2866 | ||
2644 | do { | 2867 | do { |
2868 | if (xlog_state_iodone_process_iclog(log, iclog, | ||
2869 | ciclog, &ioerror)) | ||
2870 | break; | ||
2645 | 2871 | ||
2646 | /* skip all iclogs in the ACTIVE & DIRTY states */ | 2872 | if (!(iclog->ic_state & |
2647 | if (iclog->ic_state & | 2873 | (XLOG_STATE_CALLBACK | XLOG_STATE_IOERROR))) { |
2648 | (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY)) { | ||
2649 | iclog = iclog->ic_next; | 2874 | iclog = iclog->ic_next; |
2650 | continue; | 2875 | continue; |
2651 | } | 2876 | } |
2652 | 2877 | ||
2653 | /* | 2878 | /* |
2654 | * Between marking a filesystem SHUTDOWN and stopping | 2879 | * Running callbacks will drop the icloglock which means |
2655 | * the log, we do flush all iclogs to disk (if there | 2880 | * we'll have to run at least one more complete loop. |
2656 | * wasn't a log I/O error). So, we do want things to | ||
2657 | * go smoothly in case of just a SHUTDOWN w/o a | ||
2658 | * LOG_IO_ERROR. | ||
2659 | */ | ||
2660 | if (!(iclog->ic_state & XLOG_STATE_IOERROR)) { | ||
2661 | /* | ||
2662 | * Can only perform callbacks in order. Since | ||
2663 | * this iclog is not in the DONE_SYNC/ | ||
2664 | * DO_CALLBACK state, we skip the rest and | ||
2665 | * just try to clean up. If we set our iclog | ||
2666 | * to DO_CALLBACK, we will not process it when | ||
2667 | * we retry since a previous iclog is in the | ||
2668 | * CALLBACK and the state cannot change since | ||
2669 | * we are holding the l_icloglock. | ||
2670 | */ | ||
2671 | if (!(iclog->ic_state & | ||
2672 | (XLOG_STATE_DONE_SYNC | | ||
2673 | XLOG_STATE_DO_CALLBACK))) { | ||
2674 | if (ciclog && (ciclog->ic_state == | ||
2675 | XLOG_STATE_DONE_SYNC)) { | ||
2676 | ciclog->ic_state = XLOG_STATE_DO_CALLBACK; | ||
2677 | } | ||
2678 | break; | ||
2679 | } | ||
2680 | /* | ||
2681 | * We now have an iclog that is in either the | ||
2682 | * DO_CALLBACK or DONE_SYNC states. The other | ||
2683 | * states (WANT_SYNC, SYNCING, or CALLBACK were | ||
2684 | * caught by the above if and are going to | ||
2685 | * clean (i.e. we aren't doing their callbacks) | ||
2686 | * see the above if. | ||
2687 | */ | ||
2688 | |||
2689 | /* | ||
2690 | * We will do one more check here to see if we | ||
2691 | * have chased our tail around. | ||
2692 | */ | ||
2693 | |||
2694 | lowest_lsn = xlog_get_lowest_lsn(log); | ||
2695 | if (lowest_lsn && | ||
2696 | XFS_LSN_CMP(lowest_lsn, | ||
2697 | be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { | ||
2698 | iclog = iclog->ic_next; | ||
2699 | continue; /* Leave this iclog for | ||
2700 | * another thread */ | ||
2701 | } | ||
2702 | |||
2703 | iclog->ic_state = XLOG_STATE_CALLBACK; | ||
2704 | |||
2705 | |||
2706 | /* | ||
2707 | * Completion of a iclog IO does not imply that | ||
2708 | * a transaction has completed, as transactions | ||
2709 | * can be large enough to span many iclogs. We | ||
2710 | * cannot change the tail of the log half way | ||
2711 | * through a transaction as this may be the only | ||
2712 | * transaction in the log and moving th etail to | ||
2713 | * point to the middle of it will prevent | ||
2714 | * recovery from finding the start of the | ||
2715 | * transaction. Hence we should only update the | ||
2716 | * last_sync_lsn if this iclog contains | ||
2717 | * transaction completion callbacks on it. | ||
2718 | * | ||
2719 | * We have to do this before we drop the | ||
2720 | * icloglock to ensure we are the only one that | ||
2721 | * can update it. | ||
2722 | */ | ||
2723 | ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), | ||
2724 | be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); | ||
2725 | if (!list_empty_careful(&iclog->ic_callbacks)) | ||
2726 | atomic64_set(&log->l_last_sync_lsn, | ||
2727 | be64_to_cpu(iclog->ic_header.h_lsn)); | ||
2728 | |||
2729 | } else | ||
2730 | ioerrors++; | ||
2731 | |||
2732 | spin_unlock(&log->l_icloglock); | ||
2733 | |||
2734 | /* | ||
2735 | * Keep processing entries in the callback list until | ||
2736 | * we come around and it is empty. We need to | ||
2737 | * atomically see that the list is empty and change the | ||
2738 | * state to DIRTY so that we don't miss any more | ||
2739 | * callbacks being added. | ||
2740 | */ | ||
2741 | spin_lock(&iclog->ic_callback_lock); | ||
2742 | while (!list_empty(&iclog->ic_callbacks)) { | ||
2743 | LIST_HEAD(tmp); | ||
2744 | |||
2745 | list_splice_init(&iclog->ic_callbacks, &tmp); | ||
2746 | |||
2747 | spin_unlock(&iclog->ic_callback_lock); | ||
2748 | xlog_cil_process_committed(&tmp, aborted); | ||
2749 | spin_lock(&iclog->ic_callback_lock); | ||
2750 | } | ||
2751 | |||
2752 | loopdidcallbacks++; | ||
2753 | funcdidcallbacks++; | ||
2754 | |||
2755 | spin_lock(&log->l_icloglock); | ||
2756 | spin_unlock(&iclog->ic_callback_lock); | ||
2757 | if (!(iclog->ic_state & XLOG_STATE_IOERROR)) | ||
2758 | iclog->ic_state = XLOG_STATE_DIRTY; | ||
2759 | |||
2760 | /* | ||
2761 | * Transition from DIRTY to ACTIVE if applicable. | ||
2762 | * NOP if STATE_IOERROR. | ||
2763 | */ | 2881 | */ |
2764 | xlog_state_clean_log(log); | 2882 | cycled_icloglock = true; |
2765 | 2883 | xlog_state_do_iclog_callbacks(log, iclog, aborted); | |
2766 | /* wake up threads waiting in xfs_log_force() */ | ||
2767 | wake_up_all(&iclog->ic_force_wait); | ||
2768 | 2884 | ||
2885 | xlog_state_clean_iclog(log, iclog); | ||
2769 | iclog = iclog->ic_next; | 2886 | iclog = iclog->ic_next; |
2770 | } while (first_iclog != iclog); | 2887 | } while (first_iclog != iclog); |
2771 | 2888 | ||
2889 | did_callbacks |= cycled_icloglock; | ||
2890 | |||
2772 | if (repeats > 5000) { | 2891 | if (repeats > 5000) { |
2773 | flushcnt += repeats; | 2892 | flushcnt += repeats; |
2774 | repeats = 0; | 2893 | repeats = 0; |
@@ -2776,50 +2895,15 @@ xlog_state_do_callback( | |||
2776 | "%s: possible infinite loop (%d iterations)", | 2895 | "%s: possible infinite loop (%d iterations)", |
2777 | __func__, flushcnt); | 2896 | __func__, flushcnt); |
2778 | } | 2897 | } |
2779 | } while (!ioerrors && loopdidcallbacks); | 2898 | } while (!ioerror && cycled_icloglock); |
2780 | 2899 | ||
2781 | #ifdef DEBUG | 2900 | if (did_callbacks) |
2782 | /* | 2901 | xlog_state_callback_check_state(log); |
2783 | * Make one last gasp attempt to see if iclogs are being left in limbo. | ||
2784 | * If the above loop finds an iclog earlier than the current iclog and | ||
2785 | * in one of the syncing states, the current iclog is put into | ||
2786 | * DO_CALLBACK and the callbacks are deferred to the completion of the | ||
2787 | * earlier iclog. Walk the iclogs in order and make sure that no iclog | ||
2788 | * is in DO_CALLBACK unless an earlier iclog is in one of the syncing | ||
2789 | * states. | ||
2790 | * | ||
2791 | * Note that SYNCING|IOABORT is a valid state so we cannot just check | ||
2792 | * for ic_state == SYNCING. | ||
2793 | */ | ||
2794 | if (funcdidcallbacks) { | ||
2795 | first_iclog = iclog = log->l_iclog; | ||
2796 | do { | ||
2797 | ASSERT(iclog->ic_state != XLOG_STATE_DO_CALLBACK); | ||
2798 | /* | ||
2799 | * Terminate the loop if iclogs are found in states | ||
2800 | * which will cause other threads to clean up iclogs. | ||
2801 | * | ||
2802 | * SYNCING - i/o completion will go through logs | ||
2803 | * DONE_SYNC - interrupt thread should be waiting for | ||
2804 | * l_icloglock | ||
2805 | * IOERROR - give up hope all ye who enter here | ||
2806 | */ | ||
2807 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC || | ||
2808 | iclog->ic_state & XLOG_STATE_SYNCING || | ||
2809 | iclog->ic_state == XLOG_STATE_DONE_SYNC || | ||
2810 | iclog->ic_state == XLOG_STATE_IOERROR ) | ||
2811 | break; | ||
2812 | iclog = iclog->ic_next; | ||
2813 | } while (first_iclog != iclog); | ||
2814 | } | ||
2815 | #endif | ||
2816 | 2902 | ||
2817 | if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) | 2903 | if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) |
2818 | wake = 1; | ||
2819 | spin_unlock(&log->l_icloglock); | ||
2820 | |||
2821 | if (wake) | ||
2822 | wake_up_all(&log->l_flush_wait); | 2904 | wake_up_all(&log->l_flush_wait); |
2905 | |||
2906 | spin_unlock(&log->l_icloglock); | ||
2823 | } | 2907 | } |
2824 | 2908 | ||
2825 | 2909 | ||
@@ -3919,7 +4003,9 @@ xfs_log_force_umount( | |||
3919 | * item committed callback functions will do this again under lock to | 4003 | * item committed callback functions will do this again under lock to |
3920 | * avoid races. | 4004 | * avoid races. |
3921 | */ | 4005 | */ |
4006 | spin_lock(&log->l_cilp->xc_push_lock); | ||
3922 | wake_up_all(&log->l_cilp->xc_commit_wait); | 4007 | wake_up_all(&log->l_cilp->xc_commit_wait); |
4008 | spin_unlock(&log->l_cilp->xc_push_lock); | ||
3923 | xlog_state_do_callback(log, true, NULL); | 4009 | xlog_state_do_callback(log, true, NULL); |
3924 | 4010 | ||
3925 | #ifdef XFSERRORDEBUG | 4011 | #ifdef XFSERRORDEBUG |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index fa5602d0fd7f..ef652abd112c 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -38,7 +38,7 @@ xlog_cil_ticket_alloc( | |||
38 | struct xlog_ticket *tic; | 38 | struct xlog_ticket *tic; |
39 | 39 | ||
40 | tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0, | 40 | tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0, |
41 | KM_SLEEP|KM_NOFS); | 41 | KM_NOFS); |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * set the current reservation to zero so we know to steal the basic | 44 | * set the current reservation to zero so we know to steal the basic |
@@ -186,7 +186,7 @@ xlog_cil_alloc_shadow_bufs( | |||
186 | */ | 186 | */ |
187 | kmem_free(lip->li_lv_shadow); | 187 | kmem_free(lip->li_lv_shadow); |
188 | 188 | ||
189 | lv = kmem_alloc_large(buf_size, KM_SLEEP | KM_NOFS); | 189 | lv = kmem_alloc_large(buf_size, KM_NOFS); |
190 | memset(lv, 0, xlog_cil_iovec_space(niovecs)); | 190 | memset(lv, 0, xlog_cil_iovec_space(niovecs)); |
191 | 191 | ||
192 | lv->lv_item = lip; | 192 | lv->lv_item = lip; |
@@ -660,7 +660,7 @@ xlog_cil_push( | |||
660 | if (!cil) | 660 | if (!cil) |
661 | return 0; | 661 | return 0; |
662 | 662 | ||
663 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 663 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_NOFS); |
664 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 664 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
665 | 665 | ||
666 | down_write(&cil->xc_ctx_lock); | 666 | down_write(&cil->xc_ctx_lock); |
@@ -1179,11 +1179,11 @@ xlog_cil_init( | |||
1179 | struct xfs_cil *cil; | 1179 | struct xfs_cil *cil; |
1180 | struct xfs_cil_ctx *ctx; | 1180 | struct xfs_cil_ctx *ctx; |
1181 | 1181 | ||
1182 | cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); | 1182 | cil = kmem_zalloc(sizeof(*cil), KM_MAYFAIL); |
1183 | if (!cil) | 1183 | if (!cil) |
1184 | return -ENOMEM; | 1184 | return -ENOMEM; |
1185 | 1185 | ||
1186 | ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); | 1186 | ctx = kmem_zalloc(sizeof(*ctx), KM_MAYFAIL); |
1187 | if (!ctx) { | 1187 | if (!ctx) { |
1188 | kmem_free(cil); | 1188 | kmem_free(cil); |
1189 | return -ENOMEM; | 1189 | return -ENOMEM; |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 13d1d3e95b88..508319039dce 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -97,6 +97,8 @@ xlog_alloc_buffer( | |||
97 | struct xlog *log, | 97 | struct xlog *log, |
98 | int nbblks) | 98 | int nbblks) |
99 | { | 99 | { |
100 | int align_mask = xfs_buftarg_dma_alignment(log->l_targ); | ||
101 | |||
100 | /* | 102 | /* |
101 | * Pass log block 0 since we don't have an addr yet, buffer will be | 103 | * Pass log block 0 since we don't have an addr yet, buffer will be |
102 | * verified on read. | 104 | * verified on read. |
@@ -125,7 +127,7 @@ xlog_alloc_buffer( | |||
125 | if (nbblks > 1 && log->l_sectBBsize > 1) | 127 | if (nbblks > 1 && log->l_sectBBsize > 1) |
126 | nbblks += log->l_sectBBsize; | 128 | nbblks += log->l_sectBBsize; |
127 | nbblks = round_up(nbblks, log->l_sectBBsize); | 129 | nbblks = round_up(nbblks, log->l_sectBBsize); |
128 | return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL); | 130 | return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL); |
129 | } | 131 | } |
130 | 132 | ||
131 | /* | 133 | /* |
@@ -1960,7 +1962,7 @@ xlog_recover_buffer_pass1( | |||
1960 | } | 1962 | } |
1961 | } | 1963 | } |
1962 | 1964 | ||
1963 | bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); | 1965 | bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0); |
1964 | bcp->bc_blkno = buf_f->blf_blkno; | 1966 | bcp->bc_blkno = buf_f->blf_blkno; |
1965 | bcp->bc_len = buf_f->blf_len; | 1967 | bcp->bc_len = buf_f->blf_len; |
1966 | bcp->bc_refcount = 1; | 1968 | bcp->bc_refcount = 1; |
@@ -2930,7 +2932,7 @@ xlog_recover_inode_pass2( | |||
2930 | if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { | 2932 | if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { |
2931 | in_f = item->ri_buf[0].i_addr; | 2933 | in_f = item->ri_buf[0].i_addr; |
2932 | } else { | 2934 | } else { |
2933 | in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP); | 2935 | in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0); |
2934 | need_free = 1; | 2936 | need_free = 1; |
2935 | error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); | 2937 | error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); |
2936 | if (error) | 2938 | if (error) |
@@ -4161,7 +4163,7 @@ xlog_recover_add_item( | |||
4161 | { | 4163 | { |
4162 | xlog_recover_item_t *item; | 4164 | xlog_recover_item_t *item; |
4163 | 4165 | ||
4164 | item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); | 4166 | item = kmem_zalloc(sizeof(xlog_recover_item_t), 0); |
4165 | INIT_LIST_HEAD(&item->ri_list); | 4167 | INIT_LIST_HEAD(&item->ri_list); |
4166 | list_add_tail(&item->ri_list, head); | 4168 | list_add_tail(&item->ri_list, head); |
4167 | } | 4169 | } |
@@ -4201,7 +4203,7 @@ xlog_recover_add_to_cont_trans( | |||
4201 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; | 4203 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; |
4202 | old_len = item->ri_buf[item->ri_cnt-1].i_len; | 4204 | old_len = item->ri_buf[item->ri_cnt-1].i_len; |
4203 | 4205 | ||
4204 | ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP); | 4206 | ptr = kmem_realloc(old_ptr, len + old_len, 0); |
4205 | memcpy(&ptr[old_len], dp, len); | 4207 | memcpy(&ptr[old_len], dp, len); |
4206 | item->ri_buf[item->ri_cnt-1].i_len += len; | 4208 | item->ri_buf[item->ri_cnt-1].i_len += len; |
4207 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | 4209 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; |
@@ -4261,7 +4263,7 @@ xlog_recover_add_to_trans( | |||
4261 | return 0; | 4263 | return 0; |
4262 | } | 4264 | } |
4263 | 4265 | ||
4264 | ptr = kmem_alloc(len, KM_SLEEP); | 4266 | ptr = kmem_alloc(len, 0); |
4265 | memcpy(ptr, dp, len); | 4267 | memcpy(ptr, dp, len); |
4266 | in_f = (struct xfs_inode_log_format *)ptr; | 4268 | in_f = (struct xfs_inode_log_format *)ptr; |
4267 | 4269 | ||
@@ -4289,7 +4291,7 @@ xlog_recover_add_to_trans( | |||
4289 | item->ri_total = in_f->ilf_size; | 4291 | item->ri_total = in_f->ilf_size; |
4290 | item->ri_buf = | 4292 | item->ri_buf = |
4291 | kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), | 4293 | kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), |
4292 | KM_SLEEP); | 4294 | 0); |
4293 | } | 4295 | } |
4294 | ASSERT(item->ri_total > item->ri_cnt); | 4296 | ASSERT(item->ri_total > item->ri_cnt); |
4295 | /* Description region is ri_buf[0] */ | 4297 | /* Description region is ri_buf[0] */ |
@@ -4423,7 +4425,7 @@ xlog_recover_ophdr_to_trans( | |||
4423 | * This is a new transaction so allocate a new recovery container to | 4425 | * This is a new transaction so allocate a new recovery container to |
4424 | * hold the recovery ops that will follow. | 4426 | * hold the recovery ops that will follow. |
4425 | */ | 4427 | */ |
4426 | trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP); | 4428 | trans = kmem_zalloc(sizeof(struct xlog_recover), 0); |
4427 | trans->r_log_tid = tid; | 4429 | trans->r_log_tid = tid; |
4428 | trans->r_lsn = be64_to_cpu(rhead->h_lsn); | 4430 | trans->r_lsn = be64_to_cpu(rhead->h_lsn); |
4429 | INIT_LIST_HEAD(&trans->r_itemq); | 4431 | INIT_LIST_HEAD(&trans->r_itemq); |
@@ -5022,16 +5024,27 @@ xlog_recover_process_one_iunlink( | |||
5022 | } | 5024 | } |
5023 | 5025 | ||
5024 | /* | 5026 | /* |
5025 | * xlog_iunlink_recover | 5027 | * Recover AGI unlinked lists |
5028 | * | ||
5029 | * This is called during recovery to process any inodes which we unlinked but | ||
5030 | * not freed when the system crashed. These inodes will be on the lists in the | ||
5031 | * AGI blocks. What we do here is scan all the AGIs and fully truncate and free | ||
5032 | * any inodes found on the lists. Each inode is removed from the lists when it | ||
5033 | * has been fully truncated and is freed. The freeing of the inode and its | ||
5034 | * removal from the list must be atomic. | ||
5035 | * | ||
5036 | * If everything we touch in the agi processing loop is already in memory, this | ||
5037 | * loop can hold the cpu for a long time. It runs without lock contention, | ||
5038 | * memory allocation contention, the need wait for IO, etc, and so will run | ||
5039 | * until we either run out of inodes to process, run low on memory or we run out | ||
5040 | * of log space. | ||
5026 | * | 5041 | * |
5027 | * This is called during recovery to process any inodes which | 5042 | * This behaviour is bad for latency on single CPU and non-preemptible kernels, |
5028 | * we unlinked but not freed when the system crashed. These | 5043 | * and can prevent other filesytem work (such as CIL pushes) from running. This |
5029 | * inodes will be on the lists in the AGI blocks. What we do | 5044 | * can lead to deadlocks if the recovery process runs out of log reservation |
5030 | * here is scan all the AGIs and fully truncate and free any | 5045 | * space. Hence we need to yield the CPU when there is other kernel work |
5031 | * inodes found on the lists. Each inode is removed from the | 5046 | * scheduled on this CPU to ensure other scheduled work can run without undue |
5032 | * lists when it has been fully truncated and is freed. The | 5047 | * latency. |
5033 | * freeing of the inode and its removal from the list must be | ||
5034 | * atomic. | ||
5035 | */ | 5048 | */ |
5036 | STATIC void | 5049 | STATIC void |
5037 | xlog_recover_process_iunlinks( | 5050 | xlog_recover_process_iunlinks( |
@@ -5078,6 +5091,7 @@ xlog_recover_process_iunlinks( | |||
5078 | while (agino != NULLAGINO) { | 5091 | while (agino != NULLAGINO) { |
5079 | agino = xlog_recover_process_one_iunlink(mp, | 5092 | agino = xlog_recover_process_one_iunlink(mp, |
5080 | agno, agino, bucket); | 5093 | agno, agino, bucket); |
5094 | cond_resched(); | ||
5081 | } | 5095 | } |
5082 | } | 5096 | } |
5083 | xfs_buf_rele(agibp); | 5097 | xfs_buf_rele(agibp); |
@@ -5527,7 +5541,7 @@ xlog_do_log_recovery( | |||
5527 | */ | 5541 | */ |
5528 | log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * | 5542 | log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * |
5529 | sizeof(struct list_head), | 5543 | sizeof(struct list_head), |
5530 | KM_SLEEP); | 5544 | 0); |
5531 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | 5545 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) |
5532 | INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); | 5546 | INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); |
5533 | 5547 | ||
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 322da6909290..ba5b6f3b2b88 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -82,7 +82,7 @@ xfs_uuid_mount( | |||
82 | if (hole < 0) { | 82 | if (hole < 0) { |
83 | xfs_uuid_table = kmem_realloc(xfs_uuid_table, | 83 | xfs_uuid_table = kmem_realloc(xfs_uuid_table, |
84 | (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table), | 84 | (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table), |
85 | KM_SLEEP); | 85 | 0); |
86 | hole = xfs_uuid_table_size++; | 86 | hole = xfs_uuid_table_size++; |
87 | } | 87 | } |
88 | xfs_uuid_table[hole] = *uuid; | 88 | xfs_uuid_table[hole] = *uuid; |
@@ -214,7 +214,7 @@ xfs_initialize_perag( | |||
214 | 214 | ||
215 | spin_lock(&mp->m_perag_lock); | 215 | spin_lock(&mp->m_perag_lock); |
216 | if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { | 216 | if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { |
217 | BUG(); | 217 | WARN_ON_ONCE(1); |
218 | spin_unlock(&mp->m_perag_lock); | 218 | spin_unlock(&mp->m_perag_lock); |
219 | radix_tree_preload_end(); | 219 | radix_tree_preload_end(); |
220 | error = -EEXIST; | 220 | error = -EEXIST; |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4adb6837439a..fdb60e09a9c5 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -327,13 +327,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) | |||
327 | } | 327 | } |
328 | 328 | ||
329 | /* per-AG block reservation data structures*/ | 329 | /* per-AG block reservation data structures*/ |
330 | enum xfs_ag_resv_type { | ||
331 | XFS_AG_RESV_NONE = 0, | ||
332 | XFS_AG_RESV_AGFL, | ||
333 | XFS_AG_RESV_METADATA, | ||
334 | XFS_AG_RESV_RMAPBT, | ||
335 | }; | ||
336 | |||
337 | struct xfs_ag_resv { | 330 | struct xfs_ag_resv { |
338 | /* number of blocks originally reserved here */ | 331 | /* number of blocks originally reserved here */ |
339 | xfs_extlen_t ar_orig_reserved; | 332 | xfs_extlen_t ar_orig_reserved; |
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 74738813f60d..a06661dac5be 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c | |||
@@ -333,12 +333,12 @@ xfs_mru_cache_create( | |||
333 | if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count)) | 333 | if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count)) |
334 | return -EINVAL; | 334 | return -EINVAL; |
335 | 335 | ||
336 | if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP))) | 336 | if (!(mru = kmem_zalloc(sizeof(*mru), 0))) |
337 | return -ENOMEM; | 337 | return -ENOMEM; |
338 | 338 | ||
339 | /* An extra list is needed to avoid reaping up to a grp_time early. */ | 339 | /* An extra list is needed to avoid reaping up to a grp_time early. */ |
340 | mru->grp_count = grp_count + 1; | 340 | mru->grp_count = grp_count + 1; |
341 | mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); | 341 | mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), 0); |
342 | 342 | ||
343 | if (!mru->lists) { | 343 | if (!mru->lists) { |
344 | err = -ENOMEM; | 344 | err = -ENOMEM; |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 5e7a37f0cf84..ecd8ce152ab1 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -642,7 +642,7 @@ xfs_qm_init_quotainfo( | |||
642 | 642 | ||
643 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 643 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
644 | 644 | ||
645 | qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); | 645 | qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), 0); |
646 | 646 | ||
647 | error = list_lru_init(&qinf->qi_lru); | 647 | error = list_lru_init(&qinf->qi_lru); |
648 | if (error) | 648 | if (error) |
@@ -978,7 +978,7 @@ xfs_qm_reset_dqcounts_buf( | |||
978 | if (qip->i_d.di_nblocks == 0) | 978 | if (qip->i_d.di_nblocks == 0) |
979 | return 0; | 979 | return 0; |
980 | 980 | ||
981 | map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); | 981 | map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), 0); |
982 | 982 | ||
983 | lblkno = 0; | 983 | lblkno = 0; |
984 | maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); | 984 | maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); |
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index d8288aa0670a..2328268e6245 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c | |||
@@ -144,9 +144,9 @@ xfs_cui_init( | |||
144 | ASSERT(nextents > 0); | 144 | ASSERT(nextents > 0); |
145 | if (nextents > XFS_CUI_MAX_FAST_EXTENTS) | 145 | if (nextents > XFS_CUI_MAX_FAST_EXTENTS) |
146 | cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents), | 146 | cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents), |
147 | KM_SLEEP); | 147 | 0); |
148 | else | 148 | else |
149 | cuip = kmem_zone_zalloc(xfs_cui_zone, KM_SLEEP); | 149 | cuip = kmem_zone_zalloc(xfs_cui_zone, 0); |
150 | 150 | ||
151 | xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); | 151 | xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); |
152 | cuip->cui_format.cui_nextents = nextents; | 152 | cuip->cui_format.cui_nextents = nextents; |
@@ -223,7 +223,7 @@ xfs_trans_get_cud( | |||
223 | { | 223 | { |
224 | struct xfs_cud_log_item *cudp; | 224 | struct xfs_cud_log_item *cudp; |
225 | 225 | ||
226 | cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP); | 226 | cudp = kmem_zone_zalloc(xfs_cud_zone, 0); |
227 | xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, | 227 | xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, |
228 | &xfs_cud_item_ops); | 228 | &xfs_cud_item_ops); |
229 | cudp->cud_cuip = cuip; | 229 | cudp->cud_cuip = cuip; |
@@ -555,26 +555,24 @@ xfs_cui_recover( | |||
555 | irec.br_blockcount = new_len; | 555 | irec.br_blockcount = new_len; |
556 | switch (type) { | 556 | switch (type) { |
557 | case XFS_REFCOUNT_INCREASE: | 557 | case XFS_REFCOUNT_INCREASE: |
558 | error = xfs_refcount_increase_extent(tp, &irec); | 558 | xfs_refcount_increase_extent(tp, &irec); |
559 | break; | 559 | break; |
560 | case XFS_REFCOUNT_DECREASE: | 560 | case XFS_REFCOUNT_DECREASE: |
561 | error = xfs_refcount_decrease_extent(tp, &irec); | 561 | xfs_refcount_decrease_extent(tp, &irec); |
562 | break; | 562 | break; |
563 | case XFS_REFCOUNT_ALLOC_COW: | 563 | case XFS_REFCOUNT_ALLOC_COW: |
564 | error = xfs_refcount_alloc_cow_extent(tp, | 564 | xfs_refcount_alloc_cow_extent(tp, |
565 | irec.br_startblock, | 565 | irec.br_startblock, |
566 | irec.br_blockcount); | 566 | irec.br_blockcount); |
567 | break; | 567 | break; |
568 | case XFS_REFCOUNT_FREE_COW: | 568 | case XFS_REFCOUNT_FREE_COW: |
569 | error = xfs_refcount_free_cow_extent(tp, | 569 | xfs_refcount_free_cow_extent(tp, |
570 | irec.br_startblock, | 570 | irec.br_startblock, |
571 | irec.br_blockcount); | 571 | irec.br_blockcount); |
572 | break; | 572 | break; |
573 | default: | 573 | default: |
574 | ASSERT(0); | 574 | ASSERT(0); |
575 | } | 575 | } |
576 | if (error) | ||
577 | goto abort_error; | ||
578 | requeue_only = true; | 576 | requeue_only = true; |
579 | } | 577 | } |
580 | } | 578 | } |
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index edbe37b7f636..0f08153b4994 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c | |||
@@ -495,10 +495,8 @@ xfs_reflink_cancel_cow_blocks( | |||
495 | ASSERT((*tpp)->t_firstblock == NULLFSBLOCK); | 495 | ASSERT((*tpp)->t_firstblock == NULLFSBLOCK); |
496 | 496 | ||
497 | /* Free the CoW orphan record. */ | 497 | /* Free the CoW orphan record. */ |
498 | error = xfs_refcount_free_cow_extent(*tpp, | 498 | xfs_refcount_free_cow_extent(*tpp, del.br_startblock, |
499 | del.br_startblock, del.br_blockcount); | 499 | del.br_blockcount); |
500 | if (error) | ||
501 | break; | ||
502 | 500 | ||
503 | xfs_bmap_add_free(*tpp, del.br_startblock, | 501 | xfs_bmap_add_free(*tpp, del.br_startblock, |
504 | del.br_blockcount, NULL); | 502 | del.br_blockcount, NULL); |
@@ -675,15 +673,10 @@ xfs_reflink_end_cow_extent( | |||
675 | trace_xfs_reflink_cow_remap(ip, &del); | 673 | trace_xfs_reflink_cow_remap(ip, &del); |
676 | 674 | ||
677 | /* Free the CoW orphan record. */ | 675 | /* Free the CoW orphan record. */ |
678 | error = xfs_refcount_free_cow_extent(tp, del.br_startblock, | 676 | xfs_refcount_free_cow_extent(tp, del.br_startblock, del.br_blockcount); |
679 | del.br_blockcount); | ||
680 | if (error) | ||
681 | goto out_cancel; | ||
682 | 677 | ||
683 | /* Map the new blocks into the data fork. */ | 678 | /* Map the new blocks into the data fork. */ |
684 | error = xfs_bmap_map_extent(tp, ip, &del); | 679 | xfs_bmap_map_extent(tp, ip, &del); |
685 | if (error) | ||
686 | goto out_cancel; | ||
687 | 680 | ||
688 | /* Charge this new data fork mapping to the on-disk quota. */ | 681 | /* Charge this new data fork mapping to the on-disk quota. */ |
689 | xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT, | 682 | xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT, |
@@ -1070,14 +1063,10 @@ xfs_reflink_remap_extent( | |||
1070 | uirec.br_blockcount, uirec.br_startblock); | 1063 | uirec.br_blockcount, uirec.br_startblock); |
1071 | 1064 | ||
1072 | /* Update the refcount tree */ | 1065 | /* Update the refcount tree */ |
1073 | error = xfs_refcount_increase_extent(tp, &uirec); | 1066 | xfs_refcount_increase_extent(tp, &uirec); |
1074 | if (error) | ||
1075 | goto out_cancel; | ||
1076 | 1067 | ||
1077 | /* Map the new blocks into the data fork. */ | 1068 | /* Map the new blocks into the data fork. */ |
1078 | error = xfs_bmap_map_extent(tp, ip, &uirec); | 1069 | xfs_bmap_map_extent(tp, ip, &uirec); |
1079 | if (error) | ||
1080 | goto out_cancel; | ||
1081 | 1070 | ||
1082 | /* Update quota accounting. */ | 1071 | /* Update quota accounting. */ |
1083 | xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, | 1072 | xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, |
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 77ed557b6127..8939e0ea09cd 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c | |||
@@ -142,9 +142,9 @@ xfs_rui_init( | |||
142 | 142 | ||
143 | ASSERT(nextents > 0); | 143 | ASSERT(nextents > 0); |
144 | if (nextents > XFS_RUI_MAX_FAST_EXTENTS) | 144 | if (nextents > XFS_RUI_MAX_FAST_EXTENTS) |
145 | ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), KM_SLEEP); | 145 | ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), 0); |
146 | else | 146 | else |
147 | ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP); | 147 | ruip = kmem_zone_zalloc(xfs_rui_zone, 0); |
148 | 148 | ||
149 | xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); | 149 | xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); |
150 | ruip->rui_format.rui_nextents = nextents; | 150 | ruip->rui_format.rui_nextents = nextents; |
@@ -244,7 +244,7 @@ xfs_trans_get_rud( | |||
244 | { | 244 | { |
245 | struct xfs_rud_log_item *rudp; | 245 | struct xfs_rud_log_item *rudp; |
246 | 246 | ||
247 | rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP); | 247 | rudp = kmem_zone_zalloc(xfs_rud_zone, 0); |
248 | xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD, | 248 | xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD, |
249 | &xfs_rud_item_ops); | 249 | &xfs_rud_item_ops); |
250 | rudp->rud_ruip = ruip; | 250 | rudp->rud_ruip = ruip; |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 5fa4db3c3e32..4a48a8c75b4f 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -865,7 +865,7 @@ xfs_alloc_rsum_cache( | |||
865 | * lower bound on the minimum level with any free extents. We can | 865 | * lower bound on the minimum level with any free extents. We can |
866 | * continue without the cache if it couldn't be allocated. | 866 | * continue without the cache if it couldn't be allocated. |
867 | */ | 867 | */ |
868 | mp->m_rsum_cache = kmem_zalloc_large(rbmblocks, KM_SLEEP); | 868 | mp->m_rsum_cache = kmem_zalloc_large(rbmblocks, 0); |
869 | if (!mp->m_rsum_cache) | 869 | if (!mp->m_rsum_cache) |
870 | xfs_warn(mp, "could not allocate realtime summary cache"); | 870 | xfs_warn(mp, "could not allocate realtime summary cache"); |
871 | } | 871 | } |
@@ -963,7 +963,7 @@ xfs_growfs_rt( | |||
963 | /* | 963 | /* |
964 | * Allocate a new (fake) mount/sb. | 964 | * Allocate a new (fake) mount/sb. |
965 | */ | 965 | */ |
966 | nmp = kmem_alloc(sizeof(*nmp), KM_SLEEP); | 966 | nmp = kmem_alloc(sizeof(*nmp), 0); |
967 | /* | 967 | /* |
968 | * Loop over the bitmap blocks. | 968 | * Loop over the bitmap blocks. |
969 | * We will do everything one bitmap block at a time. | 969 | * We will do everything one bitmap block at a time. |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f9450235533c..391b4748cae3 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -818,7 +818,8 @@ xfs_init_mount_workqueues( | |||
818 | goto out_destroy_buf; | 818 | goto out_destroy_buf; |
819 | 819 | ||
820 | mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", | 820 | mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", |
821 | WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); | 821 | WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND, |
822 | 0, mp->m_fsname); | ||
822 | if (!mp->m_cil_workqueue) | 823 | if (!mp->m_cil_workqueue) |
823 | goto out_destroy_unwritten; | 824 | goto out_destroy_unwritten; |
824 | 825 | ||
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 8094b1920eef..eaae275ed430 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -23,6 +23,7 @@ struct xlog; | |||
23 | struct xlog_ticket; | 23 | struct xlog_ticket; |
24 | struct xlog_recover; | 24 | struct xlog_recover; |
25 | struct xlog_recover_item; | 25 | struct xlog_recover_item; |
26 | struct xlog_rec_header; | ||
26 | struct xfs_buf_log_format; | 27 | struct xfs_buf_log_format; |
27 | struct xfs_inode_log_format; | 28 | struct xfs_inode_log_format; |
28 | struct xfs_bmbt_irec; | 29 | struct xfs_bmbt_irec; |
@@ -30,6 +31,10 @@ struct xfs_btree_cur; | |||
30 | struct xfs_refcount_irec; | 31 | struct xfs_refcount_irec; |
31 | struct xfs_fsmap; | 32 | struct xfs_fsmap; |
32 | struct xfs_rmap_irec; | 33 | struct xfs_rmap_irec; |
34 | struct xfs_icreate_log; | ||
35 | struct xfs_owner_info; | ||
36 | struct xfs_trans_res; | ||
37 | struct xfs_inobt_rec_incore; | ||
33 | 38 | ||
34 | DECLARE_EVENT_CLASS(xfs_attr_list_class, | 39 | DECLARE_EVENT_CLASS(xfs_attr_list_class, |
35 | TP_PROTO(struct xfs_attr_list_context *ctx), | 40 | TP_PROTO(struct xfs_attr_list_context *ctx), |
@@ -3575,6 +3580,35 @@ TRACE_EVENT(xfs_pwork_init, | |||
3575 | __entry->nr_threads, __entry->pid) | 3580 | __entry->nr_threads, __entry->pid) |
3576 | ) | 3581 | ) |
3577 | 3582 | ||
3583 | DECLARE_EVENT_CLASS(xfs_kmem_class, | ||
3584 | TP_PROTO(ssize_t size, int flags, unsigned long caller_ip), | ||
3585 | TP_ARGS(size, flags, caller_ip), | ||
3586 | TP_STRUCT__entry( | ||
3587 | __field(ssize_t, size) | ||
3588 | __field(int, flags) | ||
3589 | __field(unsigned long, caller_ip) | ||
3590 | ), | ||
3591 | TP_fast_assign( | ||
3592 | __entry->size = size; | ||
3593 | __entry->flags = flags; | ||
3594 | __entry->caller_ip = caller_ip; | ||
3595 | ), | ||
3596 | TP_printk("size %zd flags 0x%x caller %pS", | ||
3597 | __entry->size, | ||
3598 | __entry->flags, | ||
3599 | (char *)__entry->caller_ip) | ||
3600 | ) | ||
3601 | |||
3602 | #define DEFINE_KMEM_EVENT(name) \ | ||
3603 | DEFINE_EVENT(xfs_kmem_class, name, \ | ||
3604 | TP_PROTO(ssize_t size, int flags, unsigned long caller_ip), \ | ||
3605 | TP_ARGS(size, flags, caller_ip)) | ||
3606 | DEFINE_KMEM_EVENT(kmem_alloc); | ||
3607 | DEFINE_KMEM_EVENT(kmem_alloc_io); | ||
3608 | DEFINE_KMEM_EVENT(kmem_alloc_large); | ||
3609 | DEFINE_KMEM_EVENT(kmem_realloc); | ||
3610 | DEFINE_KMEM_EVENT(kmem_zone_alloc); | ||
3611 | |||
3578 | #endif /* _TRACE_XFS_H */ | 3612 | #endif /* _TRACE_XFS_H */ |
3579 | 3613 | ||
3580 | #undef TRACE_INCLUDE_PATH | 3614 | #undef TRACE_INCLUDE_PATH |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index d42a68d8313b..f4795fdb7389 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -90,7 +90,7 @@ xfs_trans_dup( | |||
90 | 90 | ||
91 | trace_xfs_trans_dup(tp, _RET_IP_); | 91 | trace_xfs_trans_dup(tp, _RET_IP_); |
92 | 92 | ||
93 | ntp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); | 93 | ntp = kmem_zone_zalloc(xfs_trans_zone, 0); |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * Initialize the new transaction structure. | 96 | * Initialize the new transaction structure. |
@@ -263,7 +263,7 @@ xfs_trans_alloc( | |||
263 | * GFP_NOFS allocation context so that we avoid lockdep false positives | 263 | * GFP_NOFS allocation context so that we avoid lockdep false positives |
264 | * by doing GFP_KERNEL allocations inside sb_start_intwrite(). | 264 | * by doing GFP_KERNEL allocations inside sb_start_intwrite(). |
265 | */ | 265 | */ |
266 | tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); | 266 | tp = kmem_zone_zalloc(xfs_trans_zone, 0); |
267 | if (!(flags & XFS_TRANS_NO_WRITECOUNT)) | 267 | if (!(flags & XFS_TRANS_NO_WRITECOUNT)) |
268 | sb_start_intwrite(mp->m_super); | 268 | sb_start_intwrite(mp->m_super); |
269 | 269 | ||
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 1027c9ca6eb8..16457465833b 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c | |||
@@ -863,7 +863,7 @@ STATIC void | |||
863 | xfs_trans_alloc_dqinfo( | 863 | xfs_trans_alloc_dqinfo( |
864 | xfs_trans_t *tp) | 864 | xfs_trans_t *tp) |
865 | { | 865 | { |
866 | tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, KM_SLEEP); | 866 | tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, 0); |
867 | } | 867 | } |
868 | 868 | ||
869 | void | 869 | void |
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 3123b5aaad2a..cb895b1df5e4 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c | |||
@@ -30,7 +30,7 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, | |||
30 | value = NULL; | 30 | value = NULL; |
31 | } | 31 | } |
32 | 32 | ||
33 | error = xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); | 33 | error = xfs_attr_get(ip, name, (unsigned char **)&value, &asize, xflags); |
34 | if (error) | 34 | if (error) |
35 | return error; | 35 | return error; |
36 | return asize; | 36 | return asize; |
diff --git a/include/linux/fs.h b/include/linux/fs.h index ae6648145d18..ffe35d97afcb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -3543,6 +3543,8 @@ extern void inode_nohighmem(struct inode *inode); | |||
3543 | /* mm/fadvise.c */ | 3543 | /* mm/fadvise.c */ |
3544 | extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, | 3544 | extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, |
3545 | int advice); | 3545 | int advice); |
3546 | extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, | ||
3547 | int advice); | ||
3546 | 3548 | ||
3547 | #if defined(CONFIG_IO_URING) | 3549 | #if defined(CONFIG_IO_URING) |
3548 | extern struct sock *io_uring_get_socket(struct file *file); | 3550 | extern struct sock *io_uring_get_socket(struct file *file); |
diff --git a/mm/fadvise.c b/mm/fadvise.c index 467bcd032037..4f17c83db575 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c | |||
@@ -27,8 +27,7 @@ | |||
27 | * deactivate the pages and clear PG_Referenced. | 27 | * deactivate the pages and clear PG_Referenced. |
28 | */ | 28 | */ |
29 | 29 | ||
30 | static int generic_fadvise(struct file *file, loff_t offset, loff_t len, | 30 | int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice) |
31 | int advice) | ||
32 | { | 31 | { |
33 | struct inode *inode; | 32 | struct inode *inode; |
34 | struct address_space *mapping; | 33 | struct address_space *mapping; |
@@ -178,6 +177,7 @@ static int generic_fadvise(struct file *file, loff_t offset, loff_t len, | |||
178 | } | 177 | } |
179 | return 0; | 178 | return 0; |
180 | } | 179 | } |
180 | EXPORT_SYMBOL(generic_fadvise); | ||
181 | 181 | ||
182 | int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice) | 182 | int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice) |
183 | { | 183 | { |
diff --git a/mm/madvise.c b/mm/madvise.c index 968df3aa069f..bac973b9f2cc 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/userfaultfd_k.h> | 14 | #include <linux/userfaultfd_k.h> |
15 | #include <linux/hugetlb.h> | 15 | #include <linux/hugetlb.h> |
16 | #include <linux/falloc.h> | 16 | #include <linux/falloc.h> |
17 | #include <linux/fadvise.h> | ||
17 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
18 | #include <linux/ksm.h> | 19 | #include <linux/ksm.h> |
19 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
@@ -275,6 +276,7 @@ static long madvise_willneed(struct vm_area_struct *vma, | |||
275 | unsigned long start, unsigned long end) | 276 | unsigned long start, unsigned long end) |
276 | { | 277 | { |
277 | struct file *file = vma->vm_file; | 278 | struct file *file = vma->vm_file; |
279 | loff_t offset; | ||
278 | 280 | ||
279 | *prev = vma; | 281 | *prev = vma; |
280 | #ifdef CONFIG_SWAP | 282 | #ifdef CONFIG_SWAP |
@@ -298,12 +300,20 @@ static long madvise_willneed(struct vm_area_struct *vma, | |||
298 | return 0; | 300 | return 0; |
299 | } | 301 | } |
300 | 302 | ||
301 | start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 303 | /* |
302 | if (end > vma->vm_end) | 304 | * Filesystem's fadvise may need to take various locks. We need to |
303 | end = vma->vm_end; | 305 | * explicitly grab a reference because the vma (and hence the |
304 | end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 306 | * vma's reference to the file) can go away as soon as we drop |
305 | 307 | * mmap_sem. | |
306 | force_page_cache_readahead(file->f_mapping, file, start, end - start); | 308 | */ |
309 | *prev = NULL; /* tell sys_madvise we drop mmap_sem */ | ||
310 | get_file(file); | ||
311 | up_read(¤t->mm->mmap_sem); | ||
312 | offset = (loff_t)(start - vma->vm_start) | ||
313 | + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); | ||
314 | vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED); | ||
315 | fput(file); | ||
316 | down_read(¤t->mm->mmap_sem); | ||
307 | return 0; | 317 | return 0; |
308 | } | 318 | } |
309 | 319 | ||