aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_iget.c49
-rw-r--r--fs/xfs/xfs_inode.c268
-rw-r--r--fs/xfs/xfs_inode.h16
-rw-r--r--fs/xfs/xfs_vfsops.c5
4 files changed, 156 insertions, 182 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8e09b71f4104..e657c5128460 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -78,7 +78,6 @@ xfs_iget_core(
78 xfs_inode_t *ip; 78 xfs_inode_t *ip;
79 xfs_inode_t *iq; 79 xfs_inode_t *iq;
80 int error; 80 int error;
81 xfs_icluster_t *icl, *new_icl = NULL;
82 unsigned long first_index, mask; 81 unsigned long first_index, mask;
83 xfs_perag_t *pag; 82 xfs_perag_t *pag;
84 xfs_agino_t agino; 83 xfs_agino_t agino;
@@ -229,11 +228,9 @@ finish_inode:
229 } 228 }
230 229
231 /* 230 /*
232 * This is a bit messy - we preallocate everything we _might_ 231 * Preload the radix tree so we can insert safely under the
233 * need before we pick up the ici lock. That way we don't have to 232 * write spinlock.
234 * juggle locks and go all the way back to the start.
235 */ 233 */
236 new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP);
237 if (radix_tree_preload(GFP_KERNEL)) { 234 if (radix_tree_preload(GFP_KERNEL)) {
238 xfs_idestroy(ip); 235 xfs_idestroy(ip);
239 delay(1); 236 delay(1);
@@ -242,17 +239,6 @@ finish_inode:
242 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 239 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
243 first_index = agino & mask; 240 first_index = agino & mask;
244 write_lock(&pag->pag_ici_lock); 241 write_lock(&pag->pag_ici_lock);
245
246 /*
247 * Find the cluster if it exists
248 */
249 icl = NULL;
250 if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
251 first_index, 1)) {
252 if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index)
253 icl = iq->i_cluster;
254 }
255
256 /* 242 /*
257 * insert the new inode 243 * insert the new inode
258 */ 244 */
@@ -267,30 +253,13 @@ finish_inode:
267 } 253 }
268 254
269 /* 255 /*
270 * These values _must_ be set before releasing ihlock! 256 * These values _must_ be set before releasing the radix tree lock!
271 */ 257 */
272 ip->i_udquot = ip->i_gdquot = NULL; 258 ip->i_udquot = ip->i_gdquot = NULL;
273 xfs_iflags_set(ip, XFS_INEW); 259 xfs_iflags_set(ip, XFS_INEW);
274 260
275 ASSERT(ip->i_cluster == NULL);
276
277 if (!icl) {
278 spin_lock_init(&new_icl->icl_lock);
279 INIT_HLIST_HEAD(&new_icl->icl_inodes);
280 icl = new_icl;
281 new_icl = NULL;
282 } else {
283 ASSERT(!hlist_empty(&icl->icl_inodes));
284 }
285 spin_lock(&icl->icl_lock);
286 hlist_add_head(&ip->i_cnode, &icl->icl_inodes);
287 ip->i_cluster = icl;
288 spin_unlock(&icl->icl_lock);
289
290 write_unlock(&pag->pag_ici_lock); 261 write_unlock(&pag->pag_ici_lock);
291 radix_tree_preload_end(); 262 radix_tree_preload_end();
292 if (new_icl)
293 kmem_zone_free(xfs_icluster_zone, new_icl);
294 263
295 /* 264 /*
296 * Link ip to its mount and thread it on the mount's inode list. 265 * Link ip to its mount and thread it on the mount's inode list.
@@ -529,18 +498,6 @@ xfs_iextract(
529 xfs_put_perag(mp, pag); 498 xfs_put_perag(mp, pag);
530 499
531 /* 500 /*
532 * Remove from cluster list
533 */
534 mp = ip->i_mount;
535 spin_lock(&ip->i_cluster->icl_lock);
536 hlist_del(&ip->i_cnode);
537 spin_unlock(&ip->i_cluster->icl_lock);
538
539 /* was last inode in cluster? */
540 if (hlist_empty(&ip->i_cluster->icl_inodes))
541 kmem_zone_free(xfs_icluster_zone, ip->i_cluster);
542
543 /*
544 * Remove from mount's inode list. 501 * Remove from mount's inode list.
545 */ 502 */
546 XFS_MOUNT_ILOCK(mp); 503 XFS_MOUNT_ILOCK(mp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3c3e9e3c1da8..040c0e41729b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -55,7 +55,6 @@
55 55
56kmem_zone_t *xfs_ifork_zone; 56kmem_zone_t *xfs_ifork_zone;
57kmem_zone_t *xfs_inode_zone; 57kmem_zone_t *xfs_inode_zone;
58kmem_zone_t *xfs_icluster_zone;
59 58
60/* 59/*
61 * Used in xfs_itruncate(). This is the maximum number of extents 60 * Used in xfs_itruncate(). This is the maximum number of extents
@@ -2994,6 +2993,153 @@ xfs_iflush_fork(
2994 return 0; 2993 return 0;
2995} 2994}
2996 2995
2996STATIC int
2997xfs_iflush_cluster(
2998 xfs_inode_t *ip,
2999 xfs_buf_t *bp)
3000{
3001 xfs_mount_t *mp = ip->i_mount;
3002 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
3003 unsigned long first_index, mask;
3004 int ilist_size;
3005 xfs_inode_t **ilist;
3006 xfs_inode_t *iq;
3007 xfs_inode_log_item_t *iip;
3008 int nr_found;
3009 int clcount = 0;
3010 int bufwasdelwri;
3011 int i;
3012
3013 ASSERT(pag->pagi_inodeok);
3014 ASSERT(pag->pag_ici_init);
3015
3016 ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *);
3017 ilist = kmem_alloc(ilist_size, KM_MAYFAIL);
3018 if (!ilist)
3019 return 0;
3020
3021 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
3022 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
3023 read_lock(&pag->pag_ici_lock);
3024 /* really need a gang lookup range call here */
3025 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
3026 first_index,
3027 XFS_INODE_CLUSTER_SIZE(mp));
3028 if (nr_found == 0)
3029 goto out_free;
3030
3031 for (i = 0; i < nr_found; i++) {
3032 iq = ilist[i];
3033 if (iq == ip)
3034 continue;
3035 /* if the inode lies outside this cluster, we're done. */
3036 if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
3037 break;
3038 /*
3039 * Do an un-protected check to see if the inode is dirty and
3040 * is a candidate for flushing. These checks will be repeated
3041 * later after the appropriate locks are acquired.
3042 */
3043 iip = iq->i_itemp;
3044 if ((iq->i_update_core == 0) &&
3045 ((iip == NULL) ||
3046 !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
3047 xfs_ipincount(iq) == 0) {
3048 continue;
3049 }
3050
3051 /*
3052 * Try to get locks. If any are unavailable or it is pinned,
3053 * then this inode cannot be flushed and is skipped.
3054 */
3055
3056 if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
3057 continue;
3058 if (!xfs_iflock_nowait(iq)) {
3059 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3060 continue;
3061 }
3062 if (xfs_ipincount(iq)) {
3063 xfs_ifunlock(iq);
3064 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3065 continue;
3066 }
3067
3068 /*
3069 * arriving here means that this inode can be flushed. First
3070 * re-check that it's dirty before flushing.
3071 */
3072 iip = iq->i_itemp;
3073 if ((iq->i_update_core != 0) || ((iip != NULL) &&
3074 (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
3075 int error;
3076 error = xfs_iflush_int(iq, bp);
3077 if (error) {
3078 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3079 goto cluster_corrupt_out;
3080 }
3081 clcount++;
3082 } else {
3083 xfs_ifunlock(iq);
3084 }
3085 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3086 }
3087
3088 if (clcount) {
3089 XFS_STATS_INC(xs_icluster_flushcnt);
3090 XFS_STATS_ADD(xs_icluster_flushinode, clcount);
3091 }
3092
3093out_free:
3094 read_unlock(&pag->pag_ici_lock);
3095 kmem_free(ilist, ilist_size);
3096 return 0;
3097
3098
3099cluster_corrupt_out:
3100 /*
3101 * Corruption detected in the clustering loop. Invalidate the
3102 * inode buffer and shut down the filesystem.
3103 */
3104 read_unlock(&pag->pag_ici_lock);
3105 /*
3106 * Clean up the buffer. If it was B_DELWRI, just release it --
3107 * brelse can handle it with no problems. If not, shut down the
3108 * filesystem before releasing the buffer.
3109 */
3110 bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
3111 if (bufwasdelwri)
3112 xfs_buf_relse(bp);
3113
3114 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3115
3116 if (!bufwasdelwri) {
3117 /*
3118 * Just like incore_relse: if we have b_iodone functions,
3119 * mark the buffer as an error and call them. Otherwise
3120 * mark it as stale and brelse.
3121 */
3122 if (XFS_BUF_IODONE_FUNC(bp)) {
3123 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
3124 XFS_BUF_UNDONE(bp);
3125 XFS_BUF_STALE(bp);
3126 XFS_BUF_SHUT(bp);
3127 XFS_BUF_ERROR(bp,EIO);
3128 xfs_biodone(bp);
3129 } else {
3130 XFS_BUF_STALE(bp);
3131 xfs_buf_relse(bp);
3132 }
3133 }
3134
3135 /*
3136 * Unlocks the flush lock
3137 */
3138 xfs_iflush_abort(iq);
3139 kmem_free(ilist, ilist_size);
3140 return XFS_ERROR(EFSCORRUPTED);
3141}
3142
2997/* 3143/*
2998 * xfs_iflush() will write a modified inode's changes out to the 3144 * xfs_iflush() will write a modified inode's changes out to the
2999 * inode's on disk home. The caller must have the inode lock held 3145 * inode's on disk home. The caller must have the inode lock held
@@ -3013,13 +3159,8 @@ xfs_iflush(
3013 xfs_dinode_t *dip; 3159 xfs_dinode_t *dip;
3014 xfs_mount_t *mp; 3160 xfs_mount_t *mp;
3015 int error; 3161 int error;
3016 /* REFERENCED */
3017 xfs_inode_t *iq;
3018 int clcount; /* count of inodes clustered */
3019 int bufwasdelwri;
3020 struct hlist_node *entry;
3021 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
3022 int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); 3162 int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
3163 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
3023 3164
3024 XFS_STATS_INC(xs_iflush_count); 3165 XFS_STATS_INC(xs_iflush_count);
3025 3166
@@ -3138,9 +3279,8 @@ xfs_iflush(
3138 * First flush out the inode that xfs_iflush was called with. 3279 * First flush out the inode that xfs_iflush was called with.
3139 */ 3280 */
3140 error = xfs_iflush_int(ip, bp); 3281 error = xfs_iflush_int(ip, bp);
3141 if (error) { 3282 if (error)
3142 goto corrupt_out; 3283 goto corrupt_out;
3143 }
3144 3284
3145 /* 3285 /*
3146 * If the buffer is pinned then push on the log now so we won't 3286 * If the buffer is pinned then push on the log now so we won't
@@ -3153,70 +3293,9 @@ xfs_iflush(
3153 * inode clustering: 3293 * inode clustering:
3154 * see if other inodes can be gathered into this write 3294 * see if other inodes can be gathered into this write
3155 */ 3295 */
3156 spin_lock(&ip->i_cluster->icl_lock); 3296 error = xfs_iflush_cluster(ip, bp);
3157 ip->i_cluster->icl_buf = bp; 3297 if (error)
3158 3298 goto cluster_corrupt_out;
3159 clcount = 0;
3160 hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
3161 if (iq == ip)
3162 continue;
3163
3164 /*
3165 * Do an un-protected check to see if the inode is dirty and
3166 * is a candidate for flushing. These checks will be repeated
3167 * later after the appropriate locks are acquired.
3168 */
3169 iip = iq->i_itemp;
3170 if ((iq->i_update_core == 0) &&
3171 ((iip == NULL) ||
3172 !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
3173 xfs_ipincount(iq) == 0) {
3174 continue;
3175 }
3176
3177 /*
3178 * Try to get locks. If any are unavailable,
3179 * then this inode cannot be flushed and is skipped.
3180 */
3181
3182 /* get inode locks (just i_lock) */
3183 if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) {
3184 /* get inode flush lock */
3185 if (xfs_iflock_nowait(iq)) {
3186 /* check if pinned */
3187 if (xfs_ipincount(iq) == 0) {
3188 /* arriving here means that
3189 * this inode can be flushed.
3190 * first re-check that it's
3191 * dirty
3192 */
3193 iip = iq->i_itemp;
3194 if ((iq->i_update_core != 0)||
3195 ((iip != NULL) &&
3196 (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
3197 clcount++;
3198 error = xfs_iflush_int(iq, bp);
3199 if (error) {
3200 xfs_iunlock(iq,
3201 XFS_ILOCK_SHARED);
3202 goto cluster_corrupt_out;
3203 }
3204 } else {
3205 xfs_ifunlock(iq);
3206 }
3207 } else {
3208 xfs_ifunlock(iq);
3209 }
3210 }
3211 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3212 }
3213 }
3214 spin_unlock(&ip->i_cluster->icl_lock);
3215
3216 if (clcount) {
3217 XFS_STATS_INC(xs_icluster_flushcnt);
3218 XFS_STATS_ADD(xs_icluster_flushinode, clcount);
3219 }
3220 3299
3221 if (flags & INT_DELWRI) { 3300 if (flags & INT_DELWRI) {
3222 xfs_bdwrite(mp, bp); 3301 xfs_bdwrite(mp, bp);
@@ -3230,52 +3309,11 @@ xfs_iflush(
3230corrupt_out: 3309corrupt_out:
3231 xfs_buf_relse(bp); 3310 xfs_buf_relse(bp);
3232 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3311 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3233 xfs_iflush_abort(ip);
3234 /*
3235 * Unlocks the flush lock
3236 */
3237 return XFS_ERROR(EFSCORRUPTED);
3238
3239cluster_corrupt_out: 3312cluster_corrupt_out:
3240 /* Corruption detected in the clustering loop. Invalidate the
3241 * inode buffer and shut down the filesystem.
3242 */
3243 spin_unlock(&ip->i_cluster->icl_lock);
3244
3245 /*
3246 * Clean up the buffer. If it was B_DELWRI, just release it --
3247 * brelse can handle it with no problems. If not, shut down the
3248 * filesystem before releasing the buffer.
3249 */
3250 if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) {
3251 xfs_buf_relse(bp);
3252 }
3253
3254 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3255
3256 if(!bufwasdelwri) {
3257 /*
3258 * Just like incore_relse: if we have b_iodone functions,
3259 * mark the buffer as an error and call them. Otherwise
3260 * mark it as stale and brelse.
3261 */
3262 if (XFS_BUF_IODONE_FUNC(bp)) {
3263 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
3264 XFS_BUF_UNDONE(bp);
3265 XFS_BUF_STALE(bp);
3266 XFS_BUF_SHUT(bp);
3267 XFS_BUF_ERROR(bp,EIO);
3268 xfs_biodone(bp);
3269 } else {
3270 XFS_BUF_STALE(bp);
3271 xfs_buf_relse(bp);
3272 }
3273 }
3274
3275 xfs_iflush_abort(iq);
3276 /* 3313 /*
3277 * Unlocks the flush lock 3314 * Unlocks the flush lock
3278 */ 3315 */
3316 xfs_iflush_abort(ip);
3279 return XFS_ERROR(EFSCORRUPTED); 3317 return XFS_ERROR(EFSCORRUPTED);
3280} 3318}
3281 3319
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index c3bfffca9214..93c37697a72c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -133,19 +133,6 @@ typedef struct dm_attrs_s {
133} dm_attrs_t; 133} dm_attrs_t;
134 134
135/* 135/*
136 * This is the xfs inode cluster structure. This structure is used by
137 * xfs_iflush to find inodes that share a cluster and can be flushed to disk at
138 * the same time.
139 */
140typedef struct xfs_icluster {
141 struct hlist_head icl_inodes; /* list of inodes on cluster */
142 xfs_daddr_t icl_blkno; /* starting block number of
143 * the cluster */
144 struct xfs_buf *icl_buf; /* the inode buffer */
145 spinlock_t icl_lock; /* inode list lock */
146} xfs_icluster_t;
147
148/*
149 * This is the xfs in-core inode structure. 136 * This is the xfs in-core inode structure.
150 * Most of the on-disk inode is embedded in the i_d field. 137 * Most of the on-disk inode is embedded in the i_d field.
151 * 138 *
@@ -248,8 +235,6 @@ typedef struct xfs_inode {
248 unsigned int i_delayed_blks; /* count of delay alloc blks */ 235 unsigned int i_delayed_blks; /* count of delay alloc blks */
249 236
250 xfs_icdinode_t i_d; /* most of ondisk inode */ 237 xfs_icdinode_t i_d; /* most of ondisk inode */
251 xfs_icluster_t *i_cluster; /* cluster list header */
252 struct hlist_node i_cnode; /* cluster link node */
253 238
254 xfs_fsize_t i_size; /* in-memory size */ 239 xfs_fsize_t i_size; /* in-memory size */
255 xfs_fsize_t i_new_size; /* size when write completes */ 240 xfs_fsize_t i_new_size; /* size when write completes */
@@ -594,7 +579,6 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
594#define xfs_inobp_check(mp, bp) 579#define xfs_inobp_check(mp, bp)
595#endif /* DEBUG */ 580#endif /* DEBUG */
596 581
597extern struct kmem_zone *xfs_icluster_zone;
598extern struct kmem_zone *xfs_ifork_zone; 582extern struct kmem_zone *xfs_ifork_zone;
599extern struct kmem_zone *xfs_inode_zone; 583extern struct kmem_zone *xfs_inode_zone;
600extern struct kmem_zone *xfs_ili_zone; 584extern struct kmem_zone *xfs_ili_zone;
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 79bdfb3d6081..3ec27bf8531c 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -112,9 +112,6 @@ xfs_init(void)
112 xfs_ili_zone = 112 xfs_ili_zone =
113 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", 113 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
114 KM_ZONE_SPREAD, NULL); 114 KM_ZONE_SPREAD, NULL);
115 xfs_icluster_zone =
116 kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster",
117 KM_ZONE_SPREAD, NULL);
118 115
119 /* 116 /*
120 * Allocate global trace buffers. 117 * Allocate global trace buffers.
@@ -152,7 +149,6 @@ xfs_cleanup(void)
152 extern kmem_zone_t *xfs_inode_zone; 149 extern kmem_zone_t *xfs_inode_zone;
153 extern kmem_zone_t *xfs_efd_zone; 150 extern kmem_zone_t *xfs_efd_zone;
154 extern kmem_zone_t *xfs_efi_zone; 151 extern kmem_zone_t *xfs_efi_zone;
155 extern kmem_zone_t *xfs_icluster_zone;
156 152
157 xfs_cleanup_procfs(); 153 xfs_cleanup_procfs();
158 xfs_sysctl_unregister(); 154 xfs_sysctl_unregister();
@@ -187,7 +183,6 @@ xfs_cleanup(void)
187 kmem_zone_destroy(xfs_efi_zone); 183 kmem_zone_destroy(xfs_efi_zone);
188 kmem_zone_destroy(xfs_ifork_zone); 184 kmem_zone_destroy(xfs_ifork_zone);
189 kmem_zone_destroy(xfs_ili_zone); 185 kmem_zone_destroy(xfs_ili_zone);
190 kmem_zone_destroy(xfs_icluster_zone);
191} 186}
192 187
193/* 188/*