aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2008-03-05 21:43:49 -0500
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-04-17 21:37:41 -0400
commitbad5584332e888ac40ca13584e8c114149ddb01e (patch)
treef66ff83686cd28967d8b9fc0a8b8d6411a081071 /fs/xfs/xfs_inode.c
parenta3f74ffb6d1448d9a8f482e593b80ec15f1695d4 (diff)
[XFS] Remove the xfs_icluster structure
Remove the xfs_icluster structure and replace with a radix tree lookup. We don't need to keep a list of inodes in each cluster around anymore as we can look them up quickly when we need to. The only time we need to do this now is during inode writeback. Factor the inode cluster writeback code out of xfs_iflush and convert it to use radix_tree_gang_lookup() instead of walking a list of inodes built when we first read in the inodes. This remove 3 pointers from each xfs_inode structure and the xfs_icluster structure per inode cluster. Hence we reduce the cache footprint of the xfs_inodes by between 5-10% depending on cluster sparseness. To be truly efficient we need a radix_tree_gang_lookup_range() call to stop searching once we are past the end of the cluster instead of trying to find a full cluster's worth of inodes. Before (ia64): $ cat /sys/slab/xfs_inode/object_size 536 After: $ cat /sys/slab/xfs_inode/object_size 512 SGI-PV: 977460 SGI-Modid: xfs-linux-melb:xfs-kern:30502a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c268
1 files changed, 153 insertions, 115 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3c3e9e3c1da8..040c0e41729b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -55,7 +55,6 @@
55 55
56kmem_zone_t *xfs_ifork_zone; 56kmem_zone_t *xfs_ifork_zone;
57kmem_zone_t *xfs_inode_zone; 57kmem_zone_t *xfs_inode_zone;
58kmem_zone_t *xfs_icluster_zone;
59 58
60/* 59/*
61 * Used in xfs_itruncate(). This is the maximum number of extents 60 * Used in xfs_itruncate(). This is the maximum number of extents
@@ -2994,6 +2993,153 @@ xfs_iflush_fork(
2994 return 0; 2993 return 0;
2995} 2994}
2996 2995
2996STATIC int
2997xfs_iflush_cluster(
2998 xfs_inode_t *ip,
2999 xfs_buf_t *bp)
3000{
3001 xfs_mount_t *mp = ip->i_mount;
3002 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
3003 unsigned long first_index, mask;
3004 int ilist_size;
3005 xfs_inode_t **ilist;
3006 xfs_inode_t *iq;
3007 xfs_inode_log_item_t *iip;
3008 int nr_found;
3009 int clcount = 0;
3010 int bufwasdelwri;
3011 int i;
3012
3013 ASSERT(pag->pagi_inodeok);
3014 ASSERT(pag->pag_ici_init);
3015
3016 ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *);
3017 ilist = kmem_alloc(ilist_size, KM_MAYFAIL);
3018 if (!ilist)
3019 return 0;
3020
3021 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
3022 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
3023 read_lock(&pag->pag_ici_lock);
3024 /* really need a gang lookup range call here */
3025 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
3026 first_index,
3027 XFS_INODE_CLUSTER_SIZE(mp));
3028 if (nr_found == 0)
3029 goto out_free;
3030
3031 for (i = 0; i < nr_found; i++) {
3032 iq = ilist[i];
3033 if (iq == ip)
3034 continue;
3035 /* if the inode lies outside this cluster, we're done. */
3036 if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
3037 break;
3038 /*
3039 * Do an un-protected check to see if the inode is dirty and
3040 * is a candidate for flushing. These checks will be repeated
3041 * later after the appropriate locks are acquired.
3042 */
3043 iip = iq->i_itemp;
3044 if ((iq->i_update_core == 0) &&
3045 ((iip == NULL) ||
3046 !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
3047 xfs_ipincount(iq) == 0) {
3048 continue;
3049 }
3050
3051 /*
3052 * Try to get locks. If any are unavailable or it is pinned,
3053 * then this inode cannot be flushed and is skipped.
3054 */
3055
3056 if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
3057 continue;
3058 if (!xfs_iflock_nowait(iq)) {
3059 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3060 continue;
3061 }
3062 if (xfs_ipincount(iq)) {
3063 xfs_ifunlock(iq);
3064 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3065 continue;
3066 }
3067
3068 /*
3069 * arriving here means that this inode can be flushed. First
3070 * re-check that it's dirty before flushing.
3071 */
3072 iip = iq->i_itemp;
3073 if ((iq->i_update_core != 0) || ((iip != NULL) &&
3074 (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
3075 int error;
3076 error = xfs_iflush_int(iq, bp);
3077 if (error) {
3078 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3079 goto cluster_corrupt_out;
3080 }
3081 clcount++;
3082 } else {
3083 xfs_ifunlock(iq);
3084 }
3085 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3086 }
3087
3088 if (clcount) {
3089 XFS_STATS_INC(xs_icluster_flushcnt);
3090 XFS_STATS_ADD(xs_icluster_flushinode, clcount);
3091 }
3092
3093out_free:
3094 read_unlock(&pag->pag_ici_lock);
3095 kmem_free(ilist, ilist_size);
3096 return 0;
3097
3098
3099cluster_corrupt_out:
3100 /*
3101 * Corruption detected in the clustering loop. Invalidate the
3102 * inode buffer and shut down the filesystem.
3103 */
3104 read_unlock(&pag->pag_ici_lock);
3105 /*
3106 * Clean up the buffer. If it was B_DELWRI, just release it --
3107 * brelse can handle it with no problems. If not, shut down the
3108 * filesystem before releasing the buffer.
3109 */
3110 bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
3111 if (bufwasdelwri)
3112 xfs_buf_relse(bp);
3113
3114 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3115
3116 if (!bufwasdelwri) {
3117 /*
3118 * Just like incore_relse: if we have b_iodone functions,
3119 * mark the buffer as an error and call them. Otherwise
3120 * mark it as stale and brelse.
3121 */
3122 if (XFS_BUF_IODONE_FUNC(bp)) {
3123 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
3124 XFS_BUF_UNDONE(bp);
3125 XFS_BUF_STALE(bp);
3126 XFS_BUF_SHUT(bp);
3127 XFS_BUF_ERROR(bp,EIO);
3128 xfs_biodone(bp);
3129 } else {
3130 XFS_BUF_STALE(bp);
3131 xfs_buf_relse(bp);
3132 }
3133 }
3134
3135 /*
3136 * Unlocks the flush lock
3137 */
3138 xfs_iflush_abort(iq);
3139 kmem_free(ilist, ilist_size);
3140 return XFS_ERROR(EFSCORRUPTED);
3141}
3142
2997/* 3143/*
2998 * xfs_iflush() will write a modified inode's changes out to the 3144 * xfs_iflush() will write a modified inode's changes out to the
2999 * inode's on disk home. The caller must have the inode lock held 3145 * inode's on disk home. The caller must have the inode lock held
@@ -3013,13 +3159,8 @@ xfs_iflush(
3013 xfs_dinode_t *dip; 3159 xfs_dinode_t *dip;
3014 xfs_mount_t *mp; 3160 xfs_mount_t *mp;
3015 int error; 3161 int error;
3016 /* REFERENCED */
3017 xfs_inode_t *iq;
3018 int clcount; /* count of inodes clustered */
3019 int bufwasdelwri;
3020 struct hlist_node *entry;
3021 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
3022 int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); 3162 int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
3163 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
3023 3164
3024 XFS_STATS_INC(xs_iflush_count); 3165 XFS_STATS_INC(xs_iflush_count);
3025 3166
@@ -3138,9 +3279,8 @@ xfs_iflush(
3138 * First flush out the inode that xfs_iflush was called with. 3279 * First flush out the inode that xfs_iflush was called with.
3139 */ 3280 */
3140 error = xfs_iflush_int(ip, bp); 3281 error = xfs_iflush_int(ip, bp);
3141 if (error) { 3282 if (error)
3142 goto corrupt_out; 3283 goto corrupt_out;
3143 }
3144 3284
3145 /* 3285 /*
3146 * If the buffer is pinned then push on the log now so we won't 3286 * If the buffer is pinned then push on the log now so we won't
@@ -3153,70 +3293,9 @@ xfs_iflush(
3153 * inode clustering: 3293 * inode clustering:
3154 * see if other inodes can be gathered into this write 3294 * see if other inodes can be gathered into this write
3155 */ 3295 */
3156 spin_lock(&ip->i_cluster->icl_lock); 3296 error = xfs_iflush_cluster(ip, bp);
3157 ip->i_cluster->icl_buf = bp; 3297 if (error)
3158 3298 goto cluster_corrupt_out;
3159 clcount = 0;
3160 hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
3161 if (iq == ip)
3162 continue;
3163
3164 /*
3165 * Do an un-protected check to see if the inode is dirty and
3166 * is a candidate for flushing. These checks will be repeated
3167 * later after the appropriate locks are acquired.
3168 */
3169 iip = iq->i_itemp;
3170 if ((iq->i_update_core == 0) &&
3171 ((iip == NULL) ||
3172 !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
3173 xfs_ipincount(iq) == 0) {
3174 continue;
3175 }
3176
3177 /*
3178 * Try to get locks. If any are unavailable,
3179 * then this inode cannot be flushed and is skipped.
3180 */
3181
3182 /* get inode locks (just i_lock) */
3183 if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) {
3184 /* get inode flush lock */
3185 if (xfs_iflock_nowait(iq)) {
3186 /* check if pinned */
3187 if (xfs_ipincount(iq) == 0) {
3188 /* arriving here means that
3189 * this inode can be flushed.
3190 * first re-check that it's
3191 * dirty
3192 */
3193 iip = iq->i_itemp;
3194 if ((iq->i_update_core != 0)||
3195 ((iip != NULL) &&
3196 (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
3197 clcount++;
3198 error = xfs_iflush_int(iq, bp);
3199 if (error) {
3200 xfs_iunlock(iq,
3201 XFS_ILOCK_SHARED);
3202 goto cluster_corrupt_out;
3203 }
3204 } else {
3205 xfs_ifunlock(iq);
3206 }
3207 } else {
3208 xfs_ifunlock(iq);
3209 }
3210 }
3211 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3212 }
3213 }
3214 spin_unlock(&ip->i_cluster->icl_lock);
3215
3216 if (clcount) {
3217 XFS_STATS_INC(xs_icluster_flushcnt);
3218 XFS_STATS_ADD(xs_icluster_flushinode, clcount);
3219 }
3220 3299
3221 if (flags & INT_DELWRI) { 3300 if (flags & INT_DELWRI) {
3222 xfs_bdwrite(mp, bp); 3301 xfs_bdwrite(mp, bp);
@@ -3230,52 +3309,11 @@ xfs_iflush(
3230corrupt_out: 3309corrupt_out:
3231 xfs_buf_relse(bp); 3310 xfs_buf_relse(bp);
3232 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3311 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3233 xfs_iflush_abort(ip);
3234 /*
3235 * Unlocks the flush lock
3236 */
3237 return XFS_ERROR(EFSCORRUPTED);
3238
3239cluster_corrupt_out: 3312cluster_corrupt_out:
3240 /* Corruption detected in the clustering loop. Invalidate the
3241 * inode buffer and shut down the filesystem.
3242 */
3243 spin_unlock(&ip->i_cluster->icl_lock);
3244
3245 /*
3246 * Clean up the buffer. If it was B_DELWRI, just release it --
3247 * brelse can handle it with no problems. If not, shut down the
3248 * filesystem before releasing the buffer.
3249 */
3250 if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) {
3251 xfs_buf_relse(bp);
3252 }
3253
3254 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3255
3256 if(!bufwasdelwri) {
3257 /*
3258 * Just like incore_relse: if we have b_iodone functions,
3259 * mark the buffer as an error and call them. Otherwise
3260 * mark it as stale and brelse.
3261 */
3262 if (XFS_BUF_IODONE_FUNC(bp)) {
3263 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
3264 XFS_BUF_UNDONE(bp);
3265 XFS_BUF_STALE(bp);
3266 XFS_BUF_SHUT(bp);
3267 XFS_BUF_ERROR(bp,EIO);
3268 xfs_biodone(bp);
3269 } else {
3270 XFS_BUF_STALE(bp);
3271 xfs_buf_relse(bp);
3272 }
3273 }
3274
3275 xfs_iflush_abort(iq);
3276 /* 3313 /*
3277 * Unlocks the flush lock 3314 * Unlocks the flush lock
3278 */ 3315 */
3316 xfs_iflush_abort(ip);
3279 return XFS_ERROR(EFSCORRUPTED); 3317 return XFS_ERROR(EFSCORRUPTED);
3280} 3318}
3281 3319