diff options
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r-- | fs/xfs/xfs_inode.c | 268 |
1 files changed, 153 insertions, 115 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3c3e9e3c1da8..040c0e41729b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -55,7 +55,6 @@ | |||
55 | 55 | ||
56 | kmem_zone_t *xfs_ifork_zone; | 56 | kmem_zone_t *xfs_ifork_zone; |
57 | kmem_zone_t *xfs_inode_zone; | 57 | kmem_zone_t *xfs_inode_zone; |
58 | kmem_zone_t *xfs_icluster_zone; | ||
59 | 58 | ||
60 | /* | 59 | /* |
61 | * Used in xfs_itruncate(). This is the maximum number of extents | 60 | * Used in xfs_itruncate(). This is the maximum number of extents |
@@ -2994,6 +2993,153 @@ xfs_iflush_fork( | |||
2994 | return 0; | 2993 | return 0; |
2995 | } | 2994 | } |
2996 | 2995 | ||
2996 | STATIC int | ||
2997 | xfs_iflush_cluster( | ||
2998 | xfs_inode_t *ip, | ||
2999 | xfs_buf_t *bp) | ||
3000 | { | ||
3001 | xfs_mount_t *mp = ip->i_mount; | ||
3002 | xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); | ||
3003 | unsigned long first_index, mask; | ||
3004 | int ilist_size; | ||
3005 | xfs_inode_t **ilist; | ||
3006 | xfs_inode_t *iq; | ||
3007 | xfs_inode_log_item_t *iip; | ||
3008 | int nr_found; | ||
3009 | int clcount = 0; | ||
3010 | int bufwasdelwri; | ||
3011 | int i; | ||
3012 | |||
3013 | ASSERT(pag->pagi_inodeok); | ||
3014 | ASSERT(pag->pag_ici_init); | ||
3015 | |||
3016 | ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); | ||
3017 | ilist = kmem_alloc(ilist_size, KM_MAYFAIL); | ||
3018 | if (!ilist) | ||
3019 | return 0; | ||
3020 | |||
3021 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | ||
3022 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | ||
3023 | read_lock(&pag->pag_ici_lock); | ||
3024 | /* really need a gang lookup range call here */ | ||
3025 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | ||
3026 | first_index, | ||
3027 | XFS_INODE_CLUSTER_SIZE(mp)); | ||
3028 | if (nr_found == 0) | ||
3029 | goto out_free; | ||
3030 | |||
3031 | for (i = 0; i < nr_found; i++) { | ||
3032 | iq = ilist[i]; | ||
3033 | if (iq == ip) | ||
3034 | continue; | ||
3035 | /* if the inode lies outside this cluster, we're done. */ | ||
3036 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) | ||
3037 | break; | ||
3038 | /* | ||
3039 | * Do an un-protected check to see if the inode is dirty and | ||
3040 | * is a candidate for flushing. These checks will be repeated | ||
3041 | * later after the appropriate locks are acquired. | ||
3042 | */ | ||
3043 | iip = iq->i_itemp; | ||
3044 | if ((iq->i_update_core == 0) && | ||
3045 | ((iip == NULL) || | ||
3046 | !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
3047 | xfs_ipincount(iq) == 0) { | ||
3048 | continue; | ||
3049 | } | ||
3050 | |||
3051 | /* | ||
3052 | * Try to get locks. If any are unavailable or it is pinned, | ||
3053 | * then this inode cannot be flushed and is skipped. | ||
3054 | */ | ||
3055 | |||
3056 | if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) | ||
3057 | continue; | ||
3058 | if (!xfs_iflock_nowait(iq)) { | ||
3059 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3060 | continue; | ||
3061 | } | ||
3062 | if (xfs_ipincount(iq)) { | ||
3063 | xfs_ifunlock(iq); | ||
3064 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3065 | continue; | ||
3066 | } | ||
3067 | |||
3068 | /* | ||
3069 | * arriving here means that this inode can be flushed. First | ||
3070 | * re-check that it's dirty before flushing. | ||
3071 | */ | ||
3072 | iip = iq->i_itemp; | ||
3073 | if ((iq->i_update_core != 0) || ((iip != NULL) && | ||
3074 | (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
3075 | int error; | ||
3076 | error = xfs_iflush_int(iq, bp); | ||
3077 | if (error) { | ||
3078 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3079 | goto cluster_corrupt_out; | ||
3080 | } | ||
3081 | clcount++; | ||
3082 | } else { | ||
3083 | xfs_ifunlock(iq); | ||
3084 | } | ||
3085 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3086 | } | ||
3087 | |||
3088 | if (clcount) { | ||
3089 | XFS_STATS_INC(xs_icluster_flushcnt); | ||
3090 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); | ||
3091 | } | ||
3092 | |||
3093 | out_free: | ||
3094 | read_unlock(&pag->pag_ici_lock); | ||
3095 | kmem_free(ilist, ilist_size); | ||
3096 | return 0; | ||
3097 | |||
3098 | |||
3099 | cluster_corrupt_out: | ||
3100 | /* | ||
3101 | * Corruption detected in the clustering loop. Invalidate the | ||
3102 | * inode buffer and shut down the filesystem. | ||
3103 | */ | ||
3104 | read_unlock(&pag->pag_ici_lock); | ||
3105 | /* | ||
3106 | * Clean up the buffer. If it was B_DELWRI, just release it -- | ||
3107 | * brelse can handle it with no problems. If not, shut down the | ||
3108 | * filesystem before releasing the buffer. | ||
3109 | */ | ||
3110 | bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); | ||
3111 | if (bufwasdelwri) | ||
3112 | xfs_buf_relse(bp); | ||
3113 | |||
3114 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
3115 | |||
3116 | if (!bufwasdelwri) { | ||
3117 | /* | ||
3118 | * Just like incore_relse: if we have b_iodone functions, | ||
3119 | * mark the buffer as an error and call them. Otherwise | ||
3120 | * mark it as stale and brelse. | ||
3121 | */ | ||
3122 | if (XFS_BUF_IODONE_FUNC(bp)) { | ||
3123 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
3124 | XFS_BUF_UNDONE(bp); | ||
3125 | XFS_BUF_STALE(bp); | ||
3126 | XFS_BUF_SHUT(bp); | ||
3127 | XFS_BUF_ERROR(bp,EIO); | ||
3128 | xfs_biodone(bp); | ||
3129 | } else { | ||
3130 | XFS_BUF_STALE(bp); | ||
3131 | xfs_buf_relse(bp); | ||
3132 | } | ||
3133 | } | ||
3134 | |||
3135 | /* | ||
3136 | * Unlocks the flush lock | ||
3137 | */ | ||
3138 | xfs_iflush_abort(iq); | ||
3139 | kmem_free(ilist, ilist_size); | ||
3140 | return XFS_ERROR(EFSCORRUPTED); | ||
3141 | } | ||
3142 | |||
2997 | /* | 3143 | /* |
2998 | * xfs_iflush() will write a modified inode's changes out to the | 3144 | * xfs_iflush() will write a modified inode's changes out to the |
2999 | * inode's on disk home. The caller must have the inode lock held | 3145 | * inode's on disk home. The caller must have the inode lock held |
@@ -3013,13 +3159,8 @@ xfs_iflush( | |||
3013 | xfs_dinode_t *dip; | 3159 | xfs_dinode_t *dip; |
3014 | xfs_mount_t *mp; | 3160 | xfs_mount_t *mp; |
3015 | int error; | 3161 | int error; |
3016 | /* REFERENCED */ | ||
3017 | xfs_inode_t *iq; | ||
3018 | int clcount; /* count of inodes clustered */ | ||
3019 | int bufwasdelwri; | ||
3020 | struct hlist_node *entry; | ||
3021 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; | ||
3022 | int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); | 3162 | int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); |
3163 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; | ||
3023 | 3164 | ||
3024 | XFS_STATS_INC(xs_iflush_count); | 3165 | XFS_STATS_INC(xs_iflush_count); |
3025 | 3166 | ||
@@ -3138,9 +3279,8 @@ xfs_iflush( | |||
3138 | * First flush out the inode that xfs_iflush was called with. | 3279 | * First flush out the inode that xfs_iflush was called with. |
3139 | */ | 3280 | */ |
3140 | error = xfs_iflush_int(ip, bp); | 3281 | error = xfs_iflush_int(ip, bp); |
3141 | if (error) { | 3282 | if (error) |
3142 | goto corrupt_out; | 3283 | goto corrupt_out; |
3143 | } | ||
3144 | 3284 | ||
3145 | /* | 3285 | /* |
3146 | * If the buffer is pinned then push on the log now so we won't | 3286 | * If the buffer is pinned then push on the log now so we won't |
@@ -3153,70 +3293,9 @@ xfs_iflush( | |||
3153 | * inode clustering: | 3293 | * inode clustering: |
3154 | * see if other inodes can be gathered into this write | 3294 | * see if other inodes can be gathered into this write |
3155 | */ | 3295 | */ |
3156 | spin_lock(&ip->i_cluster->icl_lock); | 3296 | error = xfs_iflush_cluster(ip, bp); |
3157 | ip->i_cluster->icl_buf = bp; | 3297 | if (error) |
3158 | 3298 | goto cluster_corrupt_out; | |
3159 | clcount = 0; | ||
3160 | hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) { | ||
3161 | if (iq == ip) | ||
3162 | continue; | ||
3163 | |||
3164 | /* | ||
3165 | * Do an un-protected check to see if the inode is dirty and | ||
3166 | * is a candidate for flushing. These checks will be repeated | ||
3167 | * later after the appropriate locks are acquired. | ||
3168 | */ | ||
3169 | iip = iq->i_itemp; | ||
3170 | if ((iq->i_update_core == 0) && | ||
3171 | ((iip == NULL) || | ||
3172 | !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
3173 | xfs_ipincount(iq) == 0) { | ||
3174 | continue; | ||
3175 | } | ||
3176 | |||
3177 | /* | ||
3178 | * Try to get locks. If any are unavailable, | ||
3179 | * then this inode cannot be flushed and is skipped. | ||
3180 | */ | ||
3181 | |||
3182 | /* get inode locks (just i_lock) */ | ||
3183 | if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) { | ||
3184 | /* get inode flush lock */ | ||
3185 | if (xfs_iflock_nowait(iq)) { | ||
3186 | /* check if pinned */ | ||
3187 | if (xfs_ipincount(iq) == 0) { | ||
3188 | /* arriving here means that | ||
3189 | * this inode can be flushed. | ||
3190 | * first re-check that it's | ||
3191 | * dirty | ||
3192 | */ | ||
3193 | iip = iq->i_itemp; | ||
3194 | if ((iq->i_update_core != 0)|| | ||
3195 | ((iip != NULL) && | ||
3196 | (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
3197 | clcount++; | ||
3198 | error = xfs_iflush_int(iq, bp); | ||
3199 | if (error) { | ||
3200 | xfs_iunlock(iq, | ||
3201 | XFS_ILOCK_SHARED); | ||
3202 | goto cluster_corrupt_out; | ||
3203 | } | ||
3204 | } else { | ||
3205 | xfs_ifunlock(iq); | ||
3206 | } | ||
3207 | } else { | ||
3208 | xfs_ifunlock(iq); | ||
3209 | } | ||
3210 | } | ||
3211 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3212 | } | ||
3213 | } | ||
3214 | spin_unlock(&ip->i_cluster->icl_lock); | ||
3215 | |||
3216 | if (clcount) { | ||
3217 | XFS_STATS_INC(xs_icluster_flushcnt); | ||
3218 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); | ||
3219 | } | ||
3220 | 3299 | ||
3221 | if (flags & INT_DELWRI) { | 3300 | if (flags & INT_DELWRI) { |
3222 | xfs_bdwrite(mp, bp); | 3301 | xfs_bdwrite(mp, bp); |
@@ -3230,52 +3309,11 @@ xfs_iflush( | |||
3230 | corrupt_out: | 3309 | corrupt_out: |
3231 | xfs_buf_relse(bp); | 3310 | xfs_buf_relse(bp); |
3232 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 3311 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
3233 | xfs_iflush_abort(ip); | ||
3234 | /* | ||
3235 | * Unlocks the flush lock | ||
3236 | */ | ||
3237 | return XFS_ERROR(EFSCORRUPTED); | ||
3238 | |||
3239 | cluster_corrupt_out: | 3312 | cluster_corrupt_out: |
3240 | /* Corruption detected in the clustering loop. Invalidate the | ||
3241 | * inode buffer and shut down the filesystem. | ||
3242 | */ | ||
3243 | spin_unlock(&ip->i_cluster->icl_lock); | ||
3244 | |||
3245 | /* | ||
3246 | * Clean up the buffer. If it was B_DELWRI, just release it -- | ||
3247 | * brelse can handle it with no problems. If not, shut down the | ||
3248 | * filesystem before releasing the buffer. | ||
3249 | */ | ||
3250 | if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) { | ||
3251 | xfs_buf_relse(bp); | ||
3252 | } | ||
3253 | |||
3254 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
3255 | |||
3256 | if(!bufwasdelwri) { | ||
3257 | /* | ||
3258 | * Just like incore_relse: if we have b_iodone functions, | ||
3259 | * mark the buffer as an error and call them. Otherwise | ||
3260 | * mark it as stale and brelse. | ||
3261 | */ | ||
3262 | if (XFS_BUF_IODONE_FUNC(bp)) { | ||
3263 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
3264 | XFS_BUF_UNDONE(bp); | ||
3265 | XFS_BUF_STALE(bp); | ||
3266 | XFS_BUF_SHUT(bp); | ||
3267 | XFS_BUF_ERROR(bp,EIO); | ||
3268 | xfs_biodone(bp); | ||
3269 | } else { | ||
3270 | XFS_BUF_STALE(bp); | ||
3271 | xfs_buf_relse(bp); | ||
3272 | } | ||
3273 | } | ||
3274 | |||
3275 | xfs_iflush_abort(iq); | ||
3276 | /* | 3313 | /* |
3277 | * Unlocks the flush lock | 3314 | * Unlocks the flush lock |
3278 | */ | 3315 | */ |
3316 | xfs_iflush_abort(ip); | ||
3279 | return XFS_ERROR(EFSCORRUPTED); | 3317 | return XFS_ERROR(EFSCORRUPTED); |
3280 | } | 3318 | } |
3281 | 3319 | ||