aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/compat_ioctl.c22
-rw-r--r--fs/ext3/namei.c73
-rw-r--r--fs/ext4/namei.c73
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
-rw-r--r--fs/xfs/xfs_buf_item.h5
-rw-r--r--fs/xfs/xfs_filestream.c3
-rw-r--r--fs/xfs/xfs_log_recover.c51
-rw-r--r--fs/xfs/xfs_mru_cache.c72
-rw-r--r--fs/xfs/xfs_mru_cache.h6
-rw-r--r--fs/xfs/xfs_trans_buf.c1
-rw-r--r--fs/xfs/xfs_vnodeops.c20
13 files changed, 250 insertions, 83 deletions
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a6c9078af124..5a5b7116cefb 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2311,8 +2311,10 @@ static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long ar
2311 struct iwreq __user *iwr_u; 2311 struct iwreq __user *iwr_u;
2312 struct iw_point __user *iwp; 2312 struct iw_point __user *iwp;
2313 struct compat_iw_point __user *iwp_u; 2313 struct compat_iw_point __user *iwp_u;
2314 compat_caddr_t pointer; 2314 compat_caddr_t pointer_u;
2315 void __user *pointer;
2315 __u16 length, flags; 2316 __u16 length, flags;
2317 int ret;
2316 2318
2317 iwr_u = compat_ptr(arg); 2319 iwr_u = compat_ptr(arg);
2318 iwp_u = (struct compat_iw_point __user *) &iwr_u->u.data; 2320 iwp_u = (struct compat_iw_point __user *) &iwr_u->u.data;
@@ -2330,17 +2332,29 @@ static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long ar
2330 sizeof(iwr->ifr_ifrn.ifrn_name))) 2332 sizeof(iwr->ifr_ifrn.ifrn_name)))
2331 return -EFAULT; 2333 return -EFAULT;
2332 2334
2333 if (__get_user(pointer, &iwp_u->pointer) || 2335 if (__get_user(pointer_u, &iwp_u->pointer) ||
2334 __get_user(length, &iwp_u->length) || 2336 __get_user(length, &iwp_u->length) ||
2335 __get_user(flags, &iwp_u->flags)) 2337 __get_user(flags, &iwp_u->flags))
2336 return -EFAULT; 2338 return -EFAULT;
2337 2339
2338 if (__put_user(compat_ptr(pointer), &iwp->pointer) || 2340 if (__put_user(compat_ptr(pointer_u), &iwp->pointer) ||
2339 __put_user(length, &iwp->length) || 2341 __put_user(length, &iwp->length) ||
2340 __put_user(flags, &iwp->flags)) 2342 __put_user(flags, &iwp->flags))
2341 return -EFAULT; 2343 return -EFAULT;
2342 2344
2343 return sys_ioctl(fd, cmd, (unsigned long) iwr); 2345 ret = sys_ioctl(fd, cmd, (unsigned long) iwr);
2346
2347 if (__get_user(pointer, &iwp->pointer) ||
2348 __get_user(length, &iwp->length) ||
2349 __get_user(flags, &iwp->flags))
2350 return -EFAULT;
2351
2352 if (__put_user(ptr_to_compat(pointer), &iwp_u->pointer) ||
2353 __put_user(length, &iwp_u->length) ||
2354 __put_user(flags, &iwp_u->flags))
2355 return -EFAULT;
2356
2357 return ret;
2344} 2358}
2345 2359
2346/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE 2360/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 1586807b8177..c1fa1908dba0 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
140struct dx_map_entry 140struct dx_map_entry
141{ 141{
142 u32 hash; 142 u32 hash;
143 u32 offs; 143 u16 offs;
144 u16 size;
144}; 145};
145 146
146#ifdef CONFIG_EXT3_INDEX 147#ifdef CONFIG_EXT3_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
379 380
380 entries = (struct dx_entry *) (((char *)&root->info) + 381 entries = (struct dx_entry *) (((char *)&root->info) +
381 root->info.info_length); 382 root->info.info_length);
382 assert(dx_get_limit(entries) == dx_root_limit(dir, 383
383 root->info.info_length)); 384 if (dx_get_limit(entries) != dx_root_limit(dir,
385 root->info.info_length)) {
386 ext3_warning(dir->i_sb, __FUNCTION__,
387 "dx entry: limit != root limit");
388 brelse(bh);
389 *err = ERR_BAD_DX_DIR;
390 goto fail;
391 }
392
384 dxtrace (printk("Look up %x", hash)); 393 dxtrace (printk("Look up %x", hash));
385 while (1) 394 while (1)
386 { 395 {
387 count = dx_get_count(entries); 396 count = dx_get_count(entries);
388 assert (count && count <= dx_get_limit(entries)); 397 if (!count || count > dx_get_limit(entries)) {
398 ext3_warning(dir->i_sb, __FUNCTION__,
399 "dx entry: no count or count > limit");
400 brelse(bh);
401 *err = ERR_BAD_DX_DIR;
402 goto fail2;
403 }
404
389 p = entries + 1; 405 p = entries + 1;
390 q = entries + count - 1; 406 q = entries + count - 1;
391 while (p <= q) 407 while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
423 if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) 439 if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
424 goto fail2; 440 goto fail2;
425 at = entries = ((struct dx_node *) bh->b_data)->entries; 441 at = entries = ((struct dx_node *) bh->b_data)->entries;
426 assert (dx_get_limit(entries) == dx_node_limit (dir)); 442 if (dx_get_limit(entries) != dx_node_limit (dir)) {
443 ext3_warning(dir->i_sb, __FUNCTION__,
444 "dx entry: limit != node limit");
445 brelse(bh);
446 *err = ERR_BAD_DX_DIR;
447 goto fail2;
448 }
427 frame++; 449 frame++;
450 frame->bh = NULL;
428 } 451 }
429fail2: 452fail2:
430 while (frame >= frame_in) { 453 while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
432 frame--; 455 frame--;
433 } 456 }
434fail: 457fail:
458 if (*err == ERR_BAD_DX_DIR)
459 ext3_warning(dir->i_sb, __FUNCTION__,
460 "Corrupt dir inode %ld, running e2fsck is "
461 "recommended.", dir->i_ino);
435 return NULL; 462 return NULL;
436} 463}
437 464
@@ -671,6 +698,10 @@ errout:
671 * Directory block splitting, compacting 698 * Directory block splitting, compacting
672 */ 699 */
673 700
701/*
702 * Create map of hash values, offsets, and sizes, stored at end of block.
703 * Returns number of entries mapped.
704 */
674static int dx_make_map (struct ext3_dir_entry_2 *de, int size, 705static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
675 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) 706 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
676{ 707{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
684 ext3fs_dirhash(de->name, de->name_len, &h); 715 ext3fs_dirhash(de->name, de->name_len, &h);
685 map_tail--; 716 map_tail--;
686 map_tail->hash = h.hash; 717 map_tail->hash = h.hash;
687 map_tail->offs = (u32) ((char *) de - base); 718 map_tail->offs = (u16) ((char *) de - base);
719 map_tail->size = le16_to_cpu(de->rec_len);
688 count++; 720 count++;
689 cond_resched(); 721 cond_resched();
690 } 722 }
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
694 return count; 726 return count;
695} 727}
696 728
729/* Sort map by hash value */
697static void dx_sort_map (struct dx_map_entry *map, unsigned count) 730static void dx_sort_map (struct dx_map_entry *map, unsigned count)
698{ 731{
699 struct dx_map_entry *p, *q, *top = map + count - 1; 732 struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1091,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb,
1091} 1124}
1092 1125
1093#ifdef CONFIG_EXT3_INDEX 1126#ifdef CONFIG_EXT3_INDEX
1127/*
1128 * Move count entries from end of map between two memory locations.
1129 * Returns pointer to last entry moved.
1130 */
1094static struct ext3_dir_entry_2 * 1131static struct ext3_dir_entry_2 *
1095dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) 1132dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1096{ 1133{
@@ -1109,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1109 return (struct ext3_dir_entry_2 *) (to - rec_len); 1146 return (struct ext3_dir_entry_2 *) (to - rec_len);
1110} 1147}
1111 1148
1149/*
1150 * Compact each dir entry in the range to the minimal rec_len.
1151 * Returns pointer to last entry in range.
1152 */
1112static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) 1153static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
1113{ 1154{
1114 struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; 1155 struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
@@ -1131,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
1131 return prev; 1172 return prev;
1132} 1173}
1133 1174
1175/*
1176 * Split a full leaf block to make room for a new dir entry.
1177 * Allocate a new block, and move entries so that they are approx. equally full.
1178 * Returns pointer to de in block into which the new entry will be inserted.
1179 */
1134static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, 1180static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1135 struct buffer_head **bh,struct dx_frame *frame, 1181 struct buffer_head **bh,struct dx_frame *frame,
1136 struct dx_hash_info *hinfo, int *error) 1182 struct dx_hash_info *hinfo, int *error)
@@ -1142,7 +1188,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1142 u32 hash2; 1188 u32 hash2;
1143 struct dx_map_entry *map; 1189 struct dx_map_entry *map;
1144 char *data1 = (*bh)->b_data, *data2; 1190 char *data1 = (*bh)->b_data, *data2;
1145 unsigned split; 1191 unsigned split, move, size, i;
1146 struct ext3_dir_entry_2 *de = NULL, *de2; 1192 struct ext3_dir_entry_2 *de = NULL, *de2;
1147 int err = 0; 1193 int err = 0;
1148 1194
@@ -1170,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1170 count = dx_make_map ((struct ext3_dir_entry_2 *) data1, 1216 count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
1171 blocksize, hinfo, map); 1217 blocksize, hinfo, map);
1172 map -= count; 1218 map -= count;
1173 split = count/2; // need to adjust to actual middle
1174 dx_sort_map (map, count); 1219 dx_sort_map (map, count);
1220 /* Split the existing block in the middle, size-wise */
1221 size = 0;
1222 move = 0;
1223 for (i = count-1; i >= 0; i--) {
1224 /* is more than half of this entry in 2nd half of the block? */
1225 if (size + map[i].size/2 > blocksize/2)
1226 break;
1227 size += map[i].size;
1228 move++;
1229 }
1230 /* map index at which we will split */
1231 split = count - move;
1175 hash2 = map[split].hash; 1232 hash2 = map[split].hash;
1176 continued = hash2 == map[split - 1].hash; 1233 continued = hash2 == map[split - 1].hash;
1177 dxtrace(printk("Split block %i at %x, %i/%i\n", 1234 dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index da224974af78..5fdb862e71c4 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
140struct dx_map_entry 140struct dx_map_entry
141{ 141{
142 u32 hash; 142 u32 hash;
143 u32 offs; 143 u16 offs;
144 u16 size;
144}; 145};
145 146
146#ifdef CONFIG_EXT4_INDEX 147#ifdef CONFIG_EXT4_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
379 380
380 entries = (struct dx_entry *) (((char *)&root->info) + 381 entries = (struct dx_entry *) (((char *)&root->info) +
381 root->info.info_length); 382 root->info.info_length);
382 assert(dx_get_limit(entries) == dx_root_limit(dir, 383
383 root->info.info_length)); 384 if (dx_get_limit(entries) != dx_root_limit(dir,
385 root->info.info_length)) {
386 ext4_warning(dir->i_sb, __FUNCTION__,
387 "dx entry: limit != root limit");
388 brelse(bh);
389 *err = ERR_BAD_DX_DIR;
390 goto fail;
391 }
392
384 dxtrace (printk("Look up %x", hash)); 393 dxtrace (printk("Look up %x", hash));
385 while (1) 394 while (1)
386 { 395 {
387 count = dx_get_count(entries); 396 count = dx_get_count(entries);
388 assert (count && count <= dx_get_limit(entries)); 397 if (!count || count > dx_get_limit(entries)) {
398 ext4_warning(dir->i_sb, __FUNCTION__,
399 "dx entry: no count or count > limit");
400 brelse(bh);
401 *err = ERR_BAD_DX_DIR;
402 goto fail2;
403 }
404
389 p = entries + 1; 405 p = entries + 1;
390 q = entries + count - 1; 406 q = entries + count - 1;
391 while (p <= q) 407 while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
423 if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) 439 if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
424 goto fail2; 440 goto fail2;
425 at = entries = ((struct dx_node *) bh->b_data)->entries; 441 at = entries = ((struct dx_node *) bh->b_data)->entries;
426 assert (dx_get_limit(entries) == dx_node_limit (dir)); 442 if (dx_get_limit(entries) != dx_node_limit (dir)) {
443 ext4_warning(dir->i_sb, __FUNCTION__,
444 "dx entry: limit != node limit");
445 brelse(bh);
446 *err = ERR_BAD_DX_DIR;
447 goto fail2;
448 }
427 frame++; 449 frame++;
450 frame->bh = NULL;
428 } 451 }
429fail2: 452fail2:
430 while (frame >= frame_in) { 453 while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
432 frame--; 455 frame--;
433 } 456 }
434fail: 457fail:
458 if (*err == ERR_BAD_DX_DIR)
459 ext4_warning(dir->i_sb, __FUNCTION__,
460 "Corrupt dir inode %ld, running e2fsck is "
461 "recommended.", dir->i_ino);
435 return NULL; 462 return NULL;
436} 463}
437 464
@@ -671,6 +698,10 @@ errout:
671 * Directory block splitting, compacting 698 * Directory block splitting, compacting
672 */ 699 */
673 700
701/*
702 * Create map of hash values, offsets, and sizes, stored at end of block.
703 * Returns number of entries mapped.
704 */
674static int dx_make_map (struct ext4_dir_entry_2 *de, int size, 705static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
675 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) 706 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
676{ 707{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
684 ext4fs_dirhash(de->name, de->name_len, &h); 715 ext4fs_dirhash(de->name, de->name_len, &h);
685 map_tail--; 716 map_tail--;
686 map_tail->hash = h.hash; 717 map_tail->hash = h.hash;
687 map_tail->offs = (u32) ((char *) de - base); 718 map_tail->offs = (u16) ((char *) de - base);
719 map_tail->size = le16_to_cpu(de->rec_len);
688 count++; 720 count++;
689 cond_resched(); 721 cond_resched();
690 } 722 }
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
694 return count; 726 return count;
695} 727}
696 728
729/* Sort map by hash value */
697static void dx_sort_map (struct dx_map_entry *map, unsigned count) 730static void dx_sort_map (struct dx_map_entry *map, unsigned count)
698{ 731{
699 struct dx_map_entry *p, *q, *top = map + count - 1; 732 struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1089,6 +1122,10 @@ static inline void ext4_set_de_type(struct super_block *sb,
1089} 1122}
1090 1123
1091#ifdef CONFIG_EXT4_INDEX 1124#ifdef CONFIG_EXT4_INDEX
1125/*
1126 * Move count entries from end of map between two memory locations.
1127 * Returns pointer to last entry moved.
1128 */
1092static struct ext4_dir_entry_2 * 1129static struct ext4_dir_entry_2 *
1093dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) 1130dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1094{ 1131{
@@ -1107,6 +1144,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1107 return (struct ext4_dir_entry_2 *) (to - rec_len); 1144 return (struct ext4_dir_entry_2 *) (to - rec_len);
1108} 1145}
1109 1146
1147/*
1148 * Compact each dir entry in the range to the minimal rec_len.
1149 * Returns pointer to last entry in range.
1150 */
1110static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) 1151static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
1111{ 1152{
1112 struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; 1153 struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
@@ -1129,6 +1170,11 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
1129 return prev; 1170 return prev;
1130} 1171}
1131 1172
1173/*
1174 * Split a full leaf block to make room for a new dir entry.
1175 * Allocate a new block, and move entries so that they are approx. equally full.
1176 * Returns pointer to de in block into which the new entry will be inserted.
1177 */
1132static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, 1178static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1133 struct buffer_head **bh,struct dx_frame *frame, 1179 struct buffer_head **bh,struct dx_frame *frame,
1134 struct dx_hash_info *hinfo, int *error) 1180 struct dx_hash_info *hinfo, int *error)
@@ -1140,7 +1186,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1140 u32 hash2; 1186 u32 hash2;
1141 struct dx_map_entry *map; 1187 struct dx_map_entry *map;
1142 char *data1 = (*bh)->b_data, *data2; 1188 char *data1 = (*bh)->b_data, *data2;
1143 unsigned split; 1189 unsigned split, move, size, i;
1144 struct ext4_dir_entry_2 *de = NULL, *de2; 1190 struct ext4_dir_entry_2 *de = NULL, *de2;
1145 int err = 0; 1191 int err = 0;
1146 1192
@@ -1168,8 +1214,19 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1168 count = dx_make_map ((struct ext4_dir_entry_2 *) data1, 1214 count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
1169 blocksize, hinfo, map); 1215 blocksize, hinfo, map);
1170 map -= count; 1216 map -= count;
1171 split = count/2; // need to adjust to actual middle
1172 dx_sort_map (map, count); 1217 dx_sort_map (map, count);
1218 /* Split the existing block in the middle, size-wise */
1219 size = 0;
1220 move = 0;
1221 for (i = count-1; i >= 0; i--) {
1222 /* is more than half of this entry in 2nd half of the block? */
1223 if (size + map[i].size/2 > blocksize/2)
1224 break;
1225 size += map[i].size;
1226 move++;
1227 }
1228 /* map index at which we will split */
1229 split = count - move;
1173 hash2 = map[split].hash; 1230 hash2 = map[split].hash;
1174 continued = hash2 == map[split - 1].hash; 1231 continued = hash2 == map[split - 1].hash;
1175 dxtrace(printk("Split block %i at %x, %i/%i\n", 1232 dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8ed593766f16..b878528b64c1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -345,8 +345,8 @@ void __exit unregister_nfs_fs(void)
345 unregister_shrinker(&acl_shrinker); 345 unregister_shrinker(&acl_shrinker);
346#ifdef CONFIG_NFS_V4 346#ifdef CONFIG_NFS_V4
347 unregister_filesystem(&nfs4_fs_type); 347 unregister_filesystem(&nfs4_fs_type);
348 nfs_unregister_sysctl();
349#endif 348#endif
349 nfs_unregister_sysctl();
350 unregister_filesystem(&nfs_fs_type); 350 unregister_filesystem(&nfs_fs_type);
351} 351}
352 352
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d9c40fe64195..5f152f60d74d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@ xfs_setfilesize(
181 ip->i_d.di_size = isize; 181 ip->i_d.di_size = isize;
182 ip->i_update_core = 1; 182 ip->i_update_core = 1;
183 ip->i_update_size = 1; 183 ip->i_update_size = 1;
184 mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode));
184 } 185 }
185 186
186 xfs_iunlock(ip, XFS_ILOCK_EXCL); 187 xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4528f9a3f304..491d1f4f202d 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -415,8 +415,10 @@ xfs_fs_write_inode(
415 415
416 if (vp) { 416 if (vp) {
417 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 417 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
418 if (sync) 418 if (sync) {
419 filemap_fdatawait(inode->i_mapping);
419 flags |= FLUSH_SYNC; 420 flags |= FLUSH_SYNC;
421 }
420 error = bhv_vop_iflush(vp, flags); 422 error = bhv_vop_iflush(vp, flags);
421 if (error == EAGAIN) 423 if (error == EAGAIN)
422 error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0; 424 error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0;
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index d7e136143066..fa25b7dcc6c3 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -52,6 +52,11 @@ typedef struct xfs_buf_log_format_t {
52#define XFS_BLI_UDQUOT_BUF 0x4 52#define XFS_BLI_UDQUOT_BUF 0x4
53#define XFS_BLI_PDQUOT_BUF 0x8 53#define XFS_BLI_PDQUOT_BUF 0x8
54#define XFS_BLI_GDQUOT_BUF 0x10 54#define XFS_BLI_GDQUOT_BUF 0x10
55/*
56 * This flag indicates that the buffer contains newly allocated
57 * inodes.
58 */
59#define XFS_BLI_INODE_NEW_BUF 0x20
55 60
56#define XFS_BLI_CHUNK 128 61#define XFS_BLI_CHUNK 128
57#define XFS_BLI_SHIFT 7 62#define XFS_BLI_SHIFT 7
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index ce2278611bb7..16f8e175167d 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -467,8 +467,7 @@ void
467xfs_filestream_flush( 467xfs_filestream_flush(
468 xfs_mount_t *mp) 468 xfs_mount_t *mp)
469{ 469{
470 /* point in time flush, so keep the reaper running */ 470 xfs_mru_cache_flush(mp->m_filestream);
471 xfs_mru_cache_flush(mp->m_filestream, 1);
472} 471}
473 472
474/* 473/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ae6e8e5f3db..dacb19739cc2 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1874,6 +1874,7 @@ xlog_recover_do_inode_buffer(
1874/*ARGSUSED*/ 1874/*ARGSUSED*/
1875STATIC void 1875STATIC void
1876xlog_recover_do_reg_buffer( 1876xlog_recover_do_reg_buffer(
1877 xfs_mount_t *mp,
1877 xlog_recover_item_t *item, 1878 xlog_recover_item_t *item,
1878 xfs_buf_t *bp, 1879 xfs_buf_t *bp,
1879 xfs_buf_log_format_t *buf_f) 1880 xfs_buf_log_format_t *buf_f)
@@ -1884,6 +1885,50 @@ xlog_recover_do_reg_buffer(
1884 unsigned int *data_map = NULL; 1885 unsigned int *data_map = NULL;
1885 unsigned int map_size = 0; 1886 unsigned int map_size = 0;
1886 int error; 1887 int error;
1888 int stale_buf = 1;
1889
1890 /*
1891 * Scan through the on-disk inode buffer and attempt to
1892 * determine if it has been written to since it was logged.
1893 *
1894 * - If any of the magic numbers are incorrect then the buffer is stale
1895 * - If any of the modes are non-zero then the buffer is not stale
1896 * - If all of the modes are zero and at least one of the generation
1897 * counts is non-zero then the buffer is stale
1898 *
1899 * If the end result is a stale buffer then the log buffer is replayed
1900 * otherwise it is skipped.
1901 *
1902 * This heuristic is not perfect. It can be improved by scanning the
1903 * entire inode chunk for evidence that any of the inode clusters have
1904 * been updated. To fix this problem completely we will need a major
1905 * architectural change to the logging system.
1906 */
1907 if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) {
1908 xfs_dinode_t *dip;
1909 int inodes_per_buf;
1910 int mode_count = 0;
1911 int gen_count = 0;
1912
1913 stale_buf = 0;
1914 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
1915 for (i = 0; i < inodes_per_buf; i++) {
1916 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
1917 i * mp->m_sb.sb_inodesize);
1918 if (be16_to_cpu(dip->di_core.di_magic) !=
1919 XFS_DINODE_MAGIC) {
1920 stale_buf = 1;
1921 break;
1922 }
1923 if (be16_to_cpu(dip->di_core.di_mode))
1924 mode_count++;
1925 if (be16_to_cpu(dip->di_core.di_gen))
1926 gen_count++;
1927 }
1928
1929 if (!mode_count && gen_count)
1930 stale_buf = 1;
1931 }
1887 1932
1888 switch (buf_f->blf_type) { 1933 switch (buf_f->blf_type) {
1889 case XFS_LI_BUF: 1934 case XFS_LI_BUF:
@@ -1917,7 +1962,7 @@ xlog_recover_do_reg_buffer(
1917 -1, 0, XFS_QMOPT_DOWARN, 1962 -1, 0, XFS_QMOPT_DOWARN,
1918 "dquot_buf_recover"); 1963 "dquot_buf_recover");
1919 } 1964 }
1920 if (!error) 1965 if (!error && stale_buf)
1921 memcpy(xfs_buf_offset(bp, 1966 memcpy(xfs_buf_offset(bp,
1922 (uint)bit << XFS_BLI_SHIFT), /* dest */ 1967 (uint)bit << XFS_BLI_SHIFT), /* dest */
1923 item->ri_buf[i].i_addr, /* source */ 1968 item->ri_buf[i].i_addr, /* source */
@@ -2089,7 +2134,7 @@ xlog_recover_do_dquot_buffer(
2089 if (log->l_quotaoffs_flag & type) 2134 if (log->l_quotaoffs_flag & type)
2090 return; 2135 return;
2091 2136
2092 xlog_recover_do_reg_buffer(item, bp, buf_f); 2137 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2093} 2138}
2094 2139
2095/* 2140/*
@@ -2190,7 +2235,7 @@ xlog_recover_do_buffer_trans(
2190 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { 2235 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
2191 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2236 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2192 } else { 2237 } else {
2193 xlog_recover_do_reg_buffer(item, bp, buf_f); 2238 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2194 } 2239 }
2195 if (error) 2240 if (error)
2196 return XFS_ERROR(error); 2241 return XFS_ERROR(error);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 7deb9e3cbbd3..e0b358c1c533 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert(
206 */ 206 */
207 if (!_xfs_mru_cache_migrate(mru, now)) { 207 if (!_xfs_mru_cache_migrate(mru, now)) {
208 mru->time_zero = now; 208 mru->time_zero = now;
209 if (!mru->next_reap) 209 if (!mru->queued) {
210 mru->next_reap = mru->grp_count * mru->grp_time; 210 mru->queued = 1;
211 queue_delayed_work(xfs_mru_reap_wq, &mru->work,
212 mru->grp_count * mru->grp_time);
213 }
211 } else { 214 } else {
212 grp = (now - mru->time_zero) / mru->grp_time; 215 grp = (now - mru->time_zero) / mru->grp_time;
213 grp = (mru->lru_grp + grp) % mru->grp_count; 216 grp = (mru->lru_grp + grp) % mru->grp_count;
@@ -271,29 +274,26 @@ _xfs_mru_cache_reap(
271 struct work_struct *work) 274 struct work_struct *work)
272{ 275{
273 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); 276 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work);
274 unsigned long now; 277 unsigned long now, next;
275 278
276 ASSERT(mru && mru->lists); 279 ASSERT(mru && mru->lists);
277 if (!mru || !mru->lists) 280 if (!mru || !mru->lists)
278 return; 281 return;
279 282
280 mutex_spinlock(&mru->lock); 283 mutex_spinlock(&mru->lock);
281 now = jiffies; 284 next = _xfs_mru_cache_migrate(mru, jiffies);
282 if (mru->reap_all || 285 _xfs_mru_cache_clear_reap_list(mru);
283 (mru->next_reap && time_after(now, mru->next_reap))) { 286
284 if (mru->reap_all) 287 mru->queued = next;
285 now += mru->grp_count * mru->grp_time * 2; 288 if ((mru->queued > 0)) {
286 mru->next_reap = _xfs_mru_cache_migrate(mru, now); 289 now = jiffies;
287 _xfs_mru_cache_clear_reap_list(mru); 290 if (next <= now)
291 next = 0;
292 else
293 next -= now;
294 queue_delayed_work(xfs_mru_reap_wq, &mru->work, next);
288 } 295 }
289 296
290 /*
291 * the process that triggered the reap_all is responsible
292 * for restating the periodic reap if it is required.
293 */
294 if (!mru->reap_all)
295 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
296 mru->reap_all = 0;
297 mutex_spinunlock(&mru->lock, 0); 297 mutex_spinunlock(&mru->lock, 0);
298} 298}
299 299
@@ -352,7 +352,7 @@ xfs_mru_cache_create(
352 352
353 /* An extra list is needed to avoid reaping up to a grp_time early. */ 353 /* An extra list is needed to avoid reaping up to a grp_time early. */
354 mru->grp_count = grp_count + 1; 354 mru->grp_count = grp_count + 1;
355 mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); 355 mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
356 356
357 if (!mru->lists) { 357 if (!mru->lists) {
358 err = ENOMEM; 358 err = ENOMEM;
@@ -374,11 +374,6 @@ xfs_mru_cache_create(
374 mru->grp_time = grp_time; 374 mru->grp_time = grp_time;
375 mru->free_func = free_func; 375 mru->free_func = free_func;
376 376
377 /* start up the reaper event */
378 mru->next_reap = 0;
379 mru->reap_all = 0;
380 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
381
382 *mrup = mru; 377 *mrup = mru;
383 378
384exit: 379exit:
@@ -394,35 +389,25 @@ exit:
394 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their 389 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
395 * free functions as they're deleted. When this function returns, the caller is 390 * free functions as they're deleted. When this function returns, the caller is
396 * guaranteed that all the free functions for all the elements have finished 391 * guaranteed that all the free functions for all the elements have finished
397 * executing. 392 * executing and the reaper is not running.
398 *
399 * While we are flushing, we stop the periodic reaper event from triggering.
400 * Normally, we want to restart this periodic event, but if we are shutting
401 * down the cache we do not want it restarted. hence the restart parameter
402 * where 0 = do not restart reaper and 1 = restart reaper.
403 */ 393 */
404void 394void
405xfs_mru_cache_flush( 395xfs_mru_cache_flush(
406 xfs_mru_cache_t *mru, 396 xfs_mru_cache_t *mru)
407 int restart)
408{ 397{
409 if (!mru || !mru->lists) 398 if (!mru || !mru->lists)
410 return; 399 return;
411 400
412 cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
413
414 mutex_spinlock(&mru->lock); 401 mutex_spinlock(&mru->lock);
415 mru->reap_all = 1; 402 if (mru->queued) {
416 mutex_spinunlock(&mru->lock, 0); 403 mutex_spinunlock(&mru->lock, 0);
404 cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
405 mutex_spinlock(&mru->lock);
406 }
417 407
418 queue_work(xfs_mru_reap_wq, &mru->work.work); 408 _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
419 flush_workqueue(xfs_mru_reap_wq); 409 _xfs_mru_cache_clear_reap_list(mru);
420 410
421 mutex_spinlock(&mru->lock);
422 WARN_ON_ONCE(mru->reap_all != 0);
423 mru->reap_all = 0;
424 if (restart)
425 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
426 mutex_spinunlock(&mru->lock, 0); 411 mutex_spinunlock(&mru->lock, 0);
427} 412}
428 413
@@ -433,8 +418,7 @@ xfs_mru_cache_destroy(
433 if (!mru || !mru->lists) 418 if (!mru || !mru->lists)
434 return; 419 return;
435 420
436 /* we don't want the reaper to restart here */ 421 xfs_mru_cache_flush(mru);
437 xfs_mru_cache_flush(mru, 0);
438 422
439 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 423 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
440 kmem_free(mru, sizeof(*mru)); 424 kmem_free(mru, sizeof(*mru));
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 624fd10ee8e5..dd58ea1bbebe 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -32,11 +32,9 @@ typedef struct xfs_mru_cache
32 unsigned int grp_time; /* Time period spanned by grps. */ 32 unsigned int grp_time; /* Time period spanned by grps. */
33 unsigned int lru_grp; /* Group containing time zero. */ 33 unsigned int lru_grp; /* Group containing time zero. */
34 unsigned long time_zero; /* Time first element was added. */ 34 unsigned long time_zero; /* Time first element was added. */
35 unsigned long next_reap; /* Time that the reaper should
36 next do something. */
37 unsigned int reap_all; /* if set, reap all lists */
38 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ 35 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
39 struct delayed_work work; /* Workqueue data for reaping. */ 36 struct delayed_work work; /* Workqueue data for reaping. */
37 unsigned int queued; /* work has been queued */
40} xfs_mru_cache_t; 38} xfs_mru_cache_t;
41 39
42int xfs_mru_cache_init(void); 40int xfs_mru_cache_init(void);
@@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void);
44int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, 42int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
45 unsigned int grp_count, 43 unsigned int grp_count,
46 xfs_mru_cache_free_func_t free_func); 44 xfs_mru_cache_free_func_t free_func);
47void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart); 45void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
48void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); 46void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
49int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, 47int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
50 void *value); 48 void *value);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 60b6b898022b..95fff6872a2f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -966,6 +966,7 @@ xfs_trans_inode_alloc_buf(
966 ASSERT(atomic_read(&bip->bli_refcount) > 0); 966 ASSERT(atomic_read(&bip->bli_refcount) > 0);
967 967
968 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; 968 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
969 bip->bli_format.blf_flags |= XFS_BLI_INODE_NEW_BUF;
969} 970}
970 971
971 972
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1a5ad8cd97b0..603459229904 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1082,6 +1082,9 @@ xfs_fsync(
1082 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1082 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
1083 return XFS_ERROR(EIO); 1083 return XFS_ERROR(EIO);
1084 1084
1085 if (flag & FSYNC_DATA)
1086 filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
1087
1085 /* 1088 /*
1086 * We always need to make sure that the required inode state 1089 * We always need to make sure that the required inode state
1087 * is safe on disk. The vnode might be clean but because 1090 * is safe on disk. The vnode might be clean but because
@@ -3769,12 +3772,16 @@ xfs_inode_flush(
3769 sync_lsn = log->l_last_sync_lsn; 3772 sync_lsn = log->l_last_sync_lsn;
3770 GRANT_UNLOCK(log, s); 3773 GRANT_UNLOCK(log, s);
3771 3774
3772 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) 3775 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
3773 return 0; 3776 if (flags & FLUSH_SYNC)
3777 log_flags |= XFS_LOG_SYNC;
3778 error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
3779 if (error)
3780 return error;
3781 }
3774 3782
3775 if (flags & FLUSH_SYNC) 3783 if (ip->i_update_core == 0)
3776 log_flags |= XFS_LOG_SYNC; 3784 return 0;
3777 return xfs_log_force(mp, iip->ili_last_lsn, log_flags);
3778 } 3785 }
3779 } 3786 }
3780 3787
@@ -3788,9 +3795,6 @@ xfs_inode_flush(
3788 if (flags & FLUSH_INODE) { 3795 if (flags & FLUSH_INODE) {
3789 int flush_flags; 3796 int flush_flags;
3790 3797
3791 if (xfs_ipincount(ip))
3792 return EAGAIN;
3793
3794 if (flags & FLUSH_SYNC) { 3798 if (flags & FLUSH_SYNC) {
3795 xfs_ilock(ip, XFS_ILOCK_SHARED); 3799 xfs_ilock(ip, XFS_ILOCK_SHARED);
3796 xfs_iflock(ip); 3800 xfs_iflock(ip);