aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/exec.c3
-rw-r--r--fs/ext3/namei.c73
-rw-r--r--fs/ext4/namei.c73
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/ocfs2/aops.c33
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/localalloc.h2
-rw-r--r--fs/ocfs2/suballoc.c29
-rw-r--r--fs/ocfs2/suballoc.h11
-rw-r--r--fs/ocfs2/vote.c4
-rw-r--r--fs/signalfd.c190
12 files changed, 217 insertions, 211 deletions
diff --git a/fs/exec.c b/fs/exec.c
index c21a8cc06277..073b0b8c6d05 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -50,7 +50,6 @@
50#include <linux/tsacct_kern.h> 50#include <linux/tsacct_kern.h>
51#include <linux/cn_proc.h> 51#include <linux/cn_proc.h>
52#include <linux/audit.h> 52#include <linux/audit.h>
53#include <linux/signalfd.h>
54 53
55#include <asm/uaccess.h> 54#include <asm/uaccess.h>
56#include <asm/mmu_context.h> 55#include <asm/mmu_context.h>
@@ -784,7 +783,6 @@ static int de_thread(struct task_struct *tsk)
784 * and we can just re-use it all. 783 * and we can just re-use it all.
785 */ 784 */
786 if (atomic_read(&oldsighand->count) <= 1) { 785 if (atomic_read(&oldsighand->count) <= 1) {
787 signalfd_detach(tsk);
788 exit_itimers(sig); 786 exit_itimers(sig);
789 return 0; 787 return 0;
790 } 788 }
@@ -923,7 +921,6 @@ static int de_thread(struct task_struct *tsk)
923 sig->flags = 0; 921 sig->flags = 0;
924 922
925no_thread_group: 923no_thread_group:
926 signalfd_detach(tsk);
927 exit_itimers(sig); 924 exit_itimers(sig);
928 if (leader) 925 if (leader)
929 release_task(leader); 926 release_task(leader);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 1586807b8177..c1fa1908dba0 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
140struct dx_map_entry 140struct dx_map_entry
141{ 141{
142 u32 hash; 142 u32 hash;
143 u32 offs; 143 u16 offs;
144 u16 size;
144}; 145};
145 146
146#ifdef CONFIG_EXT3_INDEX 147#ifdef CONFIG_EXT3_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
379 380
380 entries = (struct dx_entry *) (((char *)&root->info) + 381 entries = (struct dx_entry *) (((char *)&root->info) +
381 root->info.info_length); 382 root->info.info_length);
382 assert(dx_get_limit(entries) == dx_root_limit(dir, 383
383 root->info.info_length)); 384 if (dx_get_limit(entries) != dx_root_limit(dir,
385 root->info.info_length)) {
386 ext3_warning(dir->i_sb, __FUNCTION__,
387 "dx entry: limit != root limit");
388 brelse(bh);
389 *err = ERR_BAD_DX_DIR;
390 goto fail;
391 }
392
384 dxtrace (printk("Look up %x", hash)); 393 dxtrace (printk("Look up %x", hash));
385 while (1) 394 while (1)
386 { 395 {
387 count = dx_get_count(entries); 396 count = dx_get_count(entries);
388 assert (count && count <= dx_get_limit(entries)); 397 if (!count || count > dx_get_limit(entries)) {
398 ext3_warning(dir->i_sb, __FUNCTION__,
399 "dx entry: no count or count > limit");
400 brelse(bh);
401 *err = ERR_BAD_DX_DIR;
402 goto fail2;
403 }
404
389 p = entries + 1; 405 p = entries + 1;
390 q = entries + count - 1; 406 q = entries + count - 1;
391 while (p <= q) 407 while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
423 if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) 439 if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
424 goto fail2; 440 goto fail2;
425 at = entries = ((struct dx_node *) bh->b_data)->entries; 441 at = entries = ((struct dx_node *) bh->b_data)->entries;
426 assert (dx_get_limit(entries) == dx_node_limit (dir)); 442 if (dx_get_limit(entries) != dx_node_limit (dir)) {
443 ext3_warning(dir->i_sb, __FUNCTION__,
444 "dx entry: limit != node limit");
445 brelse(bh);
446 *err = ERR_BAD_DX_DIR;
447 goto fail2;
448 }
427 frame++; 449 frame++;
450 frame->bh = NULL;
428 } 451 }
429fail2: 452fail2:
430 while (frame >= frame_in) { 453 while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
432 frame--; 455 frame--;
433 } 456 }
434fail: 457fail:
458 if (*err == ERR_BAD_DX_DIR)
459 ext3_warning(dir->i_sb, __FUNCTION__,
460 "Corrupt dir inode %ld, running e2fsck is "
461 "recommended.", dir->i_ino);
435 return NULL; 462 return NULL;
436} 463}
437 464
@@ -671,6 +698,10 @@ errout:
671 * Directory block splitting, compacting 698 * Directory block splitting, compacting
672 */ 699 */
673 700
701/*
702 * Create map of hash values, offsets, and sizes, stored at end of block.
703 * Returns number of entries mapped.
704 */
674static int dx_make_map (struct ext3_dir_entry_2 *de, int size, 705static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
675 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) 706 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
676{ 707{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
684 ext3fs_dirhash(de->name, de->name_len, &h); 715 ext3fs_dirhash(de->name, de->name_len, &h);
685 map_tail--; 716 map_tail--;
686 map_tail->hash = h.hash; 717 map_tail->hash = h.hash;
687 map_tail->offs = (u32) ((char *) de - base); 718 map_tail->offs = (u16) ((char *) de - base);
719 map_tail->size = le16_to_cpu(de->rec_len);
688 count++; 720 count++;
689 cond_resched(); 721 cond_resched();
690 } 722 }
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
694 return count; 726 return count;
695} 727}
696 728
729/* Sort map by hash value */
697static void dx_sort_map (struct dx_map_entry *map, unsigned count) 730static void dx_sort_map (struct dx_map_entry *map, unsigned count)
698{ 731{
699 struct dx_map_entry *p, *q, *top = map + count - 1; 732 struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1091,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb,
1091} 1124}
1092 1125
1093#ifdef CONFIG_EXT3_INDEX 1126#ifdef CONFIG_EXT3_INDEX
1127/*
1128 * Move count entries from end of map between two memory locations.
1129 * Returns pointer to last entry moved.
1130 */
1094static struct ext3_dir_entry_2 * 1131static struct ext3_dir_entry_2 *
1095dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) 1132dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1096{ 1133{
@@ -1109,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1109 return (struct ext3_dir_entry_2 *) (to - rec_len); 1146 return (struct ext3_dir_entry_2 *) (to - rec_len);
1110} 1147}
1111 1148
1149/*
1150 * Compact each dir entry in the range to the minimal rec_len.
1151 * Returns pointer to last entry in range.
1152 */
1112static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) 1153static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
1113{ 1154{
1114 struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; 1155 struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
@@ -1131,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
1131 return prev; 1172 return prev;
1132} 1173}
1133 1174
1175/*
1176 * Split a full leaf block to make room for a new dir entry.
1177 * Allocate a new block, and move entries so that they are approx. equally full.
1178 * Returns pointer to de in block into which the new entry will be inserted.
1179 */
1134static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, 1180static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1135 struct buffer_head **bh,struct dx_frame *frame, 1181 struct buffer_head **bh,struct dx_frame *frame,
1136 struct dx_hash_info *hinfo, int *error) 1182 struct dx_hash_info *hinfo, int *error)
@@ -1142,7 +1188,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1142 u32 hash2; 1188 u32 hash2;
1143 struct dx_map_entry *map; 1189 struct dx_map_entry *map;
1144 char *data1 = (*bh)->b_data, *data2; 1190 char *data1 = (*bh)->b_data, *data2;
1145 unsigned split; 1191 unsigned split, move, size, i;
1146 struct ext3_dir_entry_2 *de = NULL, *de2; 1192 struct ext3_dir_entry_2 *de = NULL, *de2;
1147 int err = 0; 1193 int err = 0;
1148 1194
@@ -1170,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1170 count = dx_make_map ((struct ext3_dir_entry_2 *) data1, 1216 count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
1171 blocksize, hinfo, map); 1217 blocksize, hinfo, map);
1172 map -= count; 1218 map -= count;
1173 split = count/2; // need to adjust to actual middle
1174 dx_sort_map (map, count); 1219 dx_sort_map (map, count);
1220 /* Split the existing block in the middle, size-wise */
1221 size = 0;
1222 move = 0;
1223 for (i = count-1; i >= 0; i--) {
1224 /* is more than half of this entry in 2nd half of the block? */
1225 if (size + map[i].size/2 > blocksize/2)
1226 break;
1227 size += map[i].size;
1228 move++;
1229 }
1230 /* map index at which we will split */
1231 split = count - move;
1175 hash2 = map[split].hash; 1232 hash2 = map[split].hash;
1176 continued = hash2 == map[split - 1].hash; 1233 continued = hash2 == map[split - 1].hash;
1177 dxtrace(printk("Split block %i at %x, %i/%i\n", 1234 dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index da224974af78..5fdb862e71c4 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
140struct dx_map_entry 140struct dx_map_entry
141{ 141{
142 u32 hash; 142 u32 hash;
143 u32 offs; 143 u16 offs;
144 u16 size;
144}; 145};
145 146
146#ifdef CONFIG_EXT4_INDEX 147#ifdef CONFIG_EXT4_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
379 380
380 entries = (struct dx_entry *) (((char *)&root->info) + 381 entries = (struct dx_entry *) (((char *)&root->info) +
381 root->info.info_length); 382 root->info.info_length);
382 assert(dx_get_limit(entries) == dx_root_limit(dir, 383
383 root->info.info_length)); 384 if (dx_get_limit(entries) != dx_root_limit(dir,
385 root->info.info_length)) {
386 ext4_warning(dir->i_sb, __FUNCTION__,
387 "dx entry: limit != root limit");
388 brelse(bh);
389 *err = ERR_BAD_DX_DIR;
390 goto fail;
391 }
392
384 dxtrace (printk("Look up %x", hash)); 393 dxtrace (printk("Look up %x", hash));
385 while (1) 394 while (1)
386 { 395 {
387 count = dx_get_count(entries); 396 count = dx_get_count(entries);
388 assert (count && count <= dx_get_limit(entries)); 397 if (!count || count > dx_get_limit(entries)) {
398 ext4_warning(dir->i_sb, __FUNCTION__,
399 "dx entry: no count or count > limit");
400 brelse(bh);
401 *err = ERR_BAD_DX_DIR;
402 goto fail2;
403 }
404
389 p = entries + 1; 405 p = entries + 1;
390 q = entries + count - 1; 406 q = entries + count - 1;
391 while (p <= q) 407 while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
423 if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) 439 if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
424 goto fail2; 440 goto fail2;
425 at = entries = ((struct dx_node *) bh->b_data)->entries; 441 at = entries = ((struct dx_node *) bh->b_data)->entries;
426 assert (dx_get_limit(entries) == dx_node_limit (dir)); 442 if (dx_get_limit(entries) != dx_node_limit (dir)) {
443 ext4_warning(dir->i_sb, __FUNCTION__,
444 "dx entry: limit != node limit");
445 brelse(bh);
446 *err = ERR_BAD_DX_DIR;
447 goto fail2;
448 }
427 frame++; 449 frame++;
450 frame->bh = NULL;
428 } 451 }
429fail2: 452fail2:
430 while (frame >= frame_in) { 453 while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
432 frame--; 455 frame--;
433 } 456 }
434fail: 457fail:
458 if (*err == ERR_BAD_DX_DIR)
459 ext4_warning(dir->i_sb, __FUNCTION__,
460 "Corrupt dir inode %ld, running e2fsck is "
461 "recommended.", dir->i_ino);
435 return NULL; 462 return NULL;
436} 463}
437 464
@@ -671,6 +698,10 @@ errout:
671 * Directory block splitting, compacting 698 * Directory block splitting, compacting
672 */ 699 */
673 700
701/*
702 * Create map of hash values, offsets, and sizes, stored at end of block.
703 * Returns number of entries mapped.
704 */
674static int dx_make_map (struct ext4_dir_entry_2 *de, int size, 705static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
675 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) 706 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
676{ 707{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
684 ext4fs_dirhash(de->name, de->name_len, &h); 715 ext4fs_dirhash(de->name, de->name_len, &h);
685 map_tail--; 716 map_tail--;
686 map_tail->hash = h.hash; 717 map_tail->hash = h.hash;
687 map_tail->offs = (u32) ((char *) de - base); 718 map_tail->offs = (u16) ((char *) de - base);
719 map_tail->size = le16_to_cpu(de->rec_len);
688 count++; 720 count++;
689 cond_resched(); 721 cond_resched();
690 } 722 }
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
694 return count; 726 return count;
695} 727}
696 728
729/* Sort map by hash value */
697static void dx_sort_map (struct dx_map_entry *map, unsigned count) 730static void dx_sort_map (struct dx_map_entry *map, unsigned count)
698{ 731{
699 struct dx_map_entry *p, *q, *top = map + count - 1; 732 struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1089,6 +1122,10 @@ static inline void ext4_set_de_type(struct super_block *sb,
1089} 1122}
1090 1123
1091#ifdef CONFIG_EXT4_INDEX 1124#ifdef CONFIG_EXT4_INDEX
1125/*
1126 * Move count entries from end of map between two memory locations.
1127 * Returns pointer to last entry moved.
1128 */
1092static struct ext4_dir_entry_2 * 1129static struct ext4_dir_entry_2 *
1093dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) 1130dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1094{ 1131{
@@ -1107,6 +1144,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1107 return (struct ext4_dir_entry_2 *) (to - rec_len); 1144 return (struct ext4_dir_entry_2 *) (to - rec_len);
1108} 1145}
1109 1146
1147/*
1148 * Compact each dir entry in the range to the minimal rec_len.
1149 * Returns pointer to last entry in range.
1150 */
1110static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) 1151static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
1111{ 1152{
1112 struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; 1153 struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
@@ -1129,6 +1170,11 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
1129 return prev; 1170 return prev;
1130} 1171}
1131 1172
1173/*
1174 * Split a full leaf block to make room for a new dir entry.
1175 * Allocate a new block, and move entries so that they are approx. equally full.
1176 * Returns pointer to de in block into which the new entry will be inserted.
1177 */
1132static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, 1178static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1133 struct buffer_head **bh,struct dx_frame *frame, 1179 struct buffer_head **bh,struct dx_frame *frame,
1134 struct dx_hash_info *hinfo, int *error) 1180 struct dx_hash_info *hinfo, int *error)
@@ -1140,7 +1186,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1140 u32 hash2; 1186 u32 hash2;
1141 struct dx_map_entry *map; 1187 struct dx_map_entry *map;
1142 char *data1 = (*bh)->b_data, *data2; 1188 char *data1 = (*bh)->b_data, *data2;
1143 unsigned split; 1189 unsigned split, move, size, i;
1144 struct ext4_dir_entry_2 *de = NULL, *de2; 1190 struct ext4_dir_entry_2 *de = NULL, *de2;
1145 int err = 0; 1191 int err = 0;
1146 1192
@@ -1168,8 +1214,19 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1168 count = dx_make_map ((struct ext4_dir_entry_2 *) data1, 1214 count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
1169 blocksize, hinfo, map); 1215 blocksize, hinfo, map);
1170 map -= count; 1216 map -= count;
1171 split = count/2; // need to adjust to actual middle
1172 dx_sort_map (map, count); 1217 dx_sort_map (map, count);
1218 /* Split the existing block in the middle, size-wise */
1219 size = 0;
1220 move = 0;
1221 for (i = count-1; i >= 0; i--) {
1222 /* is more than half of this entry in 2nd half of the block? */
1223 if (size + map[i].size/2 > blocksize/2)
1224 break;
1225 size += map[i].size;
1226 move++;
1227 }
1228 /* map index at which we will split */
1229 split = count - move;
1173 hash2 = map[split].hash; 1230 hash2 = map[split].hash;
1174 continued = hash2 == map[split - 1].hash; 1231 continued = hash2 == map[split - 1].hash;
1175 dxtrace(printk("Split block %i at %x, %i/%i\n", 1232 dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8ed593766f16..b878528b64c1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -345,8 +345,8 @@ void __exit unregister_nfs_fs(void)
345 unregister_shrinker(&acl_shrinker); 345 unregister_shrinker(&acl_shrinker);
346#ifdef CONFIG_NFS_V4 346#ifdef CONFIG_NFS_V4
347 unregister_filesystem(&nfs4_fs_type); 347 unregister_filesystem(&nfs4_fs_type);
348 nfs_unregister_sysctl();
349#endif 348#endif
349 nfs_unregister_sysctl();
350 unregister_filesystem(&nfs_fs_type); 350 unregister_filesystem(&nfs_fs_type);
351} 351}
352 352
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 50cd8a209012..f37f25c931f5 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -930,18 +930,11 @@ static void ocfs2_write_failure(struct inode *inode,
930 loff_t user_pos, unsigned user_len) 930 loff_t user_pos, unsigned user_len)
931{ 931{
932 int i; 932 int i;
933 unsigned from, to; 933 unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),
934 to = user_pos + user_len;
934 struct page *tmppage; 935 struct page *tmppage;
935 936
936 ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len); 937 ocfs2_zero_new_buffers(wc->w_target_page, from, to);
937
938 if (wc->w_large_pages) {
939 from = wc->w_target_from;
940 to = wc->w_target_to;
941 } else {
942 from = 0;
943 to = PAGE_CACHE_SIZE;
944 }
945 938
946 for(i = 0; i < wc->w_num_pages; i++) { 939 for(i = 0; i < wc->w_num_pages; i++) {
947 tmppage = wc->w_pages[i]; 940 tmppage = wc->w_pages[i];
@@ -991,9 +984,6 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
991 map_from = cluster_start; 984 map_from = cluster_start;
992 map_to = cluster_end; 985 map_to = cluster_end;
993 } 986 }
994
995 wc->w_target_from = map_from;
996 wc->w_target_to = map_to;
997 } else { 987 } else {
998 /* 988 /*
999 * If we haven't allocated the new page yet, we 989 * If we haven't allocated the new page yet, we
@@ -1211,18 +1201,33 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
1211 loff_t pos, unsigned len) 1201 loff_t pos, unsigned len)
1212{ 1202{
1213 int ret, i; 1203 int ret, i;
1204 loff_t cluster_off;
1205 unsigned int local_len = len;
1214 struct ocfs2_write_cluster_desc *desc; 1206 struct ocfs2_write_cluster_desc *desc;
1207 struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb);
1215 1208
1216 for (i = 0; i < wc->w_clen; i++) { 1209 for (i = 0; i < wc->w_clen; i++) {
1217 desc = &wc->w_desc[i]; 1210 desc = &wc->w_desc[i];
1218 1211
1212 /*
1213 * We have to make sure that the total write passed in
1214 * doesn't extend past a single cluster.
1215 */
1216 local_len = len;
1217 cluster_off = pos & (osb->s_clustersize - 1);
1218 if ((cluster_off + local_len) > osb->s_clustersize)
1219 local_len = osb->s_clustersize - cluster_off;
1220
1219 ret = ocfs2_write_cluster(mapping, desc->c_phys, 1221 ret = ocfs2_write_cluster(mapping, desc->c_phys,
1220 desc->c_unwritten, data_ac, meta_ac, 1222 desc->c_unwritten, data_ac, meta_ac,
1221 wc, desc->c_cpos, pos, len); 1223 wc, desc->c_cpos, pos, local_len);
1222 if (ret) { 1224 if (ret) {
1223 mlog_errno(ret); 1225 mlog_errno(ret);
1224 goto out; 1226 goto out;
1225 } 1227 }
1228
1229 len -= local_len;
1230 pos += local_len;
1226 } 1231 }
1227 1232
1228 ret = 0; 1233 ret = 0;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7e34e66159c6..f3bc3658e7a5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -491,8 +491,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
491 goto leave; 491 goto leave;
492 } 492 }
493 493
494 status = ocfs2_claim_clusters(osb, handle, data_ac, 1, 494 status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
495 &bit_off, &num_bits); 495 clusters_to_add, &bit_off, &num_bits);
496 if (status < 0) { 496 if (status < 0) {
497 if (status != -ENOSPC) 497 if (status != -ENOSPC)
498 mlog_errno(status); 498 mlog_errno(status);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 545f7892cdf3..de984d272576 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -524,13 +524,12 @@ bail:
524int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 524int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
525 handle_t *handle, 525 handle_t *handle,
526 struct ocfs2_alloc_context *ac, 526 struct ocfs2_alloc_context *ac,
527 u32 min_bits, 527 u32 bits_wanted,
528 u32 *bit_off, 528 u32 *bit_off,
529 u32 *num_bits) 529 u32 *num_bits)
530{ 530{
531 int status, start; 531 int status, start;
532 struct inode *local_alloc_inode; 532 struct inode *local_alloc_inode;
533 u32 bits_wanted;
534 void *bitmap; 533 void *bitmap;
535 struct ocfs2_dinode *alloc; 534 struct ocfs2_dinode *alloc;
536 struct ocfs2_local_alloc *la; 535 struct ocfs2_local_alloc *la;
@@ -538,7 +537,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
538 mlog_entry_void(); 537 mlog_entry_void();
539 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 538 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
540 539
541 bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
542 local_alloc_inode = ac->ac_inode; 540 local_alloc_inode = ac->ac_inode;
543 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 541 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
544 la = OCFS2_LOCAL_ALLOC(alloc); 542 la = OCFS2_LOCAL_ALLOC(alloc);
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 385a10152f9c..3f76631e110c 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -48,7 +48,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
48int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 48int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
49 handle_t *handle, 49 handle_t *handle,
50 struct ocfs2_alloc_context *ac, 50 struct ocfs2_alloc_context *ac,
51 u32 min_bits, 51 u32 bits_wanted,
52 u32 *bit_off, 52 u32 *bit_off,
53 u32 *num_bits); 53 u32 *num_bits);
54 54
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index d9c5c9fcb30f..8f09f5235e3a 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1486,21 +1486,21 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
1486 * contig. allocation, set to '1' to indicate we can deal with extents 1486 * contig. allocation, set to '1' to indicate we can deal with extents
1487 * of any size. 1487 * of any size.
1488 */ 1488 */
1489int ocfs2_claim_clusters(struct ocfs2_super *osb, 1489int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1490 handle_t *handle, 1490 handle_t *handle,
1491 struct ocfs2_alloc_context *ac, 1491 struct ocfs2_alloc_context *ac,
1492 u32 min_clusters, 1492 u32 min_clusters,
1493 u32 *cluster_start, 1493 u32 max_clusters,
1494 u32 *num_clusters) 1494 u32 *cluster_start,
1495 u32 *num_clusters)
1495{ 1496{
1496 int status; 1497 int status;
1497 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; 1498 unsigned int bits_wanted = max_clusters;
1498 u64 bg_blkno = 0; 1499 u64 bg_blkno = 0;
1499 u16 bg_bit_off; 1500 u16 bg_bit_off;
1500 1501
1501 mlog_entry_void(); 1502 mlog_entry_void();
1502 1503
1503 BUG_ON(!ac);
1504 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 1504 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1505 1505
1506 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL 1506 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
@@ -1557,6 +1557,19 @@ bail:
1557 return status; 1557 return status;
1558} 1558}
1559 1559
1560int ocfs2_claim_clusters(struct ocfs2_super *osb,
1561 handle_t *handle,
1562 struct ocfs2_alloc_context *ac,
1563 u32 min_clusters,
1564 u32 *cluster_start,
1565 u32 *num_clusters)
1566{
1567 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
1568
1569 return __ocfs2_claim_clusters(osb, handle, ac, min_clusters,
1570 bits_wanted, cluster_start, num_clusters);
1571}
1572
1560static inline int ocfs2_block_group_clear_bits(handle_t *handle, 1573static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1561 struct inode *alloc_inode, 1574 struct inode *alloc_inode,
1562 struct ocfs2_group_desc *bg, 1575 struct ocfs2_group_desc *bg,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index f212dc01a84b..cafe93703095 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -85,6 +85,17 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
85 u32 min_clusters, 85 u32 min_clusters,
86 u32 *cluster_start, 86 u32 *cluster_start,
87 u32 *num_clusters); 87 u32 *num_clusters);
88/*
89 * Use this variant of ocfs2_claim_clusters to specify a maxiumum
90 * number of clusters smaller than the allocation reserved.
91 */
92int __ocfs2_claim_clusters(struct ocfs2_super *osb,
93 handle_t *handle,
94 struct ocfs2_alloc_context *ac,
95 u32 min_clusters,
96 u32 max_clusters,
97 u32 *cluster_start,
98 u32 *num_clusters);
88 99
89int ocfs2_free_suballoc_bits(handle_t *handle, 100int ocfs2_free_suballoc_bits(handle_t *handle,
90 struct inode *alloc_inode, 101 struct inode *alloc_inode,
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 66a13ee63d4c..c05358538f2b 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -66,7 +66,7 @@ struct ocfs2_vote_msg
66{ 66{
67 struct ocfs2_msg_hdr v_hdr; 67 struct ocfs2_msg_hdr v_hdr;
68 __be32 v_reserved1; 68 __be32 v_reserved1;
69}; 69} __attribute__ ((packed));
70 70
71/* Responses are given these values to maintain backwards 71/* Responses are given these values to maintain backwards
72 * compatibility with older ocfs2 versions */ 72 * compatibility with older ocfs2 versions */
@@ -78,7 +78,7 @@ struct ocfs2_response_msg
78{ 78{
79 struct ocfs2_msg_hdr r_hdr; 79 struct ocfs2_msg_hdr r_hdr;
80 __be32 r_response; 80 __be32 r_response;
81}; 81} __attribute__ ((packed));
82 82
83struct ocfs2_vote_work { 83struct ocfs2_vote_work {
84 struct list_head w_list; 84 struct list_head w_list;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index a8e293d30034..aefb0be07942 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -11,8 +11,10 @@
11 * Now using anonymous inode source. 11 * Now using anonymous inode source.
12 * Thanks to Oleg Nesterov for useful code review and suggestions. 12 * Thanks to Oleg Nesterov for useful code review and suggestions.
13 * More comments and suggestions from Arnd Bergmann. 13 * More comments and suggestions from Arnd Bergmann.
14 * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> 14 * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
15 * Retrieve multiple signals with one read() call 15 * Retrieve multiple signals with one read() call
16 * Sun Jul 15, 2007: Davide Libenzi <davidel@xmailserver.org>
17 * Attach to the sighand only during read() and poll().
16 */ 18 */
17 19
18#include <linux/file.h> 20#include <linux/file.h>
@@ -27,102 +29,12 @@
27#include <linux/signalfd.h> 29#include <linux/signalfd.h>
28 30
29struct signalfd_ctx { 31struct signalfd_ctx {
30 struct list_head lnk;
31 wait_queue_head_t wqh;
32 sigset_t sigmask; 32 sigset_t sigmask;
33 struct task_struct *tsk;
34}; 33};
35 34
36struct signalfd_lockctx {
37 struct task_struct *tsk;
38 unsigned long flags;
39};
40
41/*
42 * Tries to acquire the sighand lock. We do not increment the sighand
43 * use count, and we do not even pin the task struct, so we need to
44 * do it inside an RCU read lock, and we must be prepared for the
45 * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand
46 * being detached. We return 0 if the sighand has been detached, or
47 * 1 if we were able to pin the sighand lock.
48 */
49static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk)
50{
51 struct sighand_struct *sighand = NULL;
52
53 rcu_read_lock();
54 lk->tsk = rcu_dereference(ctx->tsk);
55 if (likely(lk->tsk != NULL))
56 sighand = lock_task_sighand(lk->tsk, &lk->flags);
57 rcu_read_unlock();
58
59 if (!sighand)
60 return 0;
61
62 if (!ctx->tsk) {
63 unlock_task_sighand(lk->tsk, &lk->flags);
64 return 0;
65 }
66
67 if (lk->tsk->tgid == current->tgid)
68 lk->tsk = current;
69
70 return 1;
71}
72
73static void signalfd_unlock(struct signalfd_lockctx *lk)
74{
75 unlock_task_sighand(lk->tsk, &lk->flags);
76}
77
78/*
79 * This must be called with the sighand lock held.
80 */
81void signalfd_deliver(struct task_struct *tsk, int sig)
82{
83 struct sighand_struct *sighand = tsk->sighand;
84 struct signalfd_ctx *ctx, *tmp;
85
86 BUG_ON(!sig);
87 list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) {
88 /*
89 * We use a negative signal value as a way to broadcast that the
90 * sighand has been orphaned, so that we can notify all the
91 * listeners about this. Remember the ctx->sigmask is inverted,
92 * so if the user is interested in a signal, that corresponding
93 * bit will be zero.
94 */
95 if (sig < 0) {
96 if (ctx->tsk == tsk) {
97 ctx->tsk = NULL;
98 list_del_init(&ctx->lnk);
99 wake_up(&ctx->wqh);
100 }
101 } else {
102 if (!sigismember(&ctx->sigmask, sig))
103 wake_up(&ctx->wqh);
104 }
105 }
106}
107
108static void signalfd_cleanup(struct signalfd_ctx *ctx)
109{
110 struct signalfd_lockctx lk;
111
112 /*
113 * This is tricky. If the sighand is gone, we do not need to remove
114 * context from the list, the list itself won't be there anymore.
115 */
116 if (signalfd_lock(ctx, &lk)) {
117 list_del(&ctx->lnk);
118 signalfd_unlock(&lk);
119 }
120 kfree(ctx);
121}
122
123static int signalfd_release(struct inode *inode, struct file *file) 35static int signalfd_release(struct inode *inode, struct file *file)
124{ 36{
125 signalfd_cleanup(file->private_data); 37 kfree(file->private_data);
126 return 0; 38 return 0;
127} 39}
128 40
@@ -130,23 +42,15 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait)
130{ 42{
131 struct signalfd_ctx *ctx = file->private_data; 43 struct signalfd_ctx *ctx = file->private_data;
132 unsigned int events = 0; 44 unsigned int events = 0;
133 struct signalfd_lockctx lk;
134 45
135 poll_wait(file, &ctx->wqh, wait); 46 poll_wait(file, &current->sighand->signalfd_wqh, wait);
136 47
137 /* 48 spin_lock_irq(&current->sighand->siglock);
138 * Let the caller get a POLLIN in this case, ala socket recv() when 49 if (next_signal(&current->pending, &ctx->sigmask) ||
139 * the peer disconnects. 50 next_signal(&current->signal->shared_pending,
140 */ 51 &ctx->sigmask))
141 if (signalfd_lock(ctx, &lk)) {
142 if ((lk.tsk == current &&
143 next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) ||
144 next_signal(&lk.tsk->signal->shared_pending,
145 &ctx->sigmask) > 0)
146 events |= POLLIN;
147 signalfd_unlock(&lk);
148 } else
149 events |= POLLIN; 52 events |= POLLIN;
53 spin_unlock_irq(&current->sighand->siglock);
150 54
151 return events; 55 return events;
152} 56}
@@ -219,59 +123,46 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info,
219 int nonblock) 123 int nonblock)
220{ 124{
221 ssize_t ret; 125 ssize_t ret;
222 struct signalfd_lockctx lk;
223 DECLARE_WAITQUEUE(wait, current); 126 DECLARE_WAITQUEUE(wait, current);
224 127
225 if (!signalfd_lock(ctx, &lk)) 128 spin_lock_irq(&current->sighand->siglock);
226 return 0; 129 ret = dequeue_signal(current, &ctx->sigmask, info);
227
228 ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
229 switch (ret) { 130 switch (ret) {
230 case 0: 131 case 0:
231 if (!nonblock) 132 if (!nonblock)
232 break; 133 break;
233 ret = -EAGAIN; 134 ret = -EAGAIN;
234 default: 135 default:
235 signalfd_unlock(&lk); 136 spin_unlock_irq(&current->sighand->siglock);
236 return ret; 137 return ret;
237 } 138 }
238 139
239 add_wait_queue(&ctx->wqh, &wait); 140 add_wait_queue(&current->sighand->signalfd_wqh, &wait);
240 for (;;) { 141 for (;;) {
241 set_current_state(TASK_INTERRUPTIBLE); 142 set_current_state(TASK_INTERRUPTIBLE);
242 ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); 143 ret = dequeue_signal(current, &ctx->sigmask, info);
243 signalfd_unlock(&lk);
244 if (ret != 0) 144 if (ret != 0)
245 break; 145 break;
246 if (signal_pending(current)) { 146 if (signal_pending(current)) {
247 ret = -ERESTARTSYS; 147 ret = -ERESTARTSYS;
248 break; 148 break;
249 } 149 }
150 spin_unlock_irq(&current->sighand->siglock);
250 schedule(); 151 schedule();
251 ret = signalfd_lock(ctx, &lk); 152 spin_lock_irq(&current->sighand->siglock);
252 if (unlikely(!ret)) {
253 /*
254 * Let the caller read zero byte, ala socket
255 * recv() when the peer disconnect. This test
256 * must be done before doing a dequeue_signal(),
257 * because if the sighand has been orphaned,
258 * the dequeue_signal() call is going to crash
259 * because ->sighand will be long gone.
260 */
261 break;
262 }
263 } 153 }
154 spin_unlock_irq(&current->sighand->siglock);
264 155
265 remove_wait_queue(&ctx->wqh, &wait); 156 remove_wait_queue(&current->sighand->signalfd_wqh, &wait);
266 __set_current_state(TASK_RUNNING); 157 __set_current_state(TASK_RUNNING);
267 158
268 return ret; 159 return ret;
269} 160}
270 161
271/* 162/*
272 * Returns either the size of a "struct signalfd_siginfo", or zero if the 163 * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative
273 * sighand we are attached to, has been orphaned. The "count" parameter 164 * error code. The "count" parameter must be at least the size of a
274 * must be at least the size of a "struct signalfd_siginfo". 165 * "struct signalfd_siginfo".
275 */ 166 */
276static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, 167static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
277 loff_t *ppos) 168 loff_t *ppos)
@@ -287,7 +178,6 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
287 return -EINVAL; 178 return -EINVAL;
288 179
289 siginfo = (struct signalfd_siginfo __user *) buf; 180 siginfo = (struct signalfd_siginfo __user *) buf;
290
291 do { 181 do {
292 ret = signalfd_dequeue(ctx, &info, nonblock); 182 ret = signalfd_dequeue(ctx, &info, nonblock);
293 if (unlikely(ret <= 0)) 183 if (unlikely(ret <= 0))
@@ -300,7 +190,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
300 nonblock = 1; 190 nonblock = 1;
301 } while (--count); 191 } while (--count);
302 192
303 return total ? total : ret; 193 return total ? total: ret;
304} 194}
305 195
306static const struct file_operations signalfd_fops = { 196static const struct file_operations signalfd_fops = {
@@ -309,20 +199,13 @@ static const struct file_operations signalfd_fops = {
309 .read = signalfd_read, 199 .read = signalfd_read,
310}; 200};
311 201
312/*
313 * Create a file descriptor that is associated with our signal
314 * state. We can pass it around to others if we want to, but
315 * it will always be _our_ signal state.
316 */
317asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) 202asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
318{ 203{
319 int error; 204 int error;
320 sigset_t sigmask; 205 sigset_t sigmask;
321 struct signalfd_ctx *ctx; 206 struct signalfd_ctx *ctx;
322 struct sighand_struct *sighand;
323 struct file *file; 207 struct file *file;
324 struct inode *inode; 208 struct inode *inode;
325 struct signalfd_lockctx lk;
326 209
327 if (sizemask != sizeof(sigset_t) || 210 if (sizemask != sizeof(sigset_t) ||
328 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) 211 copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -335,17 +218,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
335 if (!ctx) 218 if (!ctx)
336 return -ENOMEM; 219 return -ENOMEM;
337 220
338 init_waitqueue_head(&ctx->wqh);
339 ctx->sigmask = sigmask; 221 ctx->sigmask = sigmask;
340 ctx->tsk = current->group_leader;
341
342 sighand = current->sighand;
343 /*
344 * Add this fd to the list of signal listeners.
345 */
346 spin_lock_irq(&sighand->siglock);
347 list_add_tail(&ctx->lnk, &sighand->signalfd_list);
348 spin_unlock_irq(&sighand->siglock);
349 222
350 /* 223 /*
351 * When we call this, the initialization must be complete, since 224 * When we call this, the initialization must be complete, since
@@ -364,23 +237,18 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
364 fput(file); 237 fput(file);
365 return -EINVAL; 238 return -EINVAL;
366 } 239 }
367 /* 240 spin_lock_irq(&current->sighand->siglock);
368 * We need to be prepared of the fact that the sighand this fd 241 ctx->sigmask = sigmask;
369 * is attached to, has been detched. In that case signalfd_lock() 242 spin_unlock_irq(&current->sighand->siglock);
370 * will return 0, and we'll just skip setting the new mask. 243
371 */ 244 wake_up(&current->sighand->signalfd_wqh);
372 if (signalfd_lock(ctx, &lk)) {
373 ctx->sigmask = sigmask;
374 signalfd_unlock(&lk);
375 }
376 wake_up(&ctx->wqh);
377 fput(file); 245 fput(file);
378 } 246 }
379 247
380 return ufd; 248 return ufd;
381 249
382err_fdalloc: 250err_fdalloc:
383 signalfd_cleanup(ctx); 251 kfree(ctx);
384 return error; 252 return error;
385} 253}
386 254