12 files changed, 217 insertions, 211 deletions
diff --git a/fs/exec.c b/fs/exec.c
index c21a8cc06277..073b0b8c6d05 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -50,7 +50,6 @@
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
-#include <linux/signalfd.h>
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -784,7 +783,6 @@ static int de_thread(struct task_struct *tsk)
         * and we can just re-use it all.
         */
        if (atomic_read(&oldsighand->count) <= 1) {
-                signalfd_detach(tsk);
                exit_itimers(sig);
                return 0;
        }
@@ -923,7 +921,6 @@ static int de_thread(struct task_struct *tsk)
        sig->flags = 0;
 no_thread_group:
-        signalfd_detach(tsk);
        exit_itimers(sig);
        if (leader)
                release_task(leader);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 1586807b8177..c1fa1908dba0 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
 struct dx_map_entry
 {
        u32 hash;
-        u32 offs;
+        u16 offs;
+        u16 size;
 };
 #ifdef CONFIG_EXT3_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        entries = (struct dx_entry *) (((char *)&root->info) +
                                       root->info.info_length);
-        assert(dx_get_limit(entries) == dx_root_limit(dir,
-                                                      root->info.info_length));
+        if (dx_get_limit(entries) != dx_root_limit(dir,
+                                                   root->info.info_length)) {
+                ext3_warning(dir->i_sb, __FUNCTION__,
+                             "dx entry: limit != root limit");
+                brelse(bh);
+                *err = ERR_BAD_DX_DIR;
+                goto fail;
+        }
        dxtrace (printk("Look up %x", hash));
        while (1)
        {
                count = dx_get_count(entries);
-                assert (count && count <= dx_get_limit(entries));
+                if (!count || count > dx_get_limit(entries)) {
+                        ext3_warning(dir->i_sb, __FUNCTION__,
+                                     "dx entry: no count or count > limit");
+                        brelse(bh);
+                        *err = ERR_BAD_DX_DIR;
+                        goto fail2;
+                }
                p = entries + 1;
                q = entries + count - 1;
                while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
                if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
                        goto fail2;
                at = entries = ((struct dx_node *) bh->b_data)->entries;
-                assert (dx_get_limit(entries) == dx_node_limit (dir));
+                if (dx_get_limit(entries) != dx_node_limit (dir)) {
+                        ext3_warning(dir->i_sb, __FUNCTION__,
+                                     "dx entry: limit != node limit");
+                        brelse(bh);
+                        *err = ERR_BAD_DX_DIR;
+                        goto fail2;
+                }
                frame++;
+                frame->bh = NULL;
        }
 fail2:
        while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
                frame--;
        }
 fail:
+        if (*err == ERR_BAD_DX_DIR)
+                ext3_warning(dir->i_sb, __FUNCTION__,
+                             "Corrupt dir inode %ld, running e2fsck is "
+                             "recommended.", dir->i_ino);
        return NULL;
 }
@@ -671,6 +698,10 @@ errout:
 * Directory block splitting, compacting
 */
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
 static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
                        struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
 {
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
                        ext3fs_dirhash(de->name, de->name_len, &h);
                        map_tail--;
                        map_tail->hash = h.hash;
-                        map_tail->offs = (u32) ((char *) de - base);
+                        map_tail->offs = (u16) ((char *) de - base);
+                        map_tail->size = le16_to_cpu(de->rec_len);
                        count++;
                        cond_resched();
                }
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
        return count;
 }
+/* Sort map by hash value */
 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
 {
        struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1091,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb,
 }
 #ifdef CONFIG_EXT3_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
 static struct ext3_dir_entry_2 *
 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 {
@@ -1109,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
        return (struct ext3_dir_entry_2 *) (to - rec_len);
 }
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
 static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
 {
        struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
@@ -1131,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
        return prev;
 }
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
 static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
                        struct buffer_head **bh,struct dx_frame *frame,
                        struct dx_hash_info *hinfo, int *error)
@@ -1142,7 +1188,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
        u32 hash2;
        struct dx_map_entry *map;
        char *data1 = (*bh)->b_data, *data2;
-        unsigned split;
+        unsigned split, move, size, i;
        struct ext3_dir_entry_2 *de = NULL, *de2;
        int     err = 0;
@@ -1170,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
        count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
                             blocksize, hinfo, map);
        map -= count;
-        split = count/2; // need to adjust to actual middle
        dx_sort_map (map, count);
+        /* Split the existing block in the middle, size-wise */
+        size = 0;
+        move = 0;
+        for (i = count-1; i >= 0; i--) {
+                /* is more than half of this entry in 2nd half of the block? */
+                if (size + map[i].size/2 > blocksize/2)
+                        break;
+                size += map[i].size;
+                move++;
+        }
+        /* map index at which we will split */
+        split = count - move;
        hash2 = map[split].hash;
        continued = hash2 == map[split - 1].hash;
        dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index da224974af78..5fdb862e71c4 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
 struct dx_map_entry
 {
        u32 hash;
-        u32 offs;
+        u16 offs;
+        u16 size;
 };
 #ifdef CONFIG_EXT4_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        entries = (struct dx_entry *) (((char *)&root->info) +
                                       root->info.info_length);
-        assert(dx_get_limit(entries) == dx_root_limit(dir,
-                                                      root->info.info_length));
+        if (dx_get_limit(entries) != dx_root_limit(dir,
+                                                   root->info.info_length)) {
+                ext4_warning(dir->i_sb, __FUNCTION__,
+                             "dx entry: limit != root limit");
+                brelse(bh);
+                *err = ERR_BAD_DX_DIR;
+                goto fail;
+        }
        dxtrace (printk("Look up %x", hash));
        while (1)
        {
                count = dx_get_count(entries);
-                assert (count && count <= dx_get_limit(entries));
+                if (!count || count > dx_get_limit(entries)) {
+                        ext4_warning(dir->i_sb, __FUNCTION__,
+                                     "dx entry: no count or count > limit");
+                        brelse(bh);
+                        *err = ERR_BAD_DX_DIR;
+                        goto fail2;
+                }
                p = entries + 1;
                q = entries + count - 1;
                while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
                if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
                        goto fail2;
                at = entries = ((struct dx_node *) bh->b_data)->entries;
-                assert (dx_get_limit(entries) == dx_node_limit (dir));
+                if (dx_get_limit(entries) != dx_node_limit (dir)) {
+                        ext4_warning(dir->i_sb, __FUNCTION__,
+                                     "dx entry: limit != node limit");
+                        brelse(bh);
+                        *err = ERR_BAD_DX_DIR;
+                        goto fail2;
+                }
                frame++;
+                frame->bh = NULL;
        }
 fail2:
        while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
                frame--;
        }
 fail:
+        if (*err == ERR_BAD_DX_DIR)
+                ext4_warning(dir->i_sb, __FUNCTION__,
+                             "Corrupt dir inode %ld, running e2fsck is "
+                             "recommended.", dir->i_ino);
        return NULL;
 }
@@ -671,6 +698,10 @@ errout:
 * Directory block splitting, compacting
 */
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
 static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
                        struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
 {
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
                        ext4fs_dirhash(de->name, de->name_len, &h);
                        map_tail--;
                        map_tail->hash = h.hash;
-                        map_tail->offs = (u32) ((char *) de - base);
+                        map_tail->offs = (u16) ((char *) de - base);
+                        map_tail->size = le16_to_cpu(de->rec_len);
                        count++;
                        cond_resched();
                }
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
        return count;
 }
+/* Sort map by hash value */
 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
 {
        struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1089,6 +1122,10 @@ static inline void ext4_set_de_type(struct super_block *sb,
 }
 #ifdef CONFIG_EXT4_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
 static struct ext4_dir_entry_2 *
 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 {
@@ -1107,6 +1144,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
        return (struct ext4_dir_entry_2 *) (to - rec_len);
 }
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
 static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
 {
        struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
@@ -1129,6 +1170,11 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
        return prev;
 }
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
 static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
                        struct buffer_head **bh,struct dx_frame *frame,
                        struct dx_hash_info *hinfo, int *error)
@@ -1140,7 +1186,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
        u32 hash2;
        struct dx_map_entry *map;
        char *data1 = (*bh)->b_data, *data2;
-        unsigned split;
+        unsigned split, move, size, i;
        struct ext4_dir_entry_2 *de = NULL, *de2;
        int     err = 0;
@@ -1168,8 +1214,19 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
        count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
                             blocksize, hinfo, map);
        map -= count;
-        split = count/2; // need to adjust to actual middle
        dx_sort_map (map, count);
+        /* Split the existing block in the middle, size-wise */
+        size = 0;
+        move = 0;
+        for (i = count-1; i >= 0; i--) {
+                /* is more than half of this entry in 2nd half of the block? */
+                if (size + map[i].size/2 > blocksize/2)
+                        break;
+                size += map[i].size;
+                move++;
+        }
+        /* map index at which we will split */
+        split = count - move;
        hash2 = map[split].hash;
        continued = hash2 == map[split - 1].hash;
        dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8ed593766f16..b878528b64c1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -345,8 +345,8 @@ void __exit unregister_nfs_fs(void)
        unregister_shrinker(&acl_shrinker);
 #ifdef CONFIG_NFS_V4
        unregister_filesystem(&nfs4_fs_type);
-        nfs_unregister_sysctl();
 #endif
+        nfs_unregister_sysctl();
        unregister_filesystem(&nfs_fs_type);
 }
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 50cd8a209012..f37f25c931f5 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -930,18 +930,11 @@ static void ocfs2_write_failure(struct inode *inode,
                                loff_t user_pos, unsigned user_len)
 {
        int i;
-        unsigned from, to;
+        unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),
+                to = user_pos + user_len;
        struct page *tmppage;
-        ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len);
+        ocfs2_zero_new_buffers(wc->w_target_page, from, to);
-        if (wc->w_large_pages) {
-                from = wc->w_target_from;
-                to = wc->w_target_to;
-        } else {
-                from = 0;
-                to = PAGE_CACHE_SIZE;
-        }
        for(i = 0; i < wc->w_num_pages; i++) {
                tmppage = wc->w_pages[i];
@@ -991,9 +984,6 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
                        map_from = cluster_start;
                        map_to = cluster_end;
                }
-                wc->w_target_from = map_from;
-                wc->w_target_to = map_to;
        } else {
                /*
                 * If we haven't allocated the new page yet, we
@@ -1211,18 +1201,33 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
                                       loff_t pos, unsigned len)
 {
        int ret, i;
+        loff_t cluster_off;
+        unsigned int local_len = len;
        struct ocfs2_write_cluster_desc *desc;
+        struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb);
        for (i = 0; i < wc->w_clen; i++) {
                desc = &wc->w_desc[i];
+                /*
+                 * We have to make sure that the total write passed in
+                 * doesn't extend past a single cluster.
+                 */
+                local_len = len;
+                cluster_off = pos & (osb->s_clustersize - 1);
+                if ((cluster_off + local_len) > osb->s_clustersize)
+                        local_len = osb->s_clustersize - cluster_off;
                ret = ocfs2_write_cluster(mapping, desc->c_phys,
                                          desc->c_unwritten, data_ac, meta_ac,
-                                          wc, desc->c_cpos, pos, len);
+                                          wc, desc->c_cpos, pos, local_len);
                if (ret) {
                        mlog_errno(ret);
                        goto out;
                }
+                len -= local_len;
+                pos += local_len;
        }
        ret = 0;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7e34e66159c6..f3bc3658e7a5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -491,8 +491,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
                goto leave;
        }
-        status = ocfs2_claim_clusters(osb, handle, data_ac, 1,
+        status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
-                                      &bit_off, &num_bits);
+                                        clusters_to_add, &bit_off, &num_bits);
        if (status < 0) {
                if (status != -ENOSPC)
                        mlog_errno(status);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 545f7892cdf3..de984d272576 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -524,13 +524,12 @@ bail:
 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
                                 handle_t *handle,
                                 struct ocfs2_alloc_context *ac,
-                                 u32 min_bits,
+                                 u32 bits_wanted,
                                 u32 *bit_off,
                                 u32 *num_bits)
 {
        int status, start;
        struct inode *local_alloc_inode;
-        u32 bits_wanted;
        void *bitmap;
        struct ocfs2_dinode *alloc;
        struct ocfs2_local_alloc *la;
@@ -538,7 +537,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
        mlog_entry_void();
        BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
-        bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
        local_alloc_inode = ac->ac_inode;
        alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
        la = OCFS2_LOCAL_ALLOC(alloc);
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 385a10152f9c..3f76631e110c 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -48,7 +48,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
                                 handle_t *handle,
                                 struct ocfs2_alloc_context *ac,
-                                 u32 min_bits,
+                                 u32 bits_wanted,
                                 u32 *bit_off,
                                 u32 *num_bits);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index d9c5c9fcb30f..8f09f5235e3a 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1486,21 +1486,21 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
 * contig. allocation, set to '1' to indicate we can deal with extents
 * of any size.
 */
-int ocfs2_claim_clusters(struct ocfs2_super *osb,
+int __ocfs2_claim_clusters(struct ocfs2_super *osb,
-                         handle_t *handle,
+                           handle_t *handle,
-                         struct ocfs2_alloc_context *ac,
+                           struct ocfs2_alloc_context *ac,
-                         u32 min_clusters,
+                           u32 min_clusters,
-                         u32 *cluster_start,
+                           u32 max_clusters,
-                         u32 *num_clusters)
+                           u32 *cluster_start,
+                           u32 *num_clusters)
 {
        int status;
-        unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+        unsigned int bits_wanted = max_clusters;
        u64 bg_blkno = 0;
        u16 bg_bit_off;
        mlog_entry_void();
-        BUG_ON(!ac);
        BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
        BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
@@ -1557,6 +1557,19 @@ bail:
        return status;
 }
+int ocfs2_claim_clusters(struct ocfs2_super *osb,
+                         handle_t *handle,
+                         struct ocfs2_alloc_context *ac,
+                         u32 min_clusters,
+                         u32 *cluster_start,
+                         u32 *num_clusters)
+{
+        unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+        return __ocfs2_claim_clusters(osb, handle, ac, min_clusters,
+                                      bits_wanted, cluster_start, num_clusters);
+}
 static inline int ocfs2_block_group_clear_bits(handle_t *handle,
                                               struct inode *alloc_inode,
                                               struct ocfs2_group_desc *bg,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index f212dc01a84b..cafe93703095 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -85,6 +85,17 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
                         u32 min_clusters,
                         u32 *cluster_start,
                         u32 *num_clusters);
+/*
+ * Use this variant of ocfs2_claim_clusters to specify a maxiumum
+ * number of clusters smaller than the allocation reserved.
+ */
+int __ocfs2_claim_clusters(struct ocfs2_super *osb,
+                           handle_t *handle,
+                           struct ocfs2_alloc_context *ac,
+                           u32 min_clusters,
+                           u32 max_clusters,
+                           u32 *cluster_start,
+                           u32 *num_clusters);
 int ocfs2_free_suballoc_bits(handle_t *handle,
                             struct inode *alloc_inode,
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 66a13ee63d4c..c05358538f2b 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -66,7 +66,7 @@ struct ocfs2_vote_msg
 {
        struct ocfs2_msg_hdr v_hdr;
        __be32 v_reserved1;
-};
+} __attribute__ ((packed));
 /* Responses are given these values to maintain backwards
 * compatibility with older ocfs2 versions */
@@ -78,7 +78,7 @@ struct ocfs2_response_msg
 {
        struct ocfs2_msg_hdr r_hdr;
        __be32 r_response;
-};
+} __attribute__ ((packed));
 struct ocfs2_vote_work {
        struct list_head   w_list;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index a8e293d30034..aefb0be07942 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -11,8 +11,10 @@
 *      Now using anonymous inode source.
 *      Thanks to Oleg Nesterov for useful code review and suggestions.
 *      More comments and suggestions from Arnd Bergmann.
- * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
+ *  Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
 *      Retrieve multiple signals with one read() call
+ *  Sun Jul 15, 2007: Davide Libenzi <davidel@xmailserver.org>
+ *      Attach to the sighand only during read() and poll().
 */
 #include <linux/file.h>
@@ -27,102 +29,12 @@
 #include <linux/signalfd.h>
 struct signalfd_ctx {
-        struct list_head lnk;
-        wait_queue_head_t wqh;
        sigset_t sigmask;
-        struct task_struct *tsk;
 };
-struct signalfd_lockctx {
-        struct task_struct *tsk;
-        unsigned long flags;
-};
-/*
- * Tries to acquire the sighand lock. We do not increment the sighand
- * use count, and we do not even pin the task struct, so we need to
- * do it inside an RCU read lock, and we must be prepared for the
- * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand
- * being detached. We return 0 if the sighand has been detached, or
- * 1 if we were able to pin the sighand lock.
- */
-static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk)
-{
-        struct sighand_struct *sighand = NULL;
-        rcu_read_lock();
-        lk->tsk = rcu_dereference(ctx->tsk);
-        if (likely(lk->tsk != NULL))
-                sighand = lock_task_sighand(lk->tsk, &lk->flags);
-        rcu_read_unlock();
-        if (!sighand)
-                return 0;
-        if (!ctx->tsk) {
-                unlock_task_sighand(lk->tsk, &lk->flags);
-                return 0;
-        }
-        if (lk->tsk->tgid == current->tgid)
-                lk->tsk = current;
-        return 1;
-}
-static void signalfd_unlock(struct signalfd_lockctx *lk)
-{
-        unlock_task_sighand(lk->tsk, &lk->flags);
-}
-/*
- * This must be called with the sighand lock held.
- */
-void signalfd_deliver(struct task_struct *tsk, int sig)
-{
-        struct sighand_struct *sighand = tsk->sighand;
-        struct signalfd_ctx *ctx, *tmp;
-        BUG_ON(!sig);
-        list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) {
-                /*
-                 * We use a negative signal value as a way to broadcast that the
-                 * sighand has been orphaned, so that we can notify all the
-                 * listeners about this. Remember the ctx->sigmask is inverted,
-                 * so if the user is interested in a signal, that corresponding
-                 * bit will be zero.
-                 */
-                if (sig < 0) {
-                        if (ctx->tsk == tsk) {
-                                ctx->tsk = NULL;
-                                list_del_init(&ctx->lnk);
-                                wake_up(&ctx->wqh);
-                        }
-                } else {
-                        if (!sigismember(&ctx->sigmask, sig))
-                                wake_up(&ctx->wqh);
-                }
-        }
-}
-static void signalfd_cleanup(struct signalfd_ctx *ctx)
-{
-        struct signalfd_lockctx lk;
-        /*
-         * This is tricky. If the sighand is gone, we do not need to remove
-         * context from the list, the list itself won't be there anymore.
-         */
-        if (signalfd_lock(ctx, &lk)) {
-                list_del(&ctx->lnk);
-                signalfd_unlock(&lk);
-        }
-        kfree(ctx);
-}
 static int signalfd_release(struct inode *inode, struct file *file)
 {
-        signalfd_cleanup(file->private_data);
+        kfree(file->private_data);
        return 0;
 }
@@ -130,23 +42,15 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait)
 {
        struct signalfd_ctx *ctx = file->private_data;
        unsigned int events = 0;
-        struct signalfd_lockctx lk;
-        poll_wait(file, &ctx->wqh, wait);
+        poll_wait(file, &current->sighand->signalfd_wqh, wait);
-        /*
+        spin_lock_irq(&current->sighand->siglock);
-         * Let the caller get a POLLIN in this case, ala socket recv() when
+        if (next_signal(&current->pending, &ctx->sigmask) ||
-         * the peer disconnects.
+            next_signal(&current->signal->shared_pending,
-         */
+                        &ctx->sigmask))
-        if (signalfd_lock(ctx, &lk)) {
-                if ((lk.tsk == current &&
-                     next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) ||
-                    next_signal(&lk.tsk->signal->shared_pending,
-                                &ctx->sigmask) > 0)
-                        events |= POLLIN;
-                signalfd_unlock(&lk);
-        } else
                events |= POLLIN;
+        spin_unlock_irq(&current->sighand->siglock);
        return events;
 }
@@ -219,59 +123,46 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info,
                                int nonblock)
 {
        ssize_t ret;
-        struct signalfd_lockctx lk;
        DECLARE_WAITQUEUE(wait, current);
-        if (!signalfd_lock(ctx, &lk))
+        spin_lock_irq(&current->sighand->siglock);
-                return 0;
+        ret = dequeue_signal(current, &ctx->sigmask, info);
-        ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
        switch (ret) {
        case 0:
                if (!nonblock)
                        break;
                ret = -EAGAIN;
        default:
-                signalfd_unlock(&lk);
+                spin_unlock_irq(&current->sighand->siglock);
                return ret;
        }
-        add_wait_queue(&ctx->wqh, &wait);
+        add_wait_queue(&current->sighand->signalfd_wqh, &wait);
        for (;;) {
                set_current_state(TASK_INTERRUPTIBLE);
-                ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
+                ret = dequeue_signal(current, &ctx->sigmask, info);
-                signalfd_unlock(&lk);
                if (ret != 0)
                        break;
                if (signal_pending(current)) {
                        ret = -ERESTARTSYS;
                        break;
                }
+                spin_unlock_irq(&current->sighand->siglock);
                schedule();
-                ret = signalfd_lock(ctx, &lk);
+                spin_lock_irq(&current->sighand->siglock);
-                if (unlikely(!ret)) {
-                        /*
-                         * Let the caller read zero byte, ala socket
-                         * recv() when the peer disconnect. This test
-                         * must be done before doing a dequeue_signal(),
-                         * because if the sighand has been orphaned,
-                         * the dequeue_signal() call is going to crash
-                         * because ->sighand will be long gone.
-                         */
-                         break;
-                }
        }
+        spin_unlock_irq(&current->sighand->siglock);
-        remove_wait_queue(&ctx->wqh, &wait);
+        remove_wait_queue(&current->sighand->signalfd_wqh, &wait);
        __set_current_state(TASK_RUNNING);
        return ret;
 }
 /*
- * Returns either the size of a "struct signalfd_siginfo", or zero if the
+ * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative
- * sighand we are attached to, has been orphaned. The "count" parameter
+ * error code. The "count" parameter must be at least the size of a
- * must be at least the size of a "struct signalfd_siginfo".
+ * "struct signalfd_siginfo".
 */
 static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
                             loff_t *ppos)
@@ -287,7 +178,6 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
                return -EINVAL;
        siginfo = (struct signalfd_siginfo __user *) buf;
        do {
                ret = signalfd_dequeue(ctx, &info, nonblock);
                if (unlikely(ret <= 0))
@@ -300,7 +190,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
                nonblock = 1;
        } while (--count);
-        return total ? total : ret;
+        return total ? total: ret;
 }
 static const struct file_operations signalfd_fops = {
@@ -309,20 +199,13 @@ static const struct file_operations signalfd_fops = {
        .read           = signalfd_read,
 };
-/*
- * Create a file descriptor that is associated with our signal
- * state. We can pass it around to others if we want to, but
- * it will always be _our_ signal state.
- */
 asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
 {
        int error;
        sigset_t sigmask;
        struct signalfd_ctx *ctx;
-        struct sighand_struct *sighand;
        struct file *file;
        struct inode *inode;
-        struct signalfd_lockctx lk;
        if (sizemask != sizeof(sigset_t) ||
            copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -335,17 +218,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
                if (!ctx)
                        return -ENOMEM;
-                init_waitqueue_head(&ctx->wqh);
                ctx->sigmask = sigmask;
-                ctx->tsk = current->group_leader;
-                sighand = current->sighand;
-                /*
-                 * Add this fd to the list of signal listeners.
-                 */
-                spin_lock_irq(&sighand->siglock);
-                list_add_tail(&ctx->lnk, &sighand->signalfd_list);
-                spin_unlock_irq(&sighand->siglock);
                /*
                 * When we call this, the initialization must be complete, since
@@ -364,23 +237,18 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
                        fput(file);
                        return -EINVAL;
                }
-                /*
+                spin_lock_irq(&current->sighand->siglock);
-                 * We need to be prepared of the fact that the sighand this fd
+                ctx->sigmask = sigmask;
-                 * is attached to, has been detched. In that case signalfd_lock()
+                spin_unlock_irq(&current->sighand->siglock);
-                 * will return 0, and we'll just skip setting the new mask.
-                 */
+                wake_up(&current->sighand->signalfd_wqh);
-                if (signalfd_lock(ctx, &lk)) {
-                        ctx->sigmask = sigmask;
-                        signalfd_unlock(&lk);
-                }
-                wake_up(&ctx->wqh);
                fput(file);
        }
        return ufd;
 err_fdalloc:
-        signalfd_cleanup(ctx);
+        kfree(ctx);
        return error;
 }