10 files changed, 103 insertions, 99 deletions
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 23419dc3027b..a7cbfbd340c7 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -386,16 +386,16 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
 #define GDLM_ATTR(_name,_mode,_show,_store) \
 static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
-GDLM_ATTR(proto_name,     0444, proto_name_show,        NULL);
+GDLM_ATTR(proto_name,           0444, proto_name_show,          NULL);
-GDLM_ATTR(block,          0644, block_show,             block_store);
+GDLM_ATTR(block,                0644, block_show,               block_store);
-GDLM_ATTR(withdraw,       0644, withdraw_show,          withdraw_store);
+GDLM_ATTR(withdraw,             0644, withdraw_show,            withdraw_store);
-GDLM_ATTR(id,             0444, lkid_show,              NULL);
+GDLM_ATTR(id,                   0444, lkid_show,                NULL);
-GDLM_ATTR(jid,            0444, jid_show,               NULL);
+GDLM_ATTR(jid,                  0444, jid_show,                 NULL);
-GDLM_ATTR(first,          0444, lkfirst_show,           NULL);
+GDLM_ATTR(first,                0444, lkfirst_show,             NULL);
-GDLM_ATTR(first_done,     0444, first_done_show,        NULL);
+GDLM_ATTR(first_done,           0444, first_done_show,          NULL);
-GDLM_ATTR(recover,        0200, NULL,                   recover_store);
+GDLM_ATTR(recover,              0600, NULL,                     recover_store);
-GDLM_ATTR(recover_done,   0444, recover_done_show,      NULL);
+GDLM_ATTR(recover_done,         0444, recover_done_show,        NULL);
-GDLM_ATTR(recover_status, 0444, recover_status_show,    NULL);
+GDLM_ATTR(recover_status,       0444, recover_status_show,      NULL);
 static struct attribute *lock_module_attrs[] = {
        &gdlm_attr_proto_name.attr,
diff --git a/fs/libfs.c b/fs/libfs.c
index ddfa89948c3f..dcec3d3ea64f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -217,7 +217,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
                return PTR_ERR(s);
        s->s_flags = MS_NOUSER;
-        s->s_maxbytes = ~0ULL;
+        s->s_maxbytes = MAX_LFS_FILESIZE;
        s->s_blocksize = PAGE_SIZE;
        s->s_blocksize_bits = PAGE_SHIFT;
        s->s_magic = magic;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 47cd258fd24d..5dcbafe72d71 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -62,13 +62,14 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
        event_priv->wd = wd;
        ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
-        /* EEXIST is not an error */
+        if (ret) {
-        if (ret == -EEXIST)
-                ret = 0;
-        /* did event_priv get attached? */
-        if (list_empty(&fsn_event_priv->event_list))
                inotify_free_event_priv(fsn_event_priv);
+                /* EEXIST says we tail matched, EOVERFLOW isn't something
+                 * to report up the stack. */
+                if ((ret == -EEXIST) ||
+                    (ret == -EOVERFLOW))
+                        ret = 0;
+        }
        /*
         * If we hold the entry until after the event is on the queue
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index f30d9bbc2e1b..dc32ed8323ba 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -386,6 +386,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
        struct fsnotify_event *ignored_event;
        struct inotify_event_private_data *event_priv;
        struct fsnotify_event_private_data *fsn_event_priv;
+        int ret;
        ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
                                              FSNOTIFY_EVENT_NONE, NULL, 0,
@@ -404,10 +405,8 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
        fsn_event_priv->group = group;
        event_priv->wd = ientry->wd;
-        fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
+        ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
+        if (ret)
-        /* did the private data get added? */
-        if (list_empty(&fsn_event_priv->event_list))
                inotify_free_event_priv(fsn_event_priv);
 skip_send_ignore:
@@ -568,7 +567,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
        spin_lock_init(&group->inotify_data.idr_lock);
        idr_init(&group->inotify_data.idr);
-        group->inotify_data.last_wd = 0;
+        group->inotify_data.last_wd = 1;
        group->inotify_data.user = user;
        group->inotify_data.fa = NULL;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 521368574e97..3816d5750dd5 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -153,6 +153,10 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
                                return true;
                        break;
                case (FSNOTIFY_EVENT_NONE):
+                        if (old->mask & FS_Q_OVERFLOW)
+                                return true;
+                        else if (old->mask & FS_IN_IGNORED)
+                                return false;
                        return false;
                };
        }
@@ -171,9 +175,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
        struct list_head *list = &group->notification_list;
        struct fsnotify_event_holder *last_holder;
        struct fsnotify_event *last_event;
+        int ret = 0;
-        /* easy to tell if priv was attached to the event */
-        INIT_LIST_HEAD(&priv->event_list);
        /*
         * There is one fsnotify_event_holder embedded inside each fsnotify_event.
@@ -194,6 +196,7 @@ alloc_holder:
        if (group->q_len >= group->max_events) {
                event = &q_overflow_event;
+                ret = -EOVERFLOW;
                /* sorry, no private data on the overflow event */
                priv = NULL;
        }
@@ -235,7 +238,7 @@ alloc_holder:
        mutex_unlock(&group->notification_mutex);
        wake_up(&group->notification_waitq);
-        return 0;
+        return ret;
 }
 /*
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 175db258942f..6f742f6658a9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
        if (!task)
                return -ESRCH;
-        task_lock(task);
+        oom_adjust = task->oomkilladj;
-        if (task->mm)
-                oom_adjust = task->mm->oom_adj;
-        else
-                oom_adjust = OOM_DISABLE;
-        task_unlock(task);
        put_task_struct(task);
        len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
        task = get_proc_task(file->f_path.dentry->d_inode);
        if (!task)
                return -ESRCH;
-        task_lock(task);
+        if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
-        if (!task->mm) {
-                task_unlock(task);
-                put_task_struct(task);
-                return -EINVAL;
-        }
-        if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) {
-                task_unlock(task);
                put_task_struct(task);
                return -EACCES;
        }
-        task->mm->oom_adj = oom_adjust;
+        task->oomkilladj = oom_adjust;
-        task_unlock(task);
        put_task_struct(task);
        if (end - buffer == 0)
                return -EIO;
diff --git a/fs/select.c b/fs/select.c
index d870237e42c7..8084834e123e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -110,6 +110,7 @@ void poll_initwait(struct poll_wqueues *pwq)
 {
        init_poll_funcptr(&pwq->pt, __pollwait);
        pwq->polling_task = current;
+        pwq->triggered = 0;
        pwq->error = 0;
        pwq->table = NULL;
        pwq->inline_index = 0;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index b619d6b8ca43..98ef624d9baf 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -708,6 +708,16 @@ xfs_reclaim_inode(
        return 0;
 }
+void
+__xfs_inode_set_reclaim_tag(
+        struct xfs_perag        *pag,
+        struct xfs_inode        *ip)
+{
+        radix_tree_tag_set(&pag->pag_ici_root,
+                           XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
+                           XFS_ICI_RECLAIM_TAG);
+}
 /*
 * We set the inode flag atomically with the radix tree tag.
 * Once we get tag lookups on the radix tree, this inode flag
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag(
        read_lock(&pag->pag_ici_lock);
        spin_lock(&ip->i_flags_lock);
-        radix_tree_tag_set(&pag->pag_ici_root,
+        __xfs_inode_set_reclaim_tag(pag, ip);
-                        XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
        __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
        spin_unlock(&ip->i_flags_lock);
        read_unlock(&pag->pag_ici_lock);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 2a10301c99c7..59120602588a 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
 int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
 void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
+void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
 void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
 void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
                                struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 34ec86923f7e..ecbf8b4d2e2e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -191,80 +191,82 @@ xfs_iget_cache_hit(
        int                     flags,
        int                     lock_flags) __releases(pag->pag_ici_lock)
 {
+        struct inode            *inode = VFS_I(ip);
        struct xfs_mount        *mp = ip->i_mount;
-        int                     error = EAGAIN;
+        int                     error;
+        spin_lock(&ip->i_flags_lock);
        /*
-         * If INEW is set this inode is being set up
+         * If we are racing with another cache hit that is currently
-         * If IRECLAIM is set this inode is being torn down
+         * instantiating this inode or currently recycling it out of
-         * Pause and try again.
+         * reclaimabe state, wait for the initialisation to complete
+         * before continuing.
+         *
+         * XXX(hch): eventually we should do something equivalent to
+         *           wait_on_inode to wait for these flags to be cleared
+         *           instead of polling for it.
         */
-        if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
+        if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
                XFS_STATS_INC(xs_ig_frecycle);
+                error = EAGAIN;
                goto out_error;
        }
-        /* If IRECLAIMABLE is set, we've torn down the vfs inode part */
+        /*
-        if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
+         * If lookup is racing with unlink return an error immediately.
+         */
-                /*
+        if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
-                 * If lookup is racing with unlink, then we should return an
+                error = ENOENT;
-                 * error immediately so we don't remove it from the reclaim
+                goto out_error;
-                 * list and potentially leak the inode.
+        }
-                 */
-                if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
-                        error = ENOENT;
-                        goto out_error;
-                }
+        /*
+         * If IRECLAIMABLE is set, we've torn down the VFS inode already.
+         * Need to carefully get it back into useable state.
+         */
+        if (ip->i_flags & XFS_IRECLAIMABLE) {
                xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
                /*
-                 * We need to re-initialise the VFS inode as it has been
+                 * We need to set XFS_INEW atomically with clearing the
-                 * 'freed' by the VFS. Do this here so we can deal with
+                 * reclaimable tag so that we do have an indicator of the
-                 * errors cleanly, then tag it so it can be set up correctly
+                 * inode still being initialized.
-                 * later.
                 */
-                if (inode_init_always(mp->m_super, VFS_I(ip))) {
+                ip->i_flags |= XFS_INEW;
-                        error = ENOMEM;
+                ip->i_flags &= ~XFS_IRECLAIMABLE;
-                        goto out_error;
+                __xfs_inode_clear_reclaim_tag(mp, pag, ip);
-                }
-                /*
+                spin_unlock(&ip->i_flags_lock);
-                 * We must set the XFS_INEW flag before clearing the
+                read_unlock(&pag->pag_ici_lock);
-                 * XFS_IRECLAIMABLE flag so that if a racing lookup does
-                 * not find the XFS_IRECLAIMABLE above but has the igrab()
-                 * below succeed we can safely check XFS_INEW to detect
-                 * that this inode is still being initialised.
-                 */
-                xfs_iflags_set(ip, XFS_INEW);
-                xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
-                /* clear the radix tree reclaim flag as well. */
+                error = -inode_init_always(mp->m_super, inode);
-                __xfs_inode_clear_reclaim_tag(mp, pag, ip);
+                if (error) {
-        } else if (!igrab(VFS_I(ip))) {
+                        /*
+                         * Re-initializing the inode failed, and we are in deep
+                         * trouble.  Try to re-add it to the reclaim list.
+                         */
+                        read_lock(&pag->pag_ici_lock);
+                        spin_lock(&ip->i_flags_lock);
+                        ip->i_flags &= ~XFS_INEW;
+                        ip->i_flags |= XFS_IRECLAIMABLE;
+                        __xfs_inode_set_reclaim_tag(pag, ip);
+                        goto out_error;
+                }
+                inode->i_state = I_LOCK|I_NEW;
+        } else {
                /* If the VFS inode is being torn down, pause and try again. */
-                XFS_STATS_INC(xs_ig_frecycle);
+                if (!igrab(inode)) {
-                goto out_error;
+                        error = EAGAIN;
-        } else if (xfs_iflags_test(ip, XFS_INEW)) {
+                        goto out_error;
-                /*
+                }
-                 * We are racing with another cache hit that is
-                 * currently recycling this inode out of the XFS_IRECLAIMABLE
-                 * state. Wait for the initialisation to complete before
-                 * continuing.
-                 */
-                wait_on_inode(VFS_I(ip));
-        }
-        if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
+                /* We've got a live one. */
-                error = ENOENT;
+                spin_unlock(&ip->i_flags_lock);
-                iput(VFS_I(ip));
+                read_unlock(&pag->pag_ici_lock);
-                goto out_error;
        }
-        /* We've got a live one. */
-        read_unlock(&pag->pag_ici_lock);
        if (lock_flags != 0)
                xfs_ilock(ip, lock_flags);
@@ -274,6 +276,7 @@ xfs_iget_cache_hit(
        return 0;
 out_error:
+        spin_unlock(&ip->i_flags_lock);
        read_unlock(&pag->pag_ici_lock);
        return error;
 }

diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 23419dc3027b..a7cbfbd340c7 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c
@@ -386,16 +386,16 @@ static ssize_t jid_show(struct gfs2_sbd sdp, char buf)
386	#define GDLM_ATTR(_name,_mode,_show,_store) \	386	#define GDLM_ATTR(_name,_mode,_show,_store) \
387	static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)	387	static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
388		388
389	GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);	389	GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
390	GDLM_ATTR(block, 0644, block_show, block_store);	390	GDLM_ATTR(block, 0644, block_show, block_store);
391	GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);	391	GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
392	GDLM_ATTR(id, 0444, lkid_show, NULL);	392	GDLM_ATTR(id, 0444, lkid_show, NULL);
393	GDLM_ATTR(jid, 0444, jid_show, NULL);	393	GDLM_ATTR(jid, 0444, jid_show, NULL);
394	GDLM_ATTR(first, 0444, lkfirst_show, NULL);	394	GDLM_ATTR(first, 0444, lkfirst_show, NULL);
395	GDLM_ATTR(first_done, 0444, first_done_show, NULL);	395	GDLM_ATTR(first_done, 0444, first_done_show, NULL);
396	GDLM_ATTR(recover, 0200, NULL, recover_store);	396	GDLM_ATTR(recover, 0600, NULL, recover_store);
397	GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);	397	GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
398	GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);	398	GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
399		399
400	static struct attribute *lock_module_attrs[] = {	400	static struct attribute *lock_module_attrs[] = {
401	&gdlm_attr_proto_name.attr,	401	&gdlm_attr_proto_name.attr,


diff --git a/fs/libfs.c b/fs/libfs.c index ddfa89948c3f..dcec3d3ea64f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c
@@ -217,7 +217,7 @@ int get_sb_pseudo(struct file_system_type fs_type, char name,
217	return PTR_ERR(s);	217	return PTR_ERR(s);
218		218
219	s->s_flags = MS_NOUSER;	219	s->s_flags = MS_NOUSER;
220	s->s_maxbytes = ~0ULL;	220	s->s_maxbytes = MAX_LFS_FILESIZE;
221	s->s_blocksize = PAGE_SIZE;	221	s->s_blocksize = PAGE_SIZE;
222	s->s_blocksize_bits = PAGE_SHIFT;	222	s->s_blocksize_bits = PAGE_SHIFT;
223	s->s_magic = magic;	223	s->s_magic = magic;


diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 47cd258fd24d..5dcbafe72d71 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -62,13 +62,14 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
62	event_priv->wd = wd;	62	event_priv->wd = wd;
63		63
64	ret = fsnotify_add_notify_event(group, event, fsn_event_priv);	64	ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
65	/* EEXIST is not an error */	65	if (ret) {
66	if (ret == -EEXIST)
67	ret = 0;
68
69	/* did event_priv get attached? */
70	if (list_empty(&fsn_event_priv->event_list))
71	inotify_free_event_priv(fsn_event_priv);	66	inotify_free_event_priv(fsn_event_priv);
		67	/* EEXIST says we tail matched, EOVERFLOW isn't something
		68	* to report up the stack. */
		69	if ((ret == -EEXIST) \|\|
		70	(ret == -EOVERFLOW))
		71	ret = 0;
		72	}
72		73
73	/*	74	/*
74	* If we hold the entry until after the event is on the queue	75	* If we hold the entry until after the event is on the queue


diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index f30d9bbc2e1b..dc32ed8323ba 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c
@@ -386,6 +386,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
386	struct fsnotify_event *ignored_event;	386	struct fsnotify_event *ignored_event;
387	struct inotify_event_private_data *event_priv;	387	struct inotify_event_private_data *event_priv;
388	struct fsnotify_event_private_data *fsn_event_priv;	388	struct fsnotify_event_private_data *fsn_event_priv;
		389	int ret;
389		390
390	ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,	391	ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
391	FSNOTIFY_EVENT_NONE, NULL, 0,	392	FSNOTIFY_EVENT_NONE, NULL, 0,
@@ -404,10 +405,8 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
404	fsn_event_priv->group = group;	405	fsn_event_priv->group = group;
405	event_priv->wd = ientry->wd;	406	event_priv->wd = ientry->wd;
406		407
407	fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);	408	ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
408		409	if (ret)
409	/* did the private data get added? */
410	if (list_empty(&fsn_event_priv->event_list))
411	inotify_free_event_priv(fsn_event_priv);	410	inotify_free_event_priv(fsn_event_priv);
412		411
413	skip_send_ignore:	412	skip_send_ignore:
@@ -568,7 +567,7 @@ static struct fsnotify_group inotify_new_group(struct user_struct user, unsign
568		567
569	spin_lock_init(&group->inotify_data.idr_lock);	568	spin_lock_init(&group->inotify_data.idr_lock);
570	idr_init(&group->inotify_data.idr);	569	idr_init(&group->inotify_data.idr);
571	group->inotify_data.last_wd = 0;	570	group->inotify_data.last_wd = 1;
572	group->inotify_data.user = user;	571	group->inotify_data.user = user;
573	group->inotify_data.fa = NULL;	572	group->inotify_data.fa = NULL;
574		573


diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 521368574e97..3816d5750dd5 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c
@@ -153,6 +153,10 @@ static bool event_compare(struct fsnotify_event old, struct fsnotify_event new
153	return true;	153	return true;
154	break;	154	break;
155	case (FSNOTIFY_EVENT_NONE):	155	case (FSNOTIFY_EVENT_NONE):
		156	if (old->mask & FS_Q_OVERFLOW)
		157	return true;
		158	else if (old->mask & FS_IN_IGNORED)
		159	return false;
156	return false;	160	return false;
157	};	161	};
158	}	162	}
@@ -171,9 +175,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
171	struct list_head *list = &group->notification_list;	175	struct list_head *list = &group->notification_list;
172	struct fsnotify_event_holder *last_holder;	176	struct fsnotify_event_holder *last_holder;
173	struct fsnotify_event *last_event;	177	struct fsnotify_event *last_event;
174		178	int ret = 0;
175	/* easy to tell if priv was attached to the event */
176	INIT_LIST_HEAD(&priv->event_list);
177		179
178	/*	180	/*
179	* There is one fsnotify_event_holder embedded inside each fsnotify_event.	181	* There is one fsnotify_event_holder embedded inside each fsnotify_event.
@@ -194,6 +196,7 @@ alloc_holder:
194		196
195	if (group->q_len >= group->max_events) {	197	if (group->q_len >= group->max_events) {
196	event = &q_overflow_event;	198	event = &q_overflow_event;
		199	ret = -EOVERFLOW;
197	/* sorry, no private data on the overflow event */	200	/* sorry, no private data on the overflow event */
198	priv = NULL;	201	priv = NULL;
199	}	202	}
@@ -235,7 +238,7 @@ alloc_holder:
235	mutex_unlock(&group->notification_mutex);	238	mutex_unlock(&group->notification_mutex);
236		239
237	wake_up(&group->notification_waitq);	240	wake_up(&group->notification_waitq);
238	return 0;	241	return ret;
239	}	242	}
240		243
241	/*	244	/*


diff --git a/fs/proc/base.c b/fs/proc/base.c index 175db258942f..6f742f6658a9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c
@@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file file, char __user buf,
1003		1003
1004	if (!task)	1004	if (!task)
1005	return -ESRCH;	1005	return -ESRCH;
1006	task_lock(task);	1006	oom_adjust = task->oomkilladj;
1007	if (task->mm)
1008	oom_adjust = task->mm->oom_adj;
1009	else
1010	oom_adjust = OOM_DISABLE;
1011	task_unlock(task);
1012	put_task_struct(task);	1007	put_task_struct(task);
1013		1008
1014	len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);	1009	len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file file, const char __user buf,
1037	task = get_proc_task(file->f_path.dentry->d_inode);	1032	task = get_proc_task(file->f_path.dentry->d_inode);
1038	if (!task)	1033	if (!task)
1039	return -ESRCH;	1034	return -ESRCH;
1040	task_lock(task);	1035	if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
1041	if (!task->mm) {
1042	task_unlock(task);
1043	put_task_struct(task);
1044	return -EINVAL;
1045	}
1046	if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1047	task_unlock(task);
1048	put_task_struct(task);	1036	put_task_struct(task);
1049	return -EACCES;	1037	return -EACCES;
1050	}	1038	}
1051	task->mm->oom_adj = oom_adjust;	1039	task->oomkilladj = oom_adjust;
1052	task_unlock(task);
1053	put_task_struct(task);	1040	put_task_struct(task);
1054	if (end - buffer == 0)	1041	if (end - buffer == 0)
1055	return -EIO;	1042	return -EIO;


diff --git a/fs/select.c b/fs/select.c index d870237e42c7..8084834e123e 100644 --- a/fs/select.c +++ b/fs/select.c
@@ -110,6 +110,7 @@ void poll_initwait(struct poll_wqueues *pwq)
110	{	110	{
111	init_poll_funcptr(&pwq->pt, __pollwait);	111	init_poll_funcptr(&pwq->pt, __pollwait);
112	pwq->polling_task = current;	112	pwq->polling_task = current;
		113	pwq->triggered = 0;
113	pwq->error = 0;	114	pwq->error = 0;
114	pwq->table = NULL;	115	pwq->table = NULL;
115	pwq->inline_index = 0;	116	pwq->inline_index = 0;


diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b619d6b8ca43..98ef624d9baf 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -708,6 +708,16 @@ xfs_reclaim_inode(
708	return 0;	708	return 0;
709	}	709	}
710		710
		711	void
		712	__xfs_inode_set_reclaim_tag(
		713	struct xfs_perag *pag,
		714	struct xfs_inode *ip)
		715	{
		716	radix_tree_tag_set(&pag->pag_ici_root,
		717	XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
		718	XFS_ICI_RECLAIM_TAG);
		719	}
		720
711	/*	721	/*
712	* We set the inode flag atomically with the radix tree tag.	722	* We set the inode flag atomically with the radix tree tag.
713	* Once we get tag lookups on the radix tree, this inode flag	723	* Once we get tag lookups on the radix tree, this inode flag
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag(
722		732
723	read_lock(&pag->pag_ici_lock);	733	read_lock(&pag->pag_ici_lock);
724	spin_lock(&ip->i_flags_lock);	734	spin_lock(&ip->i_flags_lock);
725	radix_tree_tag_set(&pag->pag_ici_root,	735	__xfs_inode_set_reclaim_tag(pag, ip);
726	XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
727	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);	736	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
728	spin_unlock(&ip->i_flags_lock);	737	spin_unlock(&ip->i_flags_lock);
729	read_unlock(&pag->pag_ici_lock);	738	read_unlock(&pag->pag_ici_lock);


diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 2a10301c99c7..59120602588a 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
48	int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);	48	int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49		49
50	void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);	50	void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
		51	void __xfs_inode_set_reclaim_tag(struct xfs_perag pag, struct xfs_inode ip);
51	void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);	52	void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
52	void __xfs_inode_clear_reclaim_tag(struct xfs_mount mp, struct xfs_perag pag,	53	void __xfs_inode_clear_reclaim_tag(struct xfs_mount mp, struct xfs_perag pag,
53	struct xfs_inode *ip);	54	struct xfs_inode *ip);


diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 34ec86923f7e..ecbf8b4d2e2e 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c
@@ -191,80 +191,82 @@ xfs_iget_cache_hit(
191	int flags,	191	int flags,
192	int lock_flags) __releases(pag->pag_ici_lock)	192	int lock_flags) __releases(pag->pag_ici_lock)
193	{	193	{
		194	struct inode *inode = VFS_I(ip);
194	struct xfs_mount *mp = ip->i_mount;	195	struct xfs_mount *mp = ip->i_mount;
195	int error = EAGAIN;	196	int error;
		197
		198	spin_lock(&ip->i_flags_lock);
196		199
197	/*	200	/*
198	* If INEW is set this inode is being set up	201	* If we are racing with another cache hit that is currently
199	* If IRECLAIM is set this inode is being torn down	202	* instantiating this inode or currently recycling it out of
200	* Pause and try again.	203	* reclaimabe state, wait for the initialisation to complete
		204	* before continuing.
		205	*
		206	* XXX(hch): eventually we should do something equivalent to
		207	* wait_on_inode to wait for these flags to be cleared
		208	* instead of polling for it.
201	*/	209	*/
202	if (xfs_iflags_test(ip, (XFS_INEW\|XFS_IRECLAIM))) {	210	if (ip->i_flags & (XFS_INEW\|XFS_IRECLAIM)) {
203	XFS_STATS_INC(xs_ig_frecycle);	211	XFS_STATS_INC(xs_ig_frecycle);
		212	error = EAGAIN;
204	goto out_error;	213	goto out_error;
205	}	214	}
206		215
207	/* If IRECLAIMABLE is set, we've torn down the vfs inode part */	216	/*
208	if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {	217	* If lookup is racing with unlink return an error immediately.
209		218	*/
210	/*	219	if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
211	* If lookup is racing with unlink, then we should return an	220	error = ENOENT;
212	* error immediately so we don't remove it from the reclaim	221	goto out_error;
213	* list and potentially leak the inode.	222	}
214	*/
215	if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
216	error = ENOENT;
217	goto out_error;
218	}
219		223
		224	/*
		225	* If IRECLAIMABLE is set, we've torn down the VFS inode already.
		226	* Need to carefully get it back into useable state.
		227	*/
		228	if (ip->i_flags & XFS_IRECLAIMABLE) {
220	xfs_itrace_exit_tag(ip, "xfs_iget.alloc");	229	xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
221		230
222	/*	231	/*
223	* We need to re-initialise the VFS inode as it has been	232	* We need to set XFS_INEW atomically with clearing the
224	* 'freed' by the VFS. Do this here so we can deal with	233	* reclaimable tag so that we do have an indicator of the
225	* errors cleanly, then tag it so it can be set up correctly	234	* inode still being initialized.
226	* later.
227	*/	235	*/
228	if (inode_init_always(mp->m_super, VFS_I(ip))) {	236	ip->i_flags \|= XFS_INEW;
229	error = ENOMEM;	237	ip->i_flags &= ~XFS_IRECLAIMABLE;
230	goto out_error;	238	__xfs_inode_clear_reclaim_tag(mp, pag, ip);
231	}
232		239
233	/*	240	spin_unlock(&ip->i_flags_lock);
234	* We must set the XFS_INEW flag before clearing the	241	read_unlock(&pag->pag_ici_lock);
235	* XFS_IRECLAIMABLE flag so that if a racing lookup does
236	* not find the XFS_IRECLAIMABLE above but has the igrab()
237	* below succeed we can safely check XFS_INEW to detect
238	* that this inode is still being initialised.
239	*/
240	xfs_iflags_set(ip, XFS_INEW);
241	xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
242		242
243	/* clear the radix tree reclaim flag as well. */	243	error = -inode_init_always(mp->m_super, inode);
244	__xfs_inode_clear_reclaim_tag(mp, pag, ip);	244	if (error) {
245	} else if (!igrab(VFS_I(ip))) {	245	/*
		246	* Re-initializing the inode failed, and we are in deep
		247	* trouble. Try to re-add it to the reclaim list.
		248	*/
		249	read_lock(&pag->pag_ici_lock);
		250	spin_lock(&ip->i_flags_lock);
		251
		252	ip->i_flags &= ~XFS_INEW;
		253	ip->i_flags \|= XFS_IRECLAIMABLE;
		254	__xfs_inode_set_reclaim_tag(pag, ip);
		255	goto out_error;
		256	}
		257	inode->i_state = I_LOCK\|I_NEW;
		258	} else {
246	/* If the VFS inode is being torn down, pause and try again. */	259	/* If the VFS inode is being torn down, pause and try again. */
247	XFS_STATS_INC(xs_ig_frecycle);	260	if (!igrab(inode)) {
248	goto out_error;	261	error = EAGAIN;
249	} else if (xfs_iflags_test(ip, XFS_INEW)) {	262	goto out_error;
250	/*	263	}
251	* We are racing with another cache hit that is
252	* currently recycling this inode out of the XFS_IRECLAIMABLE
253	* state. Wait for the initialisation to complete before
254	* continuing.
255	*/
256	wait_on_inode(VFS_I(ip));
257	}
258		264
259	if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {	265	/* We've got a live one. */
260	error = ENOENT;	266	spin_unlock(&ip->i_flags_lock);
261	iput(VFS_I(ip));	267	read_unlock(&pag->pag_ici_lock);
262	goto out_error;
263	}	268	}
264		269
265	/* We've got a live one. */
266	read_unlock(&pag->pag_ici_lock);
267
268	if (lock_flags != 0)	270	if (lock_flags != 0)
269	xfs_ilock(ip, lock_flags);	271	xfs_ilock(ip, lock_flags);
270		272
@@ -274,6 +276,7 @@ xfs_iget_cache_hit(
274	return 0;	276	return 0;
275		277
276	out_error:	278	out_error:
		279	spin_unlock(&ip->i_flags_lock);
277	read_unlock(&pag->pag_ici_lock);	280	read_unlock(&pag->pag_ici_lock);
278	return error;	281	return error;
279	}	282	}