25 files changed, 736 insertions, 343 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index c8ccbd09048f..df57b493e1cb 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -55,6 +55,9 @@
 #include <net/netlink.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/selinux.h>
+#include "audit.h"
 /* No auditing will take place until audit_initialized != 0.
 * (Initialization happens after skb_init is called.) */
@@ -227,49 +230,103 @@ void audit_log_lost(const char *message)
        }
 }
-static int audit_set_rate_limit(int limit, uid_t loginuid)
+static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
 {
-        int old          = audit_rate_limit;
+        int old = audit_rate_limit;
-        audit_rate_limit = limit;
-        audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 
+        if (sid) {
+                char *ctx = NULL;
+                u32 len;
+                int rc;
+                if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+                        return rc;
+                else
+                        audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                                "audit_rate_limit=%d old=%d by auid=%u subj=%s",
+                                limit, old, loginuid, ctx);
+                kfree(ctx);
+        } else
+                audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
                        "audit_rate_limit=%d old=%d by auid=%u",
-                        audit_rate_limit, old, loginuid);
+                        limit, old, loginuid);
+        audit_rate_limit = limit;
        return old;
 }
-static int audit_set_backlog_limit(int limit, uid_t loginuid)
+static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
 {
-        int old          = audit_backlog_limit;
+        int old = audit_backlog_limit;
-        audit_backlog_limit = limit;
-        audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+        if (sid) {
+                char *ctx = NULL;
+                u32 len;
+                int rc;
+                if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+                        return rc;
+                else
+                        audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                            "audit_backlog_limit=%d old=%d by auid=%u subj=%s",
+                                limit, old, loginuid, ctx);
+                kfree(ctx);
+        } else
+                audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
                        "audit_backlog_limit=%d old=%d by auid=%u",
-                        audit_backlog_limit, old, loginuid);
+                        limit, old, loginuid);
+        audit_backlog_limit = limit;
        return old;
 }
-static int audit_set_enabled(int state, uid_t loginuid)
+static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
 {
-        int old          = audit_enabled;
+        int old = audit_enabled;
        if (state != 0 && state != 1)
                return -EINVAL;
-        audit_enabled = state;
-        audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+        if (sid) {
+                char *ctx = NULL;
+                u32 len;
+                int rc;
+                if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+                        return rc;
+                else
+                        audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                                "audit_enabled=%d old=%d by auid=%u subj=%s",
+                                state, old, loginuid, ctx);
+                kfree(ctx);
+        } else
+                audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
                        "audit_enabled=%d old=%d by auid=%u",
-                        audit_enabled, old, loginuid);
+                        state, old, loginuid);
+        audit_enabled = state;
        return old;
 }
-static int audit_set_failure(int state, uid_t loginuid)
+static int audit_set_failure(int state, uid_t loginuid, u32 sid)
 {
-        int old          = audit_failure;
+        int old = audit_failure;
        if (state != AUDIT_FAIL_SILENT
            && state != AUDIT_FAIL_PRINTK
            && state != AUDIT_FAIL_PANIC)
                return -EINVAL;
-        audit_failure = state;
-        audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+        if (sid) {
+                char *ctx = NULL;
+                u32 len;
+                int rc;
+                if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+                        return rc;
+                else
+                        audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                                "audit_failure=%d old=%d by auid=%u subj=%s",
+                                state, old, loginuid, ctx);
+                kfree(ctx);
+        } else
+                audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
                        "audit_failure=%d old=%d by auid=%u",
-                        audit_failure, old, loginuid);
+                        state, old, loginuid);
+        audit_failure = state;
        return old;
 }
@@ -387,7 +444,7 @@ static int audit_netlink_ok(kernel_cap_t eff_cap, u16 msg_type)
 static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-        u32                     uid, pid, seq;
+        u32                     uid, pid, seq, sid;
        void                    *data;
        struct audit_status     *status_get, status_set;
        int                     err;
@@ -413,6 +470,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
        pid  = NETLINK_CREDS(skb)->pid;
        uid  = NETLINK_CREDS(skb)->uid;
        loginuid = NETLINK_CB(skb).loginuid;
+        sid  = NETLINK_CB(skb).sid;
        seq  = nlh->nlmsg_seq;
        data = NLMSG_DATA(nlh);
@@ -433,25 +491,43 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                        return -EINVAL;
                status_get   = (struct audit_status *)data;
                if (status_get->mask & AUDIT_STATUS_ENABLED) {
-                        err = audit_set_enabled(status_get->enabled, loginuid);
+                        err = audit_set_enabled(status_get->enabled,
+                                                        loginuid, sid);
                        if (err < 0) return err;
                }
                if (status_get->mask & AUDIT_STATUS_FAILURE) {
-                        err = audit_set_failure(status_get->failure, loginuid);
+                        err = audit_set_failure(status_get->failure,
+                                                         loginuid, sid);
                        if (err < 0) return err;
                }
                if (status_get->mask & AUDIT_STATUS_PID) {
                        int old   = audit_pid;
+                        if (sid) {
+                                char *ctx = NULL;
+                                u32 len;
+                                int rc;
+                                if ((rc = selinux_ctxid_to_string(
+                                                sid, &ctx, &len)))
+                                        return rc;
+                                else
+                                        audit_log(NULL, GFP_KERNEL,
+                                                AUDIT_CONFIG_CHANGE,
+                                                "audit_pid=%d old=%d by auid=%u subj=%s",
+                                                status_get->pid, old,
+                                                loginuid, ctx);
+                                kfree(ctx);
+                        } else
+                                audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                                        "audit_pid=%d old=%d by auid=%u",
+                                          status_get->pid, old, loginuid);
                        audit_pid = status_get->pid;
-                        audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
-                                "audit_pid=%d old=%d by auid=%u",
-                                  audit_pid, old, loginuid);
                }
                if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
-                        audit_set_rate_limit(status_get->rate_limit, loginuid);
+                        audit_set_rate_limit(status_get->rate_limit,
+                                                         loginuid, sid);
                if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
                        audit_set_backlog_limit(status_get->backlog_limit,
-                                                        loginuid);
+                                                        loginuid, sid);
                break;
        case AUDIT_USER:
        case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG:
@@ -465,8 +541,23 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                        ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
                        if (ab) {
                                audit_log_format(ab,
-                                                 "user pid=%d uid=%u auid=%u msg='%.1024s'",
+                                                 "user pid=%d uid=%u auid=%u",
-                                                 pid, uid, loginuid, (char *)data);
+                                                 pid, uid, loginuid);
+                                if (sid) {
+                                        char *ctx = NULL;
+                                        u32 len;
+                                        if (selinux_ctxid_to_string(
+                                                        sid, &ctx, &len)) {
+                                                audit_log_format(ab, 
+                                                        " ssid=%u", sid);
+                                                /* Maybe call audit_panic? */
+                                        } else
+                                                audit_log_format(ab, 
+                                                        " subj=%s", ctx);
+                                        kfree(ctx);
+                                }
+                                audit_log_format(ab, " msg='%.1024s'",
+                                         (char *)data);
                                audit_set_pid(ab, pid);
                                audit_log_end(ab);
                        }
@@ -480,7 +571,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
        case AUDIT_LIST:
                err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
                                           uid, seq, data, nlmsg_len(nlh),
-                                           loginuid);
+                                           loginuid, sid);
                break;
        case AUDIT_ADD_RULE:
        case AUDIT_DEL_RULE:
@@ -490,7 +581,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
        case AUDIT_LIST_RULES:
                err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
                                           uid, seq, data, nlmsg_len(nlh),
-                                           loginuid);
+                                           loginuid, sid);
                break;
        case AUDIT_SIGNAL_INFO:
                sig_data.uid = audit_sig_uid;
@@ -564,6 +655,11 @@ static int __init audit_init(void)
        skb_queue_head_init(&audit_skb_queue);
        audit_initialized = 1;
        audit_enabled = audit_default;
+        /* Register the callback with selinux.  This callback will be invoked
+         * when a new policy is loaded. */
+        selinux_audit_set_callback(&selinux_audit_rule_update);
        audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
        return 0;
 }
diff --git a/kernel/audit.h b/kernel/audit.h
index bc5392076e2b..6f733920fd32 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -54,9 +54,11 @@ enum audit_state {
 /* Rule lists */
 struct audit_field {
-        u32                     type;
+        u32                             type;
-        u32                     val;
+        u32                             val;
-        u32                     op;
+        u32                             op;
+        char                            *se_str;
+        struct selinux_audit_rule       *se_rule;
 };
 struct audit_krule {
@@ -86,3 +88,5 @@ extern void		    audit_send_reply(int pid, int seq, int type,
 extern void                 audit_log_lost(const char *message);
 extern void                 audit_panic(const char *message);
 extern struct mutex audit_netlink_mutex;
+extern int selinux_audit_rule_update(void);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index d3a8539f3a83..7c134906d689 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -23,6 +23,7 @@
 #include <linux/audit.h>
 #include <linux/kthread.h>
 #include <linux/netlink.h>
+#include <linux/selinux.h>
 #include "audit.h"
 /* There are three lists of rules -- one to search at task creation
@@ -42,6 +43,13 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
 static inline void audit_free_rule(struct audit_entry *e)
 {
+        int i;
+        if (e->rule.fields)
+                for (i = 0; i < e->rule.field_count; i++) {
+                        struct audit_field *f = &e->rule.fields[i];
+                        kfree(f->se_str);
+                        selinux_audit_rule_free(f->se_rule);
+                }
        kfree(e->rule.fields);
        kfree(e);
 }
@@ -52,9 +60,29 @@ static inline void audit_free_rule_rcu(struct rcu_head *head)
        audit_free_rule(e);
 }
+/* Initialize an audit filterlist entry. */
+static inline struct audit_entry *audit_init_entry(u32 field_count)
+{
+        struct audit_entry *entry;
+        struct audit_field *fields;
+        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+        if (unlikely(!entry))
+                return NULL;
+        fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL);
+        if (unlikely(!fields)) {
+                kfree(entry);
+                return NULL;
+        }
+        entry->rule.fields = fields;
+        return entry;
+}
 /* Unpack a filter field's string representation from user-space
 * buffer. */
-static __attribute__((unused)) char *audit_unpack_string(void **bufp, size_t *remain, size_t len)
+static char *audit_unpack_string(void **bufp, size_t *remain, size_t len)
 {
        char *str;
@@ -84,7 +112,6 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
 {
        unsigned listnr;
        struct audit_entry *entry;
-        struct audit_field *fields;
        int i, err;
        err = -EINVAL;
@@ -108,23 +135,14 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
                goto exit_err;
        err = -ENOMEM;
-        entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+        entry = audit_init_entry(rule->field_count);
-        if (unlikely(!entry))
+        if (!entry)
-                goto exit_err;
-        fields = kmalloc(sizeof(*fields) * rule->field_count, GFP_KERNEL);
-        if (unlikely(!fields)) {
-                kfree(entry);
                goto exit_err;
-        }
-        memset(&entry->rule, 0, sizeof(struct audit_krule));
-        memset(fields, 0, sizeof(struct audit_field));
        entry->rule.flags = rule->flags & AUDIT_FILTER_PREPEND;
        entry->rule.listnr = listnr;
        entry->rule.action = rule->action;
        entry->rule.field_count = rule->field_count;
-        entry->rule.fields = fields;
        for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
                entry->rule.mask[i] = rule->mask[i];
@@ -150,15 +168,20 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
        for (i = 0; i < rule->field_count; i++) {
                struct audit_field *f = &entry->rule.fields[i];
-                if (rule->fields[i] & AUDIT_UNUSED_BITS) {
-                        err = -EINVAL;
-                        goto exit_free;
-                }
                f->op = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS);
                f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
                f->val = rule->values[i];
+                if (f->type & AUDIT_UNUSED_BITS ||
+                    f->type == AUDIT_SE_USER ||
+                    f->type == AUDIT_SE_ROLE ||
+                    f->type == AUDIT_SE_TYPE ||
+                    f->type == AUDIT_SE_SEN ||
+                    f->type == AUDIT_SE_CLR) {
+                        err = -EINVAL;
+                        goto exit_free;
+                }
                entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1;
                /* Support for legacy operators where
@@ -188,8 +211,9 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
        int err = 0;
        struct audit_entry *entry;
        void *bufp;
-        /* size_t remain = datasz - sizeof(struct audit_rule_data); */
+        size_t remain = datasz - sizeof(struct audit_rule_data);
        int i;
+        char *str;
        entry = audit_to_entry_common((struct audit_rule *)data);
        if (IS_ERR(entry))
@@ -207,10 +231,35 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
                f->op = data->fieldflags[i] & AUDIT_OPERATORS;
                f->type = data->fields[i];
+                f->val = data->values[i];
+                f->se_str = NULL;
+                f->se_rule = NULL;
                switch(f->type) {
-                /* call type-specific conversion routines here */
+                case AUDIT_SE_USER:
-                default:
+                case AUDIT_SE_ROLE:
-                        f->val = data->values[i];
+                case AUDIT_SE_TYPE:
+                case AUDIT_SE_SEN:
+                case AUDIT_SE_CLR:
+                        str = audit_unpack_string(&bufp, &remain, f->val);
+                        if (IS_ERR(str))
+                                goto exit_free;
+                        entry->rule.buflen += f->val;
+                        err = selinux_audit_rule_init(f->type, f->op, str,
+                                                      &f->se_rule);
+                        /* Keep currently invalid fields around in case they
+                         * become valid after a policy reload. */
+                        if (err == -EINVAL) {
+                                printk(KERN_WARNING "audit rule for selinux "
+                                       "\'%s\' is invalid\n",  str);
+                                err = 0;
+                        }
+                        if (err) {
+                                kfree(str);
+                                goto exit_free;
+                        } else
+                                f->se_str = str;
+                        break;
                }
        }
@@ -286,7 +335,14 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
                data->fields[i] = f->type;
                data->fieldflags[i] = f->op;
                switch(f->type) {
-                /* call type-specific conversion routines here */
+                case AUDIT_SE_USER:
+                case AUDIT_SE_ROLE:
+                case AUDIT_SE_TYPE:
+                case AUDIT_SE_SEN:
+                case AUDIT_SE_CLR:
+                        data->buflen += data->values[i] =
+                                audit_pack_string(&bufp, f->se_str);
+                        break;
                default:
                        data->values[i] = f->val;
                }
@@ -314,7 +370,14 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
                        return 1;
                switch(a->fields[i].type) {
-                /* call type-specific comparison routines here */
+                case AUDIT_SE_USER:
+                case AUDIT_SE_ROLE:
+                case AUDIT_SE_TYPE:
+                case AUDIT_SE_SEN:
+                case AUDIT_SE_CLR:
+                        if (strcmp(a->fields[i].se_str, b->fields[i].se_str))
+                                return 1;
+                        break;
                default:
                        if (a->fields[i].val != b->fields[i].val)
                                return 1;
@@ -328,6 +391,81 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
        return 0;
 }
+/* Duplicate selinux field information.  The se_rule is opaque, so must be
+ * re-initialized. */
+static inline int audit_dupe_selinux_field(struct audit_field *df,
+                                           struct audit_field *sf)
+{
+        int ret = 0;
+        char *se_str;
+        /* our own copy of se_str */
+        se_str = kstrdup(sf->se_str, GFP_KERNEL);
+        if (unlikely(IS_ERR(se_str)))
+            return -ENOMEM;
+        df->se_str = se_str;
+        /* our own (refreshed) copy of se_rule */
+        ret = selinux_audit_rule_init(df->type, df->op, df->se_str,
+                                      &df->se_rule);
+        /* Keep currently invalid fields around in case they
+         * become valid after a policy reload. */
+        if (ret == -EINVAL) {
+                printk(KERN_WARNING "audit rule for selinux \'%s\' is "
+                       "invalid\n", df->se_str);
+                ret = 0;
+        }
+        return ret;
+}
+/* Duplicate an audit rule.  This will be a deep copy with the exception
+ * of the watch - that pointer is carried over.  The selinux specific fields
+ * will be updated in the copy.  The point is to be able to replace the old
+ * rule with the new rule in the filterlist, then free the old rule. */
+static struct audit_entry *audit_dupe_rule(struct audit_krule *old)
+{
+        u32 fcount = old->field_count;
+        struct audit_entry *entry;
+        struct audit_krule *new;
+        int i, err = 0;
+        entry = audit_init_entry(fcount);
+        if (unlikely(!entry))
+                return ERR_PTR(-ENOMEM);
+        new = &entry->rule;
+        new->vers_ops = old->vers_ops;
+        new->flags = old->flags;
+        new->listnr = old->listnr;
+        new->action = old->action;
+        for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
+                new->mask[i] = old->mask[i];
+        new->buflen = old->buflen;
+        new->field_count = old->field_count;
+        memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount);
+        /* deep copy this information, updating the se_rule fields, because
+         * the originals will all be freed when the old rule is freed. */
+        for (i = 0; i < fcount; i++) {
+                switch (new->fields[i].type) {
+                case AUDIT_SE_USER:
+                case AUDIT_SE_ROLE:
+                case AUDIT_SE_TYPE:
+                case AUDIT_SE_SEN:
+                case AUDIT_SE_CLR:
+                        err = audit_dupe_selinux_field(&new->fields[i],
+                                                       &old->fields[i]);
+                }
+                if (err) {
+                        audit_free_rule(entry);
+                        return ERR_PTR(err);
+                }
+        }
+        return entry;
+}
 /* Add rule to given filterlist if not a duplicate.  Protected by
 * audit_netlink_mutex. */
 static inline int audit_add_rule(struct audit_entry *entry,
@@ -448,9 +586,10 @@ static int audit_list_rules(void *_dest)
 * @data: payload data
 * @datasz: size of payload data
 * @loginuid: loginuid of sender
+ * @sid: SE Linux Security ID of sender
 */
 int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
-                         size_t datasz, uid_t loginuid)
+                         size_t datasz, uid_t loginuid, u32 sid)
 {
        struct task_struct *tsk;
        int *dest;
@@ -493,9 +632,23 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
                err = audit_add_rule(entry,
                                     &audit_filter_list[entry->rule.listnr]);
-                audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                if (sid) {
-                        "auid=%u add rule to list=%d res=%d\n",
+                        char *ctx = NULL;
-                        loginuid, entry->rule.listnr, !err);
+                        u32 len;
+                        if (selinux_ctxid_to_string(sid, &ctx, &len)) {
+                                /* Maybe call audit_panic? */
+                                audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                                 "auid=%u ssid=%u add rule to list=%d res=%d",
+                                 loginuid, sid, entry->rule.listnr, !err);
+                        } else
+                                audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                                 "auid=%u subj=%s add rule to list=%d res=%d",
+                                 loginuid, ctx, entry->rule.listnr, !err);
+                        kfree(ctx);
+                } else
+                        audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                                "auid=%u add rule to list=%d res=%d",
+                                loginuid, entry->rule.listnr, !err);
                if (err)
                        audit_free_rule(entry);
@@ -511,9 +664,24 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
                err = audit_del_rule(entry,
                                     &audit_filter_list[entry->rule.listnr]);
-                audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
-                        "auid=%u remove rule from list=%d res=%d\n",
+                if (sid) {
-                        loginuid, entry->rule.listnr, !err);
+                        char *ctx = NULL;
+                        u32 len;
+                        if (selinux_ctxid_to_string(sid, &ctx, &len)) {
+                                /* Maybe call audit_panic? */
+                                audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                                        "auid=%u ssid=%u remove rule from list=%d res=%d",
+                                         loginuid, sid, entry->rule.listnr, !err);
+                        } else
+                                audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                                        "auid=%u subj=%s remove rule from list=%d res=%d",
+                                         loginuid, ctx, entry->rule.listnr, !err);
+                        kfree(ctx);
+                } else
+                        audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+                                "auid=%u remove rule from list=%d res=%d",
+                                loginuid, entry->rule.listnr, !err);
                audit_free_rule(entry);
                break;
@@ -628,3 +796,62 @@ unlock_and_return:
        rcu_read_unlock();
        return result;
 }
+/* Check to see if the rule contains any selinux fields.  Returns 1 if there
+   are selinux fields specified in the rule, 0 otherwise. */
+static inline int audit_rule_has_selinux(struct audit_krule *rule)
+{
+        int i;
+        for (i = 0; i < rule->field_count; i++) {
+                struct audit_field *f = &rule->fields[i];
+                switch (f->type) {
+                case AUDIT_SE_USER:
+                case AUDIT_SE_ROLE:
+                case AUDIT_SE_TYPE:
+                case AUDIT_SE_SEN:
+                case AUDIT_SE_CLR:
+                        return 1;
+                }
+        }
+        return 0;
+}
+/* This function will re-initialize the se_rule field of all applicable rules.
+ * It will traverse the filter lists serarching for rules that contain selinux
+ * specific filter fields.  When such a rule is found, it is copied, the
+ * selinux field is re-initialized, and the old rule is replaced with the
+ * updated rule. */
+int selinux_audit_rule_update(void)
+{
+        struct audit_entry *entry, *n, *nentry;
+        int i, err = 0;
+        /* audit_netlink_mutex synchronizes the writers */
+        mutex_lock(&audit_netlink_mutex);
+        for (i = 0; i < AUDIT_NR_FILTERS; i++) {
+                list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) {
+                        if (!audit_rule_has_selinux(&entry->rule))
+                                continue;
+                        nentry = audit_dupe_rule(&entry->rule);
+                        if (unlikely(IS_ERR(nentry))) {
+                                /* save the first error encountered for the
+                                 * return value */
+                                if (!err)
+                                        err = PTR_ERR(nentry);
+                                audit_panic("error updating selinux filters");
+                                list_del_rcu(&entry->list);
+                        } else {
+                                list_replace_rcu(&entry->list, &nentry->list);
+                        }
+                        call_rcu(&entry->rcu, audit_free_rule_rcu);
+                }
+        }
+        mutex_unlock(&audit_netlink_mutex);
+        return err;
+}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7f160df21a23..1c03a4ed1b27 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -58,6 +58,7 @@
 #include <linux/security.h>
 #include <linux/list.h>
 #include <linux/tty.h>
+#include <linux/selinux.h>
 #include "audit.h"
@@ -89,7 +90,7 @@ struct audit_names {
        uid_t           uid;
        gid_t           gid;
        dev_t           rdev;
-        char            *ctx;
+        u32             osid;
 };
 struct audit_aux_data {
@@ -106,7 +107,7 @@ struct audit_aux_data_ipcctl {
        uid_t                   uid;
        gid_t                   gid;
        mode_t                  mode;
-        char                    *ctx;
+        u32                     osid;
 };
 struct audit_aux_data_socketcall {
@@ -167,7 +168,8 @@ static int audit_filter_rules(struct task_struct *tsk,
                              struct audit_context *ctx,
                              enum audit_state *state)
 {
-        int i, j;
+        int i, j, need_sid = 1;
+        u32 sid;
        for (i = 0; i < rule->field_count; i++) {
                struct audit_field *f = &rule->fields[i];
@@ -257,6 +259,27 @@ static int audit_filter_rules(struct task_struct *tsk,
                        if (ctx)
                                result = audit_comparator(ctx->loginuid, f->op, f->val);
                        break;
+                case AUDIT_SE_USER:
+                case AUDIT_SE_ROLE:
+                case AUDIT_SE_TYPE:
+                case AUDIT_SE_SEN:
+                case AUDIT_SE_CLR:
+                        /* NOTE: this may return negative values indicating
+                           a temporary error.  We simply treat this as a
+                           match for now to avoid losing information that
+                           may be wanted.   An error message will also be
+                           logged upon error */
+                        if (f->se_rule) {
+                                if (need_sid) {
+                                        selinux_task_ctxid(tsk, &sid);
+                                        need_sid = 0;
+                                }
+                                result = selinux_audit_rule_match(sid, f->type,
+                                                                  f->op,
+                                                                  f->se_rule,
+                                                                  ctx);
+                        }
+                        break;
                case AUDIT_ARG0:
                case AUDIT_ARG1:
                case AUDIT_ARG2:
@@ -329,7 +352,6 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
        return AUDIT_BUILD_CONTEXT;
 }
-/* This should be called with task_lock() held. */
 static inline struct audit_context *audit_get_context(struct task_struct *tsk,
                                                      int return_valid,
                                                      int return_code)
@@ -391,9 +413,6 @@ static inline void audit_free_names(struct audit_context *context)
 #endif
        for (i = 0; i < context->name_count; i++) {
-                char *p = context->names[i].ctx;
-                context->names[i].ctx = NULL;
-                kfree(p);
                if (context->names[i].name)
                        __putname(context->names[i].name);
        }
@@ -416,11 +435,6 @@ static inline void audit_free_aux(struct audit_context *context)
                        dput(axi->dentry);
                        mntput(axi->mnt);
                }
-                if ( aux->type == AUDIT_IPC ) {
-                        struct audit_aux_data_ipcctl *axi = (void *)aux;
-                        if (axi->ctx)
-                                kfree(axi->ctx);
-                }
                context->aux = aux->next;
                kfree(aux);
@@ -506,7 +520,7 @@ static inline void audit_free_context(struct audit_context *context)
                printk(KERN_ERR "audit: freed %d contexts\n", count);
 }
-static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask)
+static void audit_log_task_context(struct audit_buffer *ab)
 {
        char *ctx = NULL;
        ssize_t len = 0;
@@ -518,7 +532,7 @@ static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask)
                return;
        }
-        ctx = kmalloc(len, gfp_mask);
+        ctx = kmalloc(len, GFP_KERNEL);
        if (!ctx)
                goto error_path;
@@ -536,47 +550,46 @@ error_path:
        return;
 }
-static void audit_log_task_info(struct audit_buffer *ab, gfp_t gfp_mask)
+static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
 {
-        char name[sizeof(current->comm)];
+        char name[sizeof(tsk->comm)];
-        struct mm_struct *mm = current->mm;
+        struct mm_struct *mm = tsk->mm;
        struct vm_area_struct *vma;
-        get_task_comm(name, current);
+        /* tsk == current */
+        get_task_comm(name, tsk);
        audit_log_format(ab, " comm=");
        audit_log_untrustedstring(ab, name);
-        if (!mm)
+        if (mm) {
-                return;
+                down_read(&mm->mmap_sem);
+                vma = mm->mmap;
-        /*
+                while (vma) {
-         * this is brittle; all callers that pass GFP_ATOMIC will have
+                        if ((vma->vm_flags & VM_EXECUTABLE) &&
-         * NULL current->mm and we won't get here.
+                            vma->vm_file) {
-         */
+                                audit_log_d_path(ab, "exe=",
-        down_read(&mm->mmap_sem);
+                                                 vma->vm_file->f_dentry,
-        vma = mm->mmap;
+                                                 vma->vm_file->f_vfsmnt);
-        while (vma) {
+                                break;
-                if ((vma->vm_flags & VM_EXECUTABLE) &&
+                        }
-                    vma->vm_file) {
+                        vma = vma->vm_next;
-                        audit_log_d_path(ab, "exe=",
-                                         vma->vm_file->f_dentry,
-                                         vma->vm_file->f_vfsmnt);
-                        break;
                }
-                vma = vma->vm_next;
+                up_read(&mm->mmap_sem);
        }
-        up_read(&mm->mmap_sem);
+        audit_log_task_context(ab);
-        audit_log_task_context(ab, gfp_mask);
 }
-static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
+static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
-        int i;
+        int i, call_panic = 0;
        struct audit_buffer *ab;
        struct audit_aux_data *aux;
        const char *tty;
-        ab = audit_log_start(context, gfp_mask, AUDIT_SYSCALL);
+        /* tsk == current */
+        ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL);
        if (!ab)
                return;         /* audit_panic has been called */
        audit_log_format(ab, "arch=%x syscall=%d",
@@ -587,8 +600,8 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
                audit_log_format(ab, " success=%s exit=%ld", 
                                 (context->return_valid==AUDITSC_SUCCESS)?"yes":"no",
                                 context->return_code);
-        if (current->signal->tty && current->signal->tty->name)
+        if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
-                tty = current->signal->tty->name;
+                tty = tsk->signal->tty->name;
        else
                tty = "(none)";
        audit_log_format(ab,
@@ -607,12 +620,12 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
                  context->gid,
                  context->euid, context->suid, context->fsuid,
                  context->egid, context->sgid, context->fsgid, tty);
-        audit_log_task_info(ab, gfp_mask);
+        audit_log_task_info(ab, tsk);
        audit_log_end(ab);
        for (aux = context->aux; aux; aux = aux->next) {
-                ab = audit_log_start(context, gfp_mask, aux->type);
+                ab = audit_log_start(context, GFP_KERNEL, aux->type);
                if (!ab)
                        continue; /* audit_panic has been called */
@@ -620,8 +633,39 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
                case AUDIT_IPC: {
                        struct audit_aux_data_ipcctl *axi = (void *)aux;
                        audit_log_format(ab, 
-                                         " qbytes=%lx iuid=%u igid=%u mode=%x obj=%s",
+                                 " qbytes=%lx iuid=%u igid=%u mode=%x",
-                                         axi->qbytes, axi->uid, axi->gid, axi->mode, axi->ctx);
+                                 axi->qbytes, axi->uid, axi->gid, axi->mode);
+                        if (axi->osid != 0) {
+                                char *ctx = NULL;
+                                u32 len;
+                                if (selinux_ctxid_to_string(
+                                                axi->osid, &ctx, &len)) {
+                                        audit_log_format(ab, " osid=%u",
+                                                        axi->osid);
+                                        call_panic = 1;
+                                } else
+                                        audit_log_format(ab, " obj=%s", ctx);
+                                kfree(ctx);
+                        }
+                        break; }
+                case AUDIT_IPC_SET_PERM: {
+                        struct audit_aux_data_ipcctl *axi = (void *)aux;
+                        audit_log_format(ab,
+                                " new qbytes=%lx new iuid=%u new igid=%u new mode=%x",
+                                axi->qbytes, axi->uid, axi->gid, axi->mode);
+                        if (axi->osid != 0) {
+                                char *ctx = NULL;
+                                u32 len;
+                                if (selinux_ctxid_to_string(
+                                                axi->osid, &ctx, &len)) {
+                                        audit_log_format(ab, " osid=%u",
+                                                        axi->osid);
+                                        call_panic = 1;
+                                } else
+                                        audit_log_format(ab, " obj=%s", ctx);
+                                kfree(ctx);
+                        }
                        break; }
                case AUDIT_SOCKETCALL: {
@@ -649,7 +693,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
        }
        if (context->pwd && context->pwdmnt) {
-                ab = audit_log_start(context, gfp_mask, AUDIT_CWD);
+                ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD);
                if (ab) {
                        audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt);
                        audit_log_end(ab);
@@ -659,7 +703,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
                unsigned long ino  = context->names[i].ino;
                unsigned long pino = context->names[i].pino;
-                ab = audit_log_start(context, gfp_mask, AUDIT_PATH);
+                ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
                if (!ab)
                        continue; /* audit_panic has been called */
@@ -685,32 +729,35 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
                                         context->names[i].gid, 
                                         MAJOR(context->names[i].rdev), 
                                         MINOR(context->names[i].rdev));
-                if (context->names[i].ctx) {
+                if (context->names[i].osid != 0) {
-                        audit_log_format(ab, " obj=%s",
+                        char *ctx = NULL;
-                                        context->names[i].ctx);
+                        u32 len;
+                        if (selinux_ctxid_to_string(
+                                context->names[i].osid, &ctx, &len)) {
+                                audit_log_format(ab, " osid=%u",
+                                                context->names[i].osid);
+                                call_panic = 2;
+                        } else
+                                audit_log_format(ab, " obj=%s", ctx);
+                        kfree(ctx);
                }
                audit_log_end(ab);
        }
+        if (call_panic)
+                audit_panic("error converting sid to string");
 }
 /**
 * audit_free - free a per-task audit context
 * @tsk: task whose audit context block to free
 *
- * Called from copy_process and __put_task_struct.
+ * Called from copy_process and do_exit
 */
 void audit_free(struct task_struct *tsk)
 {
        struct audit_context *context;
-        /*
-         * No need to lock the task - when we execute audit_free()
-         * then the task has no external references anymore, and
-         * we are tearing it down. (The locking also confuses
-         * DEBUG_LOCKDEP - this freeing may occur in softirq
-         * contexts as well, via RCU.)
-         */
        context = audit_get_context(tsk, 0, 0);
        if (likely(!context))
                return;
@@ -719,8 +766,9 @@ void audit_free(struct task_struct *tsk)
         * function (e.g., exit_group), then free context block. 
         * We use GFP_ATOMIC here because we might be doing this 
         * in the context of the idle thread */
+        /* that can happen only if we are called from do_exit() */
        if (context->in_syscall && context->auditable)
-                audit_log_exit(context, GFP_ATOMIC);
+                audit_log_exit(context, tsk);
        audit_free_context(context);
 }
@@ -743,10 +791,11 @@ void audit_free(struct task_struct *tsk)
 * will only be written if another part of the kernel requests that it
 * be written).
 */
-void audit_syscall_entry(struct task_struct *tsk, int arch, int major,
+void audit_syscall_entry(int arch, int major,
                         unsigned long a1, unsigned long a2,
                         unsigned long a3, unsigned long a4)
 {
+        struct task_struct *tsk = current;
        struct audit_context *context = tsk->audit_context;
        enum audit_state     state;
@@ -824,22 +873,18 @@ void audit_syscall_entry(struct task_struct *tsk, int arch, int major,
 * message), then write out the syscall information.  In call cases,
 * free the names stored from getname().
 */
-void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code)
+void audit_syscall_exit(int valid, long return_code)
 {
+        struct task_struct *tsk = current;
        struct audit_context *context;
-        get_task_struct(tsk);
-        task_lock(tsk);
        context = audit_get_context(tsk, valid, return_code);
-        task_unlock(tsk);
-        /* Not having a context here is ok, since the parent may have
-         * called __put_task_struct. */
        if (likely(!context))
-                goto out;
+                return;
        if (context->in_syscall && context->auditable)
-                audit_log_exit(context, GFP_KERNEL);
+                audit_log_exit(context, tsk);
        context->in_syscall = 0;
        context->auditable  = 0;
@@ -854,8 +899,6 @@ void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code)
                audit_free_aux(context);
                tsk->audit_context = context;
        }
- out:
-        put_task_struct(tsk);
 }
 /**
@@ -936,40 +979,11 @@ void audit_putname(const char *name)
 #endif
 }
-void audit_inode_context(int idx, const struct inode *inode)
+static void audit_inode_context(int idx, const struct inode *inode)
 {
        struct audit_context *context = current->audit_context;
-        const char *suffix = security_inode_xattr_getsuffix();
-        char *ctx = NULL;
-        int len = 0;
-        if (!suffix)
-                goto ret;
-        len = security_inode_getsecurity(inode, suffix, NULL, 0, 0);
-        if (len == -EOPNOTSUPP)
-                goto ret;
-        if (len < 0) 
-                goto error_path;
-        ctx = kmalloc(len, GFP_KERNEL);
-        if (!ctx) 
-                goto error_path;
-        len = security_inode_getsecurity(inode, suffix, ctx, len, 0);
+        selinux_get_inode_sid(inode, &context->names[idx].osid);
-        if (len < 0)
-                goto error_path;
-        kfree(context->names[idx].ctx);
-        context->names[idx].ctx = ctx;
-        goto ret;
-error_path:
-        if (ctx)
-                kfree(ctx);
-        audit_panic("error in audit_inode_context");
-ret:
-        return;
 }
@@ -1155,40 +1169,37 @@ uid_t audit_get_loginuid(struct audit_context *ctx)
        return ctx ? ctx->loginuid : -1;
 }
-static char *audit_ipc_context(struct kern_ipc_perm *ipcp)
+/**
+ * audit_ipc_obj - record audit data for ipc object
+ * @ipcp: ipc permissions
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+int audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
+        struct audit_aux_data_ipcctl *ax;
        struct audit_context *context = current->audit_context;
-        char *ctx = NULL;
-        int len = 0;
        if (likely(!context))
-                return NULL;
+                return 0;
-        len = security_ipc_getsecurity(ipcp, NULL, 0);
-        if (len == -EOPNOTSUPP)
-                goto ret;
-        if (len < 0)
-                goto error_path;
-        ctx = kmalloc(len, GFP_ATOMIC);
-        if (!ctx)
-                goto error_path;
-        len = security_ipc_getsecurity(ipcp, ctx, len);
+        ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-        if (len < 0)
+        if (!ax)
-                goto error_path;
+                return -ENOMEM;
-        return ctx;
+        ax->uid = ipcp->uid;
+        ax->gid = ipcp->gid;
+        ax->mode = ipcp->mode;
+        selinux_get_ipc_sid(ipcp, &ax->osid);
-error_path:
+        ax->d.type = AUDIT_IPC;
-        kfree(ctx);
+        ax->d.next = context->aux;
-        audit_panic("error in audit_ipc_context");
+        context->aux = (void *)ax;
-ret:
+        return 0;
-        return NULL;
 }
 /**
- * audit_ipc_perms - record audit data for ipc
+ * audit_ipc_set_perm - record audit data for new ipc permissions
 * @qbytes: msgq bytes
 * @uid: msgq user id
 * @gid: msgq group id
@@ -1196,7 +1207,7 @@ ret:
 *
 * Returns 0 for success or NULL context or < 0 on error.
 */
-int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp)
+int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp)
 {
        struct audit_aux_data_ipcctl *ax;
        struct audit_context *context = current->audit_context;
@@ -1212,9 +1223,9 @@ int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, str
        ax->uid = uid;
        ax->gid = gid;
        ax->mode = mode;
-        ax->ctx = audit_ipc_context(ipcp);
+        selinux_get_ipc_sid(ipcp, &ax->osid);
-        ax->d.type = AUDIT_IPC;
+        ax->d.type = AUDIT_IPC_SET_PERM;
        ax->d.next = context->aux;
        context->aux = (void *)ax;
        return 0;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 72248d1b9e3f..ab81fdd4572b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2231,19 +2231,25 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
 * So only GFP_KERNEL allocations, if all nodes in the cpuset are
 * short of memory, might require taking the callback_mutex mutex.
 *
- * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages()
+ * The first call here from mm/page_alloc:get_page_from_freelist()
- * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing
+ * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so
- * hardwall cpusets - no allocation on a node outside the cpuset is
+ * no allocation on a node outside the cpuset is allowed (unless in
- * allowed (unless in interrupt, of course).
+ * interrupt, of course).
- *
+ *
- * The second loop doesn't even call here for GFP_ATOMIC requests
+ * The second pass through get_page_from_freelist() doesn't even call
- * (if the __alloc_pages() local variable 'wait' is set).  That check
+ * here for GFP_ATOMIC calls.  For those calls, the __alloc_pages()
- * and the checks below have the combined affect in the second loop of
+ * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
- * the __alloc_pages() routine that:
+ * in alloc_flags.  That logic and the checks below have the combined
+ * affect that:
 *      in_interrupt - any node ok (current task context irrelevant)
 *      GFP_ATOMIC   - any node ok
 *      GFP_KERNEL   - any node in enclosing mem_exclusive cpuset ok
 *      GFP_USER     - only nodes in current tasks mems allowed ok.
+ *
+ * Rule:
+ *    Don't call cpuset_zone_allowed() if you can't sleep, unless you
+ *    pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
+ *    the code that might scan up ancestor cpusets and sleep.
 **/
 int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
@@ -2255,6 +2261,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
        if (in_interrupt())
                return 1;
        node = z->zone_pgdat->node_id;
+        might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
        if (node_isset(node, current->mems_allowed))
                return 1;
        if (gfp_mask & __GFP_HARDWALL)  /* If hardwall request, stop here */
diff --git a/kernel/exit.c b/kernel/exit.c
index 1a9787ac6173..e95b93282210 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -35,6 +35,7 @@
 #include <linux/futex.h>
 #include <linux/compat.h>
 #include <linux/pipe_fs_i.h>
+#include <linux/audit.h> /* for audit_free() */
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -56,7 +57,7 @@ static void __unhash_process(struct task_struct *p)
                detach_pid(p, PIDTYPE_PGID);
                detach_pid(p, PIDTYPE_SID);
-                list_del_init(&p->tasks);
+                list_del_rcu(&p->tasks);
                __get_cpu_var(process_counts)--;
        }
        list_del_rcu(&p->thread_group);
@@ -910,6 +911,8 @@ fastcall NORET_TYPE void do_exit(long code)
        if (unlikely(tsk->compat_robust_list))
                compat_exit_robust_list(tsk);
 #endif
+        if (unlikely(tsk->audit_context))
+                audit_free(tsk);
        exit_mm(tsk);
        exit_sem(tsk);
diff --git a/kernel/extable.c b/kernel/extable.c
index 7501b531ceed..7fe262855317 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -40,7 +40,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
        return e;
 }
-static int core_kernel_text(unsigned long addr)
+int core_kernel_text(unsigned long addr)
 {
        if (addr >= (unsigned long)_stext &&
            addr <= (unsigned long)_etext)
diff --git a/kernel/fork.c b/kernel/fork.c
index 3384eb89cb1c..ac8100e3088a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -114,8 +114,6 @@ void __put_task_struct(struct task_struct *tsk)
        WARN_ON(atomic_read(&tsk->usage));
        WARN_ON(tsk == current);
-        if (unlikely(tsk->audit_context))
-                audit_free(tsk);
        security_task_free(tsk);
        free_uid(tsk->user);
        put_group_info(tsk->group_info);
@@ -124,12 +122,6 @@ void __put_task_struct(struct task_struct *tsk)
                free_task(tsk);
 }
-void __put_task_struct_cb(struct rcu_head *rhp)
-{
-        struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
-        __put_task_struct(tsk);
-}
 void __init fork_init(unsigned long mempages)
 {
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
@@ -186,6 +178,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
        atomic_set(&tsk->usage,2);
        atomic_set(&tsk->fs_excl, 0);
        tsk->btrace_seq = 0;
+        tsk->splice_pipe = NULL;
        return tsk;
 }
@@ -1210,7 +1203,7 @@ static task_t *copy_process(unsigned long clone_flags,
                        attach_pid(p, PIDTYPE_PGID, process_group(p));
                        attach_pid(p, PIDTYPE_SID, p->signal->session);
-                        list_add_tail(&p->tasks, &init_task.tasks);
+                        list_add_tail_rcu(&p->tasks, &init_task.tasks);
                        __get_cpu_var(process_counts)++;
                }
                attach_pid(p, PIDTYPE_PID, p->pid);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d2a7296c8251..01fa2ae98a85 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -456,6 +456,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
        return ret;
 }
+EXPORT_SYMBOL_GPL(hrtimer_start);
 /**
 * hrtimer_try_to_cancel - try to deactivate a timer
@@ -484,6 +485,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
        return ret;
 }
+EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 /**
 * hrtimer_cancel - cancel a timer and wait for the handler to finish.
@@ -504,6 +506,7 @@ int hrtimer_cancel(struct hrtimer *timer)
                cpu_relax();
        }
 }
+EXPORT_SYMBOL_GPL(hrtimer_cancel);
 /**
 * hrtimer_get_remaining - get remaining time for the timer
@@ -522,6 +525,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
        return rem;
 }
+EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
 #ifdef CONFIG_NO_IDLE_HZ
 /**
@@ -580,6 +584,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
        timer->base = &bases[clock_id];
        timer->node.rb_parent = HRTIMER_INACTIVE;
 }
+EXPORT_SYMBOL_GPL(hrtimer_init);
 /**
 * hrtimer_get_res - get the timer resolution for a clock
@@ -599,6 +604,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
        return 0;
 }
+EXPORT_SYMBOL_GPL(hrtimer_get_res);
 /*
 * Expire the per base hrtimer-queue:
@@ -836,7 +842,7 @@ static void migrate_hrtimers(int cpu)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
-static int __devinit hrtimer_cpu_notify(struct notifier_block *self,
+static int hrtimer_cpu_notify(struct notifier_block *self,
                                        unsigned long action, void *hcpu)
 {
        long cpu = (long)hcpu;
@@ -860,7 +866,7 @@ static int __devinit hrtimer_cpu_notify(struct notifier_block *self,
        return NOTIFY_OK;
 }
-static struct notifier_block __devinitdata hrtimers_nb = {
+static struct notifier_block hrtimers_nb = {
        .notifier_call = hrtimer_cpu_notify,
 };
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ac766ad573e8..1279e3499534 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -246,8 +246,10 @@ int setup_irq(unsigned int irq, struct irqaction * new)
 mismatch:
        spin_unlock_irqrestore(&desc->lock, flags);
-        printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__);
+        if (!(new->flags & SA_PROBEIRQ)) {
-        dump_stack();
+                printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__);
+                dump_stack();
+        }
        return -EBUSY;
 }
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1156eb0977d0..1fbf466a29aa 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -585,6 +585,9 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
        int i;
        rp->kp.pre_handler = pre_handler_kretprobe;
+        rp->kp.post_handler = NULL;
+        rp->kp.fault_handler = NULL;
+        rp->kp.break_handler = NULL;
        /* Pre-allocate memory for max kretprobe instances */
        if (rp->maxactive <= 0) {
diff --git a/kernel/module.c b/kernel/module.c
index d24deb0dbbc9..bbe04862e1b0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -705,14 +705,14 @@ EXPORT_SYMBOL(__symbol_put);
 void symbol_put_addr(void *addr)
 {
-        unsigned long flags;
+        struct module *modaddr;
-        spin_lock_irqsave(&modlist_lock, flags);
+        if (core_kernel_text((unsigned long)addr))
-        if (!kernel_text_address((unsigned long)addr))
+                return;
-                BUG();
-        module_put(module_text_address((unsigned long)addr));
+        if (!(modaddr = module_text_address((unsigned long)addr)))
-        spin_unlock_irqrestore(&modlist_lock, flags);
+                BUG();
+        module_put(modaddr);
 }
 EXPORT_SYMBOL_GPL(symbol_put_addr);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index ee371f50ccaa..a6d9ef46009e 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -272,7 +272,7 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n
                if (*s && !strncmp(buf, *s, len))
                        break;
        }
-        if (*s)
+        if (state < PM_SUSPEND_MAX && *s)
                error = enter_state(state);
        else
                error = -EINVAL;
diff --git a/kernel/power/pm.c b/kernel/power/pm.c
index 0f6908cce1dd..84063ac8fcfc 100644
--- a/kernel/power/pm.c
+++ b/kernel/power/pm.c
@@ -75,25 +75,6 @@ struct pm_dev *pm_register(pm_dev_t type,
        return dev;
 }
-/**
- *      pm_unregister -  unregister a device with power management
- *      @dev: device to unregister
- *
- *      Remove a device from the power management notification lists. The
- *      dev passed must be a handle previously returned by pm_register.
- */
- 
-void pm_unregister(struct pm_dev *dev)
-{
-        if (dev) {
-                mutex_lock(&pm_devs_lock);
-                list_del(&dev->entry);
-                mutex_unlock(&pm_devs_lock);
-                kfree(dev);
-        }
-}
 static void __pm_unregister(struct pm_dev *dev)
 {
        if (dev) {
@@ -258,7 +239,6 @@ int pm_send_all(pm_request_t rqst, void *data)
 }
 EXPORT_SYMBOL(pm_register);
-EXPORT_SYMBOL(pm_unregister);
 EXPORT_SYMBOL(pm_unregister_all);
 EXPORT_SYMBOL(pm_send_all);
 EXPORT_SYMBOL(pm_active);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index c5863d02c89e..3eeedbb13b78 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -240,14 +240,15 @@ static void copy_data_pages(struct pbe *pblist)
 *      free_pagedir - free pages allocated with alloc_pagedir()
 */
-static void free_pagedir(struct pbe *pblist)
+static void free_pagedir(struct pbe *pblist, int clear_nosave_free)
 {
        struct pbe *pbe;
        while (pblist) {
                pbe = (pblist + PB_PAGE_SKIP)->next;
                ClearPageNosave(virt_to_page(pblist));
-                ClearPageNosaveFree(virt_to_page(pblist));
+                if (clear_nosave_free)
+                        ClearPageNosaveFree(virt_to_page(pblist));
                free_page((unsigned long)pblist);
                pblist = pbe;
        }
@@ -389,7 +390,7 @@ struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed
                pbe->next = alloc_image_page(gfp_mask, safe_needed);
        }
        if (!pbe) { /* get_zeroed_page() failed */
-                free_pagedir(pblist);
+                free_pagedir(pblist, 1);
                pblist = NULL;
        } else
                create_pbe_list(pblist, nr_pages);
@@ -736,7 +737,7 @@ static int create_image(struct snapshot_handle *handle)
                pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1);
                if (pblist)
                        copy_page_backup_list(pblist, p);
-                free_pagedir(p);
+                free_pagedir(p, 0);
                if (!pblist)
                        error = -ENOMEM;
        }
diff --git a/kernel/profile.c b/kernel/profile.c
index 5a730fdb1a2c..68afe121e507 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -299,7 +299,7 @@ out:
 }
 #ifdef CONFIG_HOTPLUG_CPU
-static int __devinit profile_cpu_callback(struct notifier_block *info,
+static int profile_cpu_callback(struct notifier_block *info,
                                        unsigned long action, void *__cpu)
 {
        int node, cpu = (unsigned long)__cpu;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0eeb7e66722c..921c22ad16e4 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -56,10 +56,6 @@ void ptrace_untrace(task_t *child)
                        signal_wake_up(child, 1);
                }
        }
-        if (child->signal->flags & SIGNAL_GROUP_EXIT) {
-                sigaddset(&child->pending.signal, SIGKILL);
-                signal_wake_up(child, 1);
-        }
        spin_unlock(&child->sighand->siglock);
 }
@@ -81,7 +77,8 @@ void __ptrace_unlink(task_t *child)
                add_parent(child);
        }
-        ptrace_untrace(child);
+        if (child->state == TASK_TRACED)
+                ptrace_untrace(child);
 }
 /*
@@ -151,12 +148,34 @@ int ptrace_may_attach(struct task_struct *task)
 int ptrace_attach(struct task_struct *task)
 {
        int retval;
-        task_lock(task);
        retval = -EPERM;
        if (task->pid <= 1)
-                goto bad;
+                goto out;
        if (task->tgid == current->tgid)
-                goto bad;
+                goto out;
+repeat:
+        /*
+         * Nasty, nasty.
+         *
+         * We want to hold both the task-lock and the
+         * tasklist_lock for writing at the same time.
+         * But that's against the rules (tasklist_lock
+         * is taken for reading by interrupts on other
+         * cpu's that may have task_lock).
+         */
+        task_lock(task);
+        local_irq_disable();
+        if (!write_trylock(&tasklist_lock)) {
+                local_irq_enable();
+                task_unlock(task);
+                do {
+                        cpu_relax();
+                } while (!write_can_lock(&tasklist_lock));
+                goto repeat;
+        }
        /* the same process cannot be attached many times */
        if (task->ptrace & PT_PTRACED)
                goto bad;
@@ -169,17 +188,15 @@ int ptrace_attach(struct task_struct *task)
                                      ? PT_ATTACHED : 0);
        if (capable(CAP_SYS_PTRACE))
                task->ptrace |= PT_PTRACE_CAP;
-        task_unlock(task);
-        write_lock_irq(&tasklist_lock);
        __ptrace_link(task, current);
-        write_unlock_irq(&tasklist_lock);
        force_sig_specific(SIGSTOP, task);
-        return 0;
 bad:
+        write_unlock_irq(&tasklist_lock);
        task_unlock(task);
+out:
        return retval;
 }
@@ -420,21 +437,22 @@ int ptrace_request(struct task_struct *child, long request,
 */
 int ptrace_traceme(void)
 {
-        int ret;
+        int ret = -EPERM;
        /*
         * Are we already being traced?
         */
-        if (current->ptrace & PT_PTRACED)
+        task_lock(current);
-                return -EPERM;
+        if (!(current->ptrace & PT_PTRACED)) {
-        ret = security_ptrace(current->parent, current);
+                ret = security_ptrace(current->parent, current);
-        if (ret)
+                /*
-                return -EPERM;
+                 * Set the ptrace bit in the process ptrace flags.
-        /*
+                 */
-         * Set the ptrace bit in the process ptrace flags.
+                if (!ret)
-         */
+                        current->ptrace |= PT_PTRACED;
-        current->ptrace |= PT_PTRACED;
+        }
-        return 0;
+        task_unlock(current);
+        return ret;
 }
 /**
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 13458bbaa1be..2058f88c7bbb 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -479,12 +479,31 @@ static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
        return 0;
 }
+/*
+ * Check to see if there is any immediate RCU-related work to be done
+ * by the current CPU, returning 1 if so.  This function is part of the
+ * RCU implementation; it is -not- an exported member of the RCU API.
+ */
 int rcu_pending(int cpu)
 {
        return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
                __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
 }
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so.  This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ */
+int rcu_needs_cpu(int cpu)
+{
+        struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+        struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
+        return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
+}
 void rcu_check_callbacks(int cpu, int user)
 {
        if (user || 
@@ -520,7 +539,7 @@ static void __devinit rcu_online_cpu(int cpu)
        tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
 }
-static int __devinit rcu_cpu_notify(struct notifier_block *self, 
+static int rcu_cpu_notify(struct notifier_block *self,
                                unsigned long action, void *hcpu)
 {
        long cpu = (long)hcpu;
@@ -537,7 +556,7 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self,
        return NOTIFY_OK;
 }
-static struct notifier_block __devinitdata rcu_nb = {
+static struct notifier_block rcu_nb = {
        .notifier_call  = rcu_cpu_notify,
 };
diff --git a/kernel/sched.c b/kernel/sched.c
index 365f0b90b4de..c13f1bd2df7d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -665,55 +665,13 @@ static int effective_prio(task_t *p)
 }
 /*
- * We place interactive tasks back into the active array, if possible.
- *
- * To guarantee that this does not starve expired tasks we ignore the
- * interactivity of a task if the first expired task had to wait more
- * than a 'reasonable' amount of time. This deadline timeout is
- * load-dependent, as the frequency of array switched decreases with
- * increasing number of running tasks. We also ignore the interactivity
- * if a better static_prio task has expired, and switch periodically
- * regardless, to ensure that highly interactive tasks do not starve
- * the less fortunate for unreasonably long periods.
- */
-static inline int expired_starving(runqueue_t *rq)
-{
-        int limit;
-        /*
-         * Arrays were recently switched, all is well
-         */
-        if (!rq->expired_timestamp)
-                return 0;
-        limit = STARVATION_LIMIT * rq->nr_running;
-        /*
-         * It's time to switch arrays
-         */
-        if (jiffies - rq->expired_timestamp >= limit)
-                return 1;
-        /*
-         * There's a better selection in the expired array
-         */
-        if (rq->curr->static_prio > rq->best_expired_prio)
-                return 1;
-        /*
-         * All is well
-         */
-        return 0;
-}
-/*
 * __activate_task - move a task to the runqueue.
 */
 static void __activate_task(task_t *p, runqueue_t *rq)
 {
        prio_array_t *target = rq->active;
-        if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p))))
+        if (batch_task(p))
                target = rq->expired;
        enqueue_task(p, target);
        rq->nr_running++;
@@ -2532,6 +2490,22 @@ unsigned long long current_sched_time(const task_t *tsk)
 }
 /*
+ * We place interactive tasks back into the active array, if possible.
+ *
+ * To guarantee that this does not starve expired tasks we ignore the
+ * interactivity of a task if the first expired task had to wait more
+ * than a 'reasonable' amount of time. This deadline timeout is
+ * load-dependent, as the frequency of array switched decreases with
+ * increasing number of running tasks. We also ignore the interactivity
+ * if a better static_prio task has expired:
+ */
+#define EXPIRED_STARVING(rq) \
+        ((STARVATION_LIMIT && ((rq)->expired_timestamp && \
+                (jiffies - (rq)->expired_timestamp >= \
+                        STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
+                        ((rq)->curr->static_prio > (rq)->best_expired_prio))
+/*
 * Account user cpu time to a process.
 * @p: the process that the cpu time gets accounted to
 * @hardirq_offset: the offset to subtract from hardirq_count()
@@ -2666,7 +2640,7 @@ void scheduler_tick(void)
                if (!rq->expired_timestamp)
                        rq->expired_timestamp = jiffies;
-                if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
+                if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
                        enqueue_task(p, rq->expired);
                        if (p->static_prio < rq->best_expired_prio)
                                rq->best_expired_prio = p->static_prio;
@@ -4814,7 +4788,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action,
 /* Register at highest priority so that task migration (migrate_all_tasks)
 * happens before everything else.
 */
-static struct notifier_block __devinitdata migration_notifier = {
+static struct notifier_block migration_notifier = {
        .notifier_call = migration_call,
        .priority = 10
 };
diff --git a/kernel/signal.c b/kernel/signal.c
index b14f895027c3..e5f8aea78ffe 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1754,9 +1754,9 @@ relock:
                        /* Let the debugger run.  */
                        ptrace_stop(signr, signr, info);
-                        /* We're back.  Did the debugger cancel the sig or group_exit? */
+                        /* We're back.  Did the debugger cancel the sig?  */
                        signr = current->exit_code;
-                        if (signr == 0 || current->signal->flags & SIGNAL_GROUP_EXIT)
+                        if (signr == 0)
                                continue;
                        current->exit_code = 0;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ec8fed42a86f..336f92d64e2e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -446,7 +446,7 @@ static void takeover_tasklets(unsigned int cpu)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
-static int __devinit cpu_callback(struct notifier_block *nfb,
+static int cpu_callback(struct notifier_block *nfb,
                                  unsigned long action,
                                  void *hcpu)
 {
@@ -484,7 +484,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
        return NOTIFY_OK;
 }
-static struct notifier_block __devinitdata cpu_nfb = {
+static struct notifier_block cpu_nfb = {
        .notifier_call = cpu_callback
 };
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index ced91e1ff564..14c7faf02909 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -104,7 +104,7 @@ static int watchdog(void * __bind_cpu)
 /*
 * Create/destroy watchdog threads as CPUs come and go:
 */
-static int __devinit
+static int
 cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
        int hotcpu = (unsigned long)hcpu;
@@ -140,7 +140,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
        return NOTIFY_OK;
 }
-static struct notifier_block __devinitdata cpu_nfb = {
+static struct notifier_block cpu_nfb = {
        .notifier_call = cpu_callback
 };
diff --git a/kernel/timer.c b/kernel/timer.c
index 883773788836..9e49deed468c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -541,6 +541,22 @@ found:
        }
        spin_unlock(&base->lock);
+        /*
+         * It can happen that other CPUs service timer IRQs and increment
+         * jiffies, but we have not yet got a local timer tick to process
+         * the timer wheels.  In that case, the expiry time can be before
+         * jiffies, but since the high-resolution timer here is relative to
+         * jiffies, the default expression when high-resolution timers are
+         * not active,
+         *
+         *   time_before(MAX_JIFFY_OFFSET + jiffies, expires)
+         *
+         * would falsely evaluate to true.  If that is the case, just
+         * return jiffies so that we can immediately fire the local timer
+         */
+        if (time_before(expires, jiffies))
+                return jiffies;
        if (time_before(hr_expires, expires))
                return hr_expires;
@@ -1314,7 +1330,7 @@ static void __devinit migrate_timers(int cpu)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
-static int __devinit timer_cpu_notify(struct notifier_block *self, 
+static int timer_cpu_notify(struct notifier_block *self,
                                unsigned long action, void *hcpu)
 {
        long cpu = (long)hcpu;
@@ -1334,7 +1350,7 @@ static int __devinit timer_cpu_notify(struct notifier_block *self,
        return NOTIFY_OK;
 }
-static struct notifier_block __devinitdata timers_nb = {
+static struct notifier_block timers_nb = {
        .notifier_call  = timer_cpu_notify,
 };
diff --git a/kernel/uid16.c b/kernel/uid16.c
index aa25605027c8..187e2a423878 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -20,43 +20,67 @@
 asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group)
 {
-        return sys_chown(filename, low2highuid(user), low2highgid(group));
+        long ret = sys_chown(filename, low2highuid(user), low2highgid(group));
+        /* avoid REGPARM breakage on x86: */
+        prevent_tail_call(ret);
+        return ret;
 }
 asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group)
 {
-        return sys_lchown(filename, low2highuid(user), low2highgid(group));
+        long ret = sys_lchown(filename, low2highuid(user), low2highgid(group));
+        /* avoid REGPARM breakage on x86: */
+        prevent_tail_call(ret);
+        return ret;
 }
 asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group)
 {
-        return sys_fchown(fd, low2highuid(user), low2highgid(group));
+        long ret = sys_fchown(fd, low2highuid(user), low2highgid(group));
+        /* avoid REGPARM breakage on x86: */
+        prevent_tail_call(ret);
+        return ret;
 }
 asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid)
 {
-        return sys_setregid(low2highgid(rgid), low2highgid(egid));
+        long ret = sys_setregid(low2highgid(rgid), low2highgid(egid));
+        /* avoid REGPARM breakage on x86: */
+        prevent_tail_call(ret);
+        return ret;
 }
 asmlinkage long sys_setgid16(old_gid_t gid)
 {
-        return sys_setgid(low2highgid(gid));
+        long ret = sys_setgid(low2highgid(gid));
+        /* avoid REGPARM breakage on x86: */
+        prevent_tail_call(ret);
+        return ret;
 }
 asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid)
 {
-        return sys_setreuid(low2highuid(ruid), low2highuid(euid));
+        long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid));
+        /* avoid REGPARM breakage on x86: */
+        prevent_tail_call(ret);
+        return ret;
 }
 asmlinkage long sys_setuid16(old_uid_t uid)
 {
-        return sys_setuid(low2highuid(uid));
+        long ret = sys_setuid(low2highuid(uid));
+        /* avoid REGPARM breakage on x86: */
+        prevent_tail_call(ret);
+        return ret;
 }
 asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
 {
-        return sys_setresuid(low2highuid(ruid), low2highuid(euid),
+        long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid),
-                low2highuid(suid));
+                                 low2highuid(suid));
+        /* avoid REGPARM breakage on x86: */
+        prevent_tail_call(ret);
+        return ret;
 }
 asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid)
@@ -72,8 +96,11 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid,
 asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
 {
-        return sys_setresgid(low2highgid(rgid), low2highgid(egid),
+        long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid),
-                low2highgid(sgid));
+                                 low2highgid(sgid));
+        /* avoid REGPARM breakage on x86: */
+        prevent_tail_call(ret);
+        return ret;
 }
 asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid)
@@ -89,12 +116,18 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid,
 asmlinkage long sys_setfsuid16(old_uid_t uid)
 {
-        return sys_setfsuid(low2highuid(uid));
+        long ret = sys_setfsuid(low2highuid(uid));
+        /* avoid REGPARM breakage on x86: */
+        prevent_tail_call(ret);
+        return ret;
 }
 asmlinkage long sys_setfsgid16(old_gid_t gid)
 {
-        return sys_setfsgid(low2highgid(gid));
+        long ret = sys_setfsgid(low2highgid(gid));
+        /* avoid REGPARM breakage on x86: */
+        prevent_tail_call(ret);
+        return ret;
 }
 static int groups16_to_user(old_gid_t __user *grouplist,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e9e464a90376..880fb415a8f6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -547,7 +547,7 @@ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
 }
 /* We're holding the cpucontrol mutex here */
-static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
+static int workqueue_cpu_callback(struct notifier_block *nfb,
                                  unsigned long action,
                                  void *hcpu)
 {