diff options
Diffstat (limited to 'kernel')
115 files changed, 3199 insertions, 1716 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 76768ee812b2..08561f1acd13 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks | |||
| @@ -231,6 +231,10 @@ config RWSEM_SPIN_ON_OWNER | |||
| 231 | def_bool y | 231 | def_bool y |
| 232 | depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW | 232 | depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW |
| 233 | 233 | ||
| 234 | config LOCK_SPIN_ON_OWNER | ||
| 235 | def_bool y | ||
| 236 | depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER | ||
| 237 | |||
| 234 | config ARCH_USE_QUEUE_RWLOCK | 238 | config ARCH_USE_QUEUE_RWLOCK |
| 235 | bool | 239 | bool |
| 236 | 240 | ||
diff --git a/kernel/Makefile b/kernel/Makefile index a59481a3fa6c..1408b3353a3c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -13,8 +13,8 @@ obj-y = fork.o exec_domain.o panic.o \ | |||
| 13 | 13 | ||
| 14 | ifdef CONFIG_FUNCTION_TRACER | 14 | ifdef CONFIG_FUNCTION_TRACER |
| 15 | # Do not trace debug files and internal ftrace files | 15 | # Do not trace debug files and internal ftrace files |
| 16 | CFLAGS_REMOVE_cgroup-debug.o = -pg | 16 | CFLAGS_REMOVE_cgroup-debug.o = $(CC_FLAGS_FTRACE) |
| 17 | CFLAGS_REMOVE_irq_work.o = -pg | 17 | CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE) |
| 18 | endif | 18 | endif |
| 19 | 19 | ||
| 20 | # cond_syscall is currently not LTO compatible | 20 | # cond_syscall is currently not LTO compatible |
| @@ -26,6 +26,7 @@ obj-y += power/ | |||
| 26 | obj-y += printk/ | 26 | obj-y += printk/ |
| 27 | obj-y += irq/ | 27 | obj-y += irq/ |
| 28 | obj-y += rcu/ | 28 | obj-y += rcu/ |
| 29 | obj-y += livepatch/ | ||
| 29 | 30 | ||
| 30 | obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o | 31 | obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o |
| 31 | obj-$(CONFIG_FREEZER) += freezer.o | 32 | obj-$(CONFIG_FREEZER) += freezer.o |
| @@ -142,7 +143,7 @@ endif | |||
| 142 | kernel/system_certificates.o: $(obj)/x509_certificate_list | 143 | kernel/system_certificates.o: $(obj)/x509_certificate_list |
| 143 | 144 | ||
| 144 | quiet_cmd_x509certs = CERTS $@ | 145 | quiet_cmd_x509certs = CERTS $@ |
| 145 | cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; echo " - Including cert $(X509)") | 146 | cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; $(kecho) " - Including cert $(X509)") |
| 146 | 147 | ||
| 147 | targets += $(obj)/x509_certificate_list | 148 | targets += $(obj)/x509_certificate_list |
| 148 | $(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list | 149 | $(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list |
diff --git a/kernel/acct.c b/kernel/acct.c index 33738ef972f3..e6c10d1a4058 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
| @@ -76,10 +76,11 @@ int acct_parm[3] = {4, 2, 30}; | |||
| 76 | /* | 76 | /* |
| 77 | * External references and all of the globals. | 77 | * External references and all of the globals. |
| 78 | */ | 78 | */ |
| 79 | static void do_acct_process(struct bsd_acct_struct *acct); | ||
| 80 | 79 | ||
| 81 | struct bsd_acct_struct { | 80 | struct bsd_acct_struct { |
| 82 | struct fs_pin pin; | 81 | struct fs_pin pin; |
| 82 | atomic_long_t count; | ||
| 83 | struct rcu_head rcu; | ||
| 83 | struct mutex lock; | 84 | struct mutex lock; |
| 84 | int active; | 85 | int active; |
| 85 | unsigned long needcheck; | 86 | unsigned long needcheck; |
| @@ -89,6 +90,8 @@ struct bsd_acct_struct { | |||
| 89 | struct completion done; | 90 | struct completion done; |
| 90 | }; | 91 | }; |
| 91 | 92 | ||
| 93 | static void do_acct_process(struct bsd_acct_struct *acct); | ||
| 94 | |||
| 92 | /* | 95 | /* |
| 93 | * Check the amount of free space and suspend/resume accordingly. | 96 | * Check the amount of free space and suspend/resume accordingly. |
| 94 | */ | 97 | */ |
| @@ -124,32 +127,56 @@ out: | |||
| 124 | return acct->active; | 127 | return acct->active; |
| 125 | } | 128 | } |
| 126 | 129 | ||
| 130 | static void acct_put(struct bsd_acct_struct *p) | ||
| 131 | { | ||
| 132 | if (atomic_long_dec_and_test(&p->count)) | ||
| 133 | kfree_rcu(p, rcu); | ||
| 134 | } | ||
| 135 | |||
| 136 | static inline struct bsd_acct_struct *to_acct(struct fs_pin *p) | ||
| 137 | { | ||
| 138 | return p ? container_of(p, struct bsd_acct_struct, pin) : NULL; | ||
| 139 | } | ||
| 140 | |||
| 127 | static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) | 141 | static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) |
| 128 | { | 142 | { |
| 129 | struct bsd_acct_struct *res; | 143 | struct bsd_acct_struct *res; |
| 130 | again: | 144 | again: |
| 131 | smp_rmb(); | 145 | smp_rmb(); |
| 132 | rcu_read_lock(); | 146 | rcu_read_lock(); |
| 133 | res = ACCESS_ONCE(ns->bacct); | 147 | res = to_acct(ACCESS_ONCE(ns->bacct)); |
| 134 | if (!res) { | 148 | if (!res) { |
| 135 | rcu_read_unlock(); | 149 | rcu_read_unlock(); |
| 136 | return NULL; | 150 | return NULL; |
| 137 | } | 151 | } |
| 138 | if (!atomic_long_inc_not_zero(&res->pin.count)) { | 152 | if (!atomic_long_inc_not_zero(&res->count)) { |
| 139 | rcu_read_unlock(); | 153 | rcu_read_unlock(); |
| 140 | cpu_relax(); | 154 | cpu_relax(); |
| 141 | goto again; | 155 | goto again; |
| 142 | } | 156 | } |
| 143 | rcu_read_unlock(); | 157 | rcu_read_unlock(); |
| 144 | mutex_lock(&res->lock); | 158 | mutex_lock(&res->lock); |
| 145 | if (!res->ns) { | 159 | if (res != to_acct(ACCESS_ONCE(ns->bacct))) { |
| 146 | mutex_unlock(&res->lock); | 160 | mutex_unlock(&res->lock); |
| 147 | pin_put(&res->pin); | 161 | acct_put(res); |
| 148 | goto again; | 162 | goto again; |
| 149 | } | 163 | } |
| 150 | return res; | 164 | return res; |
| 151 | } | 165 | } |
| 152 | 166 | ||
| 167 | static void acct_pin_kill(struct fs_pin *pin) | ||
| 168 | { | ||
| 169 | struct bsd_acct_struct *acct = to_acct(pin); | ||
| 170 | mutex_lock(&acct->lock); | ||
| 171 | do_acct_process(acct); | ||
| 172 | schedule_work(&acct->work); | ||
| 173 | wait_for_completion(&acct->done); | ||
| 174 | cmpxchg(&acct->ns->bacct, pin, NULL); | ||
| 175 | mutex_unlock(&acct->lock); | ||
| 176 | pin_remove(pin); | ||
| 177 | acct_put(acct); | ||
| 178 | } | ||
| 179 | |||
| 153 | static void close_work(struct work_struct *work) | 180 | static void close_work(struct work_struct *work) |
| 154 | { | 181 | { |
| 155 | struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); | 182 | struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); |
| @@ -160,44 +187,13 @@ static void close_work(struct work_struct *work) | |||
| 160 | complete(&acct->done); | 187 | complete(&acct->done); |
| 161 | } | 188 | } |
| 162 | 189 | ||
| 163 | static void acct_kill(struct bsd_acct_struct *acct, | ||
| 164 | struct bsd_acct_struct *new) | ||
| 165 | { | ||
| 166 | if (acct) { | ||
| 167 | struct pid_namespace *ns = acct->ns; | ||
| 168 | do_acct_process(acct); | ||
| 169 | INIT_WORK(&acct->work, close_work); | ||
| 170 | init_completion(&acct->done); | ||
| 171 | schedule_work(&acct->work); | ||
| 172 | wait_for_completion(&acct->done); | ||
| 173 | pin_remove(&acct->pin); | ||
| 174 | ns->bacct = new; | ||
| 175 | acct->ns = NULL; | ||
| 176 | atomic_long_dec(&acct->pin.count); | ||
| 177 | mutex_unlock(&acct->lock); | ||
| 178 | pin_put(&acct->pin); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | |||
| 182 | static void acct_pin_kill(struct fs_pin *pin) | ||
| 183 | { | ||
| 184 | struct bsd_acct_struct *acct; | ||
| 185 | acct = container_of(pin, struct bsd_acct_struct, pin); | ||
| 186 | mutex_lock(&acct->lock); | ||
| 187 | if (!acct->ns) { | ||
| 188 | mutex_unlock(&acct->lock); | ||
| 189 | pin_put(pin); | ||
| 190 | acct = NULL; | ||
| 191 | } | ||
| 192 | acct_kill(acct, NULL); | ||
| 193 | } | ||
| 194 | |||
| 195 | static int acct_on(struct filename *pathname) | 190 | static int acct_on(struct filename *pathname) |
| 196 | { | 191 | { |
| 197 | struct file *file; | 192 | struct file *file; |
| 198 | struct vfsmount *mnt, *internal; | 193 | struct vfsmount *mnt, *internal; |
| 199 | struct pid_namespace *ns = task_active_pid_ns(current); | 194 | struct pid_namespace *ns = task_active_pid_ns(current); |
| 200 | struct bsd_acct_struct *acct, *old; | 195 | struct bsd_acct_struct *acct; |
| 196 | struct fs_pin *old; | ||
| 201 | int err; | 197 | int err; |
| 202 | 198 | ||
| 203 | acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); | 199 | acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); |
| @@ -238,21 +234,21 @@ static int acct_on(struct filename *pathname) | |||
| 238 | mnt = file->f_path.mnt; | 234 | mnt = file->f_path.mnt; |
| 239 | file->f_path.mnt = internal; | 235 | file->f_path.mnt = internal; |
| 240 | 236 | ||
| 241 | atomic_long_set(&acct->pin.count, 1); | 237 | atomic_long_set(&acct->count, 1); |
| 242 | acct->pin.kill = acct_pin_kill; | 238 | init_fs_pin(&acct->pin, acct_pin_kill); |
| 243 | acct->file = file; | 239 | acct->file = file; |
| 244 | acct->needcheck = jiffies; | 240 | acct->needcheck = jiffies; |
| 245 | acct->ns = ns; | 241 | acct->ns = ns; |
| 246 | mutex_init(&acct->lock); | 242 | mutex_init(&acct->lock); |
| 243 | INIT_WORK(&acct->work, close_work); | ||
| 244 | init_completion(&acct->done); | ||
| 247 | mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ | 245 | mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ |
| 248 | pin_insert(&acct->pin, mnt); | 246 | pin_insert(&acct->pin, mnt); |
| 249 | 247 | ||
| 250 | old = acct_get(ns); | 248 | rcu_read_lock(); |
| 251 | if (old) | 249 | old = xchg(&ns->bacct, &acct->pin); |
| 252 | acct_kill(old, acct); | ||
| 253 | else | ||
| 254 | ns->bacct = acct; | ||
| 255 | mutex_unlock(&acct->lock); | 250 | mutex_unlock(&acct->lock); |
| 251 | pin_kill(old); | ||
| 256 | mnt_drop_write(mnt); | 252 | mnt_drop_write(mnt); |
| 257 | mntput(mnt); | 253 | mntput(mnt); |
| 258 | return 0; | 254 | return 0; |
| @@ -288,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) | |||
| 288 | mutex_unlock(&acct_on_mutex); | 284 | mutex_unlock(&acct_on_mutex); |
| 289 | putname(tmp); | 285 | putname(tmp); |
| 290 | } else { | 286 | } else { |
| 291 | acct_kill(acct_get(task_active_pid_ns(current)), NULL); | 287 | rcu_read_lock(); |
| 288 | pin_kill(task_active_pid_ns(current)->bacct); | ||
| 292 | } | 289 | } |
| 293 | 290 | ||
| 294 | return error; | 291 | return error; |
| @@ -296,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) | |||
| 296 | 293 | ||
| 297 | void acct_exit_ns(struct pid_namespace *ns) | 294 | void acct_exit_ns(struct pid_namespace *ns) |
| 298 | { | 295 | { |
| 299 | acct_kill(acct_get(ns), NULL); | 296 | rcu_read_lock(); |
| 297 | pin_kill(ns->bacct); | ||
| 300 | } | 298 | } |
| 301 | 299 | ||
| 302 | /* | 300 | /* |
| @@ -576,7 +574,7 @@ static void slow_acct_process(struct pid_namespace *ns) | |||
| 576 | if (acct) { | 574 | if (acct) { |
| 577 | do_acct_process(acct); | 575 | do_acct_process(acct); |
| 578 | mutex_unlock(&acct->lock); | 576 | mutex_unlock(&acct->lock); |
| 579 | pin_put(&acct->pin); | 577 | acct_put(acct); |
| 580 | } | 578 | } |
| 581 | } | 579 | } |
| 582 | } | 580 | } |
diff --git a/kernel/audit.h b/kernel/audit.h index 3cdffad5a1d9..1caa0d345d90 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
| @@ -24,12 +24,6 @@ | |||
| 24 | #include <linux/skbuff.h> | 24 | #include <linux/skbuff.h> |
| 25 | #include <uapi/linux/mqueue.h> | 25 | #include <uapi/linux/mqueue.h> |
| 26 | 26 | ||
| 27 | /* 0 = no checking | ||
| 28 | 1 = put_count checking | ||
| 29 | 2 = verbose put_count checking | ||
| 30 | */ | ||
| 31 | #define AUDIT_DEBUG 0 | ||
| 32 | |||
| 33 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context | 27 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context |
| 34 | * for saving names from getname(). If we get more names we will allocate | 28 | * for saving names from getname(). If we get more names we will allocate |
| 35 | * a name dynamically and also add those to the list anchored by names_list. */ | 29 | * a name dynamically and also add those to the list anchored by names_list. */ |
| @@ -74,9 +68,8 @@ struct audit_cap_data { | |||
| 74 | }; | 68 | }; |
| 75 | }; | 69 | }; |
| 76 | 70 | ||
| 77 | /* When fs/namei.c:getname() is called, we store the pointer in name and | 71 | /* When fs/namei.c:getname() is called, we store the pointer in name and bump |
| 78 | * we don't let putname() free it (instead we free all of the saved | 72 | * the refcnt in the associated filename struct. |
| 79 | * pointers at syscall exit time). | ||
| 80 | * | 73 | * |
| 81 | * Further, in fs/namei.c:path_lookup() we store the inode and device. | 74 | * Further, in fs/namei.c:path_lookup() we store the inode and device. |
| 82 | */ | 75 | */ |
| @@ -86,7 +79,6 @@ struct audit_names { | |||
| 86 | struct filename *name; | 79 | struct filename *name; |
| 87 | int name_len; /* number of chars to log */ | 80 | int name_len; /* number of chars to log */ |
| 88 | bool hidden; /* don't log this record */ | 81 | bool hidden; /* don't log this record */ |
| 89 | bool name_put; /* call __putname()? */ | ||
| 90 | 82 | ||
| 91 | unsigned long ino; | 83 | unsigned long ino; |
| 92 | dev_t dev; | 84 | dev_t dev; |
| @@ -208,11 +200,6 @@ struct audit_context { | |||
| 208 | }; | 200 | }; |
| 209 | int fds[2]; | 201 | int fds[2]; |
| 210 | struct audit_proctitle proctitle; | 202 | struct audit_proctitle proctitle; |
| 211 | |||
| 212 | #if AUDIT_DEBUG | ||
| 213 | int put_count; | ||
| 214 | int ino_count; | ||
| 215 | #endif | ||
| 216 | }; | 203 | }; |
| 217 | 204 | ||
| 218 | extern u32 audit_ever_enabled; | 205 | extern u32 audit_ever_enabled; |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 4f68a326d92e..72e1660a79a3 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
| @@ -425,7 +425,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
| 425 | goto exit_nofree; | 425 | goto exit_nofree; |
| 426 | 426 | ||
| 427 | bufp = data->buf; | 427 | bufp = data->buf; |
| 428 | entry->rule.vers_ops = 2; | ||
| 429 | for (i = 0; i < data->field_count; i++) { | 428 | for (i = 0; i < data->field_count; i++) { |
| 430 | struct audit_field *f = &entry->rule.fields[i]; | 429 | struct audit_field *f = &entry->rule.fields[i]; |
| 431 | 430 | ||
| @@ -758,7 +757,6 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old) | |||
| 758 | return ERR_PTR(-ENOMEM); | 757 | return ERR_PTR(-ENOMEM); |
| 759 | 758 | ||
| 760 | new = &entry->rule; | 759 | new = &entry->rule; |
| 761 | new->vers_ops = old->vers_ops; | ||
| 762 | new->flags = old->flags; | 760 | new->flags = old->flags; |
| 763 | new->pflags = old->pflags; | 761 | new->pflags = old->pflags; |
| 764 | new->listnr = old->listnr; | 762 | new->listnr = old->listnr; |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 072566dd0caf..dc4ae70a7413 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -866,33 +866,10 @@ static inline void audit_free_names(struct audit_context *context) | |||
| 866 | { | 866 | { |
| 867 | struct audit_names *n, *next; | 867 | struct audit_names *n, *next; |
| 868 | 868 | ||
| 869 | #if AUDIT_DEBUG == 2 | ||
| 870 | if (context->put_count + context->ino_count != context->name_count) { | ||
| 871 | int i = 0; | ||
| 872 | |||
| 873 | pr_err("%s:%d(:%d): major=%d in_syscall=%d" | ||
| 874 | " name_count=%d put_count=%d ino_count=%d" | ||
| 875 | " [NOT freeing]\n", __FILE__, __LINE__, | ||
| 876 | context->serial, context->major, context->in_syscall, | ||
| 877 | context->name_count, context->put_count, | ||
| 878 | context->ino_count); | ||
| 879 | list_for_each_entry(n, &context->names_list, list) { | ||
| 880 | pr_err("names[%d] = %p = %s\n", i++, n->name, | ||
| 881 | n->name->name ?: "(null)"); | ||
| 882 | } | ||
| 883 | dump_stack(); | ||
| 884 | return; | ||
| 885 | } | ||
| 886 | #endif | ||
| 887 | #if AUDIT_DEBUG | ||
| 888 | context->put_count = 0; | ||
| 889 | context->ino_count = 0; | ||
| 890 | #endif | ||
| 891 | |||
| 892 | list_for_each_entry_safe(n, next, &context->names_list, list) { | 869 | list_for_each_entry_safe(n, next, &context->names_list, list) { |
| 893 | list_del(&n->list); | 870 | list_del(&n->list); |
| 894 | if (n->name && n->name_put) | 871 | if (n->name) |
| 895 | final_putname(n->name); | 872 | putname(n->name); |
| 896 | if (n->should_free) | 873 | if (n->should_free) |
| 897 | kfree(n); | 874 | kfree(n); |
| 898 | } | 875 | } |
| @@ -1711,9 +1688,6 @@ static struct audit_names *audit_alloc_name(struct audit_context *context, | |||
| 1711 | list_add_tail(&aname->list, &context->names_list); | 1688 | list_add_tail(&aname->list, &context->names_list); |
| 1712 | 1689 | ||
| 1713 | context->name_count++; | 1690 | context->name_count++; |
| 1714 | #if AUDIT_DEBUG | ||
| 1715 | context->ino_count++; | ||
| 1716 | #endif | ||
| 1717 | return aname; | 1691 | return aname; |
| 1718 | } | 1692 | } |
| 1719 | 1693 | ||
| @@ -1734,8 +1708,10 @@ __audit_reusename(const __user char *uptr) | |||
| 1734 | list_for_each_entry(n, &context->names_list, list) { | 1708 | list_for_each_entry(n, &context->names_list, list) { |
| 1735 | if (!n->name) | 1709 | if (!n->name) |
| 1736 | continue; | 1710 | continue; |
| 1737 | if (n->name->uptr == uptr) | 1711 | if (n->name->uptr == uptr) { |
| 1712 | n->name->refcnt++; | ||
| 1738 | return n->name; | 1713 | return n->name; |
| 1714 | } | ||
| 1739 | } | 1715 | } |
| 1740 | return NULL; | 1716 | return NULL; |
| 1741 | } | 1717 | } |
| @@ -1752,19 +1728,8 @@ void __audit_getname(struct filename *name) | |||
| 1752 | struct audit_context *context = current->audit_context; | 1728 | struct audit_context *context = current->audit_context; |
| 1753 | struct audit_names *n; | 1729 | struct audit_names *n; |
| 1754 | 1730 | ||
| 1755 | if (!context->in_syscall) { | 1731 | if (!context->in_syscall) |
| 1756 | #if AUDIT_DEBUG == 2 | ||
| 1757 | pr_err("%s:%d(:%d): ignoring getname(%p)\n", | ||
| 1758 | __FILE__, __LINE__, context->serial, name); | ||
| 1759 | dump_stack(); | ||
| 1760 | #endif | ||
| 1761 | return; | 1732 | return; |
| 1762 | } | ||
| 1763 | |||
| 1764 | #if AUDIT_DEBUG | ||
| 1765 | /* The filename _must_ have a populated ->name */ | ||
| 1766 | BUG_ON(!name->name); | ||
| 1767 | #endif | ||
| 1768 | 1733 | ||
| 1769 | n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); | 1734 | n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); |
| 1770 | if (!n) | 1735 | if (!n) |
| @@ -1772,56 +1737,13 @@ void __audit_getname(struct filename *name) | |||
| 1772 | 1737 | ||
| 1773 | n->name = name; | 1738 | n->name = name; |
| 1774 | n->name_len = AUDIT_NAME_FULL; | 1739 | n->name_len = AUDIT_NAME_FULL; |
| 1775 | n->name_put = true; | ||
| 1776 | name->aname = n; | 1740 | name->aname = n; |
| 1741 | name->refcnt++; | ||
| 1777 | 1742 | ||
| 1778 | if (!context->pwd.dentry) | 1743 | if (!context->pwd.dentry) |
| 1779 | get_fs_pwd(current->fs, &context->pwd); | 1744 | get_fs_pwd(current->fs, &context->pwd); |
| 1780 | } | 1745 | } |
| 1781 | 1746 | ||
| 1782 | /* audit_putname - intercept a putname request | ||
| 1783 | * @name: name to intercept and delay for putname | ||
| 1784 | * | ||
| 1785 | * If we have stored the name from getname in the audit context, | ||
| 1786 | * then we delay the putname until syscall exit. | ||
| 1787 | * Called from include/linux/fs.h:putname(). | ||
| 1788 | */ | ||
| 1789 | void audit_putname(struct filename *name) | ||
| 1790 | { | ||
| 1791 | struct audit_context *context = current->audit_context; | ||
| 1792 | |||
| 1793 | BUG_ON(!context); | ||
| 1794 | if (!name->aname || !context->in_syscall) { | ||
| 1795 | #if AUDIT_DEBUG == 2 | ||
| 1796 | pr_err("%s:%d(:%d): final_putname(%p)\n", | ||
| 1797 | __FILE__, __LINE__, context->serial, name); | ||
| 1798 | if (context->name_count) { | ||
| 1799 | struct audit_names *n; | ||
| 1800 | int i = 0; | ||
| 1801 | |||
| 1802 | list_for_each_entry(n, &context->names_list, list) | ||
| 1803 | pr_err("name[%d] = %p = %s\n", i++, n->name, | ||
| 1804 | n->name->name ?: "(null)"); | ||
| 1805 | } | ||
| 1806 | #endif | ||
| 1807 | final_putname(name); | ||
| 1808 | } | ||
| 1809 | #if AUDIT_DEBUG | ||
| 1810 | else { | ||
| 1811 | ++context->put_count; | ||
| 1812 | if (context->put_count > context->name_count) { | ||
| 1813 | pr_err("%s:%d(:%d): major=%d in_syscall=%d putname(%p)" | ||
| 1814 | " name_count=%d put_count=%d\n", | ||
| 1815 | __FILE__, __LINE__, | ||
| 1816 | context->serial, context->major, | ||
| 1817 | context->in_syscall, name->name, | ||
| 1818 | context->name_count, context->put_count); | ||
| 1819 | dump_stack(); | ||
| 1820 | } | ||
| 1821 | } | ||
| 1822 | #endif | ||
| 1823 | } | ||
| 1824 | |||
| 1825 | /** | 1747 | /** |
| 1826 | * __audit_inode - store the inode and device from a lookup | 1748 | * __audit_inode - store the inode and device from a lookup |
| 1827 | * @name: name being audited | 1749 | * @name: name being audited |
| @@ -1842,10 +1764,6 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, | |||
| 1842 | if (!name) | 1764 | if (!name) |
| 1843 | goto out_alloc; | 1765 | goto out_alloc; |
| 1844 | 1766 | ||
| 1845 | #if AUDIT_DEBUG | ||
| 1846 | /* The struct filename _must_ have a populated ->name */ | ||
| 1847 | BUG_ON(!name->name); | ||
| 1848 | #endif | ||
| 1849 | /* | 1767 | /* |
| 1850 | * If we have a pointer to an audit_names entry already, then we can | 1768 | * If we have a pointer to an audit_names entry already, then we can |
| 1851 | * just use it directly if the type is correct. | 1769 | * just use it directly if the type is correct. |
| @@ -1863,7 +1781,17 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, | |||
| 1863 | } | 1781 | } |
| 1864 | 1782 | ||
| 1865 | list_for_each_entry_reverse(n, &context->names_list, list) { | 1783 | list_for_each_entry_reverse(n, &context->names_list, list) { |
| 1866 | if (!n->name || strcmp(n->name->name, name->name)) | 1784 | if (n->ino) { |
| 1785 | /* valid inode number, use that for the comparison */ | ||
| 1786 | if (n->ino != inode->i_ino || | ||
| 1787 | n->dev != inode->i_sb->s_dev) | ||
| 1788 | continue; | ||
| 1789 | } else if (n->name) { | ||
| 1790 | /* inode number has not been set, check the name */ | ||
| 1791 | if (strcmp(n->name->name, name->name)) | ||
| 1792 | continue; | ||
| 1793 | } else | ||
| 1794 | /* no inode and no name (?!) ... this is odd ... */ | ||
| 1867 | continue; | 1795 | continue; |
| 1868 | 1796 | ||
| 1869 | /* match the correct record type */ | 1797 | /* match the correct record type */ |
| @@ -1882,44 +1810,11 @@ out_alloc: | |||
| 1882 | n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); | 1810 | n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); |
| 1883 | if (!n) | 1811 | if (!n) |
| 1884 | return; | 1812 | return; |
| 1885 | /* unfortunately, while we may have a path name to record with the | ||
| 1886 | * inode, we can't always rely on the string lasting until the end of | ||
| 1887 | * the syscall so we need to create our own copy, it may fail due to | ||
| 1888 | * memory allocation issues, but we do our best */ | ||
| 1889 | if (name) { | 1813 | if (name) { |
| 1890 | /* we can't use getname_kernel() due to size limits */ | 1814 | n->name = name; |
| 1891 | size_t len = strlen(name->name) + 1; | 1815 | name->refcnt++; |
| 1892 | struct filename *new = __getname(); | ||
| 1893 | |||
| 1894 | if (unlikely(!new)) | ||
| 1895 | goto out; | ||
| 1896 | |||
| 1897 | if (len <= (PATH_MAX - sizeof(*new))) { | ||
| 1898 | new->name = (char *)(new) + sizeof(*new); | ||
| 1899 | new->separate = false; | ||
| 1900 | } else if (len <= PATH_MAX) { | ||
| 1901 | /* this looks odd, but is due to final_putname() */ | ||
| 1902 | struct filename *new2; | ||
| 1903 | |||
| 1904 | new2 = kmalloc(sizeof(*new2), GFP_KERNEL); | ||
| 1905 | if (unlikely(!new2)) { | ||
| 1906 | __putname(new); | ||
| 1907 | goto out; | ||
| 1908 | } | ||
| 1909 | new2->name = (char *)new; | ||
| 1910 | new2->separate = true; | ||
| 1911 | new = new2; | ||
| 1912 | } else { | ||
| 1913 | /* we should never get here, but let's be safe */ | ||
| 1914 | __putname(new); | ||
| 1915 | goto out; | ||
| 1916 | } | ||
| 1917 | strlcpy((char *)new->name, name->name, len); | ||
| 1918 | new->uptr = NULL; | ||
| 1919 | new->aname = n; | ||
| 1920 | n->name = new; | ||
| 1921 | n->name_put = true; | ||
| 1922 | } | 1816 | } |
| 1817 | |||
| 1923 | out: | 1818 | out: |
| 1924 | if (parent) { | 1819 | if (parent) { |
| 1925 | n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; | 1820 | n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; |
| @@ -1970,11 +1865,16 @@ void __audit_inode_child(const struct inode *parent, | |||
| 1970 | 1865 | ||
| 1971 | /* look for a parent entry first */ | 1866 | /* look for a parent entry first */ |
| 1972 | list_for_each_entry(n, &context->names_list, list) { | 1867 | list_for_each_entry(n, &context->names_list, list) { |
| 1973 | if (!n->name || n->type != AUDIT_TYPE_PARENT) | 1868 | if (!n->name || |
| 1869 | (n->type != AUDIT_TYPE_PARENT && | ||
| 1870 | n->type != AUDIT_TYPE_UNKNOWN)) | ||
| 1974 | continue; | 1871 | continue; |
| 1975 | 1872 | ||
| 1976 | if (n->ino == parent->i_ino && | 1873 | if (n->ino == parent->i_ino && n->dev == parent->i_sb->s_dev && |
| 1977 | !audit_compare_dname_path(dname, n->name->name, n->name_len)) { | 1874 | !audit_compare_dname_path(dname, |
| 1875 | n->name->name, n->name_len)) { | ||
| 1876 | if (n->type == AUDIT_TYPE_UNKNOWN) | ||
| 1877 | n->type = AUDIT_TYPE_PARENT; | ||
| 1978 | found_parent = n; | 1878 | found_parent = n; |
| 1979 | break; | 1879 | break; |
| 1980 | } | 1880 | } |
| @@ -1983,11 +1883,8 @@ void __audit_inode_child(const struct inode *parent, | |||
| 1983 | /* is there a matching child entry? */ | 1883 | /* is there a matching child entry? */ |
| 1984 | list_for_each_entry(n, &context->names_list, list) { | 1884 | list_for_each_entry(n, &context->names_list, list) { |
| 1985 | /* can only match entries that have a name */ | 1885 | /* can only match entries that have a name */ |
| 1986 | if (!n->name || n->type != type) | 1886 | if (!n->name || |
| 1987 | continue; | 1887 | (n->type != type && n->type != AUDIT_TYPE_UNKNOWN)) |
| 1988 | |||
| 1989 | /* if we found a parent, make sure this one is a child of it */ | ||
| 1990 | if (found_parent && (n->name != found_parent->name)) | ||
| 1991 | continue; | 1888 | continue; |
| 1992 | 1889 | ||
| 1993 | if (!strcmp(dname, n->name->name) || | 1890 | if (!strcmp(dname, n->name->name) || |
| @@ -1995,6 +1892,8 @@ void __audit_inode_child(const struct inode *parent, | |||
| 1995 | found_parent ? | 1892 | found_parent ? |
| 1996 | found_parent->name_len : | 1893 | found_parent->name_len : |
| 1997 | AUDIT_NAME_FULL)) { | 1894 | AUDIT_NAME_FULL)) { |
| 1895 | if (n->type == AUDIT_TYPE_UNKNOWN) | ||
| 1896 | n->type = type; | ||
| 1998 | found_child = n; | 1897 | found_child = n; |
| 1999 | break; | 1898 | break; |
| 2000 | } | 1899 | } |
| @@ -2019,10 +1918,10 @@ void __audit_inode_child(const struct inode *parent, | |||
| 2019 | if (found_parent) { | 1918 | if (found_parent) { |
| 2020 | found_child->name = found_parent->name; | 1919 | found_child->name = found_parent->name; |
| 2021 | found_child->name_len = AUDIT_NAME_FULL; | 1920 | found_child->name_len = AUDIT_NAME_FULL; |
| 2022 | /* don't call __putname() */ | 1921 | found_child->name->refcnt++; |
| 2023 | found_child->name_put = false; | ||
| 2024 | } | 1922 | } |
| 2025 | } | 1923 | } |
| 1924 | |||
| 2026 | if (inode) | 1925 | if (inode) |
| 2027 | audit_copy_inode(found_child, dentry, inode); | 1926 | audit_copy_inode(found_child, dentry, inode); |
| 2028 | else | 1927 | else |
| @@ -2405,7 +2304,6 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, | |||
| 2405 | struct audit_aux_data_bprm_fcaps *ax; | 2304 | struct audit_aux_data_bprm_fcaps *ax; |
| 2406 | struct audit_context *context = current->audit_context; | 2305 | struct audit_context *context = current->audit_context; |
| 2407 | struct cpu_vfs_cap_data vcaps; | 2306 | struct cpu_vfs_cap_data vcaps; |
| 2408 | struct dentry *dentry; | ||
| 2409 | 2307 | ||
| 2410 | ax = kmalloc(sizeof(*ax), GFP_KERNEL); | 2308 | ax = kmalloc(sizeof(*ax), GFP_KERNEL); |
| 2411 | if (!ax) | 2309 | if (!ax) |
| @@ -2415,9 +2313,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, | |||
| 2415 | ax->d.next = context->aux; | 2313 | ax->d.next = context->aux; |
| 2416 | context->aux = (void *)ax; | 2314 | context->aux = (void *)ax; |
| 2417 | 2315 | ||
| 2418 | dentry = dget(bprm->file->f_path.dentry); | 2316 | get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); |
| 2419 | get_vfs_caps_from_disk(dentry, &vcaps); | ||
| 2420 | dput(dentry); | ||
| 2421 | 2317 | ||
| 2422 | ax->fcap.permitted = vcaps.permitted; | 2318 | ax->fcap.permitted = vcaps.permitted; |
| 2423 | ax->fcap.inheritable = vcaps.inheritable; | 2319 | ax->fcap.inheritable = vcaps.inheritable; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 04cfe8ace520..29a7b2cc593e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -3077,7 +3077,7 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft) | |||
| 3077 | #endif | 3077 | #endif |
| 3078 | kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name), | 3078 | kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name), |
| 3079 | cgroup_file_mode(cft), 0, cft->kf_ops, cft, | 3079 | cgroup_file_mode(cft), 0, cft->kf_ops, cft, |
| 3080 | NULL, false, key); | 3080 | NULL, key); |
| 3081 | if (IS_ERR(kn)) | 3081 | if (IS_ERR(kn)) |
| 3082 | return PTR_ERR(kn); | 3082 | return PTR_ERR(kn); |
| 3083 | 3083 | ||
| @@ -4373,16 +4373,20 @@ static void css_free_work_fn(struct work_struct *work) | |||
| 4373 | { | 4373 | { |
| 4374 | struct cgroup_subsys_state *css = | 4374 | struct cgroup_subsys_state *css = |
| 4375 | container_of(work, struct cgroup_subsys_state, destroy_work); | 4375 | container_of(work, struct cgroup_subsys_state, destroy_work); |
| 4376 | struct cgroup_subsys *ss = css->ss; | ||
| 4376 | struct cgroup *cgrp = css->cgroup; | 4377 | struct cgroup *cgrp = css->cgroup; |
| 4377 | 4378 | ||
| 4378 | percpu_ref_exit(&css->refcnt); | 4379 | percpu_ref_exit(&css->refcnt); |
| 4379 | 4380 | ||
| 4380 | if (css->ss) { | 4381 | if (ss) { |
| 4381 | /* css free path */ | 4382 | /* css free path */ |
| 4383 | int id = css->id; | ||
| 4384 | |||
| 4382 | if (css->parent) | 4385 | if (css->parent) |
| 4383 | css_put(css->parent); | 4386 | css_put(css->parent); |
| 4384 | 4387 | ||
| 4385 | css->ss->css_free(css); | 4388 | ss->css_free(css); |
| 4389 | cgroup_idr_remove(&ss->css_idr, id); | ||
| 4386 | cgroup_put(cgrp); | 4390 | cgroup_put(cgrp); |
| 4387 | } else { | 4391 | } else { |
| 4388 | /* cgroup free path */ | 4392 | /* cgroup free path */ |
| @@ -4434,7 +4438,7 @@ static void css_release_work_fn(struct work_struct *work) | |||
| 4434 | 4438 | ||
| 4435 | if (ss) { | 4439 | if (ss) { |
| 4436 | /* css release path */ | 4440 | /* css release path */ |
| 4437 | cgroup_idr_remove(&ss->css_idr, css->id); | 4441 | cgroup_idr_replace(&ss->css_idr, NULL, css->id); |
| 4438 | if (ss->css_released) | 4442 | if (ss->css_released) |
| 4439 | ss->css_released(css); | 4443 | ss->css_released(css); |
| 4440 | } else { | 4444 | } else { |
diff --git a/kernel/compat.c b/kernel/compat.c index ebb3c369d03d..24f00610c575 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
| @@ -276,8 +276,7 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp, | |||
| 276 | * core implementation decides to return random nonsense. | 276 | * core implementation decides to return random nonsense. |
| 277 | */ | 277 | */ |
| 278 | if (ret == -ERESTART_RESTARTBLOCK) { | 278 | if (ret == -ERESTART_RESTARTBLOCK) { |
| 279 | struct restart_block *restart | 279 | struct restart_block *restart = ¤t->restart_block; |
| 280 | = ¤t_thread_info()->restart_block; | ||
| 281 | 280 | ||
| 282 | restart->fn = compat_nanosleep_restart; | 281 | restart->fn = compat_nanosleep_restart; |
| 283 | restart->nanosleep.compat_rmtp = rmtp; | 282 | restart->nanosleep.compat_rmtp = rmtp; |
| @@ -860,7 +859,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, | |||
| 860 | return -EFAULT; | 859 | return -EFAULT; |
| 861 | 860 | ||
| 862 | if (err == -ERESTART_RESTARTBLOCK) { | 861 | if (err == -ERESTART_RESTARTBLOCK) { |
| 863 | restart = ¤t_thread_info()->restart_block; | 862 | restart = ¤t->restart_block; |
| 864 | restart->fn = compat_clock_nanosleep_restart; | 863 | restart->fn = compat_clock_nanosleep_restart; |
| 865 | restart->nanosleep.compat_rmtp = rmtp; | 864 | restart->nanosleep.compat_rmtp = rmtp; |
| 866 | } | 865 | } |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 5d220234b3ca..1972b161c61e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -58,22 +58,23 @@ static int cpu_hotplug_disabled; | |||
| 58 | 58 | ||
| 59 | static struct { | 59 | static struct { |
| 60 | struct task_struct *active_writer; | 60 | struct task_struct *active_writer; |
| 61 | struct mutex lock; /* Synchronizes accesses to refcount, */ | 61 | /* wait queue to wake up the active_writer */ |
| 62 | wait_queue_head_t wq; | ||
| 63 | /* verifies that no writer will get active while readers are active */ | ||
| 64 | struct mutex lock; | ||
| 62 | /* | 65 | /* |
| 63 | * Also blocks the new readers during | 66 | * Also blocks the new readers during |
| 64 | * an ongoing cpu hotplug operation. | 67 | * an ongoing cpu hotplug operation. |
| 65 | */ | 68 | */ |
| 66 | int refcount; | 69 | atomic_t refcount; |
| 67 | /* And allows lockless put_online_cpus(). */ | ||
| 68 | atomic_t puts_pending; | ||
| 69 | 70 | ||
| 70 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 71 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 71 | struct lockdep_map dep_map; | 72 | struct lockdep_map dep_map; |
| 72 | #endif | 73 | #endif |
| 73 | } cpu_hotplug = { | 74 | } cpu_hotplug = { |
| 74 | .active_writer = NULL, | 75 | .active_writer = NULL, |
| 76 | .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), | ||
| 75 | .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), | 77 | .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), |
| 76 | .refcount = 0, | ||
| 77 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 78 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 78 | .dep_map = {.name = "cpu_hotplug.lock" }, | 79 | .dep_map = {.name = "cpu_hotplug.lock" }, |
| 79 | #endif | 80 | #endif |
| @@ -86,15 +87,6 @@ static struct { | |||
| 86 | #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) | 87 | #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) |
| 87 | #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) | 88 | #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) |
| 88 | 89 | ||
| 89 | static void apply_puts_pending(int max) | ||
| 90 | { | ||
| 91 | int delta; | ||
| 92 | |||
| 93 | if (atomic_read(&cpu_hotplug.puts_pending) >= max) { | ||
| 94 | delta = atomic_xchg(&cpu_hotplug.puts_pending, 0); | ||
| 95 | cpu_hotplug.refcount -= delta; | ||
| 96 | } | ||
| 97 | } | ||
| 98 | 90 | ||
| 99 | void get_online_cpus(void) | 91 | void get_online_cpus(void) |
| 100 | { | 92 | { |
| @@ -103,8 +95,7 @@ void get_online_cpus(void) | |||
| 103 | return; | 95 | return; |
| 104 | cpuhp_lock_acquire_read(); | 96 | cpuhp_lock_acquire_read(); |
| 105 | mutex_lock(&cpu_hotplug.lock); | 97 | mutex_lock(&cpu_hotplug.lock); |
| 106 | apply_puts_pending(65536); | 98 | atomic_inc(&cpu_hotplug.refcount); |
| 107 | cpu_hotplug.refcount++; | ||
| 108 | mutex_unlock(&cpu_hotplug.lock); | 99 | mutex_unlock(&cpu_hotplug.lock); |
| 109 | } | 100 | } |
| 110 | EXPORT_SYMBOL_GPL(get_online_cpus); | 101 | EXPORT_SYMBOL_GPL(get_online_cpus); |
| @@ -116,8 +107,7 @@ bool try_get_online_cpus(void) | |||
| 116 | if (!mutex_trylock(&cpu_hotplug.lock)) | 107 | if (!mutex_trylock(&cpu_hotplug.lock)) |
| 117 | return false; | 108 | return false; |
| 118 | cpuhp_lock_acquire_tryread(); | 109 | cpuhp_lock_acquire_tryread(); |
| 119 | apply_puts_pending(65536); | 110 | atomic_inc(&cpu_hotplug.refcount); |
| 120 | cpu_hotplug.refcount++; | ||
| 121 | mutex_unlock(&cpu_hotplug.lock); | 111 | mutex_unlock(&cpu_hotplug.lock); |
| 122 | return true; | 112 | return true; |
| 123 | } | 113 | } |
| @@ -125,20 +115,18 @@ EXPORT_SYMBOL_GPL(try_get_online_cpus); | |||
| 125 | 115 | ||
| 126 | void put_online_cpus(void) | 116 | void put_online_cpus(void) |
| 127 | { | 117 | { |
| 118 | int refcount; | ||
| 119 | |||
| 128 | if (cpu_hotplug.active_writer == current) | 120 | if (cpu_hotplug.active_writer == current) |
| 129 | return; | 121 | return; |
| 130 | if (!mutex_trylock(&cpu_hotplug.lock)) { | ||
| 131 | atomic_inc(&cpu_hotplug.puts_pending); | ||
| 132 | cpuhp_lock_release(); | ||
| 133 | return; | ||
| 134 | } | ||
| 135 | 122 | ||
| 136 | if (WARN_ON(!cpu_hotplug.refcount)) | 123 | refcount = atomic_dec_return(&cpu_hotplug.refcount); |
| 137 | cpu_hotplug.refcount++; /* try to fix things up */ | 124 | if (WARN_ON(refcount < 0)) /* try to fix things up */ |
| 125 | atomic_inc(&cpu_hotplug.refcount); | ||
| 126 | |||
| 127 | if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq)) | ||
| 128 | wake_up(&cpu_hotplug.wq); | ||
| 138 | 129 | ||
| 139 | if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) | ||
| 140 | wake_up_process(cpu_hotplug.active_writer); | ||
| 141 | mutex_unlock(&cpu_hotplug.lock); | ||
| 142 | cpuhp_lock_release(); | 130 | cpuhp_lock_release(); |
| 143 | 131 | ||
| 144 | } | 132 | } |
| @@ -168,18 +156,20 @@ EXPORT_SYMBOL_GPL(put_online_cpus); | |||
| 168 | */ | 156 | */ |
| 169 | void cpu_hotplug_begin(void) | 157 | void cpu_hotplug_begin(void) |
| 170 | { | 158 | { |
| 171 | cpu_hotplug.active_writer = current; | 159 | DEFINE_WAIT(wait); |
| 172 | 160 | ||
| 161 | cpu_hotplug.active_writer = current; | ||
| 173 | cpuhp_lock_acquire(); | 162 | cpuhp_lock_acquire(); |
| 163 | |||
| 174 | for (;;) { | 164 | for (;;) { |
| 175 | mutex_lock(&cpu_hotplug.lock); | 165 | mutex_lock(&cpu_hotplug.lock); |
| 176 | apply_puts_pending(1); | 166 | prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE); |
| 177 | if (likely(!cpu_hotplug.refcount)) | 167 | if (likely(!atomic_read(&cpu_hotplug.refcount))) |
| 178 | break; | 168 | break; |
| 179 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 180 | mutex_unlock(&cpu_hotplug.lock); | 169 | mutex_unlock(&cpu_hotplug.lock); |
| 181 | schedule(); | 170 | schedule(); |
| 182 | } | 171 | } |
| 172 | finish_wait(&cpu_hotplug.wq, &wait); | ||
| 183 | } | 173 | } |
| 184 | 174 | ||
| 185 | void cpu_hotplug_done(void) | 175 | void cpu_hotplug_done(void) |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 64b257f6bca2..1d1fe9361d29 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -1707,40 +1707,27 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |||
| 1707 | { | 1707 | { |
| 1708 | struct cpuset *cs = css_cs(seq_css(sf)); | 1708 | struct cpuset *cs = css_cs(seq_css(sf)); |
| 1709 | cpuset_filetype_t type = seq_cft(sf)->private; | 1709 | cpuset_filetype_t type = seq_cft(sf)->private; |
| 1710 | ssize_t count; | ||
| 1711 | char *buf, *s; | ||
| 1712 | int ret = 0; | 1710 | int ret = 0; |
| 1713 | 1711 | ||
| 1714 | count = seq_get_buf(sf, &buf); | ||
| 1715 | s = buf; | ||
| 1716 | |||
| 1717 | spin_lock_irq(&callback_lock); | 1712 | spin_lock_irq(&callback_lock); |
| 1718 | 1713 | ||
| 1719 | switch (type) { | 1714 | switch (type) { |
| 1720 | case FILE_CPULIST: | 1715 | case FILE_CPULIST: |
| 1721 | s += cpulist_scnprintf(s, count, cs->cpus_allowed); | 1716 | seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); |
| 1722 | break; | 1717 | break; |
| 1723 | case FILE_MEMLIST: | 1718 | case FILE_MEMLIST: |
| 1724 | s += nodelist_scnprintf(s, count, cs->mems_allowed); | 1719 | seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); |
| 1725 | break; | 1720 | break; |
| 1726 | case FILE_EFFECTIVE_CPULIST: | 1721 | case FILE_EFFECTIVE_CPULIST: |
| 1727 | s += cpulist_scnprintf(s, count, cs->effective_cpus); | 1722 | seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus)); |
| 1728 | break; | 1723 | break; |
| 1729 | case FILE_EFFECTIVE_MEMLIST: | 1724 | case FILE_EFFECTIVE_MEMLIST: |
| 1730 | s += nodelist_scnprintf(s, count, cs->effective_mems); | 1725 | seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems)); |
| 1731 | break; | 1726 | break; |
| 1732 | default: | 1727 | default: |
| 1733 | ret = -EINVAL; | 1728 | ret = -EINVAL; |
| 1734 | goto out_unlock; | ||
| 1735 | } | 1729 | } |
| 1736 | 1730 | ||
| 1737 | if (s < buf + count - 1) { | ||
| 1738 | *s++ = '\n'; | ||
| 1739 | seq_commit(sf, s - buf); | ||
| 1740 | } else { | ||
| 1741 | seq_commit(sf, -1); | ||
| 1742 | } | ||
| 1743 | out_unlock: | ||
| 1744 | spin_unlock_irq(&callback_lock); | 1731 | spin_unlock_irq(&callback_lock); |
| 1745 | return ret; | 1732 | return ret; |
| 1746 | } | 1733 | } |
| @@ -2400,7 +2387,7 @@ void cpuset_cpus_allowed_fallback(struct task_struct *tsk) | |||
| 2400 | */ | 2387 | */ |
| 2401 | } | 2388 | } |
| 2402 | 2389 | ||
| 2403 | void cpuset_init_current_mems_allowed(void) | 2390 | void __init cpuset_init_current_mems_allowed(void) |
| 2404 | { | 2391 | { |
| 2405 | nodes_setall(current->mems_allowed); | 2392 | nodes_setall(current->mems_allowed); |
| 2406 | } | 2393 | } |
| @@ -2610,8 +2597,6 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, | |||
| 2610 | return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); | 2597 | return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); |
| 2611 | } | 2598 | } |
| 2612 | 2599 | ||
| 2613 | #define CPUSET_NODELIST_LEN (256) | ||
| 2614 | |||
| 2615 | /** | 2600 | /** |
| 2616 | * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed | 2601 | * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed |
| 2617 | * @tsk: pointer to task_struct of some task. | 2602 | * @tsk: pointer to task_struct of some task. |
| @@ -2621,23 +2606,16 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, | |||
| 2621 | */ | 2606 | */ |
| 2622 | void cpuset_print_task_mems_allowed(struct task_struct *tsk) | 2607 | void cpuset_print_task_mems_allowed(struct task_struct *tsk) |
| 2623 | { | 2608 | { |
| 2624 | /* Statically allocated to prevent using excess stack. */ | ||
| 2625 | static char cpuset_nodelist[CPUSET_NODELIST_LEN]; | ||
| 2626 | static DEFINE_SPINLOCK(cpuset_buffer_lock); | ||
| 2627 | struct cgroup *cgrp; | 2609 | struct cgroup *cgrp; |
| 2628 | 2610 | ||
| 2629 | spin_lock(&cpuset_buffer_lock); | ||
| 2630 | rcu_read_lock(); | 2611 | rcu_read_lock(); |
| 2631 | 2612 | ||
| 2632 | cgrp = task_cs(tsk)->css.cgroup; | 2613 | cgrp = task_cs(tsk)->css.cgroup; |
| 2633 | nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, | ||
| 2634 | tsk->mems_allowed); | ||
| 2635 | pr_info("%s cpuset=", tsk->comm); | 2614 | pr_info("%s cpuset=", tsk->comm); |
| 2636 | pr_cont_cgroup_name(cgrp); | 2615 | pr_cont_cgroup_name(cgrp); |
| 2637 | pr_cont(" mems_allowed=%s\n", cpuset_nodelist); | 2616 | pr_cont(" mems_allowed=%*pbl\n", nodemask_pr_args(&tsk->mems_allowed)); |
| 2638 | 2617 | ||
| 2639 | rcu_read_unlock(); | 2618 | rcu_read_unlock(); |
| 2640 | spin_unlock(&cpuset_buffer_lock); | ||
| 2641 | } | 2619 | } |
| 2642 | 2620 | ||
| 2643 | /* | 2621 | /* |
| @@ -2715,10 +2693,8 @@ out: | |||
| 2715 | /* Display task mems_allowed in /proc/<pid>/status file. */ | 2693 | /* Display task mems_allowed in /proc/<pid>/status file. */ |
| 2716 | void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) | 2694 | void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) |
| 2717 | { | 2695 | { |
| 2718 | seq_puts(m, "Mems_allowed:\t"); | 2696 | seq_printf(m, "Mems_allowed:\t%*pb\n", |
| 2719 | seq_nodemask(m, &task->mems_allowed); | 2697 | nodemask_pr_args(&task->mems_allowed)); |
| 2720 | seq_puts(m, "\n"); | 2698 | seq_printf(m, "Mems_allowed_list:\t%*pbl\n", |
| 2721 | seq_puts(m, "Mems_allowed_list:\t"); | 2699 | nodemask_pr_args(&task->mems_allowed)); |
| 2722 | seq_nodemask_list(m, &task->mems_allowed); | ||
| 2723 | seq_puts(m, "\n"); | ||
| 2724 | } | 2700 | } |
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 07ce18ca71e0..0874e2edd275 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
| @@ -604,7 +604,7 @@ return_normal: | |||
| 604 | online_cpus) | 604 | online_cpus) |
| 605 | cpu_relax(); | 605 | cpu_relax(); |
| 606 | if (!time_left) | 606 | if (!time_left) |
| 607 | pr_crit("KGDB: Timed out waiting for secondary CPUs.\n"); | 607 | pr_crit("Timed out waiting for secondary CPUs.\n"); |
| 608 | 608 | ||
| 609 | /* | 609 | /* |
| 610 | * At this point the primary processor is completely | 610 | * At this point the primary processor is completely |
| @@ -696,6 +696,14 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) | |||
| 696 | 696 | ||
| 697 | if (arch_kgdb_ops.enable_nmi) | 697 | if (arch_kgdb_ops.enable_nmi) |
| 698 | arch_kgdb_ops.enable_nmi(0); | 698 | arch_kgdb_ops.enable_nmi(0); |
| 699 | /* | ||
| 700 | * Avoid entering the debugger if we were triggered due to an oops | ||
| 701 | * but panic_timeout indicates the system should automatically | ||
| 702 | * reboot on panic. We don't want to get stuck waiting for input | ||
| 703 | * on such systems, especially if its "just" an oops. | ||
| 704 | */ | ||
| 705 | if (signo != SIGTRAP && panic_timeout) | ||
| 706 | return 1; | ||
| 699 | 707 | ||
| 700 | memset(ks, 0, sizeof(struct kgdb_state)); | 708 | memset(ks, 0, sizeof(struct kgdb_state)); |
| 701 | ks->cpu = raw_smp_processor_id(); | 709 | ks->cpu = raw_smp_processor_id(); |
| @@ -828,6 +836,15 @@ static int kgdb_panic_event(struct notifier_block *self, | |||
| 828 | unsigned long val, | 836 | unsigned long val, |
| 829 | void *data) | 837 | void *data) |
| 830 | { | 838 | { |
| 839 | /* | ||
| 840 | * Avoid entering the debugger if we were triggered due to a panic | ||
| 841 | * We don't want to get stuck waiting for input from user in such case. | ||
| 842 | * panic_timeout indicates the system should automatically | ||
| 843 | * reboot on panic. | ||
| 844 | */ | ||
| 845 | if (panic_timeout) | ||
| 846 | return NOTIFY_DONE; | ||
| 847 | |||
| 831 | if (dbg_kdb_mode) | 848 | if (dbg_kdb_mode) |
| 832 | kdb_printf("PANIC: %s\n", (char *)data); | 849 | kdb_printf("PANIC: %s\n", (char *)data); |
| 833 | kgdb_breakpoint(); | 850 | kgdb_breakpoint(); |
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 7c70812caea5..fc1ef736253c 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c | |||
| @@ -439,7 +439,7 @@ poll_again: | |||
| 439 | * substituted for %d, %x or %o in the prompt. | 439 | * substituted for %d, %x or %o in the prompt. |
| 440 | */ | 440 | */ |
| 441 | 441 | ||
| 442 | char *kdb_getstr(char *buffer, size_t bufsize, char *prompt) | 442 | char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt) |
| 443 | { | 443 | { |
| 444 | if (prompt && kdb_prompt_str != prompt) | 444 | if (prompt && kdb_prompt_str != prompt) |
| 445 | strncpy(kdb_prompt_str, prompt, CMD_BUFLEN); | 445 | strncpy(kdb_prompt_str, prompt, CMD_BUFLEN); |
| @@ -548,7 +548,7 @@ static int kdb_search_string(char *searched, char *searchfor) | |||
| 548 | return 0; | 548 | return 0; |
| 549 | } | 549 | } |
| 550 | 550 | ||
| 551 | int vkdb_printf(const char *fmt, va_list ap) | 551 | int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) |
| 552 | { | 552 | { |
| 553 | int diag; | 553 | int diag; |
| 554 | int linecount; | 554 | int linecount; |
| @@ -680,6 +680,12 @@ int vkdb_printf(const char *fmt, va_list ap) | |||
| 680 | size_avail = sizeof(kdb_buffer) - len; | 680 | size_avail = sizeof(kdb_buffer) - len; |
| 681 | goto kdb_print_out; | 681 | goto kdb_print_out; |
| 682 | } | 682 | } |
| 683 | if (kdb_grepping_flag >= KDB_GREPPING_FLAG_SEARCH) | ||
| 684 | /* | ||
| 685 | * This was a interactive search (using '/' at more | ||
| 686 | * prompt) and it has completed. Clear the flag. | ||
| 687 | */ | ||
| 688 | kdb_grepping_flag = 0; | ||
| 683 | /* | 689 | /* |
| 684 | * at this point the string is a full line and | 690 | * at this point the string is a full line and |
| 685 | * should be printed, up to the null. | 691 | * should be printed, up to the null. |
| @@ -691,19 +697,20 @@ kdb_printit: | |||
| 691 | * Write to all consoles. | 697 | * Write to all consoles. |
| 692 | */ | 698 | */ |
| 693 | retlen = strlen(kdb_buffer); | 699 | retlen = strlen(kdb_buffer); |
| 700 | cp = (char *) printk_skip_level(kdb_buffer); | ||
| 694 | if (!dbg_kdb_mode && kgdb_connected) { | 701 | if (!dbg_kdb_mode && kgdb_connected) { |
| 695 | gdbstub_msg_write(kdb_buffer, retlen); | 702 | gdbstub_msg_write(cp, retlen - (cp - kdb_buffer)); |
| 696 | } else { | 703 | } else { |
| 697 | if (dbg_io_ops && !dbg_io_ops->is_console) { | 704 | if (dbg_io_ops && !dbg_io_ops->is_console) { |
| 698 | len = retlen; | 705 | len = retlen - (cp - kdb_buffer); |
| 699 | cp = kdb_buffer; | 706 | cp2 = cp; |
| 700 | while (len--) { | 707 | while (len--) { |
| 701 | dbg_io_ops->write_char(*cp); | 708 | dbg_io_ops->write_char(*cp2); |
| 702 | cp++; | 709 | cp2++; |
| 703 | } | 710 | } |
| 704 | } | 711 | } |
| 705 | while (c) { | 712 | while (c) { |
| 706 | c->write(c, kdb_buffer, retlen); | 713 | c->write(c, cp, retlen - (cp - kdb_buffer)); |
| 707 | touch_nmi_watchdog(); | 714 | touch_nmi_watchdog(); |
| 708 | c = c->next; | 715 | c = c->next; |
| 709 | } | 716 | } |
| @@ -711,7 +718,10 @@ kdb_printit: | |||
| 711 | if (logging) { | 718 | if (logging) { |
| 712 | saved_loglevel = console_loglevel; | 719 | saved_loglevel = console_loglevel; |
| 713 | console_loglevel = CONSOLE_LOGLEVEL_SILENT; | 720 | console_loglevel = CONSOLE_LOGLEVEL_SILENT; |
| 714 | printk(KERN_INFO "%s", kdb_buffer); | 721 | if (printk_get_level(kdb_buffer) || src == KDB_MSGSRC_PRINTK) |
| 722 | printk("%s", kdb_buffer); | ||
| 723 | else | ||
| 724 | pr_info("%s", kdb_buffer); | ||
| 715 | } | 725 | } |
| 716 | 726 | ||
| 717 | if (KDB_STATE(PAGER)) { | 727 | if (KDB_STATE(PAGER)) { |
| @@ -794,11 +804,23 @@ kdb_printit: | |||
| 794 | kdb_nextline = linecount - 1; | 804 | kdb_nextline = linecount - 1; |
| 795 | kdb_printf("\r"); | 805 | kdb_printf("\r"); |
| 796 | suspend_grep = 1; /* for this recursion */ | 806 | suspend_grep = 1; /* for this recursion */ |
| 807 | } else if (buf1[0] == '/' && !kdb_grepping_flag) { | ||
| 808 | kdb_printf("\r"); | ||
| 809 | kdb_getstr(kdb_grep_string, KDB_GREP_STRLEN, | ||
| 810 | kdbgetenv("SEARCHPROMPT") ?: "search> "); | ||
| 811 | *strchrnul(kdb_grep_string, '\n') = '\0'; | ||
| 812 | kdb_grepping_flag += KDB_GREPPING_FLAG_SEARCH; | ||
| 813 | suspend_grep = 1; /* for this recursion */ | ||
| 797 | } else if (buf1[0] && buf1[0] != '\n') { | 814 | } else if (buf1[0] && buf1[0] != '\n') { |
| 798 | /* user hit something other than enter */ | 815 | /* user hit something other than enter */ |
| 799 | suspend_grep = 1; /* for this recursion */ | 816 | suspend_grep = 1; /* for this recursion */ |
| 800 | kdb_printf("\nOnly 'q' or 'Q' are processed at more " | 817 | if (buf1[0] != '/') |
| 801 | "prompt, input ignored\n"); | 818 | kdb_printf( |
| 819 | "\nOnly 'q', 'Q' or '/' are processed at " | ||
| 820 | "more prompt, input ignored\n"); | ||
| 821 | else | ||
| 822 | kdb_printf("\n'/' cannot be used during | " | ||
| 823 | "grep filtering, input ignored\n"); | ||
| 802 | } else if (kdb_grepping_flag) { | 824 | } else if (kdb_grepping_flag) { |
| 803 | /* user hit enter */ | 825 | /* user hit enter */ |
| 804 | suspend_grep = 1; /* for this recursion */ | 826 | suspend_grep = 1; /* for this recursion */ |
| @@ -844,7 +866,7 @@ int kdb_printf(const char *fmt, ...) | |||
| 844 | int r; | 866 | int r; |
| 845 | 867 | ||
| 846 | va_start(ap, fmt); | 868 | va_start(ap, fmt); |
| 847 | r = vkdb_printf(fmt, ap); | 869 | r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap); |
| 848 | va_end(ap); | 870 | va_end(ap); |
| 849 | 871 | ||
| 850 | return r; | 872 | return r; |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 7b40c5f07dce..4121345498e0 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
| @@ -50,8 +50,7 @@ | |||
| 50 | static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE; | 50 | static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE; |
| 51 | module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600); | 51 | module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600); |
| 52 | 52 | ||
| 53 | #define GREP_LEN 256 | 53 | char kdb_grep_string[KDB_GREP_STRLEN]; |
| 54 | char kdb_grep_string[GREP_LEN]; | ||
| 55 | int kdb_grepping_flag; | 54 | int kdb_grepping_flag; |
| 56 | EXPORT_SYMBOL(kdb_grepping_flag); | 55 | EXPORT_SYMBOL(kdb_grepping_flag); |
| 57 | int kdb_grep_leading; | 56 | int kdb_grep_leading; |
| @@ -870,7 +869,7 @@ static void parse_grep(const char *str) | |||
| 870 | len = strlen(cp); | 869 | len = strlen(cp); |
| 871 | if (!len) | 870 | if (!len) |
| 872 | return; | 871 | return; |
| 873 | if (len >= GREP_LEN) { | 872 | if (len >= KDB_GREP_STRLEN) { |
| 874 | kdb_printf("search string too long\n"); | 873 | kdb_printf("search string too long\n"); |
| 875 | return; | 874 | return; |
| 876 | } | 875 | } |
| @@ -915,13 +914,12 @@ int kdb_parse(const char *cmdstr) | |||
| 915 | char *cp; | 914 | char *cp; |
| 916 | char *cpp, quoted; | 915 | char *cpp, quoted; |
| 917 | kdbtab_t *tp; | 916 | kdbtab_t *tp; |
| 918 | int i, escaped, ignore_errors = 0, check_grep; | 917 | int i, escaped, ignore_errors = 0, check_grep = 0; |
| 919 | 918 | ||
| 920 | /* | 919 | /* |
| 921 | * First tokenize the command string. | 920 | * First tokenize the command string. |
| 922 | */ | 921 | */ |
| 923 | cp = (char *)cmdstr; | 922 | cp = (char *)cmdstr; |
| 924 | kdb_grepping_flag = check_grep = 0; | ||
| 925 | 923 | ||
| 926 | if (KDB_FLAG(CMD_INTERRUPT)) { | 924 | if (KDB_FLAG(CMD_INTERRUPT)) { |
| 927 | /* Previous command was interrupted, newline must not | 925 | /* Previous command was interrupted, newline must not |
| @@ -1247,7 +1245,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, | |||
| 1247 | kdb_printf("due to NonMaskable Interrupt @ " | 1245 | kdb_printf("due to NonMaskable Interrupt @ " |
| 1248 | kdb_machreg_fmt "\n", | 1246 | kdb_machreg_fmt "\n", |
| 1249 | instruction_pointer(regs)); | 1247 | instruction_pointer(regs)); |
| 1250 | kdb_dumpregs(regs); | ||
| 1251 | break; | 1248 | break; |
| 1252 | case KDB_REASON_SSTEP: | 1249 | case KDB_REASON_SSTEP: |
| 1253 | case KDB_REASON_BREAK: | 1250 | case KDB_REASON_BREAK: |
| @@ -1281,6 +1278,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, | |||
| 1281 | */ | 1278 | */ |
| 1282 | kdb_nextline = 1; | 1279 | kdb_nextline = 1; |
| 1283 | KDB_STATE_CLEAR(SUPPRESS); | 1280 | KDB_STATE_CLEAR(SUPPRESS); |
| 1281 | kdb_grepping_flag = 0; | ||
| 1282 | /* ensure the old search does not leak into '/' commands */ | ||
| 1283 | kdb_grep_string[0] = '\0'; | ||
| 1284 | 1284 | ||
| 1285 | cmdbuf = cmd_cur; | 1285 | cmdbuf = cmd_cur; |
| 1286 | *cmdbuf = '\0'; | 1286 | *cmdbuf = '\0'; |
| @@ -2256,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv) | |||
| 2256 | /* | 2256 | /* |
| 2257 | * Validate cpunum | 2257 | * Validate cpunum |
| 2258 | */ | 2258 | */ |
| 2259 | if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb) | 2259 | if ((cpunum >= CONFIG_NR_CPUS) || !kgdb_info[cpunum].enter_kgdb) |
| 2260 | return KDB_BADCPUNUM; | 2260 | return KDB_BADCPUNUM; |
| 2261 | 2261 | ||
| 2262 | dbg_switch_cpu = cpunum; | 2262 | dbg_switch_cpu = cpunum; |
| @@ -2583,7 +2583,7 @@ static int kdb_summary(int argc, const char **argv) | |||
| 2583 | #define K(x) ((x) << (PAGE_SHIFT - 10)) | 2583 | #define K(x) ((x) << (PAGE_SHIFT - 10)) |
| 2584 | kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n" | 2584 | kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n" |
| 2585 | "Buffers: %8lu kB\n", | 2585 | "Buffers: %8lu kB\n", |
| 2586 | val.totalram, val.freeram, val.bufferram); | 2586 | K(val.totalram), K(val.freeram), K(val.bufferram)); |
| 2587 | return 0; | 2587 | return 0; |
| 2588 | } | 2588 | } |
| 2589 | 2589 | ||
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index eaacd1693954..75014d7f4568 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h | |||
| @@ -196,7 +196,9 @@ extern int kdb_main_loop(kdb_reason_t, kdb_reason_t, | |||
| 196 | 196 | ||
| 197 | /* Miscellaneous functions and data areas */ | 197 | /* Miscellaneous functions and data areas */ |
| 198 | extern int kdb_grepping_flag; | 198 | extern int kdb_grepping_flag; |
| 199 | #define KDB_GREPPING_FLAG_SEARCH 0x8000 | ||
| 199 | extern char kdb_grep_string[]; | 200 | extern char kdb_grep_string[]; |
| 201 | #define KDB_GREP_STRLEN 256 | ||
| 200 | extern int kdb_grep_leading; | 202 | extern int kdb_grep_leading; |
| 201 | extern int kdb_grep_trailing; | 203 | extern int kdb_grep_trailing; |
| 202 | extern char *kdb_cmds[]; | 204 | extern char *kdb_cmds[]; |
| @@ -209,7 +211,7 @@ extern void kdb_ps1(const struct task_struct *p); | |||
| 209 | extern void kdb_print_nameval(const char *name, unsigned long val); | 211 | extern void kdb_print_nameval(const char *name, unsigned long val); |
| 210 | extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); | 212 | extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); |
| 211 | extern void kdb_meminfo_proc_show(void); | 213 | extern void kdb_meminfo_proc_show(void); |
| 212 | extern char *kdb_getstr(char *, size_t, char *); | 214 | extern char *kdb_getstr(char *, size_t, const char *); |
| 213 | extern void kdb_gdb_state_pass(char *buf); | 215 | extern void kdb_gdb_state_pass(char *buf); |
| 214 | 216 | ||
| 215 | /* Defines for kdb_symbol_print */ | 217 | /* Defines for kdb_symbol_print */ |
diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 103f5d147b2f..2925188f50ea 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ifdef CONFIG_FUNCTION_TRACER | 1 | ifdef CONFIG_FUNCTION_TRACER |
| 2 | CFLAGS_REMOVE_core.o = -pg | 2 | CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE) |
| 3 | endif | 3 | endif |
| 4 | 4 | ||
| 5 | obj-y := core.o ring_buffer.o callchain.o | 5 | obj-y := core.o ring_buffer.o callchain.o |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 19efcf13375a..f04daabfd1cf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -872,22 +872,32 @@ void perf_pmu_enable(struct pmu *pmu) | |||
| 872 | pmu->pmu_enable(pmu); | 872 | pmu->pmu_enable(pmu); |
| 873 | } | 873 | } |
| 874 | 874 | ||
| 875 | static DEFINE_PER_CPU(struct list_head, rotation_list); | 875 | static DEFINE_PER_CPU(struct list_head, active_ctx_list); |
| 876 | 876 | ||
| 877 | /* | 877 | /* |
| 878 | * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized | 878 | * perf_event_ctx_activate(), perf_event_ctx_deactivate(), and |
| 879 | * because they're strictly cpu affine and rotate_start is called with IRQs | 879 | * perf_event_task_tick() are fully serialized because they're strictly cpu |
| 880 | * disabled, while rotate_context is called from IRQ context. | 880 | * affine and perf_event_ctx{activate,deactivate} are called with IRQs |
| 881 | * disabled, while perf_event_task_tick is called from IRQ context. | ||
| 881 | */ | 882 | */ |
| 882 | static void perf_pmu_rotate_start(struct pmu *pmu) | 883 | static void perf_event_ctx_activate(struct perf_event_context *ctx) |
| 883 | { | 884 | { |
| 884 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 885 | struct list_head *head = this_cpu_ptr(&active_ctx_list); |
| 885 | struct list_head *head = this_cpu_ptr(&rotation_list); | ||
| 886 | 886 | ||
| 887 | WARN_ON(!irqs_disabled()); | 887 | WARN_ON(!irqs_disabled()); |
| 888 | 888 | ||
| 889 | if (list_empty(&cpuctx->rotation_list)) | 889 | WARN_ON(!list_empty(&ctx->active_ctx_list)); |
| 890 | list_add(&cpuctx->rotation_list, head); | 890 | |
| 891 | list_add(&ctx->active_ctx_list, head); | ||
| 892 | } | ||
| 893 | |||
| 894 | static void perf_event_ctx_deactivate(struct perf_event_context *ctx) | ||
| 895 | { | ||
| 896 | WARN_ON(!irqs_disabled()); | ||
| 897 | |||
| 898 | WARN_ON(list_empty(&ctx->active_ctx_list)); | ||
| 899 | |||
| 900 | list_del_init(&ctx->active_ctx_list); | ||
| 891 | } | 901 | } |
| 892 | 902 | ||
| 893 | static void get_ctx(struct perf_event_context *ctx) | 903 | static void get_ctx(struct perf_event_context *ctx) |
| @@ -907,6 +917,84 @@ static void put_ctx(struct perf_event_context *ctx) | |||
| 907 | } | 917 | } |
| 908 | 918 | ||
| 909 | /* | 919 | /* |
| 920 | * Because of perf_event::ctx migration in sys_perf_event_open::move_group and | ||
| 921 | * perf_pmu_migrate_context() we need some magic. | ||
| 922 | * | ||
| 923 | * Those places that change perf_event::ctx will hold both | ||
| 924 | * perf_event_ctx::mutex of the 'old' and 'new' ctx value. | ||
| 925 | * | ||
| 926 | * Lock ordering is by mutex address. There is one other site where | ||
| 927 | * perf_event_context::mutex nests and that is put_event(). But remember that | ||
| 928 | * that is a parent<->child context relation, and migration does not affect | ||
| 929 | * children, therefore these two orderings should not interact. | ||
| 930 | * | ||
| 931 | * The change in perf_event::ctx does not affect children (as claimed above) | ||
| 932 | * because the sys_perf_event_open() case will install a new event and break | ||
| 933 | * the ctx parent<->child relation, and perf_pmu_migrate_context() is only | ||
| 934 | * concerned with cpuctx and that doesn't have children. | ||
| 935 | * | ||
| 936 | * The places that change perf_event::ctx will issue: | ||
| 937 | * | ||
| 938 | * perf_remove_from_context(); | ||
| 939 | * synchronize_rcu(); | ||
| 940 | * perf_install_in_context(); | ||
| 941 | * | ||
| 942 | * to affect the change. The remove_from_context() + synchronize_rcu() should | ||
| 943 | * quiesce the event, after which we can install it in the new location. This | ||
| 944 | * means that only external vectors (perf_fops, prctl) can perturb the event | ||
| 945 | * while in transit. Therefore all such accessors should also acquire | ||
| 946 | * perf_event_context::mutex to serialize against this. | ||
| 947 | * | ||
| 948 | * However; because event->ctx can change while we're waiting to acquire | ||
| 949 | * ctx->mutex we must be careful and use the below perf_event_ctx_lock() | ||
| 950 | * function. | ||
| 951 | * | ||
| 952 | * Lock order: | ||
| 953 | * task_struct::perf_event_mutex | ||
| 954 | * perf_event_context::mutex | ||
| 955 | * perf_event_context::lock | ||
| 956 | * perf_event::child_mutex; | ||
| 957 | * perf_event::mmap_mutex | ||
| 958 | * mmap_sem | ||
| 959 | */ | ||
| 960 | static struct perf_event_context * | ||
| 961 | perf_event_ctx_lock_nested(struct perf_event *event, int nesting) | ||
| 962 | { | ||
| 963 | struct perf_event_context *ctx; | ||
| 964 | |||
| 965 | again: | ||
| 966 | rcu_read_lock(); | ||
| 967 | ctx = ACCESS_ONCE(event->ctx); | ||
| 968 | if (!atomic_inc_not_zero(&ctx->refcount)) { | ||
| 969 | rcu_read_unlock(); | ||
| 970 | goto again; | ||
| 971 | } | ||
| 972 | rcu_read_unlock(); | ||
| 973 | |||
| 974 | mutex_lock_nested(&ctx->mutex, nesting); | ||
| 975 | if (event->ctx != ctx) { | ||
| 976 | mutex_unlock(&ctx->mutex); | ||
| 977 | put_ctx(ctx); | ||
| 978 | goto again; | ||
| 979 | } | ||
| 980 | |||
| 981 | return ctx; | ||
| 982 | } | ||
| 983 | |||
| 984 | static inline struct perf_event_context * | ||
| 985 | perf_event_ctx_lock(struct perf_event *event) | ||
| 986 | { | ||
| 987 | return perf_event_ctx_lock_nested(event, 0); | ||
| 988 | } | ||
| 989 | |||
| 990 | static void perf_event_ctx_unlock(struct perf_event *event, | ||
| 991 | struct perf_event_context *ctx) | ||
| 992 | { | ||
| 993 | mutex_unlock(&ctx->mutex); | ||
| 994 | put_ctx(ctx); | ||
| 995 | } | ||
| 996 | |||
| 997 | /* | ||
| 910 | * This must be done under the ctx->lock, such as to serialize against | 998 | * This must be done under the ctx->lock, such as to serialize against |
| 911 | * context_equiv(), therefore we cannot call put_ctx() since that might end up | 999 | * context_equiv(), therefore we cannot call put_ctx() since that might end up |
| 912 | * calling scheduler related locks and ctx->lock nests inside those. | 1000 | * calling scheduler related locks and ctx->lock nests inside those. |
| @@ -1155,8 +1243,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 1155 | ctx->nr_branch_stack++; | 1243 | ctx->nr_branch_stack++; |
| 1156 | 1244 | ||
| 1157 | list_add_rcu(&event->event_entry, &ctx->event_list); | 1245 | list_add_rcu(&event->event_entry, &ctx->event_list); |
| 1158 | if (!ctx->nr_events) | ||
| 1159 | perf_pmu_rotate_start(ctx->pmu); | ||
| 1160 | ctx->nr_events++; | 1246 | ctx->nr_events++; |
| 1161 | if (event->attr.inherit_stat) | 1247 | if (event->attr.inherit_stat) |
| 1162 | ctx->nr_stat++; | 1248 | ctx->nr_stat++; |
| @@ -1275,6 +1361,8 @@ static void perf_group_attach(struct perf_event *event) | |||
| 1275 | if (group_leader == event) | 1361 | if (group_leader == event) |
| 1276 | return; | 1362 | return; |
| 1277 | 1363 | ||
| 1364 | WARN_ON_ONCE(group_leader->ctx != event->ctx); | ||
| 1365 | |||
| 1278 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && | 1366 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && |
| 1279 | !is_software_event(event)) | 1367 | !is_software_event(event)) |
| 1280 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; | 1368 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; |
| @@ -1296,6 +1384,10 @@ static void | |||
| 1296 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) | 1384 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) |
| 1297 | { | 1385 | { |
| 1298 | struct perf_cpu_context *cpuctx; | 1386 | struct perf_cpu_context *cpuctx; |
| 1387 | |||
| 1388 | WARN_ON_ONCE(event->ctx != ctx); | ||
| 1389 | lockdep_assert_held(&ctx->lock); | ||
| 1390 | |||
| 1299 | /* | 1391 | /* |
| 1300 | * We can have double detach due to exit/hot-unplug + close. | 1392 | * We can have double detach due to exit/hot-unplug + close. |
| 1301 | */ | 1393 | */ |
| @@ -1380,6 +1472,8 @@ static void perf_group_detach(struct perf_event *event) | |||
| 1380 | 1472 | ||
| 1381 | /* Inherit group flags from the previous leader */ | 1473 | /* Inherit group flags from the previous leader */ |
| 1382 | sibling->group_flags = event->group_flags; | 1474 | sibling->group_flags = event->group_flags; |
| 1475 | |||
| 1476 | WARN_ON_ONCE(sibling->ctx != event->ctx); | ||
| 1383 | } | 1477 | } |
| 1384 | 1478 | ||
| 1385 | out: | 1479 | out: |
| @@ -1442,6 +1536,10 @@ event_sched_out(struct perf_event *event, | |||
| 1442 | { | 1536 | { |
| 1443 | u64 tstamp = perf_event_time(event); | 1537 | u64 tstamp = perf_event_time(event); |
| 1444 | u64 delta; | 1538 | u64 delta; |
| 1539 | |||
| 1540 | WARN_ON_ONCE(event->ctx != ctx); | ||
| 1541 | lockdep_assert_held(&ctx->lock); | ||
| 1542 | |||
| 1445 | /* | 1543 | /* |
| 1446 | * An event which could not be activated because of | 1544 | * An event which could not be activated because of |
| 1447 | * filter mismatch still needs to have its timings | 1545 | * filter mismatch still needs to have its timings |
| @@ -1471,7 +1569,8 @@ event_sched_out(struct perf_event *event, | |||
| 1471 | 1569 | ||
| 1472 | if (!is_software_event(event)) | 1570 | if (!is_software_event(event)) |
| 1473 | cpuctx->active_oncpu--; | 1571 | cpuctx->active_oncpu--; |
| 1474 | ctx->nr_active--; | 1572 | if (!--ctx->nr_active) |
| 1573 | perf_event_ctx_deactivate(ctx); | ||
| 1475 | if (event->attr.freq && event->attr.sample_freq) | 1574 | if (event->attr.freq && event->attr.sample_freq) |
| 1476 | ctx->nr_freq--; | 1575 | ctx->nr_freq--; |
| 1477 | if (event->attr.exclusive || !cpuctx->active_oncpu) | 1576 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
| @@ -1654,7 +1753,7 @@ int __perf_event_disable(void *info) | |||
| 1654 | * is the current context on this CPU and preemption is disabled, | 1753 | * is the current context on this CPU and preemption is disabled, |
| 1655 | * hence we can't get into perf_event_task_sched_out for this context. | 1754 | * hence we can't get into perf_event_task_sched_out for this context. |
| 1656 | */ | 1755 | */ |
| 1657 | void perf_event_disable(struct perf_event *event) | 1756 | static void _perf_event_disable(struct perf_event *event) |
| 1658 | { | 1757 | { |
| 1659 | struct perf_event_context *ctx = event->ctx; | 1758 | struct perf_event_context *ctx = event->ctx; |
| 1660 | struct task_struct *task = ctx->task; | 1759 | struct task_struct *task = ctx->task; |
| @@ -1695,6 +1794,19 @@ retry: | |||
| 1695 | } | 1794 | } |
| 1696 | raw_spin_unlock_irq(&ctx->lock); | 1795 | raw_spin_unlock_irq(&ctx->lock); |
| 1697 | } | 1796 | } |
| 1797 | |||
| 1798 | /* | ||
| 1799 | * Strictly speaking kernel users cannot create groups and therefore this | ||
| 1800 | * interface does not need the perf_event_ctx_lock() magic. | ||
| 1801 | */ | ||
| 1802 | void perf_event_disable(struct perf_event *event) | ||
| 1803 | { | ||
| 1804 | struct perf_event_context *ctx; | ||
| 1805 | |||
| 1806 | ctx = perf_event_ctx_lock(event); | ||
| 1807 | _perf_event_disable(event); | ||
| 1808 | perf_event_ctx_unlock(event, ctx); | ||
| 1809 | } | ||
| 1698 | EXPORT_SYMBOL_GPL(perf_event_disable); | 1810 | EXPORT_SYMBOL_GPL(perf_event_disable); |
| 1699 | 1811 | ||
| 1700 | static void perf_set_shadow_time(struct perf_event *event, | 1812 | static void perf_set_shadow_time(struct perf_event *event, |
| @@ -1782,7 +1894,8 @@ event_sched_in(struct perf_event *event, | |||
| 1782 | 1894 | ||
| 1783 | if (!is_software_event(event)) | 1895 | if (!is_software_event(event)) |
| 1784 | cpuctx->active_oncpu++; | 1896 | cpuctx->active_oncpu++; |
| 1785 | ctx->nr_active++; | 1897 | if (!ctx->nr_active++) |
| 1898 | perf_event_ctx_activate(ctx); | ||
| 1786 | if (event->attr.freq && event->attr.sample_freq) | 1899 | if (event->attr.freq && event->attr.sample_freq) |
| 1787 | ctx->nr_freq++; | 1900 | ctx->nr_freq++; |
| 1788 | 1901 | ||
| @@ -2158,7 +2271,7 @@ unlock: | |||
| 2158 | * perf_event_for_each_child or perf_event_for_each as described | 2271 | * perf_event_for_each_child or perf_event_for_each as described |
| 2159 | * for perf_event_disable. | 2272 | * for perf_event_disable. |
| 2160 | */ | 2273 | */ |
| 2161 | void perf_event_enable(struct perf_event *event) | 2274 | static void _perf_event_enable(struct perf_event *event) |
| 2162 | { | 2275 | { |
| 2163 | struct perf_event_context *ctx = event->ctx; | 2276 | struct perf_event_context *ctx = event->ctx; |
| 2164 | struct task_struct *task = ctx->task; | 2277 | struct task_struct *task = ctx->task; |
| @@ -2214,9 +2327,21 @@ retry: | |||
| 2214 | out: | 2327 | out: |
| 2215 | raw_spin_unlock_irq(&ctx->lock); | 2328 | raw_spin_unlock_irq(&ctx->lock); |
| 2216 | } | 2329 | } |
| 2330 | |||
| 2331 | /* | ||
| 2332 | * See perf_event_disable(); | ||
| 2333 | */ | ||
| 2334 | void perf_event_enable(struct perf_event *event) | ||
| 2335 | { | ||
| 2336 | struct perf_event_context *ctx; | ||
| 2337 | |||
| 2338 | ctx = perf_event_ctx_lock(event); | ||
| 2339 | _perf_event_enable(event); | ||
| 2340 | perf_event_ctx_unlock(event, ctx); | ||
| 2341 | } | ||
| 2217 | EXPORT_SYMBOL_GPL(perf_event_enable); | 2342 | EXPORT_SYMBOL_GPL(perf_event_enable); |
| 2218 | 2343 | ||
| 2219 | int perf_event_refresh(struct perf_event *event, int refresh) | 2344 | static int _perf_event_refresh(struct perf_event *event, int refresh) |
| 2220 | { | 2345 | { |
| 2221 | /* | 2346 | /* |
| 2222 | * not supported on inherited events | 2347 | * not supported on inherited events |
| @@ -2225,10 +2350,25 @@ int perf_event_refresh(struct perf_event *event, int refresh) | |||
| 2225 | return -EINVAL; | 2350 | return -EINVAL; |
| 2226 | 2351 | ||
| 2227 | atomic_add(refresh, &event->event_limit); | 2352 | atomic_add(refresh, &event->event_limit); |
| 2228 | perf_event_enable(event); | 2353 | _perf_event_enable(event); |
| 2229 | 2354 | ||
| 2230 | return 0; | 2355 | return 0; |
| 2231 | } | 2356 | } |
| 2357 | |||
| 2358 | /* | ||
| 2359 | * See perf_event_disable() | ||
| 2360 | */ | ||
| 2361 | int perf_event_refresh(struct perf_event *event, int refresh) | ||
| 2362 | { | ||
| 2363 | struct perf_event_context *ctx; | ||
| 2364 | int ret; | ||
| 2365 | |||
| 2366 | ctx = perf_event_ctx_lock(event); | ||
| 2367 | ret = _perf_event_refresh(event, refresh); | ||
| 2368 | perf_event_ctx_unlock(event, ctx); | ||
| 2369 | |||
| 2370 | return ret; | ||
| 2371 | } | ||
| 2232 | EXPORT_SYMBOL_GPL(perf_event_refresh); | 2372 | EXPORT_SYMBOL_GPL(perf_event_refresh); |
| 2233 | 2373 | ||
| 2234 | static void ctx_sched_out(struct perf_event_context *ctx, | 2374 | static void ctx_sched_out(struct perf_event_context *ctx, |
| @@ -2612,12 +2752,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
| 2612 | 2752 | ||
| 2613 | perf_pmu_enable(ctx->pmu); | 2753 | perf_pmu_enable(ctx->pmu); |
| 2614 | perf_ctx_unlock(cpuctx, ctx); | 2754 | perf_ctx_unlock(cpuctx, ctx); |
| 2615 | |||
| 2616 | /* | ||
| 2617 | * Since these rotations are per-cpu, we need to ensure the | ||
| 2618 | * cpu-context we got scheduled on is actually rotating. | ||
| 2619 | */ | ||
| 2620 | perf_pmu_rotate_start(ctx->pmu); | ||
| 2621 | } | 2755 | } |
| 2622 | 2756 | ||
| 2623 | /* | 2757 | /* |
| @@ -2905,25 +3039,18 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
| 2905 | list_rotate_left(&ctx->flexible_groups); | 3039 | list_rotate_left(&ctx->flexible_groups); |
| 2906 | } | 3040 | } |
| 2907 | 3041 | ||
| 2908 | /* | ||
| 2909 | * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized | ||
| 2910 | * because they're strictly cpu affine and rotate_start is called with IRQs | ||
| 2911 | * disabled, while rotate_context is called from IRQ context. | ||
| 2912 | */ | ||
| 2913 | static int perf_rotate_context(struct perf_cpu_context *cpuctx) | 3042 | static int perf_rotate_context(struct perf_cpu_context *cpuctx) |
| 2914 | { | 3043 | { |
| 2915 | struct perf_event_context *ctx = NULL; | 3044 | struct perf_event_context *ctx = NULL; |
| 2916 | int rotate = 0, remove = 1; | 3045 | int rotate = 0; |
| 2917 | 3046 | ||
| 2918 | if (cpuctx->ctx.nr_events) { | 3047 | if (cpuctx->ctx.nr_events) { |
| 2919 | remove = 0; | ||
| 2920 | if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) | 3048 | if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) |
| 2921 | rotate = 1; | 3049 | rotate = 1; |
| 2922 | } | 3050 | } |
| 2923 | 3051 | ||
| 2924 | ctx = cpuctx->task_ctx; | 3052 | ctx = cpuctx->task_ctx; |
| 2925 | if (ctx && ctx->nr_events) { | 3053 | if (ctx && ctx->nr_events) { |
| 2926 | remove = 0; | ||
| 2927 | if (ctx->nr_events != ctx->nr_active) | 3054 | if (ctx->nr_events != ctx->nr_active) |
| 2928 | rotate = 1; | 3055 | rotate = 1; |
| 2929 | } | 3056 | } |
| @@ -2947,8 +3074,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx) | |||
| 2947 | perf_pmu_enable(cpuctx->ctx.pmu); | 3074 | perf_pmu_enable(cpuctx->ctx.pmu); |
| 2948 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | 3075 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); |
| 2949 | done: | 3076 | done: |
| 2950 | if (remove) | ||
| 2951 | list_del_init(&cpuctx->rotation_list); | ||
| 2952 | 3077 | ||
| 2953 | return rotate; | 3078 | return rotate; |
| 2954 | } | 3079 | } |
| @@ -2966,9 +3091,8 @@ bool perf_event_can_stop_tick(void) | |||
| 2966 | 3091 | ||
| 2967 | void perf_event_task_tick(void) | 3092 | void perf_event_task_tick(void) |
| 2968 | { | 3093 | { |
| 2969 | struct list_head *head = this_cpu_ptr(&rotation_list); | 3094 | struct list_head *head = this_cpu_ptr(&active_ctx_list); |
| 2970 | struct perf_cpu_context *cpuctx, *tmp; | 3095 | struct perf_event_context *ctx, *tmp; |
| 2971 | struct perf_event_context *ctx; | ||
| 2972 | int throttled; | 3096 | int throttled; |
| 2973 | 3097 | ||
| 2974 | WARN_ON(!irqs_disabled()); | 3098 | WARN_ON(!irqs_disabled()); |
| @@ -2976,14 +3100,8 @@ void perf_event_task_tick(void) | |||
| 2976 | __this_cpu_inc(perf_throttled_seq); | 3100 | __this_cpu_inc(perf_throttled_seq); |
| 2977 | throttled = __this_cpu_xchg(perf_throttled_count, 0); | 3101 | throttled = __this_cpu_xchg(perf_throttled_count, 0); |
| 2978 | 3102 | ||
| 2979 | list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { | 3103 | list_for_each_entry_safe(ctx, tmp, head, active_ctx_list) |
| 2980 | ctx = &cpuctx->ctx; | ||
| 2981 | perf_adjust_freq_unthr_context(ctx, throttled); | 3104 | perf_adjust_freq_unthr_context(ctx, throttled); |
| 2982 | |||
| 2983 | ctx = cpuctx->task_ctx; | ||
| 2984 | if (ctx) | ||
| 2985 | perf_adjust_freq_unthr_context(ctx, throttled); | ||
| 2986 | } | ||
| 2987 | } | 3105 | } |
| 2988 | 3106 | ||
| 2989 | static int event_enable_on_exec(struct perf_event *event, | 3107 | static int event_enable_on_exec(struct perf_event *event, |
| @@ -3142,6 +3260,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx) | |||
| 3142 | { | 3260 | { |
| 3143 | raw_spin_lock_init(&ctx->lock); | 3261 | raw_spin_lock_init(&ctx->lock); |
| 3144 | mutex_init(&ctx->mutex); | 3262 | mutex_init(&ctx->mutex); |
| 3263 | INIT_LIST_HEAD(&ctx->active_ctx_list); | ||
| 3145 | INIT_LIST_HEAD(&ctx->pinned_groups); | 3264 | INIT_LIST_HEAD(&ctx->pinned_groups); |
| 3146 | INIT_LIST_HEAD(&ctx->flexible_groups); | 3265 | INIT_LIST_HEAD(&ctx->flexible_groups); |
| 3147 | INIT_LIST_HEAD(&ctx->event_list); | 3266 | INIT_LIST_HEAD(&ctx->event_list); |
| @@ -3421,7 +3540,16 @@ static void perf_remove_from_owner(struct perf_event *event) | |||
| 3421 | rcu_read_unlock(); | 3540 | rcu_read_unlock(); |
| 3422 | 3541 | ||
| 3423 | if (owner) { | 3542 | if (owner) { |
| 3424 | mutex_lock(&owner->perf_event_mutex); | 3543 | /* |
| 3544 | * If we're here through perf_event_exit_task() we're already | ||
| 3545 | * holding ctx->mutex which would be an inversion wrt. the | ||
| 3546 | * normal lock order. | ||
| 3547 | * | ||
| 3548 | * However we can safely take this lock because its the child | ||
| 3549 | * ctx->mutex. | ||
| 3550 | */ | ||
| 3551 | mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING); | ||
| 3552 | |||
| 3425 | /* | 3553 | /* |
| 3426 | * We have to re-check the event->owner field, if it is cleared | 3554 | * We have to re-check the event->owner field, if it is cleared |
| 3427 | * we raced with perf_event_exit_task(), acquiring the mutex | 3555 | * we raced with perf_event_exit_task(), acquiring the mutex |
| @@ -3440,7 +3568,7 @@ static void perf_remove_from_owner(struct perf_event *event) | |||
| 3440 | */ | 3568 | */ |
| 3441 | static void put_event(struct perf_event *event) | 3569 | static void put_event(struct perf_event *event) |
| 3442 | { | 3570 | { |
| 3443 | struct perf_event_context *ctx = event->ctx; | 3571 | struct perf_event_context *ctx; |
| 3444 | 3572 | ||
| 3445 | if (!atomic_long_dec_and_test(&event->refcount)) | 3573 | if (!atomic_long_dec_and_test(&event->refcount)) |
| 3446 | return; | 3574 | return; |
| @@ -3448,7 +3576,6 @@ static void put_event(struct perf_event *event) | |||
| 3448 | if (!is_kernel_event(event)) | 3576 | if (!is_kernel_event(event)) |
| 3449 | perf_remove_from_owner(event); | 3577 | perf_remove_from_owner(event); |
| 3450 | 3578 | ||
| 3451 | WARN_ON_ONCE(ctx->parent_ctx); | ||
| 3452 | /* | 3579 | /* |
| 3453 | * There are two ways this annotation is useful: | 3580 | * There are two ways this annotation is useful: |
| 3454 | * | 3581 | * |
| @@ -3461,7 +3588,8 @@ static void put_event(struct perf_event *event) | |||
| 3461 | * the last filedesc died, so there is no possibility | 3588 | * the last filedesc died, so there is no possibility |
| 3462 | * to trigger the AB-BA case. | 3589 | * to trigger the AB-BA case. |
| 3463 | */ | 3590 | */ |
| 3464 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); | 3591 | ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING); |
| 3592 | WARN_ON_ONCE(ctx->parent_ctx); | ||
| 3465 | perf_remove_from_context(event, true); | 3593 | perf_remove_from_context(event, true); |
| 3466 | mutex_unlock(&ctx->mutex); | 3594 | mutex_unlock(&ctx->mutex); |
| 3467 | 3595 | ||
| @@ -3547,12 +3675,13 @@ static int perf_event_read_group(struct perf_event *event, | |||
| 3547 | u64 read_format, char __user *buf) | 3675 | u64 read_format, char __user *buf) |
| 3548 | { | 3676 | { |
| 3549 | struct perf_event *leader = event->group_leader, *sub; | 3677 | struct perf_event *leader = event->group_leader, *sub; |
| 3550 | int n = 0, size = 0, ret = -EFAULT; | ||
| 3551 | struct perf_event_context *ctx = leader->ctx; | 3678 | struct perf_event_context *ctx = leader->ctx; |
| 3552 | u64 values[5]; | 3679 | int n = 0, size = 0, ret; |
| 3553 | u64 count, enabled, running; | 3680 | u64 count, enabled, running; |
| 3681 | u64 values[5]; | ||
| 3682 | |||
| 3683 | lockdep_assert_held(&ctx->mutex); | ||
| 3554 | 3684 | ||
| 3555 | mutex_lock(&ctx->mutex); | ||
| 3556 | count = perf_event_read_value(leader, &enabled, &running); | 3685 | count = perf_event_read_value(leader, &enabled, &running); |
| 3557 | 3686 | ||
| 3558 | values[n++] = 1 + leader->nr_siblings; | 3687 | values[n++] = 1 + leader->nr_siblings; |
| @@ -3567,7 +3696,7 @@ static int perf_event_read_group(struct perf_event *event, | |||
| 3567 | size = n * sizeof(u64); | 3696 | size = n * sizeof(u64); |
| 3568 | 3697 | ||
| 3569 | if (copy_to_user(buf, values, size)) | 3698 | if (copy_to_user(buf, values, size)) |
| 3570 | goto unlock; | 3699 | return -EFAULT; |
| 3571 | 3700 | ||
| 3572 | ret = size; | 3701 | ret = size; |
| 3573 | 3702 | ||
| @@ -3581,14 +3710,11 @@ static int perf_event_read_group(struct perf_event *event, | |||
| 3581 | size = n * sizeof(u64); | 3710 | size = n * sizeof(u64); |
| 3582 | 3711 | ||
| 3583 | if (copy_to_user(buf + ret, values, size)) { | 3712 | if (copy_to_user(buf + ret, values, size)) { |
| 3584 | ret = -EFAULT; | 3713 | return -EFAULT; |
| 3585 | goto unlock; | ||
| 3586 | } | 3714 | } |
| 3587 | 3715 | ||
| 3588 | ret += size; | 3716 | ret += size; |
| 3589 | } | 3717 | } |
| 3590 | unlock: | ||
| 3591 | mutex_unlock(&ctx->mutex); | ||
| 3592 | 3718 | ||
| 3593 | return ret; | 3719 | return ret; |
| 3594 | } | 3720 | } |
| @@ -3660,8 +3786,14 @@ static ssize_t | |||
| 3660 | perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | 3786 | perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) |
| 3661 | { | 3787 | { |
| 3662 | struct perf_event *event = file->private_data; | 3788 | struct perf_event *event = file->private_data; |
| 3789 | struct perf_event_context *ctx; | ||
| 3790 | int ret; | ||
| 3791 | |||
| 3792 | ctx = perf_event_ctx_lock(event); | ||
| 3793 | ret = perf_read_hw(event, buf, count); | ||
| 3794 | perf_event_ctx_unlock(event, ctx); | ||
| 3663 | 3795 | ||
| 3664 | return perf_read_hw(event, buf, count); | 3796 | return ret; |
| 3665 | } | 3797 | } |
| 3666 | 3798 | ||
| 3667 | static unsigned int perf_poll(struct file *file, poll_table *wait) | 3799 | static unsigned int perf_poll(struct file *file, poll_table *wait) |
| @@ -3687,7 +3819,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
| 3687 | return events; | 3819 | return events; |
| 3688 | } | 3820 | } |
| 3689 | 3821 | ||
| 3690 | static void perf_event_reset(struct perf_event *event) | 3822 | static void _perf_event_reset(struct perf_event *event) |
| 3691 | { | 3823 | { |
| 3692 | (void)perf_event_read(event); | 3824 | (void)perf_event_read(event); |
| 3693 | local64_set(&event->count, 0); | 3825 | local64_set(&event->count, 0); |
| @@ -3706,6 +3838,7 @@ static void perf_event_for_each_child(struct perf_event *event, | |||
| 3706 | struct perf_event *child; | 3838 | struct perf_event *child; |
| 3707 | 3839 | ||
| 3708 | WARN_ON_ONCE(event->ctx->parent_ctx); | 3840 | WARN_ON_ONCE(event->ctx->parent_ctx); |
| 3841 | |||
| 3709 | mutex_lock(&event->child_mutex); | 3842 | mutex_lock(&event->child_mutex); |
| 3710 | func(event); | 3843 | func(event); |
| 3711 | list_for_each_entry(child, &event->child_list, child_list) | 3844 | list_for_each_entry(child, &event->child_list, child_list) |
| @@ -3719,14 +3852,13 @@ static void perf_event_for_each(struct perf_event *event, | |||
| 3719 | struct perf_event_context *ctx = event->ctx; | 3852 | struct perf_event_context *ctx = event->ctx; |
| 3720 | struct perf_event *sibling; | 3853 | struct perf_event *sibling; |
| 3721 | 3854 | ||
| 3722 | WARN_ON_ONCE(ctx->parent_ctx); | 3855 | lockdep_assert_held(&ctx->mutex); |
| 3723 | mutex_lock(&ctx->mutex); | 3856 | |
| 3724 | event = event->group_leader; | 3857 | event = event->group_leader; |
| 3725 | 3858 | ||
| 3726 | perf_event_for_each_child(event, func); | 3859 | perf_event_for_each_child(event, func); |
| 3727 | list_for_each_entry(sibling, &event->sibling_list, group_entry) | 3860 | list_for_each_entry(sibling, &event->sibling_list, group_entry) |
| 3728 | perf_event_for_each_child(sibling, func); | 3861 | perf_event_for_each_child(sibling, func); |
| 3729 | mutex_unlock(&ctx->mutex); | ||
| 3730 | } | 3862 | } |
| 3731 | 3863 | ||
| 3732 | static int perf_event_period(struct perf_event *event, u64 __user *arg) | 3864 | static int perf_event_period(struct perf_event *event, u64 __user *arg) |
| @@ -3796,25 +3928,24 @@ static int perf_event_set_output(struct perf_event *event, | |||
| 3796 | struct perf_event *output_event); | 3928 | struct perf_event *output_event); |
| 3797 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); | 3929 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); |
| 3798 | 3930 | ||
| 3799 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 3931 | static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) |
| 3800 | { | 3932 | { |
| 3801 | struct perf_event *event = file->private_data; | ||
| 3802 | void (*func)(struct perf_event *); | 3933 | void (*func)(struct perf_event *); |
| 3803 | u32 flags = arg; | 3934 | u32 flags = arg; |
| 3804 | 3935 | ||
| 3805 | switch (cmd) { | 3936 | switch (cmd) { |
| 3806 | case PERF_EVENT_IOC_ENABLE: | 3937 | case PERF_EVENT_IOC_ENABLE: |
| 3807 | func = perf_event_enable; | 3938 | func = _perf_event_enable; |
| 3808 | break; | 3939 | break; |
| 3809 | case PERF_EVENT_IOC_DISABLE: | 3940 | case PERF_EVENT_IOC_DISABLE: |
| 3810 | func = perf_event_disable; | 3941 | func = _perf_event_disable; |
| 3811 | break; | 3942 | break; |
| 3812 | case PERF_EVENT_IOC_RESET: | 3943 | case PERF_EVENT_IOC_RESET: |
| 3813 | func = perf_event_reset; | 3944 | func = _perf_event_reset; |
| 3814 | break; | 3945 | break; |
| 3815 | 3946 | ||
| 3816 | case PERF_EVENT_IOC_REFRESH: | 3947 | case PERF_EVENT_IOC_REFRESH: |
| 3817 | return perf_event_refresh(event, arg); | 3948 | return _perf_event_refresh(event, arg); |
| 3818 | 3949 | ||
| 3819 | case PERF_EVENT_IOC_PERIOD: | 3950 | case PERF_EVENT_IOC_PERIOD: |
| 3820 | return perf_event_period(event, (u64 __user *)arg); | 3951 | return perf_event_period(event, (u64 __user *)arg); |
| @@ -3861,6 +3992,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 3861 | return 0; | 3992 | return 0; |
| 3862 | } | 3993 | } |
| 3863 | 3994 | ||
| 3995 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
| 3996 | { | ||
| 3997 | struct perf_event *event = file->private_data; | ||
| 3998 | struct perf_event_context *ctx; | ||
| 3999 | long ret; | ||
| 4000 | |||
| 4001 | ctx = perf_event_ctx_lock(event); | ||
| 4002 | ret = _perf_ioctl(event, cmd, arg); | ||
| 4003 | perf_event_ctx_unlock(event, ctx); | ||
| 4004 | |||
| 4005 | return ret; | ||
| 4006 | } | ||
| 4007 | |||
| 3864 | #ifdef CONFIG_COMPAT | 4008 | #ifdef CONFIG_COMPAT |
| 3865 | static long perf_compat_ioctl(struct file *file, unsigned int cmd, | 4009 | static long perf_compat_ioctl(struct file *file, unsigned int cmd, |
| 3866 | unsigned long arg) | 4010 | unsigned long arg) |
| @@ -3883,11 +4027,15 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd, | |||
| 3883 | 4027 | ||
| 3884 | int perf_event_task_enable(void) | 4028 | int perf_event_task_enable(void) |
| 3885 | { | 4029 | { |
| 4030 | struct perf_event_context *ctx; | ||
| 3886 | struct perf_event *event; | 4031 | struct perf_event *event; |
| 3887 | 4032 | ||
| 3888 | mutex_lock(¤t->perf_event_mutex); | 4033 | mutex_lock(¤t->perf_event_mutex); |
| 3889 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) | 4034 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { |
| 3890 | perf_event_for_each_child(event, perf_event_enable); | 4035 | ctx = perf_event_ctx_lock(event); |
| 4036 | perf_event_for_each_child(event, _perf_event_enable); | ||
| 4037 | perf_event_ctx_unlock(event, ctx); | ||
| 4038 | } | ||
| 3891 | mutex_unlock(¤t->perf_event_mutex); | 4039 | mutex_unlock(¤t->perf_event_mutex); |
| 3892 | 4040 | ||
| 3893 | return 0; | 4041 | return 0; |
| @@ -3895,11 +4043,15 @@ int perf_event_task_enable(void) | |||
| 3895 | 4043 | ||
| 3896 | int perf_event_task_disable(void) | 4044 | int perf_event_task_disable(void) |
| 3897 | { | 4045 | { |
| 4046 | struct perf_event_context *ctx; | ||
| 3898 | struct perf_event *event; | 4047 | struct perf_event *event; |
| 3899 | 4048 | ||
| 3900 | mutex_lock(¤t->perf_event_mutex); | 4049 | mutex_lock(¤t->perf_event_mutex); |
| 3901 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) | 4050 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { |
| 3902 | perf_event_for_each_child(event, perf_event_disable); | 4051 | ctx = perf_event_ctx_lock(event); |
| 4052 | perf_event_for_each_child(event, _perf_event_disable); | ||
| 4053 | perf_event_ctx_unlock(event, ctx); | ||
| 4054 | } | ||
| 3903 | mutex_unlock(¤t->perf_event_mutex); | 4055 | mutex_unlock(¤t->perf_event_mutex); |
| 3904 | 4056 | ||
| 3905 | return 0; | 4057 | return 0; |
| @@ -3949,7 +4101,8 @@ unlock: | |||
| 3949 | rcu_read_unlock(); | 4101 | rcu_read_unlock(); |
| 3950 | } | 4102 | } |
| 3951 | 4103 | ||
| 3952 | void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | 4104 | void __weak arch_perf_update_userpage( |
| 4105 | struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) | ||
| 3953 | { | 4106 | { |
| 3954 | } | 4107 | } |
| 3955 | 4108 | ||
| @@ -3999,7 +4152,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
| 3999 | userpg->time_running = running + | 4152 | userpg->time_running = running + |
| 4000 | atomic64_read(&event->child_total_time_running); | 4153 | atomic64_read(&event->child_total_time_running); |
| 4001 | 4154 | ||
| 4002 | arch_perf_update_userpage(userpg, now); | 4155 | arch_perf_update_userpage(event, userpg, now); |
| 4003 | 4156 | ||
| 4004 | barrier(); | 4157 | barrier(); |
| 4005 | ++userpg->lock; | 4158 | ++userpg->lock; |
| @@ -4141,6 +4294,9 @@ static void perf_mmap_open(struct vm_area_struct *vma) | |||
| 4141 | 4294 | ||
| 4142 | atomic_inc(&event->mmap_count); | 4295 | atomic_inc(&event->mmap_count); |
| 4143 | atomic_inc(&event->rb->mmap_count); | 4296 | atomic_inc(&event->rb->mmap_count); |
| 4297 | |||
| 4298 | if (event->pmu->event_mapped) | ||
| 4299 | event->pmu->event_mapped(event); | ||
| 4144 | } | 4300 | } |
| 4145 | 4301 | ||
| 4146 | /* | 4302 | /* |
| @@ -4160,6 +4316,9 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
| 4160 | int mmap_locked = rb->mmap_locked; | 4316 | int mmap_locked = rb->mmap_locked; |
| 4161 | unsigned long size = perf_data_size(rb); | 4317 | unsigned long size = perf_data_size(rb); |
| 4162 | 4318 | ||
| 4319 | if (event->pmu->event_unmapped) | ||
| 4320 | event->pmu->event_unmapped(event); | ||
| 4321 | |||
| 4163 | atomic_dec(&rb->mmap_count); | 4322 | atomic_dec(&rb->mmap_count); |
| 4164 | 4323 | ||
| 4165 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) | 4324 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) |
| @@ -4361,6 +4520,9 @@ unlock: | |||
| 4361 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; | 4520 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; |
| 4362 | vma->vm_ops = &perf_mmap_vmops; | 4521 | vma->vm_ops = &perf_mmap_vmops; |
| 4363 | 4522 | ||
| 4523 | if (event->pmu->event_mapped) | ||
| 4524 | event->pmu->event_mapped(event); | ||
| 4525 | |||
| 4364 | return ret; | 4526 | return ret; |
| 4365 | } | 4527 | } |
| 4366 | 4528 | ||
| @@ -5889,6 +6051,8 @@ end: | |||
| 5889 | rcu_read_unlock(); | 6051 | rcu_read_unlock(); |
| 5890 | } | 6052 | } |
| 5891 | 6053 | ||
| 6054 | DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]); | ||
| 6055 | |||
| 5892 | int perf_swevent_get_recursion_context(void) | 6056 | int perf_swevent_get_recursion_context(void) |
| 5893 | { | 6057 | { |
| 5894 | struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); | 6058 | struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); |
| @@ -5904,21 +6068,30 @@ inline void perf_swevent_put_recursion_context(int rctx) | |||
| 5904 | put_recursion_context(swhash->recursion, rctx); | 6068 | put_recursion_context(swhash->recursion, rctx); |
| 5905 | } | 6069 | } |
| 5906 | 6070 | ||
| 5907 | void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | 6071 | void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) |
| 5908 | { | 6072 | { |
| 5909 | struct perf_sample_data data; | 6073 | struct perf_sample_data data; |
| 5910 | int rctx; | ||
| 5911 | 6074 | ||
| 5912 | preempt_disable_notrace(); | 6075 | if (WARN_ON_ONCE(!regs)) |
| 5913 | rctx = perf_swevent_get_recursion_context(); | ||
| 5914 | if (rctx < 0) | ||
| 5915 | return; | 6076 | return; |
| 5916 | 6077 | ||
| 5917 | perf_sample_data_init(&data, addr, 0); | 6078 | perf_sample_data_init(&data, addr, 0); |
| 5918 | |||
| 5919 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); | 6079 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); |
| 6080 | } | ||
| 6081 | |||
| 6082 | void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | ||
| 6083 | { | ||
| 6084 | int rctx; | ||
| 6085 | |||
| 6086 | preempt_disable_notrace(); | ||
| 6087 | rctx = perf_swevent_get_recursion_context(); | ||
| 6088 | if (unlikely(rctx < 0)) | ||
| 6089 | goto fail; | ||
| 6090 | |||
| 6091 | ___perf_sw_event(event_id, nr, regs, addr); | ||
| 5920 | 6092 | ||
| 5921 | perf_swevent_put_recursion_context(rctx); | 6093 | perf_swevent_put_recursion_context(rctx); |
| 6094 | fail: | ||
| 5922 | preempt_enable_notrace(); | 6095 | preempt_enable_notrace(); |
| 5923 | } | 6096 | } |
| 5924 | 6097 | ||
| @@ -6780,7 +6953,6 @@ skip_type: | |||
| 6780 | 6953 | ||
| 6781 | __perf_cpu_hrtimer_init(cpuctx, cpu); | 6954 | __perf_cpu_hrtimer_init(cpuctx, cpu); |
| 6782 | 6955 | ||
| 6783 | INIT_LIST_HEAD(&cpuctx->rotation_list); | ||
| 6784 | cpuctx->unique_pmu = pmu; | 6956 | cpuctx->unique_pmu = pmu; |
| 6785 | } | 6957 | } |
| 6786 | 6958 | ||
| @@ -6853,6 +7025,20 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
| 6853 | } | 7025 | } |
| 6854 | EXPORT_SYMBOL_GPL(perf_pmu_unregister); | 7026 | EXPORT_SYMBOL_GPL(perf_pmu_unregister); |
| 6855 | 7027 | ||
| 7028 | static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) | ||
| 7029 | { | ||
| 7030 | int ret; | ||
| 7031 | |||
| 7032 | if (!try_module_get(pmu->module)) | ||
| 7033 | return -ENODEV; | ||
| 7034 | event->pmu = pmu; | ||
| 7035 | ret = pmu->event_init(event); | ||
| 7036 | if (ret) | ||
| 7037 | module_put(pmu->module); | ||
| 7038 | |||
| 7039 | return ret; | ||
| 7040 | } | ||
| 7041 | |||
| 6856 | struct pmu *perf_init_event(struct perf_event *event) | 7042 | struct pmu *perf_init_event(struct perf_event *event) |
| 6857 | { | 7043 | { |
| 6858 | struct pmu *pmu = NULL; | 7044 | struct pmu *pmu = NULL; |
| @@ -6865,24 +7051,14 @@ struct pmu *perf_init_event(struct perf_event *event) | |||
| 6865 | pmu = idr_find(&pmu_idr, event->attr.type); | 7051 | pmu = idr_find(&pmu_idr, event->attr.type); |
| 6866 | rcu_read_unlock(); | 7052 | rcu_read_unlock(); |
| 6867 | if (pmu) { | 7053 | if (pmu) { |
| 6868 | if (!try_module_get(pmu->module)) { | 7054 | ret = perf_try_init_event(pmu, event); |
| 6869 | pmu = ERR_PTR(-ENODEV); | ||
| 6870 | goto unlock; | ||
| 6871 | } | ||
| 6872 | event->pmu = pmu; | ||
| 6873 | ret = pmu->event_init(event); | ||
| 6874 | if (ret) | 7055 | if (ret) |
| 6875 | pmu = ERR_PTR(ret); | 7056 | pmu = ERR_PTR(ret); |
| 6876 | goto unlock; | 7057 | goto unlock; |
| 6877 | } | 7058 | } |
| 6878 | 7059 | ||
| 6879 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 7060 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 6880 | if (!try_module_get(pmu->module)) { | 7061 | ret = perf_try_init_event(pmu, event); |
| 6881 | pmu = ERR_PTR(-ENODEV); | ||
| 6882 | goto unlock; | ||
| 6883 | } | ||
| 6884 | event->pmu = pmu; | ||
| 6885 | ret = pmu->event_init(event); | ||
| 6886 | if (!ret) | 7062 | if (!ret) |
| 6887 | goto unlock; | 7063 | goto unlock; |
| 6888 | 7064 | ||
| @@ -7246,6 +7422,15 @@ out: | |||
| 7246 | return ret; | 7422 | return ret; |
| 7247 | } | 7423 | } |
| 7248 | 7424 | ||
| 7425 | static void mutex_lock_double(struct mutex *a, struct mutex *b) | ||
| 7426 | { | ||
| 7427 | if (b < a) | ||
| 7428 | swap(a, b); | ||
| 7429 | |||
| 7430 | mutex_lock(a); | ||
| 7431 | mutex_lock_nested(b, SINGLE_DEPTH_NESTING); | ||
| 7432 | } | ||
| 7433 | |||
| 7249 | /** | 7434 | /** |
| 7250 | * sys_perf_event_open - open a performance event, associate it to a task/cpu | 7435 | * sys_perf_event_open - open a performance event, associate it to a task/cpu |
| 7251 | * | 7436 | * |
| @@ -7261,7 +7446,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 7261 | struct perf_event *group_leader = NULL, *output_event = NULL; | 7446 | struct perf_event *group_leader = NULL, *output_event = NULL; |
| 7262 | struct perf_event *event, *sibling; | 7447 | struct perf_event *event, *sibling; |
| 7263 | struct perf_event_attr attr; | 7448 | struct perf_event_attr attr; |
| 7264 | struct perf_event_context *ctx; | 7449 | struct perf_event_context *ctx, *uninitialized_var(gctx); |
| 7265 | struct file *event_file = NULL; | 7450 | struct file *event_file = NULL; |
| 7266 | struct fd group = {NULL, 0}; | 7451 | struct fd group = {NULL, 0}; |
| 7267 | struct task_struct *task = NULL; | 7452 | struct task_struct *task = NULL; |
| @@ -7459,43 +7644,68 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 7459 | } | 7644 | } |
| 7460 | 7645 | ||
| 7461 | if (move_group) { | 7646 | if (move_group) { |
| 7462 | struct perf_event_context *gctx = group_leader->ctx; | 7647 | gctx = group_leader->ctx; |
| 7463 | |||
| 7464 | mutex_lock(&gctx->mutex); | ||
| 7465 | perf_remove_from_context(group_leader, false); | ||
| 7466 | 7648 | ||
| 7467 | /* | 7649 | /* |
| 7468 | * Removing from the context ends up with disabled | 7650 | * See perf_event_ctx_lock() for comments on the details |
| 7469 | * event. What we want here is event in the initial | 7651 | * of swizzling perf_event::ctx. |
| 7470 | * startup state, ready to be add into new context. | ||
| 7471 | */ | 7652 | */ |
| 7472 | perf_event__state_init(group_leader); | 7653 | mutex_lock_double(&gctx->mutex, &ctx->mutex); |
| 7654 | |||
| 7655 | perf_remove_from_context(group_leader, false); | ||
| 7656 | |||
| 7473 | list_for_each_entry(sibling, &group_leader->sibling_list, | 7657 | list_for_each_entry(sibling, &group_leader->sibling_list, |
| 7474 | group_entry) { | 7658 | group_entry) { |
| 7475 | perf_remove_from_context(sibling, false); | 7659 | perf_remove_from_context(sibling, false); |
| 7476 | perf_event__state_init(sibling); | ||
| 7477 | put_ctx(gctx); | 7660 | put_ctx(gctx); |
| 7478 | } | 7661 | } |
| 7479 | mutex_unlock(&gctx->mutex); | 7662 | } else { |
| 7480 | put_ctx(gctx); | 7663 | mutex_lock(&ctx->mutex); |
| 7481 | } | 7664 | } |
| 7482 | 7665 | ||
| 7483 | WARN_ON_ONCE(ctx->parent_ctx); | 7666 | WARN_ON_ONCE(ctx->parent_ctx); |
| 7484 | mutex_lock(&ctx->mutex); | ||
| 7485 | 7667 | ||
| 7486 | if (move_group) { | 7668 | if (move_group) { |
| 7669 | /* | ||
| 7670 | * Wait for everybody to stop referencing the events through | ||
| 7671 | * the old lists, before installing it on new lists. | ||
| 7672 | */ | ||
| 7487 | synchronize_rcu(); | 7673 | synchronize_rcu(); |
| 7488 | perf_install_in_context(ctx, group_leader, group_leader->cpu); | 7674 | |
| 7489 | get_ctx(ctx); | 7675 | /* |
| 7676 | * Install the group siblings before the group leader. | ||
| 7677 | * | ||
| 7678 | * Because a group leader will try and install the entire group | ||
| 7679 | * (through the sibling list, which is still in-tact), we can | ||
| 7680 | * end up with siblings installed in the wrong context. | ||
| 7681 | * | ||
| 7682 | * By installing siblings first we NO-OP because they're not | ||
| 7683 | * reachable through the group lists. | ||
| 7684 | */ | ||
| 7490 | list_for_each_entry(sibling, &group_leader->sibling_list, | 7685 | list_for_each_entry(sibling, &group_leader->sibling_list, |
| 7491 | group_entry) { | 7686 | group_entry) { |
| 7687 | perf_event__state_init(sibling); | ||
| 7492 | perf_install_in_context(ctx, sibling, sibling->cpu); | 7688 | perf_install_in_context(ctx, sibling, sibling->cpu); |
| 7493 | get_ctx(ctx); | 7689 | get_ctx(ctx); |
| 7494 | } | 7690 | } |
| 7691 | |||
| 7692 | /* | ||
| 7693 | * Removing from the context ends up with disabled | ||
| 7694 | * event. What we want here is event in the initial | ||
| 7695 | * startup state, ready to be add into new context. | ||
| 7696 | */ | ||
| 7697 | perf_event__state_init(group_leader); | ||
| 7698 | perf_install_in_context(ctx, group_leader, group_leader->cpu); | ||
| 7699 | get_ctx(ctx); | ||
| 7495 | } | 7700 | } |
| 7496 | 7701 | ||
| 7497 | perf_install_in_context(ctx, event, event->cpu); | 7702 | perf_install_in_context(ctx, event, event->cpu); |
| 7498 | perf_unpin_context(ctx); | 7703 | perf_unpin_context(ctx); |
| 7704 | |||
| 7705 | if (move_group) { | ||
| 7706 | mutex_unlock(&gctx->mutex); | ||
| 7707 | put_ctx(gctx); | ||
| 7708 | } | ||
| 7499 | mutex_unlock(&ctx->mutex); | 7709 | mutex_unlock(&ctx->mutex); |
| 7500 | 7710 | ||
| 7501 | put_online_cpus(); | 7711 | put_online_cpus(); |
| @@ -7603,7 +7813,11 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
| 7603 | src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; | 7813 | src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; |
| 7604 | dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; | 7814 | dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; |
| 7605 | 7815 | ||
| 7606 | mutex_lock(&src_ctx->mutex); | 7816 | /* |
| 7817 | * See perf_event_ctx_lock() for comments on the details | ||
| 7818 | * of swizzling perf_event::ctx. | ||
| 7819 | */ | ||
| 7820 | mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex); | ||
| 7607 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, | 7821 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, |
| 7608 | event_entry) { | 7822 | event_entry) { |
| 7609 | perf_remove_from_context(event, false); | 7823 | perf_remove_from_context(event, false); |
| @@ -7611,11 +7825,36 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
| 7611 | put_ctx(src_ctx); | 7825 | put_ctx(src_ctx); |
| 7612 | list_add(&event->migrate_entry, &events); | 7826 | list_add(&event->migrate_entry, &events); |
| 7613 | } | 7827 | } |
| 7614 | mutex_unlock(&src_ctx->mutex); | ||
| 7615 | 7828 | ||
| 7829 | /* | ||
| 7830 | * Wait for the events to quiesce before re-instating them. | ||
| 7831 | */ | ||
| 7616 | synchronize_rcu(); | 7832 | synchronize_rcu(); |
| 7617 | 7833 | ||
| 7618 | mutex_lock(&dst_ctx->mutex); | 7834 | /* |
| 7835 | * Re-instate events in 2 passes. | ||
| 7836 | * | ||
| 7837 | * Skip over group leaders and only install siblings on this first | ||
| 7838 | * pass, siblings will not get enabled without a leader, however a | ||
| 7839 | * leader will enable its siblings, even if those are still on the old | ||
| 7840 | * context. | ||
| 7841 | */ | ||
| 7842 | list_for_each_entry_safe(event, tmp, &events, migrate_entry) { | ||
| 7843 | if (event->group_leader == event) | ||
| 7844 | continue; | ||
| 7845 | |||
| 7846 | list_del(&event->migrate_entry); | ||
| 7847 | if (event->state >= PERF_EVENT_STATE_OFF) | ||
| 7848 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
| 7849 | account_event_cpu(event, dst_cpu); | ||
| 7850 | perf_install_in_context(dst_ctx, event, dst_cpu); | ||
| 7851 | get_ctx(dst_ctx); | ||
| 7852 | } | ||
| 7853 | |||
| 7854 | /* | ||
| 7855 | * Once all the siblings are setup properly, install the group leaders | ||
| 7856 | * to make it go. | ||
| 7857 | */ | ||
| 7619 | list_for_each_entry_safe(event, tmp, &events, migrate_entry) { | 7858 | list_for_each_entry_safe(event, tmp, &events, migrate_entry) { |
| 7620 | list_del(&event->migrate_entry); | 7859 | list_del(&event->migrate_entry); |
| 7621 | if (event->state >= PERF_EVENT_STATE_OFF) | 7860 | if (event->state >= PERF_EVENT_STATE_OFF) |
| @@ -7625,6 +7864,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
| 7625 | get_ctx(dst_ctx); | 7864 | get_ctx(dst_ctx); |
| 7626 | } | 7865 | } |
| 7627 | mutex_unlock(&dst_ctx->mutex); | 7866 | mutex_unlock(&dst_ctx->mutex); |
| 7867 | mutex_unlock(&src_ctx->mutex); | ||
| 7628 | } | 7868 | } |
| 7629 | EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); | 7869 | EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); |
| 7630 | 7870 | ||
| @@ -7811,14 +8051,19 @@ static void perf_free_event(struct perf_event *event, | |||
| 7811 | 8051 | ||
| 7812 | put_event(parent); | 8052 | put_event(parent); |
| 7813 | 8053 | ||
| 8054 | raw_spin_lock_irq(&ctx->lock); | ||
| 7814 | perf_group_detach(event); | 8055 | perf_group_detach(event); |
| 7815 | list_del_event(event, ctx); | 8056 | list_del_event(event, ctx); |
| 8057 | raw_spin_unlock_irq(&ctx->lock); | ||
| 7816 | free_event(event); | 8058 | free_event(event); |
| 7817 | } | 8059 | } |
| 7818 | 8060 | ||
| 7819 | /* | 8061 | /* |
| 7820 | * free an unexposed, unused context as created by inheritance by | 8062 | * Free an unexposed, unused context as created by inheritance by |
| 7821 | * perf_event_init_task below, used by fork() in case of fail. | 8063 | * perf_event_init_task below, used by fork() in case of fail. |
| 8064 | * | ||
| 8065 | * Not all locks are strictly required, but take them anyway to be nice and | ||
| 8066 | * help out with the lockdep assertions. | ||
| 7822 | */ | 8067 | */ |
| 7823 | void perf_event_free_task(struct task_struct *task) | 8068 | void perf_event_free_task(struct task_struct *task) |
| 7824 | { | 8069 | { |
| @@ -8137,7 +8382,7 @@ static void __init perf_event_init_all_cpus(void) | |||
| 8137 | for_each_possible_cpu(cpu) { | 8382 | for_each_possible_cpu(cpu) { |
| 8138 | swhash = &per_cpu(swevent_htable, cpu); | 8383 | swhash = &per_cpu(swevent_htable, cpu); |
| 8139 | mutex_init(&swhash->hlist_mutex); | 8384 | mutex_init(&swhash->hlist_mutex); |
| 8140 | INIT_LIST_HEAD(&per_cpu(rotation_list, cpu)); | 8385 | INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu)); |
| 8141 | } | 8386 | } |
| 8142 | } | 8387 | } |
| 8143 | 8388 | ||
| @@ -8158,22 +8403,11 @@ static void perf_event_init_cpu(int cpu) | |||
| 8158 | } | 8403 | } |
| 8159 | 8404 | ||
| 8160 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC | 8405 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC |
| 8161 | static void perf_pmu_rotate_stop(struct pmu *pmu) | ||
| 8162 | { | ||
| 8163 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
| 8164 | |||
| 8165 | WARN_ON(!irqs_disabled()); | ||
| 8166 | |||
| 8167 | list_del_init(&cpuctx->rotation_list); | ||
| 8168 | } | ||
| 8169 | |||
| 8170 | static void __perf_event_exit_context(void *__info) | 8406 | static void __perf_event_exit_context(void *__info) |
| 8171 | { | 8407 | { |
| 8172 | struct remove_event re = { .detach_group = true }; | 8408 | struct remove_event re = { .detach_group = true }; |
| 8173 | struct perf_event_context *ctx = __info; | 8409 | struct perf_event_context *ctx = __info; |
| 8174 | 8410 | ||
| 8175 | perf_pmu_rotate_stop(ctx->pmu); | ||
| 8176 | |||
| 8177 | rcu_read_lock(); | 8411 | rcu_read_lock(); |
| 8178 | list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) | 8412 | list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) |
| 8179 | __perf_remove_from_context(&re); | 8413 | __perf_remove_from_context(&re); |
| @@ -8284,6 +8518,18 @@ void __init perf_event_init(void) | |||
| 8284 | != 1024); | 8518 | != 1024); |
| 8285 | } | 8519 | } |
| 8286 | 8520 | ||
| 8521 | ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, | ||
| 8522 | char *page) | ||
| 8523 | { | ||
| 8524 | struct perf_pmu_events_attr *pmu_attr = | ||
| 8525 | container_of(attr, struct perf_pmu_events_attr, attr); | ||
| 8526 | |||
| 8527 | if (pmu_attr->event_str) | ||
| 8528 | return sprintf(page, "%s\n", pmu_attr->event_str); | ||
| 8529 | |||
| 8530 | return 0; | ||
| 8531 | } | ||
| 8532 | |||
| 8287 | static int __init perf_event_sysfs_init(void) | 8533 | static int __init perf_event_sysfs_init(void) |
| 8288 | { | 8534 | { |
| 8289 | struct pmu *pmu; | 8535 | struct pmu *pmu; |
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 146a5792b1d2..eadb95ce7aac 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
| @@ -13,12 +13,13 @@ | |||
| 13 | #include <linux/vmalloc.h> | 13 | #include <linux/vmalloc.h> |
| 14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
| 15 | #include <linux/circ_buf.h> | 15 | #include <linux/circ_buf.h> |
| 16 | #include <linux/poll.h> | ||
| 16 | 17 | ||
| 17 | #include "internal.h" | 18 | #include "internal.h" |
| 18 | 19 | ||
| 19 | static void perf_output_wakeup(struct perf_output_handle *handle) | 20 | static void perf_output_wakeup(struct perf_output_handle *handle) |
| 20 | { | 21 | { |
| 21 | atomic_set(&handle->rb->poll, POLL_IN); | 22 | atomic_set(&handle->rb->poll, POLLIN); |
| 22 | 23 | ||
| 23 | handle->event->pending_wakeup = 1; | 24 | handle->event->pending_wakeup = 1; |
| 24 | irq_work_queue(&handle->event->pending); | 25 | irq_work_queue(&handle->event->pending); |
diff --git a/kernel/exit.c b/kernel/exit.c index 6806c55475ee..feff10bbb307 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -435,7 +435,8 @@ static void exit_mm(struct task_struct *tsk) | |||
| 435 | task_unlock(tsk); | 435 | task_unlock(tsk); |
| 436 | mm_update_next_owner(mm); | 436 | mm_update_next_owner(mm); |
| 437 | mmput(mm); | 437 | mmput(mm); |
| 438 | clear_thread_flag(TIF_MEMDIE); | 438 | if (test_thread_flag(TIF_MEMDIE)) |
| 439 | unmark_oom_victim(); | ||
| 439 | } | 440 | } |
| 440 | 441 | ||
| 441 | static struct task_struct *find_alive_thread(struct task_struct *p) | 442 | static struct task_struct *find_alive_thread(struct task_struct *p) |
diff --git a/kernel/fork.c b/kernel/fork.c index 4dc2ddade9f1..cf65139615a0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -438,12 +438,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 438 | atomic_inc(&mapping->i_mmap_writable); | 438 | atomic_inc(&mapping->i_mmap_writable); |
| 439 | flush_dcache_mmap_lock(mapping); | 439 | flush_dcache_mmap_lock(mapping); |
| 440 | /* insert tmp into the share list, just after mpnt */ | 440 | /* insert tmp into the share list, just after mpnt */ |
| 441 | if (unlikely(tmp->vm_flags & VM_NONLINEAR)) | 441 | vma_interval_tree_insert_after(tmp, mpnt, |
| 442 | vma_nonlinear_insert(tmp, | 442 | &mapping->i_mmap); |
| 443 | &mapping->i_mmap_nonlinear); | ||
| 444 | else | ||
| 445 | vma_interval_tree_insert_after(tmp, mpnt, | ||
| 446 | &mapping->i_mmap); | ||
| 447 | flush_dcache_mmap_unlock(mapping); | 443 | flush_dcache_mmap_unlock(mapping); |
| 448 | i_mmap_unlock_write(mapping); | 444 | i_mmap_unlock_write(mapping); |
| 449 | } | 445 | } |
| @@ -559,6 +555,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) | |||
| 559 | INIT_LIST_HEAD(&mm->mmlist); | 555 | INIT_LIST_HEAD(&mm->mmlist); |
| 560 | mm->core_state = NULL; | 556 | mm->core_state = NULL; |
| 561 | atomic_long_set(&mm->nr_ptes, 0); | 557 | atomic_long_set(&mm->nr_ptes, 0); |
| 558 | mm_nr_pmds_init(mm); | ||
| 562 | mm->map_count = 0; | 559 | mm->map_count = 0; |
| 563 | mm->locked_vm = 0; | 560 | mm->locked_vm = 0; |
| 564 | mm->pinned_vm = 0; | 561 | mm->pinned_vm = 0; |
| @@ -607,6 +604,14 @@ static void check_mm(struct mm_struct *mm) | |||
| 607 | printk(KERN_ALERT "BUG: Bad rss-counter state " | 604 | printk(KERN_ALERT "BUG: Bad rss-counter state " |
| 608 | "mm:%p idx:%d val:%ld\n", mm, i, x); | 605 | "mm:%p idx:%d val:%ld\n", mm, i, x); |
| 609 | } | 606 | } |
| 607 | |||
| 608 | if (atomic_long_read(&mm->nr_ptes)) | ||
| 609 | pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n", | ||
| 610 | atomic_long_read(&mm->nr_ptes)); | ||
| 611 | if (mm_nr_pmds(mm)) | ||
| 612 | pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n", | ||
| 613 | mm_nr_pmds(mm)); | ||
| 614 | |||
| 610 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS | 615 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS |
| 611 | VM_BUG_ON_MM(mm->pmd_huge_pte, mm); | 616 | VM_BUG_ON_MM(mm->pmd_huge_pte, mm); |
| 612 | #endif | 617 | #endif |
diff --git a/kernel/futex.c b/kernel/futex.c index 63678b573d61..2a5e3830e953 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -2217,7 +2217,7 @@ retry: | |||
| 2217 | if (!abs_time) | 2217 | if (!abs_time) |
| 2218 | goto out; | 2218 | goto out; |
| 2219 | 2219 | ||
| 2220 | restart = ¤t_thread_info()->restart_block; | 2220 | restart = ¤t->restart_block; |
| 2221 | restart->fn = futex_wait_restart; | 2221 | restart->fn = futex_wait_restart; |
| 2222 | restart->futex.uaddr = uaddr; | 2222 | restart->futex.uaddr = uaddr; |
| 2223 | restart->futex.val = val; | 2223 | restart->futex.val = val; |
| @@ -2258,7 +2258,7 @@ static long futex_wait_restart(struct restart_block *restart) | |||
| 2258 | * if there are waiters then it will block, it does PI, etc. (Due to | 2258 | * if there are waiters then it will block, it does PI, etc. (Due to |
| 2259 | * races the kernel might see a 0 value of the futex too.) | 2259 | * races the kernel might see a 0 value of the futex too.) |
| 2260 | */ | 2260 | */ |
| 2261 | static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, | 2261 | static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, |
| 2262 | ktime_t *time, int trylock) | 2262 | ktime_t *time, int trylock) |
| 2263 | { | 2263 | { |
| 2264 | struct hrtimer_sleeper timeout, *to = NULL; | 2264 | struct hrtimer_sleeper timeout, *to = NULL; |
| @@ -2953,11 +2953,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | |||
| 2953 | case FUTEX_WAKE_OP: | 2953 | case FUTEX_WAKE_OP: |
| 2954 | return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); | 2954 | return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); |
| 2955 | case FUTEX_LOCK_PI: | 2955 | case FUTEX_LOCK_PI: |
| 2956 | return futex_lock_pi(uaddr, flags, val, timeout, 0); | 2956 | return futex_lock_pi(uaddr, flags, timeout, 0); |
| 2957 | case FUTEX_UNLOCK_PI: | 2957 | case FUTEX_UNLOCK_PI: |
| 2958 | return futex_unlock_pi(uaddr, flags); | 2958 | return futex_unlock_pi(uaddr, flags); |
| 2959 | case FUTEX_TRYLOCK_PI: | 2959 | case FUTEX_TRYLOCK_PI: |
| 2960 | return futex_lock_pi(uaddr, flags, 0, timeout, 1); | 2960 | return futex_lock_pi(uaddr, flags, NULL, 1); |
| 2961 | case FUTEX_WAIT_REQUEUE_PI: | 2961 | case FUTEX_WAIT_REQUEUE_PI: |
| 2962 | val3 = FUTEX_BITSET_MATCH_ANY; | 2962 | val3 = FUTEX_BITSET_MATCH_ANY; |
| 2963 | return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, | 2963 | return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, |
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index 52aa7e8de927..752d6486b67e 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile | |||
| @@ -1,33 +1,7 @@ | |||
| 1 | ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' | 1 | ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' |
| 2 | 2 | ||
| 3 | # if-lt | 3 | obj-y := base.o fs.o |
| 4 | # Usage VAR := $(call if-lt, $(a), $(b)) | 4 | obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_3_4.o |
| 5 | # Returns 1 if (a < b) | 5 | obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_4_7.o |
| 6 | if-lt = $(shell [ $(1) -lt $(2) ] && echo 1) | 6 | obj-$(CONFIG_GCOV_FORMAT_AUTODETECT) += $(call cc-ifversion, -lt, 0407, \ |
| 7 | 7 | gcc_3_4.o, gcc_4_7.o) | |
| 8 | ifeq ($(CONFIG_GCOV_FORMAT_3_4),y) | ||
| 9 | cc-ver := 0304 | ||
| 10 | else ifeq ($(CONFIG_GCOV_FORMAT_4_7),y) | ||
| 11 | cc-ver := 0407 | ||
| 12 | else | ||
| 13 | # Use cc-version if available, otherwise set 0 | ||
| 14 | # | ||
| 15 | # scripts/Kbuild.include, which contains cc-version function, is not included | ||
| 16 | # during make clean "make -f scripts/Makefile.clean obj=kernel/gcov" | ||
| 17 | # Meaning cc-ver is empty causing if-lt test to fail with | ||
| 18 | # "/bin/sh: line 0: [: -lt: unary operator expected" error mesage. | ||
| 19 | # This has no affect on the clean phase, but the error message could be | ||
| 20 | # confusing/annoying. So this dummy workaround sets cc-ver to zero if cc-version | ||
| 21 | # is not available. We can probably move if-lt to Kbuild.include, so it's also | ||
| 22 | # not defined during clean or to include Kbuild.include in | ||
| 23 | # scripts/Makefile.clean. But the following workaround seems least invasive. | ||
| 24 | cc-ver := $(if $(call cc-version),$(call cc-version),0) | ||
| 25 | endif | ||
| 26 | |||
| 27 | obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o | ||
| 28 | |||
| 29 | ifeq ($(call if-lt, $(cc-ver), 0407),1) | ||
| 30 | obj-$(CONFIG_GCOV_KERNEL) += gcc_3_4.o | ||
| 31 | else | ||
| 32 | obj-$(CONFIG_GCOV_KERNEL) += gcc_4_7.o | ||
| 33 | endif | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 80692373abd6..196a06fbc122 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -243,6 +243,9 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) | |||
| 243 | return -EINVAL; | 243 | return -EINVAL; |
| 244 | desc->affinity_hint = m; | 244 | desc->affinity_hint = m; |
| 245 | irq_put_desc_unlock(desc, flags); | 245 | irq_put_desc_unlock(desc, flags); |
| 246 | /* set the initial affinity to prevent every interrupt being on CPU0 */ | ||
| 247 | if (m) | ||
| 248 | __irq_set_affinity(irq, m, false); | ||
| 246 | return 0; | 249 | return 0; |
| 247 | } | 250 | } |
| 248 | EXPORT_SYMBOL_GPL(irq_set_affinity_hint); | 251 | EXPORT_SYMBOL_GPL(irq_set_affinity_hint); |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 9dc9bfd8a678..df2f4642d1e7 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
| @@ -46,10 +46,9 @@ static int show_irq_affinity(int type, struct seq_file *m, void *v) | |||
| 46 | mask = desc->pending_mask; | 46 | mask = desc->pending_mask; |
| 47 | #endif | 47 | #endif |
| 48 | if (type) | 48 | if (type) |
| 49 | seq_cpumask_list(m, mask); | 49 | seq_printf(m, "%*pbl\n", cpumask_pr_args(mask)); |
| 50 | else | 50 | else |
| 51 | seq_cpumask(m, mask); | 51 | seq_printf(m, "%*pb\n", cpumask_pr_args(mask)); |
| 52 | seq_putc(m, '\n'); | ||
| 53 | return 0; | 52 | return 0; |
| 54 | } | 53 | } |
| 55 | 54 | ||
| @@ -67,8 +66,7 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) | |||
| 67 | cpumask_copy(mask, desc->affinity_hint); | 66 | cpumask_copy(mask, desc->affinity_hint); |
| 68 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 67 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 69 | 68 | ||
| 70 | seq_cpumask(m, mask); | 69 | seq_printf(m, "%*pb\n", cpumask_pr_args(mask)); |
| 71 | seq_putc(m, '\n'); | ||
| 72 | free_cpumask_var(mask); | 70 | free_cpumask_var(mask); |
| 73 | 71 | ||
| 74 | return 0; | 72 | return 0; |
| @@ -186,8 +184,7 @@ static const struct file_operations irq_affinity_list_proc_fops = { | |||
| 186 | 184 | ||
| 187 | static int default_affinity_show(struct seq_file *m, void *v) | 185 | static int default_affinity_show(struct seq_file *m, void *v) |
| 188 | { | 186 | { |
| 189 | seq_cpumask(m, irq_default_affinity); | 187 | seq_printf(m, "%*pb\n", cpumask_pr_args(irq_default_affinity)); |
| 190 | seq_putc(m, '\n'); | ||
| 191 | return 0; | 188 | return 0; |
| 192 | } | 189 | } |
| 193 | 190 | ||
diff --git a/kernel/kexec.c b/kernel/kexec.c index 9a8a01abbaed..38c25b1f2fd5 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
| @@ -444,7 +444,7 @@ arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | |||
| 444 | } | 444 | } |
| 445 | 445 | ||
| 446 | /* | 446 | /* |
| 447 | * Free up memory used by kernel, initrd, and comand line. This is temporary | 447 | * Free up memory used by kernel, initrd, and command line. This is temporary |
| 448 | * memory allocation which is not needed any more after these buffers have | 448 | * memory allocation which is not needed any more after these buffers have |
| 449 | * been loaded into separate segments and have been copied elsewhere. | 449 | * been loaded into separate segments and have been copied elsewhere. |
| 450 | */ | 450 | */ |
| @@ -856,8 +856,6 @@ static int kimage_set_destination(struct kimage *image, | |||
| 856 | 856 | ||
| 857 | destination &= PAGE_MASK; | 857 | destination &= PAGE_MASK; |
| 858 | result = kimage_add_entry(image, destination | IND_DESTINATION); | 858 | result = kimage_add_entry(image, destination | IND_DESTINATION); |
| 859 | if (result == 0) | ||
| 860 | image->destination = destination; | ||
| 861 | 859 | ||
| 862 | return result; | 860 | return result; |
| 863 | } | 861 | } |
| @@ -869,8 +867,6 @@ static int kimage_add_page(struct kimage *image, unsigned long page) | |||
| 869 | 867 | ||
| 870 | page &= PAGE_MASK; | 868 | page &= PAGE_MASK; |
| 871 | result = kimage_add_entry(image, page | IND_SOURCE); | 869 | result = kimage_add_entry(image, page | IND_SOURCE); |
| 872 | if (result == 0) | ||
| 873 | image->destination += PAGE_SIZE; | ||
| 874 | 870 | ||
| 875 | return result; | 871 | return result; |
| 876 | } | 872 | } |
| @@ -1288,19 +1284,22 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, | |||
| 1288 | if (nr_segments > 0) { | 1284 | if (nr_segments > 0) { |
| 1289 | unsigned long i; | 1285 | unsigned long i; |
| 1290 | 1286 | ||
| 1291 | /* Loading another kernel to reboot into */ | 1287 | if (flags & KEXEC_ON_CRASH) { |
| 1292 | if ((flags & KEXEC_ON_CRASH) == 0) | 1288 | /* |
| 1293 | result = kimage_alloc_init(&image, entry, nr_segments, | 1289 | * Loading another kernel to switch to if this one |
| 1294 | segments, flags); | 1290 | * crashes. Free any current crash dump kernel before |
| 1295 | /* Loading another kernel to switch to if this one crashes */ | ||
| 1296 | else if (flags & KEXEC_ON_CRASH) { | ||
| 1297 | /* Free any current crash dump kernel before | ||
| 1298 | * we corrupt it. | 1291 | * we corrupt it. |
| 1299 | */ | 1292 | */ |
| 1293 | |||
| 1300 | kimage_free(xchg(&kexec_crash_image, NULL)); | 1294 | kimage_free(xchg(&kexec_crash_image, NULL)); |
| 1301 | result = kimage_alloc_init(&image, entry, nr_segments, | 1295 | result = kimage_alloc_init(&image, entry, nr_segments, |
| 1302 | segments, flags); | 1296 | segments, flags); |
| 1303 | crash_map_reserved_pages(); | 1297 | crash_map_reserved_pages(); |
| 1298 | } else { | ||
| 1299 | /* Loading another kernel to reboot into. */ | ||
| 1300 | |||
| 1301 | result = kimage_alloc_init(&image, entry, nr_segments, | ||
| 1302 | segments, flags); | ||
| 1304 | } | 1303 | } |
| 1305 | if (result) | 1304 | if (result) |
| 1306 | goto out; | 1305 | goto out; |
| @@ -2512,7 +2511,7 @@ static int kexec_apply_relocations(struct kimage *image) | |||
| 2512 | continue; | 2511 | continue; |
| 2513 | 2512 | ||
| 2514 | /* | 2513 | /* |
| 2515 | * Respective archicture needs to provide support for applying | 2514 | * Respective architecture needs to provide support for applying |
| 2516 | * relocations of type SHT_RELA/SHT_REL. | 2515 | * relocations of type SHT_RELA/SHT_REL. |
| 2517 | */ | 2516 | */ |
| 2518 | if (sechdrs[i].sh_type == SHT_RELA) | 2517 | if (sechdrs[i].sh_type == SHT_RELA) |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ee619929cf90..c90e417bb963 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -717,7 +717,7 @@ static void prepare_optimized_kprobe(struct kprobe *p) | |||
| 717 | struct optimized_kprobe *op; | 717 | struct optimized_kprobe *op; |
| 718 | 718 | ||
| 719 | op = container_of(p, struct optimized_kprobe, kp); | 719 | op = container_of(p, struct optimized_kprobe, kp); |
| 720 | arch_prepare_optimized_kprobe(op); | 720 | arch_prepare_optimized_kprobe(op, p); |
| 721 | } | 721 | } |
| 722 | 722 | ||
| 723 | /* Allocate new optimized_kprobe and try to prepare optimized instructions */ | 723 | /* Allocate new optimized_kprobe and try to prepare optimized instructions */ |
| @@ -731,7 +731,7 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) | |||
| 731 | 731 | ||
| 732 | INIT_LIST_HEAD(&op->list); | 732 | INIT_LIST_HEAD(&op->list); |
| 733 | op->kp.addr = p->addr; | 733 | op->kp.addr = p->addr; |
| 734 | arch_prepare_optimized_kprobe(op); | 734 | arch_prepare_optimized_kprobe(op, p); |
| 735 | 735 | ||
| 736 | return &op->kp; | 736 | return &op->kp; |
| 737 | } | 737 | } |
| @@ -869,7 +869,8 @@ static void __disarm_kprobe(struct kprobe *p, bool reopt) | |||
| 869 | { | 869 | { |
| 870 | struct kprobe *_p; | 870 | struct kprobe *_p; |
| 871 | 871 | ||
| 872 | unoptimize_kprobe(p, false); /* Try to unoptimize */ | 872 | /* Try to unoptimize */ |
| 873 | unoptimize_kprobe(p, kprobes_all_disarmed); | ||
| 873 | 874 | ||
| 874 | if (!kprobe_queued(p)) { | 875 | if (!kprobe_queued(p)) { |
| 875 | arch_disarm_kprobe(p); | 876 | arch_disarm_kprobe(p); |
| @@ -1571,7 +1572,13 @@ static struct kprobe *__disable_kprobe(struct kprobe *p) | |||
| 1571 | 1572 | ||
| 1572 | /* Try to disarm and disable this/parent probe */ | 1573 | /* Try to disarm and disable this/parent probe */ |
| 1573 | if (p == orig_p || aggr_kprobe_disabled(orig_p)) { | 1574 | if (p == orig_p || aggr_kprobe_disabled(orig_p)) { |
| 1574 | disarm_kprobe(orig_p, true); | 1575 | /* |
| 1576 | * If kprobes_all_disarmed is set, orig_p | ||
| 1577 | * should have already been disarmed, so | ||
| 1578 | * skip unneed disarming process. | ||
| 1579 | */ | ||
| 1580 | if (!kprobes_all_disarmed) | ||
| 1581 | disarm_kprobe(orig_p, true); | ||
| 1575 | orig_p->flags |= KPROBE_FLAG_DISABLED; | 1582 | orig_p->flags |= KPROBE_FLAG_DISABLED; |
| 1576 | } | 1583 | } |
| 1577 | } | 1584 | } |
| @@ -2320,6 +2327,12 @@ static void arm_all_kprobes(void) | |||
| 2320 | if (!kprobes_all_disarmed) | 2327 | if (!kprobes_all_disarmed) |
| 2321 | goto already_enabled; | 2328 | goto already_enabled; |
| 2322 | 2329 | ||
| 2330 | /* | ||
| 2331 | * optimize_kprobe() called by arm_kprobe() checks | ||
| 2332 | * kprobes_all_disarmed, so set kprobes_all_disarmed before | ||
| 2333 | * arm_kprobe. | ||
| 2334 | */ | ||
| 2335 | kprobes_all_disarmed = false; | ||
| 2323 | /* Arming kprobes doesn't optimize kprobe itself */ | 2336 | /* Arming kprobes doesn't optimize kprobe itself */ |
| 2324 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 2337 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
| 2325 | head = &kprobe_table[i]; | 2338 | head = &kprobe_table[i]; |
| @@ -2328,7 +2341,6 @@ static void arm_all_kprobes(void) | |||
| 2328 | arm_kprobe(p); | 2341 | arm_kprobe(p); |
| 2329 | } | 2342 | } |
| 2330 | 2343 | ||
| 2331 | kprobes_all_disarmed = false; | ||
| 2332 | printk(KERN_INFO "Kprobes globally enabled\n"); | 2344 | printk(KERN_INFO "Kprobes globally enabled\n"); |
| 2333 | 2345 | ||
| 2334 | already_enabled: | 2346 | already_enabled: |
diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig new file mode 100644 index 000000000000..045022557936 --- /dev/null +++ b/kernel/livepatch/Kconfig | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | config HAVE_LIVEPATCH | ||
| 2 | bool | ||
| 3 | help | ||
| 4 | Arch supports kernel live patching | ||
| 5 | |||
| 6 | config LIVEPATCH | ||
| 7 | bool "Kernel Live Patching" | ||
| 8 | depends on DYNAMIC_FTRACE_WITH_REGS | ||
| 9 | depends on MODULES | ||
| 10 | depends on SYSFS | ||
| 11 | depends on KALLSYMS_ALL | ||
| 12 | depends on HAVE_LIVEPATCH | ||
| 13 | help | ||
| 14 | Say Y here if you want to support kernel live patching. | ||
| 15 | This option has no runtime impact until a kernel "patch" | ||
| 16 | module uses the interface provided by this option to register | ||
| 17 | a patch, causing calls to patched functions to be redirected | ||
| 18 | to new function code contained in the patch module. | ||
diff --git a/kernel/livepatch/Makefile b/kernel/livepatch/Makefile new file mode 100644 index 000000000000..e8780c0901d9 --- /dev/null +++ b/kernel/livepatch/Makefile | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | obj-$(CONFIG_LIVEPATCH) += livepatch.o | ||
| 2 | |||
| 3 | livepatch-objs := core.o | ||
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c new file mode 100644 index 000000000000..ff7f47d026ac --- /dev/null +++ b/kernel/livepatch/core.c | |||
| @@ -0,0 +1,1015 @@ | |||
| 1 | /* | ||
| 2 | * core.c - Kernel Live Patching Core | ||
| 3 | * | ||
| 4 | * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com> | ||
| 5 | * Copyright (C) 2014 SUSE | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; either version 2 | ||
| 10 | * of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | * GNU General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
| 19 | */ | ||
| 20 | |||
| 21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 22 | |||
| 23 | #include <linux/module.h> | ||
| 24 | #include <linux/kernel.h> | ||
| 25 | #include <linux/mutex.h> | ||
| 26 | #include <linux/slab.h> | ||
| 27 | #include <linux/ftrace.h> | ||
| 28 | #include <linux/list.h> | ||
| 29 | #include <linux/kallsyms.h> | ||
| 30 | #include <linux/livepatch.h> | ||
| 31 | |||
| 32 | /** | ||
| 33 | * struct klp_ops - structure for tracking registered ftrace ops structs | ||
| 34 | * | ||
| 35 | * A single ftrace_ops is shared between all enabled replacement functions | ||
| 36 | * (klp_func structs) which have the same old_addr. This allows the switch | ||
| 37 | * between function versions to happen instantaneously by updating the klp_ops | ||
| 38 | * struct's func_stack list. The winner is the klp_func at the top of the | ||
| 39 | * func_stack (front of the list). | ||
| 40 | * | ||
| 41 | * @node: node for the global klp_ops list | ||
| 42 | * @func_stack: list head for the stack of klp_func's (active func is on top) | ||
| 43 | * @fops: registered ftrace ops struct | ||
| 44 | */ | ||
| 45 | struct klp_ops { | ||
| 46 | struct list_head node; | ||
| 47 | struct list_head func_stack; | ||
| 48 | struct ftrace_ops fops; | ||
| 49 | }; | ||
| 50 | |||
| 51 | /* | ||
| 52 | * The klp_mutex protects the global lists and state transitions of any | ||
| 53 | * structure reachable from them. References to any structure must be obtained | ||
| 54 | * under mutex protection (except in klp_ftrace_handler(), which uses RCU to | ||
| 55 | * ensure it gets consistent data). | ||
| 56 | */ | ||
| 57 | static DEFINE_MUTEX(klp_mutex); | ||
| 58 | |||
| 59 | static LIST_HEAD(klp_patches); | ||
| 60 | static LIST_HEAD(klp_ops); | ||
| 61 | |||
| 62 | static struct kobject *klp_root_kobj; | ||
| 63 | |||
| 64 | static struct klp_ops *klp_find_ops(unsigned long old_addr) | ||
| 65 | { | ||
| 66 | struct klp_ops *ops; | ||
| 67 | struct klp_func *func; | ||
| 68 | |||
| 69 | list_for_each_entry(ops, &klp_ops, node) { | ||
| 70 | func = list_first_entry(&ops->func_stack, struct klp_func, | ||
| 71 | stack_node); | ||
| 72 | if (func->old_addr == old_addr) | ||
| 73 | return ops; | ||
| 74 | } | ||
| 75 | |||
| 76 | return NULL; | ||
| 77 | } | ||
| 78 | |||
| 79 | static bool klp_is_module(struct klp_object *obj) | ||
| 80 | { | ||
| 81 | return obj->name; | ||
| 82 | } | ||
| 83 | |||
| 84 | static bool klp_is_object_loaded(struct klp_object *obj) | ||
| 85 | { | ||
| 86 | return !obj->name || obj->mod; | ||
| 87 | } | ||
| 88 | |||
| 89 | /* sets obj->mod if object is not vmlinux and module is found */ | ||
| 90 | static void klp_find_object_module(struct klp_object *obj) | ||
| 91 | { | ||
| 92 | if (!klp_is_module(obj)) | ||
| 93 | return; | ||
| 94 | |||
| 95 | mutex_lock(&module_mutex); | ||
| 96 | /* | ||
| 97 | * We don't need to take a reference on the module here because we have | ||
| 98 | * the klp_mutex, which is also taken by the module notifier. This | ||
| 99 | * prevents any module from unloading until we release the klp_mutex. | ||
| 100 | */ | ||
| 101 | obj->mod = find_module(obj->name); | ||
| 102 | mutex_unlock(&module_mutex); | ||
| 103 | } | ||
| 104 | |||
| 105 | /* klp_mutex must be held by caller */ | ||
| 106 | static bool klp_is_patch_registered(struct klp_patch *patch) | ||
| 107 | { | ||
| 108 | struct klp_patch *mypatch; | ||
| 109 | |||
| 110 | list_for_each_entry(mypatch, &klp_patches, list) | ||
| 111 | if (mypatch == patch) | ||
| 112 | return true; | ||
| 113 | |||
| 114 | return false; | ||
| 115 | } | ||
| 116 | |||
| 117 | static bool klp_initialized(void) | ||
| 118 | { | ||
| 119 | return klp_root_kobj; | ||
| 120 | } | ||
| 121 | |||
| 122 | struct klp_find_arg { | ||
| 123 | const char *objname; | ||
| 124 | const char *name; | ||
| 125 | unsigned long addr; | ||
| 126 | /* | ||
| 127 | * If count == 0, the symbol was not found. If count == 1, a unique | ||
| 128 | * match was found and addr is set. If count > 1, there is | ||
| 129 | * unresolvable ambiguity among "count" number of symbols with the same | ||
| 130 | * name in the same object. | ||
| 131 | */ | ||
| 132 | unsigned long count; | ||
| 133 | }; | ||
| 134 | |||
| 135 | static int klp_find_callback(void *data, const char *name, | ||
| 136 | struct module *mod, unsigned long addr) | ||
| 137 | { | ||
| 138 | struct klp_find_arg *args = data; | ||
| 139 | |||
| 140 | if ((mod && !args->objname) || (!mod && args->objname)) | ||
| 141 | return 0; | ||
| 142 | |||
| 143 | if (strcmp(args->name, name)) | ||
| 144 | return 0; | ||
| 145 | |||
| 146 | if (args->objname && strcmp(args->objname, mod->name)) | ||
| 147 | return 0; | ||
| 148 | |||
| 149 | /* | ||
| 150 | * args->addr might be overwritten if another match is found | ||
| 151 | * but klp_find_object_symbol() handles this and only returns the | ||
| 152 | * addr if count == 1. | ||
| 153 | */ | ||
| 154 | args->addr = addr; | ||
| 155 | args->count++; | ||
| 156 | |||
| 157 | return 0; | ||
| 158 | } | ||
| 159 | |||
| 160 | static int klp_find_object_symbol(const char *objname, const char *name, | ||
| 161 | unsigned long *addr) | ||
| 162 | { | ||
| 163 | struct klp_find_arg args = { | ||
| 164 | .objname = objname, | ||
| 165 | .name = name, | ||
| 166 | .addr = 0, | ||
| 167 | .count = 0 | ||
| 168 | }; | ||
| 169 | |||
| 170 | kallsyms_on_each_symbol(klp_find_callback, &args); | ||
| 171 | |||
| 172 | if (args.count == 0) | ||
| 173 | pr_err("symbol '%s' not found in symbol table\n", name); | ||
| 174 | else if (args.count > 1) | ||
| 175 | pr_err("unresolvable ambiguity (%lu matches) on symbol '%s' in object '%s'\n", | ||
| 176 | args.count, name, objname); | ||
| 177 | else { | ||
| 178 | *addr = args.addr; | ||
| 179 | return 0; | ||
| 180 | } | ||
| 181 | |||
| 182 | *addr = 0; | ||
| 183 | return -EINVAL; | ||
| 184 | } | ||
| 185 | |||
| 186 | struct klp_verify_args { | ||
| 187 | const char *name; | ||
| 188 | const unsigned long addr; | ||
| 189 | }; | ||
| 190 | |||
| 191 | static int klp_verify_callback(void *data, const char *name, | ||
| 192 | struct module *mod, unsigned long addr) | ||
| 193 | { | ||
| 194 | struct klp_verify_args *args = data; | ||
| 195 | |||
| 196 | if (!mod && | ||
| 197 | !strcmp(args->name, name) && | ||
| 198 | args->addr == addr) | ||
| 199 | return 1; | ||
| 200 | |||
| 201 | return 0; | ||
| 202 | } | ||
| 203 | |||
| 204 | static int klp_verify_vmlinux_symbol(const char *name, unsigned long addr) | ||
| 205 | { | ||
| 206 | struct klp_verify_args args = { | ||
| 207 | .name = name, | ||
| 208 | .addr = addr, | ||
| 209 | }; | ||
| 210 | |||
| 211 | if (kallsyms_on_each_symbol(klp_verify_callback, &args)) | ||
| 212 | return 0; | ||
| 213 | |||
| 214 | pr_err("symbol '%s' not found at specified address 0x%016lx, kernel mismatch?\n", | ||
| 215 | name, addr); | ||
| 216 | return -EINVAL; | ||
| 217 | } | ||
| 218 | |||
| 219 | static int klp_find_verify_func_addr(struct klp_object *obj, | ||
| 220 | struct klp_func *func) | ||
| 221 | { | ||
| 222 | int ret; | ||
| 223 | |||
| 224 | #if defined(CONFIG_RANDOMIZE_BASE) | ||
| 225 | /* KASLR is enabled, disregard old_addr from user */ | ||
| 226 | func->old_addr = 0; | ||
| 227 | #endif | ||
| 228 | |||
| 229 | if (!func->old_addr || klp_is_module(obj)) | ||
| 230 | ret = klp_find_object_symbol(obj->name, func->old_name, | ||
| 231 | &func->old_addr); | ||
| 232 | else | ||
| 233 | ret = klp_verify_vmlinux_symbol(func->old_name, | ||
| 234 | func->old_addr); | ||
| 235 | |||
| 236 | return ret; | ||
| 237 | } | ||
| 238 | |||
| 239 | /* | ||
| 240 | * external symbols are located outside the parent object (where the parent | ||
| 241 | * object is either vmlinux or the kmod being patched). | ||
| 242 | */ | ||
| 243 | static int klp_find_external_symbol(struct module *pmod, const char *name, | ||
| 244 | unsigned long *addr) | ||
| 245 | { | ||
| 246 | const struct kernel_symbol *sym; | ||
| 247 | |||
| 248 | /* first, check if it's an exported symbol */ | ||
| 249 | preempt_disable(); | ||
| 250 | sym = find_symbol(name, NULL, NULL, true, true); | ||
| 251 | preempt_enable(); | ||
| 252 | if (sym) { | ||
| 253 | *addr = sym->value; | ||
| 254 | return 0; | ||
| 255 | } | ||
| 256 | |||
| 257 | /* otherwise check if it's in another .o within the patch module */ | ||
| 258 | return klp_find_object_symbol(pmod->name, name, addr); | ||
| 259 | } | ||
| 260 | |||
| 261 | static int klp_write_object_relocations(struct module *pmod, | ||
| 262 | struct klp_object *obj) | ||
| 263 | { | ||
| 264 | int ret; | ||
| 265 | struct klp_reloc *reloc; | ||
| 266 | |||
| 267 | if (WARN_ON(!klp_is_object_loaded(obj))) | ||
| 268 | return -EINVAL; | ||
| 269 | |||
| 270 | if (WARN_ON(!obj->relocs)) | ||
| 271 | return -EINVAL; | ||
| 272 | |||
| 273 | for (reloc = obj->relocs; reloc->name; reloc++) { | ||
| 274 | if (!klp_is_module(obj)) { | ||
| 275 | ret = klp_verify_vmlinux_symbol(reloc->name, | ||
| 276 | reloc->val); | ||
| 277 | if (ret) | ||
| 278 | return ret; | ||
| 279 | } else { | ||
| 280 | /* module, reloc->val needs to be discovered */ | ||
| 281 | if (reloc->external) | ||
| 282 | ret = klp_find_external_symbol(pmod, | ||
| 283 | reloc->name, | ||
| 284 | &reloc->val); | ||
| 285 | else | ||
| 286 | ret = klp_find_object_symbol(obj->mod->name, | ||
| 287 | reloc->name, | ||
| 288 | &reloc->val); | ||
| 289 | if (ret) | ||
| 290 | return ret; | ||
| 291 | } | ||
| 292 | ret = klp_write_module_reloc(pmod, reloc->type, reloc->loc, | ||
| 293 | reloc->val + reloc->addend); | ||
| 294 | if (ret) { | ||
| 295 | pr_err("relocation failed for symbol '%s' at 0x%016lx (%d)\n", | ||
| 296 | reloc->name, reloc->val, ret); | ||
| 297 | return ret; | ||
| 298 | } | ||
| 299 | } | ||
| 300 | |||
| 301 | return 0; | ||
| 302 | } | ||
| 303 | |||
| 304 | static void notrace klp_ftrace_handler(unsigned long ip, | ||
| 305 | unsigned long parent_ip, | ||
| 306 | struct ftrace_ops *fops, | ||
| 307 | struct pt_regs *regs) | ||
| 308 | { | ||
| 309 | struct klp_ops *ops; | ||
| 310 | struct klp_func *func; | ||
| 311 | |||
| 312 | ops = container_of(fops, struct klp_ops, fops); | ||
| 313 | |||
| 314 | rcu_read_lock(); | ||
| 315 | func = list_first_or_null_rcu(&ops->func_stack, struct klp_func, | ||
| 316 | stack_node); | ||
| 317 | rcu_read_unlock(); | ||
| 318 | |||
| 319 | if (WARN_ON_ONCE(!func)) | ||
| 320 | return; | ||
| 321 | |||
| 322 | klp_arch_set_pc(regs, (unsigned long)func->new_func); | ||
| 323 | } | ||
| 324 | |||
| 325 | static int klp_disable_func(struct klp_func *func) | ||
| 326 | { | ||
| 327 | struct klp_ops *ops; | ||
| 328 | int ret; | ||
| 329 | |||
| 330 | if (WARN_ON(func->state != KLP_ENABLED)) | ||
| 331 | return -EINVAL; | ||
| 332 | |||
| 333 | if (WARN_ON(!func->old_addr)) | ||
| 334 | return -EINVAL; | ||
| 335 | |||
| 336 | ops = klp_find_ops(func->old_addr); | ||
| 337 | if (WARN_ON(!ops)) | ||
| 338 | return -EINVAL; | ||
| 339 | |||
| 340 | if (list_is_singular(&ops->func_stack)) { | ||
| 341 | ret = unregister_ftrace_function(&ops->fops); | ||
| 342 | if (ret) { | ||
| 343 | pr_err("failed to unregister ftrace handler for function '%s' (%d)\n", | ||
| 344 | func->old_name, ret); | ||
| 345 | return ret; | ||
| 346 | } | ||
| 347 | |||
| 348 | ret = ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0); | ||
| 349 | if (ret) | ||
| 350 | pr_warn("function unregister succeeded but failed to clear the filter\n"); | ||
| 351 | |||
| 352 | list_del_rcu(&func->stack_node); | ||
| 353 | list_del(&ops->node); | ||
| 354 | kfree(ops); | ||
| 355 | } else { | ||
| 356 | list_del_rcu(&func->stack_node); | ||
| 357 | } | ||
| 358 | |||
| 359 | func->state = KLP_DISABLED; | ||
| 360 | |||
| 361 | return 0; | ||
| 362 | } | ||
| 363 | |||
| 364 | static int klp_enable_func(struct klp_func *func) | ||
| 365 | { | ||
| 366 | struct klp_ops *ops; | ||
| 367 | int ret; | ||
| 368 | |||
| 369 | if (WARN_ON(!func->old_addr)) | ||
| 370 | return -EINVAL; | ||
| 371 | |||
| 372 | if (WARN_ON(func->state != KLP_DISABLED)) | ||
| 373 | return -EINVAL; | ||
| 374 | |||
| 375 | ops = klp_find_ops(func->old_addr); | ||
| 376 | if (!ops) { | ||
| 377 | ops = kzalloc(sizeof(*ops), GFP_KERNEL); | ||
| 378 | if (!ops) | ||
| 379 | return -ENOMEM; | ||
| 380 | |||
| 381 | ops->fops.func = klp_ftrace_handler; | ||
| 382 | ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS | | ||
| 383 | FTRACE_OPS_FL_DYNAMIC | | ||
| 384 | FTRACE_OPS_FL_IPMODIFY; | ||
| 385 | |||
| 386 | list_add(&ops->node, &klp_ops); | ||
| 387 | |||
| 388 | INIT_LIST_HEAD(&ops->func_stack); | ||
| 389 | list_add_rcu(&func->stack_node, &ops->func_stack); | ||
| 390 | |||
| 391 | ret = ftrace_set_filter_ip(&ops->fops, func->old_addr, 0, 0); | ||
| 392 | if (ret) { | ||
| 393 | pr_err("failed to set ftrace filter for function '%s' (%d)\n", | ||
| 394 | func->old_name, ret); | ||
| 395 | goto err; | ||
| 396 | } | ||
| 397 | |||
| 398 | ret = register_ftrace_function(&ops->fops); | ||
| 399 | if (ret) { | ||
| 400 | pr_err("failed to register ftrace handler for function '%s' (%d)\n", | ||
| 401 | func->old_name, ret); | ||
| 402 | ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0); | ||
| 403 | goto err; | ||
| 404 | } | ||
| 405 | |||
| 406 | |||
| 407 | } else { | ||
| 408 | list_add_rcu(&func->stack_node, &ops->func_stack); | ||
| 409 | } | ||
| 410 | |||
| 411 | func->state = KLP_ENABLED; | ||
| 412 | |||
| 413 | return 0; | ||
| 414 | |||
| 415 | err: | ||
| 416 | list_del_rcu(&func->stack_node); | ||
| 417 | list_del(&ops->node); | ||
| 418 | kfree(ops); | ||
| 419 | return ret; | ||
| 420 | } | ||
| 421 | |||
| 422 | static int klp_disable_object(struct klp_object *obj) | ||
| 423 | { | ||
| 424 | struct klp_func *func; | ||
| 425 | int ret; | ||
| 426 | |||
| 427 | for (func = obj->funcs; func->old_name; func++) { | ||
| 428 | if (func->state != KLP_ENABLED) | ||
| 429 | continue; | ||
| 430 | |||
| 431 | ret = klp_disable_func(func); | ||
| 432 | if (ret) | ||
| 433 | return ret; | ||
| 434 | } | ||
| 435 | |||
| 436 | obj->state = KLP_DISABLED; | ||
| 437 | |||
| 438 | return 0; | ||
| 439 | } | ||
| 440 | |||
| 441 | static int klp_enable_object(struct klp_object *obj) | ||
| 442 | { | ||
| 443 | struct klp_func *func; | ||
| 444 | int ret; | ||
| 445 | |||
| 446 | if (WARN_ON(obj->state != KLP_DISABLED)) | ||
| 447 | return -EINVAL; | ||
| 448 | |||
| 449 | if (WARN_ON(!klp_is_object_loaded(obj))) | ||
| 450 | return -EINVAL; | ||
| 451 | |||
| 452 | for (func = obj->funcs; func->old_name; func++) { | ||
| 453 | ret = klp_enable_func(func); | ||
| 454 | if (ret) | ||
| 455 | goto unregister; | ||
| 456 | } | ||
| 457 | obj->state = KLP_ENABLED; | ||
| 458 | |||
| 459 | return 0; | ||
| 460 | |||
| 461 | unregister: | ||
| 462 | WARN_ON(klp_disable_object(obj)); | ||
| 463 | return ret; | ||
| 464 | } | ||
| 465 | |||
| 466 | static int __klp_disable_patch(struct klp_patch *patch) | ||
| 467 | { | ||
| 468 | struct klp_object *obj; | ||
| 469 | int ret; | ||
| 470 | |||
| 471 | /* enforce stacking: only the last enabled patch can be disabled */ | ||
| 472 | if (!list_is_last(&patch->list, &klp_patches) && | ||
| 473 | list_next_entry(patch, list)->state == KLP_ENABLED) | ||
| 474 | return -EBUSY; | ||
| 475 | |||
| 476 | pr_notice("disabling patch '%s'\n", patch->mod->name); | ||
| 477 | |||
| 478 | for (obj = patch->objs; obj->funcs; obj++) { | ||
| 479 | if (obj->state != KLP_ENABLED) | ||
| 480 | continue; | ||
| 481 | |||
| 482 | ret = klp_disable_object(obj); | ||
| 483 | if (ret) | ||
| 484 | return ret; | ||
| 485 | } | ||
| 486 | |||
| 487 | patch->state = KLP_DISABLED; | ||
| 488 | |||
| 489 | return 0; | ||
| 490 | } | ||
| 491 | |||
| 492 | /** | ||
| 493 | * klp_disable_patch() - disables a registered patch | ||
| 494 | * @patch: The registered, enabled patch to be disabled | ||
| 495 | * | ||
| 496 | * Unregisters the patched functions from ftrace. | ||
| 497 | * | ||
| 498 | * Return: 0 on success, otherwise error | ||
| 499 | */ | ||
| 500 | int klp_disable_patch(struct klp_patch *patch) | ||
| 501 | { | ||
| 502 | int ret; | ||
| 503 | |||
| 504 | mutex_lock(&klp_mutex); | ||
| 505 | |||
| 506 | if (!klp_is_patch_registered(patch)) { | ||
| 507 | ret = -EINVAL; | ||
| 508 | goto err; | ||
| 509 | } | ||
| 510 | |||
| 511 | if (patch->state == KLP_DISABLED) { | ||
| 512 | ret = -EINVAL; | ||
| 513 | goto err; | ||
| 514 | } | ||
| 515 | |||
| 516 | ret = __klp_disable_patch(patch); | ||
| 517 | |||
| 518 | err: | ||
| 519 | mutex_unlock(&klp_mutex); | ||
| 520 | return ret; | ||
| 521 | } | ||
| 522 | EXPORT_SYMBOL_GPL(klp_disable_patch); | ||
| 523 | |||
| 524 | static int __klp_enable_patch(struct klp_patch *patch) | ||
| 525 | { | ||
| 526 | struct klp_object *obj; | ||
| 527 | int ret; | ||
| 528 | |||
| 529 | if (WARN_ON(patch->state != KLP_DISABLED)) | ||
| 530 | return -EINVAL; | ||
| 531 | |||
| 532 | /* enforce stacking: only the first disabled patch can be enabled */ | ||
| 533 | if (patch->list.prev != &klp_patches && | ||
| 534 | list_prev_entry(patch, list)->state == KLP_DISABLED) | ||
| 535 | return -EBUSY; | ||
| 536 | |||
| 537 | pr_notice_once("tainting kernel with TAINT_LIVEPATCH\n"); | ||
| 538 | add_taint(TAINT_LIVEPATCH, LOCKDEP_STILL_OK); | ||
| 539 | |||
| 540 | pr_notice("enabling patch '%s'\n", patch->mod->name); | ||
| 541 | |||
| 542 | for (obj = patch->objs; obj->funcs; obj++) { | ||
| 543 | klp_find_object_module(obj); | ||
| 544 | |||
| 545 | if (!klp_is_object_loaded(obj)) | ||
| 546 | continue; | ||
| 547 | |||
| 548 | ret = klp_enable_object(obj); | ||
| 549 | if (ret) | ||
| 550 | goto unregister; | ||
| 551 | } | ||
| 552 | |||
| 553 | patch->state = KLP_ENABLED; | ||
| 554 | |||
| 555 | return 0; | ||
| 556 | |||
| 557 | unregister: | ||
| 558 | WARN_ON(__klp_disable_patch(patch)); | ||
| 559 | return ret; | ||
| 560 | } | ||
| 561 | |||
| 562 | /** | ||
| 563 | * klp_enable_patch() - enables a registered patch | ||
| 564 | * @patch: The registered, disabled patch to be enabled | ||
| 565 | * | ||
| 566 | * Performs the needed symbol lookups and code relocations, | ||
| 567 | * then registers the patched functions with ftrace. | ||
| 568 | * | ||
| 569 | * Return: 0 on success, otherwise error | ||
| 570 | */ | ||
| 571 | int klp_enable_patch(struct klp_patch *patch) | ||
| 572 | { | ||
| 573 | int ret; | ||
| 574 | |||
| 575 | mutex_lock(&klp_mutex); | ||
| 576 | |||
| 577 | if (!klp_is_patch_registered(patch)) { | ||
| 578 | ret = -EINVAL; | ||
| 579 | goto err; | ||
| 580 | } | ||
| 581 | |||
| 582 | ret = __klp_enable_patch(patch); | ||
| 583 | |||
| 584 | err: | ||
| 585 | mutex_unlock(&klp_mutex); | ||
| 586 | return ret; | ||
| 587 | } | ||
| 588 | EXPORT_SYMBOL_GPL(klp_enable_patch); | ||
| 589 | |||
| 590 | /* | ||
| 591 | * Sysfs Interface | ||
| 592 | * | ||
| 593 | * /sys/kernel/livepatch | ||
| 594 | * /sys/kernel/livepatch/<patch> | ||
| 595 | * /sys/kernel/livepatch/<patch>/enabled | ||
| 596 | * /sys/kernel/livepatch/<patch>/<object> | ||
| 597 | * /sys/kernel/livepatch/<patch>/<object>/<func> | ||
| 598 | */ | ||
| 599 | |||
| 600 | static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, | ||
| 601 | const char *buf, size_t count) | ||
| 602 | { | ||
| 603 | struct klp_patch *patch; | ||
| 604 | int ret; | ||
| 605 | unsigned long val; | ||
| 606 | |||
| 607 | ret = kstrtoul(buf, 10, &val); | ||
| 608 | if (ret) | ||
| 609 | return -EINVAL; | ||
| 610 | |||
| 611 | if (val != KLP_DISABLED && val != KLP_ENABLED) | ||
| 612 | return -EINVAL; | ||
| 613 | |||
| 614 | patch = container_of(kobj, struct klp_patch, kobj); | ||
| 615 | |||
| 616 | mutex_lock(&klp_mutex); | ||
| 617 | |||
| 618 | if (val == patch->state) { | ||
| 619 | /* already in requested state */ | ||
| 620 | ret = -EINVAL; | ||
| 621 | goto err; | ||
| 622 | } | ||
| 623 | |||
| 624 | if (val == KLP_ENABLED) { | ||
| 625 | ret = __klp_enable_patch(patch); | ||
| 626 | if (ret) | ||
| 627 | goto err; | ||
| 628 | } else { | ||
| 629 | ret = __klp_disable_patch(patch); | ||
| 630 | if (ret) | ||
| 631 | goto err; | ||
| 632 | } | ||
| 633 | |||
| 634 | mutex_unlock(&klp_mutex); | ||
| 635 | |||
| 636 | return count; | ||
| 637 | |||
| 638 | err: | ||
| 639 | mutex_unlock(&klp_mutex); | ||
| 640 | return ret; | ||
| 641 | } | ||
| 642 | |||
| 643 | static ssize_t enabled_show(struct kobject *kobj, | ||
| 644 | struct kobj_attribute *attr, char *buf) | ||
| 645 | { | ||
| 646 | struct klp_patch *patch; | ||
| 647 | |||
| 648 | patch = container_of(kobj, struct klp_patch, kobj); | ||
| 649 | return snprintf(buf, PAGE_SIZE-1, "%d\n", patch->state); | ||
| 650 | } | ||
| 651 | |||
| 652 | static struct kobj_attribute enabled_kobj_attr = __ATTR_RW(enabled); | ||
| 653 | static struct attribute *klp_patch_attrs[] = { | ||
| 654 | &enabled_kobj_attr.attr, | ||
| 655 | NULL | ||
| 656 | }; | ||
| 657 | |||
| 658 | static void klp_kobj_release_patch(struct kobject *kobj) | ||
| 659 | { | ||
| 660 | /* | ||
| 661 | * Once we have a consistency model we'll need to module_put() the | ||
| 662 | * patch module here. See klp_register_patch() for more details. | ||
| 663 | */ | ||
| 664 | } | ||
| 665 | |||
| 666 | static struct kobj_type klp_ktype_patch = { | ||
| 667 | .release = klp_kobj_release_patch, | ||
| 668 | .sysfs_ops = &kobj_sysfs_ops, | ||
| 669 | .default_attrs = klp_patch_attrs, | ||
| 670 | }; | ||
| 671 | |||
| 672 | static void klp_kobj_release_func(struct kobject *kobj) | ||
| 673 | { | ||
| 674 | } | ||
| 675 | |||
| 676 | static struct kobj_type klp_ktype_func = { | ||
| 677 | .release = klp_kobj_release_func, | ||
| 678 | .sysfs_ops = &kobj_sysfs_ops, | ||
| 679 | }; | ||
| 680 | |||
| 681 | /* | ||
| 682 | * Free all functions' kobjects in the array up to some limit. When limit is | ||
| 683 | * NULL, all kobjects are freed. | ||
| 684 | */ | ||
| 685 | static void klp_free_funcs_limited(struct klp_object *obj, | ||
| 686 | struct klp_func *limit) | ||
| 687 | { | ||
| 688 | struct klp_func *func; | ||
| 689 | |||
| 690 | for (func = obj->funcs; func->old_name && func != limit; func++) | ||
| 691 | kobject_put(&func->kobj); | ||
| 692 | } | ||
| 693 | |||
| 694 | /* Clean up when a patched object is unloaded */ | ||
| 695 | static void klp_free_object_loaded(struct klp_object *obj) | ||
| 696 | { | ||
| 697 | struct klp_func *func; | ||
| 698 | |||
| 699 | obj->mod = NULL; | ||
| 700 | |||
| 701 | for (func = obj->funcs; func->old_name; func++) | ||
| 702 | func->old_addr = 0; | ||
| 703 | } | ||
| 704 | |||
| 705 | /* | ||
| 706 | * Free all objects' kobjects in the array up to some limit. When limit is | ||
| 707 | * NULL, all kobjects are freed. | ||
| 708 | */ | ||
| 709 | static void klp_free_objects_limited(struct klp_patch *patch, | ||
| 710 | struct klp_object *limit) | ||
| 711 | { | ||
| 712 | struct klp_object *obj; | ||
| 713 | |||
| 714 | for (obj = patch->objs; obj->funcs && obj != limit; obj++) { | ||
| 715 | klp_free_funcs_limited(obj, NULL); | ||
| 716 | kobject_put(obj->kobj); | ||
| 717 | } | ||
| 718 | } | ||
| 719 | |||
| 720 | static void klp_free_patch(struct klp_patch *patch) | ||
| 721 | { | ||
| 722 | klp_free_objects_limited(patch, NULL); | ||
| 723 | if (!list_empty(&patch->list)) | ||
| 724 | list_del(&patch->list); | ||
| 725 | kobject_put(&patch->kobj); | ||
| 726 | } | ||
| 727 | |||
| 728 | static int klp_init_func(struct klp_object *obj, struct klp_func *func) | ||
| 729 | { | ||
| 730 | INIT_LIST_HEAD(&func->stack_node); | ||
| 731 | func->state = KLP_DISABLED; | ||
| 732 | |||
| 733 | return kobject_init_and_add(&func->kobj, &klp_ktype_func, | ||
| 734 | obj->kobj, func->old_name); | ||
| 735 | } | ||
| 736 | |||
| 737 | /* parts of the initialization that is done only when the object is loaded */ | ||
| 738 | static int klp_init_object_loaded(struct klp_patch *patch, | ||
| 739 | struct klp_object *obj) | ||
| 740 | { | ||
| 741 | struct klp_func *func; | ||
| 742 | int ret; | ||
| 743 | |||
| 744 | if (obj->relocs) { | ||
| 745 | ret = klp_write_object_relocations(patch->mod, obj); | ||
| 746 | if (ret) | ||
| 747 | return ret; | ||
| 748 | } | ||
| 749 | |||
| 750 | for (func = obj->funcs; func->old_name; func++) { | ||
| 751 | ret = klp_find_verify_func_addr(obj, func); | ||
| 752 | if (ret) | ||
| 753 | return ret; | ||
| 754 | } | ||
| 755 | |||
| 756 | return 0; | ||
| 757 | } | ||
| 758 | |||
| 759 | static int klp_init_object(struct klp_patch *patch, struct klp_object *obj) | ||
| 760 | { | ||
| 761 | struct klp_func *func; | ||
| 762 | int ret; | ||
| 763 | const char *name; | ||
| 764 | |||
| 765 | if (!obj->funcs) | ||
| 766 | return -EINVAL; | ||
| 767 | |||
| 768 | obj->state = KLP_DISABLED; | ||
| 769 | |||
| 770 | klp_find_object_module(obj); | ||
| 771 | |||
| 772 | name = klp_is_module(obj) ? obj->name : "vmlinux"; | ||
| 773 | obj->kobj = kobject_create_and_add(name, &patch->kobj); | ||
| 774 | if (!obj->kobj) | ||
| 775 | return -ENOMEM; | ||
| 776 | |||
| 777 | for (func = obj->funcs; func->old_name; func++) { | ||
| 778 | ret = klp_init_func(obj, func); | ||
| 779 | if (ret) | ||
| 780 | goto free; | ||
| 781 | } | ||
| 782 | |||
| 783 | if (klp_is_object_loaded(obj)) { | ||
| 784 | ret = klp_init_object_loaded(patch, obj); | ||
| 785 | if (ret) | ||
| 786 | goto free; | ||
| 787 | } | ||
| 788 | |||
| 789 | return 0; | ||
| 790 | |||
| 791 | free: | ||
| 792 | klp_free_funcs_limited(obj, func); | ||
| 793 | kobject_put(obj->kobj); | ||
| 794 | return ret; | ||
| 795 | } | ||
| 796 | |||
| 797 | static int klp_init_patch(struct klp_patch *patch) | ||
| 798 | { | ||
| 799 | struct klp_object *obj; | ||
| 800 | int ret; | ||
| 801 | |||
| 802 | if (!patch->objs) | ||
| 803 | return -EINVAL; | ||
| 804 | |||
| 805 | mutex_lock(&klp_mutex); | ||
| 806 | |||
| 807 | patch->state = KLP_DISABLED; | ||
| 808 | |||
| 809 | ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch, | ||
| 810 | klp_root_kobj, patch->mod->name); | ||
| 811 | if (ret) | ||
| 812 | goto unlock; | ||
| 813 | |||
| 814 | for (obj = patch->objs; obj->funcs; obj++) { | ||
| 815 | ret = klp_init_object(patch, obj); | ||
| 816 | if (ret) | ||
| 817 | goto free; | ||
| 818 | } | ||
| 819 | |||
| 820 | list_add_tail(&patch->list, &klp_patches); | ||
| 821 | |||
| 822 | mutex_unlock(&klp_mutex); | ||
| 823 | |||
| 824 | return 0; | ||
| 825 | |||
| 826 | free: | ||
| 827 | klp_free_objects_limited(patch, obj); | ||
| 828 | kobject_put(&patch->kobj); | ||
| 829 | unlock: | ||
| 830 | mutex_unlock(&klp_mutex); | ||
| 831 | return ret; | ||
| 832 | } | ||
| 833 | |||
| 834 | /** | ||
| 835 | * klp_unregister_patch() - unregisters a patch | ||
| 836 | * @patch: Disabled patch to be unregistered | ||
| 837 | * | ||
| 838 | * Frees the data structures and removes the sysfs interface. | ||
| 839 | * | ||
| 840 | * Return: 0 on success, otherwise error | ||
| 841 | */ | ||
| 842 | int klp_unregister_patch(struct klp_patch *patch) | ||
| 843 | { | ||
| 844 | int ret = 0; | ||
| 845 | |||
| 846 | mutex_lock(&klp_mutex); | ||
| 847 | |||
| 848 | if (!klp_is_patch_registered(patch)) { | ||
| 849 | ret = -EINVAL; | ||
| 850 | goto out; | ||
| 851 | } | ||
| 852 | |||
| 853 | if (patch->state == KLP_ENABLED) { | ||
| 854 | ret = -EBUSY; | ||
| 855 | goto out; | ||
| 856 | } | ||
| 857 | |||
| 858 | klp_free_patch(patch); | ||
| 859 | |||
| 860 | out: | ||
| 861 | mutex_unlock(&klp_mutex); | ||
| 862 | return ret; | ||
| 863 | } | ||
| 864 | EXPORT_SYMBOL_GPL(klp_unregister_patch); | ||
| 865 | |||
| 866 | /** | ||
| 867 | * klp_register_patch() - registers a patch | ||
| 868 | * @patch: Patch to be registered | ||
| 869 | * | ||
| 870 | * Initializes the data structure associated with the patch and | ||
| 871 | * creates the sysfs interface. | ||
| 872 | * | ||
| 873 | * Return: 0 on success, otherwise error | ||
| 874 | */ | ||
| 875 | int klp_register_patch(struct klp_patch *patch) | ||
| 876 | { | ||
| 877 | int ret; | ||
| 878 | |||
| 879 | if (!klp_initialized()) | ||
| 880 | return -ENODEV; | ||
| 881 | |||
| 882 | if (!patch || !patch->mod) | ||
| 883 | return -EINVAL; | ||
| 884 | |||
| 885 | /* | ||
| 886 | * A reference is taken on the patch module to prevent it from being | ||
| 887 | * unloaded. Right now, we don't allow patch modules to unload since | ||
| 888 | * there is currently no method to determine if a thread is still | ||
| 889 | * running in the patched code contained in the patch module once | ||
| 890 | * the ftrace registration is successful. | ||
| 891 | */ | ||
| 892 | if (!try_module_get(patch->mod)) | ||
| 893 | return -ENODEV; | ||
| 894 | |||
| 895 | ret = klp_init_patch(patch); | ||
| 896 | if (ret) | ||
| 897 | module_put(patch->mod); | ||
| 898 | |||
| 899 | return ret; | ||
| 900 | } | ||
| 901 | EXPORT_SYMBOL_GPL(klp_register_patch); | ||
| 902 | |||
| 903 | static void klp_module_notify_coming(struct klp_patch *patch, | ||
| 904 | struct klp_object *obj) | ||
| 905 | { | ||
| 906 | struct module *pmod = patch->mod; | ||
| 907 | struct module *mod = obj->mod; | ||
| 908 | int ret; | ||
| 909 | |||
| 910 | ret = klp_init_object_loaded(patch, obj); | ||
| 911 | if (ret) | ||
| 912 | goto err; | ||
| 913 | |||
| 914 | if (patch->state == KLP_DISABLED) | ||
| 915 | return; | ||
| 916 | |||
| 917 | pr_notice("applying patch '%s' to loading module '%s'\n", | ||
| 918 | pmod->name, mod->name); | ||
| 919 | |||
| 920 | ret = klp_enable_object(obj); | ||
| 921 | if (!ret) | ||
| 922 | return; | ||
| 923 | |||
| 924 | err: | ||
| 925 | pr_warn("failed to apply patch '%s' to module '%s' (%d)\n", | ||
| 926 | pmod->name, mod->name, ret); | ||
| 927 | } | ||
| 928 | |||
| 929 | static void klp_module_notify_going(struct klp_patch *patch, | ||
| 930 | struct klp_object *obj) | ||
| 931 | { | ||
| 932 | struct module *pmod = patch->mod; | ||
| 933 | struct module *mod = obj->mod; | ||
| 934 | int ret; | ||
| 935 | |||
| 936 | if (patch->state == KLP_DISABLED) | ||
| 937 | goto disabled; | ||
| 938 | |||
| 939 | pr_notice("reverting patch '%s' on unloading module '%s'\n", | ||
| 940 | pmod->name, mod->name); | ||
| 941 | |||
| 942 | ret = klp_disable_object(obj); | ||
| 943 | if (ret) | ||
| 944 | pr_warn("failed to revert patch '%s' on module '%s' (%d)\n", | ||
| 945 | pmod->name, mod->name, ret); | ||
| 946 | |||
| 947 | disabled: | ||
| 948 | klp_free_object_loaded(obj); | ||
| 949 | } | ||
| 950 | |||
| 951 | static int klp_module_notify(struct notifier_block *nb, unsigned long action, | ||
| 952 | void *data) | ||
| 953 | { | ||
| 954 | struct module *mod = data; | ||
| 955 | struct klp_patch *patch; | ||
| 956 | struct klp_object *obj; | ||
| 957 | |||
| 958 | if (action != MODULE_STATE_COMING && action != MODULE_STATE_GOING) | ||
| 959 | return 0; | ||
| 960 | |||
| 961 | mutex_lock(&klp_mutex); | ||
| 962 | |||
| 963 | list_for_each_entry(patch, &klp_patches, list) { | ||
| 964 | for (obj = patch->objs; obj->funcs; obj++) { | ||
| 965 | if (!klp_is_module(obj) || strcmp(obj->name, mod->name)) | ||
| 966 | continue; | ||
| 967 | |||
| 968 | if (action == MODULE_STATE_COMING) { | ||
| 969 | obj->mod = mod; | ||
| 970 | klp_module_notify_coming(patch, obj); | ||
| 971 | } else /* MODULE_STATE_GOING */ | ||
| 972 | klp_module_notify_going(patch, obj); | ||
| 973 | |||
| 974 | break; | ||
| 975 | } | ||
| 976 | } | ||
| 977 | |||
| 978 | mutex_unlock(&klp_mutex); | ||
| 979 | |||
| 980 | return 0; | ||
| 981 | } | ||
| 982 | |||
| 983 | static struct notifier_block klp_module_nb = { | ||
| 984 | .notifier_call = klp_module_notify, | ||
| 985 | .priority = INT_MIN+1, /* called late but before ftrace notifier */ | ||
| 986 | }; | ||
| 987 | |||
| 988 | static int klp_init(void) | ||
| 989 | { | ||
| 990 | int ret; | ||
| 991 | |||
| 992 | ret = klp_check_compiler_support(); | ||
| 993 | if (ret) { | ||
| 994 | pr_info("Your compiler is too old; turning off.\n"); | ||
| 995 | return -EINVAL; | ||
| 996 | } | ||
| 997 | |||
| 998 | ret = register_module_notifier(&klp_module_nb); | ||
| 999 | if (ret) | ||
| 1000 | return ret; | ||
| 1001 | |||
| 1002 | klp_root_kobj = kobject_create_and_add("livepatch", kernel_kobj); | ||
| 1003 | if (!klp_root_kobj) { | ||
| 1004 | ret = -ENOMEM; | ||
| 1005 | goto unregister; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | return 0; | ||
| 1009 | |||
| 1010 | unregister: | ||
| 1011 | unregister_module_notifier(&klp_module_nb); | ||
| 1012 | return ret; | ||
| 1013 | } | ||
| 1014 | |||
| 1015 | module_init(klp_init); | ||
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 8541bfdfd232..de7a416cca2a 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile | |||
| @@ -1,11 +1,11 @@ | |||
| 1 | 1 | ||
| 2 | obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o | 2 | obj-y += mutex.o semaphore.o rwsem.o |
| 3 | 3 | ||
| 4 | ifdef CONFIG_FUNCTION_TRACER | 4 | ifdef CONFIG_FUNCTION_TRACER |
| 5 | CFLAGS_REMOVE_lockdep.o = -pg | 5 | CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) |
| 6 | CFLAGS_REMOVE_lockdep_proc.o = -pg | 6 | CFLAGS_REMOVE_lockdep_proc.o = $(CC_FLAGS_FTRACE) |
| 7 | CFLAGS_REMOVE_mutex-debug.o = -pg | 7 | CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE) |
| 8 | CFLAGS_REMOVE_rtmutex-debug.o = -pg | 8 | CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) |
| 9 | endif | 9 | endif |
| 10 | 10 | ||
| 11 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | 11 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o |
| @@ -14,6 +14,7 @@ ifeq ($(CONFIG_PROC_FS),y) | |||
| 14 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o | 14 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o |
| 15 | endif | 15 | endif |
| 16 | obj-$(CONFIG_SMP) += spinlock.o | 16 | obj-$(CONFIG_SMP) += spinlock.o |
| 17 | obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o | ||
| 17 | obj-$(CONFIG_SMP) += lglock.o | 18 | obj-$(CONFIG_SMP) += lglock.o |
| 18 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o | 19 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o |
| 19 | obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | 20 | obj-$(CONFIG_RT_MUTEXES) += rtmutex.o |
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index 4d60986fcbee..d1fe2ba5bac9 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h | |||
| @@ -108,20 +108,4 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | |||
| 108 | arch_mcs_spin_unlock_contended(&next->locked); | 108 | arch_mcs_spin_unlock_contended(&next->locked); |
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | /* | ||
| 112 | * Cancellable version of the MCS lock above. | ||
| 113 | * | ||
| 114 | * Intended for adaptive spinning of sleeping locks: | ||
| 115 | * mutex_lock()/rwsem_down_{read,write}() etc. | ||
| 116 | */ | ||
| 117 | |||
| 118 | struct optimistic_spin_node { | ||
| 119 | struct optimistic_spin_node *next, *prev; | ||
| 120 | int locked; /* 1 if lock acquired */ | ||
| 121 | int cpu; /* encoded CPU # value */ | ||
| 122 | }; | ||
| 123 | |||
| 124 | extern bool osq_lock(struct optimistic_spin_queue *lock); | ||
| 125 | extern void osq_unlock(struct optimistic_spin_queue *lock); | ||
| 126 | |||
| 127 | #endif /* __LINUX_MCS_SPINLOCK_H */ | 111 | #endif /* __LINUX_MCS_SPINLOCK_H */ |
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 454195194d4a..94674e5919cb 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
| @@ -81,7 +81,7 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); | |||
| 81 | * The mutex must later on be released by the same task that | 81 | * The mutex must later on be released by the same task that |
| 82 | * acquired it. Recursive locking is not allowed. The task | 82 | * acquired it. Recursive locking is not allowed. The task |
| 83 | * may not exit without first unlocking the mutex. Also, kernel | 83 | * may not exit without first unlocking the mutex. Also, kernel |
| 84 | * memory where the mutex resides mutex must not be freed with | 84 | * memory where the mutex resides must not be freed with |
| 85 | * the mutex still locked. The mutex must first be initialized | 85 | * the mutex still locked. The mutex must first be initialized |
| 86 | * (or statically defined) before it can be locked. memset()-ing | 86 | * (or statically defined) before it can be locked. memset()-ing |
| 87 | * the mutex to 0 is not allowed. | 87 | * the mutex to 0 is not allowed. |
| @@ -147,7 +147,7 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, | |||
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | /* | 149 | /* |
| 150 | * after acquiring lock with fastpath or when we lost out in contested | 150 | * After acquiring lock with fastpath or when we lost out in contested |
| 151 | * slowpath, set ctx and wake up any waiters so they can recheck. | 151 | * slowpath, set ctx and wake up any waiters so they can recheck. |
| 152 | * | 152 | * |
| 153 | * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, | 153 | * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, |
| @@ -191,19 +191,32 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, | |||
| 191 | spin_unlock_mutex(&lock->base.wait_lock, flags); | 191 | spin_unlock_mutex(&lock->base.wait_lock, flags); |
| 192 | } | 192 | } |
| 193 | 193 | ||
| 194 | |||
| 195 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | ||
| 196 | /* | 194 | /* |
| 197 | * In order to avoid a stampede of mutex spinners from acquiring the mutex | 195 | * After acquiring lock in the slowpath set ctx and wake up any |
| 198 | * more or less simultaneously, the spinners need to acquire a MCS lock | 196 | * waiters so they can recheck. |
| 199 | * first before spinning on the owner field. | ||
| 200 | * | 197 | * |
| 198 | * Callers must hold the mutex wait_lock. | ||
| 201 | */ | 199 | */ |
| 200 | static __always_inline void | ||
| 201 | ww_mutex_set_context_slowpath(struct ww_mutex *lock, | ||
| 202 | struct ww_acquire_ctx *ctx) | ||
| 203 | { | ||
| 204 | struct mutex_waiter *cur; | ||
| 202 | 205 | ||
| 203 | /* | 206 | ww_mutex_lock_acquired(lock, ctx); |
| 204 | * Mutex spinning code migrated from kernel/sched/core.c | 207 | lock->ctx = ctx; |
| 205 | */ | 208 | |
| 209 | /* | ||
| 210 | * Give any possible sleeping processes the chance to wake up, | ||
| 211 | * so they can recheck if they have to back off. | ||
| 212 | */ | ||
| 213 | list_for_each_entry(cur, &lock->base.wait_list, list) { | ||
| 214 | debug_mutex_wake_waiter(&lock->base, cur); | ||
| 215 | wake_up_process(cur->task); | ||
| 216 | } | ||
| 217 | } | ||
| 206 | 218 | ||
| 219 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | ||
| 207 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | 220 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) |
| 208 | { | 221 | { |
| 209 | if (lock->owner != owner) | 222 | if (lock->owner != owner) |
| @@ -307,6 +320,11 @@ static bool mutex_optimistic_spin(struct mutex *lock, | |||
| 307 | if (!mutex_can_spin_on_owner(lock)) | 320 | if (!mutex_can_spin_on_owner(lock)) |
| 308 | goto done; | 321 | goto done; |
| 309 | 322 | ||
| 323 | /* | ||
| 324 | * In order to avoid a stampede of mutex spinners trying to | ||
| 325 | * acquire the mutex all at once, the spinners need to take a | ||
| 326 | * MCS (queued) lock first before spinning on the owner field. | ||
| 327 | */ | ||
| 310 | if (!osq_lock(&lock->osq)) | 328 | if (!osq_lock(&lock->osq)) |
| 311 | goto done; | 329 | goto done; |
| 312 | 330 | ||
| @@ -469,7 +487,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock) | |||
| 469 | EXPORT_SYMBOL(ww_mutex_unlock); | 487 | EXPORT_SYMBOL(ww_mutex_unlock); |
| 470 | 488 | ||
| 471 | static inline int __sched | 489 | static inline int __sched |
| 472 | __mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) | 490 | __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) |
| 473 | { | 491 | { |
| 474 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); | 492 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); |
| 475 | struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); | 493 | struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); |
| @@ -557,7 +575,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 557 | } | 575 | } |
| 558 | 576 | ||
| 559 | if (use_ww_ctx && ww_ctx->acquired > 0) { | 577 | if (use_ww_ctx && ww_ctx->acquired > 0) { |
| 560 | ret = __mutex_lock_check_stamp(lock, ww_ctx); | 578 | ret = __ww_mutex_lock_check_stamp(lock, ww_ctx); |
| 561 | if (ret) | 579 | if (ret) |
| 562 | goto err; | 580 | goto err; |
| 563 | } | 581 | } |
| @@ -569,6 +587,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 569 | schedule_preempt_disabled(); | 587 | schedule_preempt_disabled(); |
| 570 | spin_lock_mutex(&lock->wait_lock, flags); | 588 | spin_lock_mutex(&lock->wait_lock, flags); |
| 571 | } | 589 | } |
| 590 | __set_task_state(task, TASK_RUNNING); | ||
| 591 | |||
| 572 | mutex_remove_waiter(lock, &waiter, current_thread_info()); | 592 | mutex_remove_waiter(lock, &waiter, current_thread_info()); |
| 573 | /* set it to 0 if there are no waiters left: */ | 593 | /* set it to 0 if there are no waiters left: */ |
| 574 | if (likely(list_empty(&lock->wait_list))) | 594 | if (likely(list_empty(&lock->wait_list))) |
| @@ -582,23 +602,7 @@ skip_wait: | |||
| 582 | 602 | ||
| 583 | if (use_ww_ctx) { | 603 | if (use_ww_ctx) { |
| 584 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); | 604 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); |
| 585 | struct mutex_waiter *cur; | 605 | ww_mutex_set_context_slowpath(ww, ww_ctx); |
| 586 | |||
| 587 | /* | ||
| 588 | * This branch gets optimized out for the common case, | ||
| 589 | * and is only important for ww_mutex_lock. | ||
| 590 | */ | ||
| 591 | ww_mutex_lock_acquired(ww, ww_ctx); | ||
| 592 | ww->ctx = ww_ctx; | ||
| 593 | |||
| 594 | /* | ||
| 595 | * Give any possible sleeping processes the chance to wake up, | ||
| 596 | * so they can recheck if they have to back off. | ||
| 597 | */ | ||
| 598 | list_for_each_entry(cur, &lock->wait_list, list) { | ||
| 599 | debug_mutex_wake_waiter(lock, cur); | ||
| 600 | wake_up_process(cur->task); | ||
| 601 | } | ||
| 602 | } | 606 | } |
| 603 | 607 | ||
| 604 | spin_unlock_mutex(&lock->wait_lock, flags); | 608 | spin_unlock_mutex(&lock->wait_lock, flags); |
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/osq_lock.c index 9887a905a762..c112d00341b0 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/osq_lock.c | |||
| @@ -1,8 +1,6 @@ | |||
| 1 | #include <linux/percpu.h> | 1 | #include <linux/percpu.h> |
| 2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
| 3 | #include "mcs_spinlock.h" | 3 | #include <linux/osq_lock.h> |
| 4 | |||
| 5 | #ifdef CONFIG_SMP | ||
| 6 | 4 | ||
| 7 | /* | 5 | /* |
| 8 | * An MCS like lock especially tailored for optimistic spinning for sleeping | 6 | * An MCS like lock especially tailored for optimistic spinning for sleeping |
| @@ -111,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) | |||
| 111 | * cmpxchg in an attempt to undo our queueing. | 109 | * cmpxchg in an attempt to undo our queueing. |
| 112 | */ | 110 | */ |
| 113 | 111 | ||
| 114 | while (!smp_load_acquire(&node->locked)) { | 112 | while (!ACCESS_ONCE(node->locked)) { |
| 115 | /* | 113 | /* |
| 116 | * If we need to reschedule bail... so we can block. | 114 | * If we need to reschedule bail... so we can block. |
| 117 | */ | 115 | */ |
| @@ -203,6 +201,3 @@ void osq_unlock(struct optimistic_spin_queue *lock) | |||
| 203 | if (next) | 201 | if (next) |
| 204 | ACCESS_ONCE(next->locked) = 1; | 202 | ACCESS_ONCE(next->locked) = 1; |
| 205 | } | 203 | } |
| 206 | |||
| 207 | #endif | ||
| 208 | |||
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 7c98873a3077..e16e5542bf13 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
| @@ -1130,6 +1130,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
| 1130 | set_current_state(state); | 1130 | set_current_state(state); |
| 1131 | } | 1131 | } |
| 1132 | 1132 | ||
| 1133 | __set_current_state(TASK_RUNNING); | ||
| 1133 | return ret; | 1134 | return ret; |
| 1134 | } | 1135 | } |
| 1135 | 1136 | ||
| @@ -1188,12 +1189,12 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
| 1188 | ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); | 1189 | ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); |
| 1189 | 1190 | ||
| 1190 | if (likely(!ret)) | 1191 | if (likely(!ret)) |
| 1192 | /* sleep on the mutex */ | ||
| 1191 | ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); | 1193 | ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); |
| 1192 | 1194 | ||
| 1193 | set_current_state(TASK_RUNNING); | ||
| 1194 | |||
| 1195 | if (unlikely(ret)) { | 1195 | if (unlikely(ret)) { |
| 1196 | remove_waiter(lock, &waiter); | 1196 | if (rt_mutex_has_waiters(lock)) |
| 1197 | remove_waiter(lock, &waiter); | ||
| 1197 | rt_mutex_handle_deadlock(ret, chwalk, &waiter); | 1198 | rt_mutex_handle_deadlock(ret, chwalk, &waiter); |
| 1198 | } | 1199 | } |
| 1199 | 1200 | ||
| @@ -1626,10 +1627,9 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | |||
| 1626 | 1627 | ||
| 1627 | set_current_state(TASK_INTERRUPTIBLE); | 1628 | set_current_state(TASK_INTERRUPTIBLE); |
| 1628 | 1629 | ||
| 1630 | /* sleep on the mutex */ | ||
| 1629 | ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); | 1631 | ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); |
| 1630 | 1632 | ||
| 1631 | set_current_state(TASK_RUNNING); | ||
| 1632 | |||
| 1633 | if (unlikely(ret)) | 1633 | if (unlikely(ret)) |
| 1634 | remove_waiter(lock, waiter); | 1634 | remove_waiter(lock, waiter); |
| 1635 | 1635 | ||
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 2c93571162cb..2555ae15ec14 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c | |||
| @@ -154,7 +154,7 @@ void __sched __down_read(struct rw_semaphore *sem) | |||
| 154 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 154 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
| 155 | } | 155 | } |
| 156 | 156 | ||
| 157 | tsk->state = TASK_RUNNING; | 157 | __set_task_state(tsk, TASK_RUNNING); |
| 158 | out: | 158 | out: |
| 159 | ; | 159 | ; |
| 160 | } | 160 | } |
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 7628c3fc37ca..2f7cc4076f50 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
| @@ -242,8 +242,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | |||
| 242 | schedule(); | 242 | schedule(); |
| 243 | } | 243 | } |
| 244 | 244 | ||
| 245 | tsk->state = TASK_RUNNING; | 245 | __set_task_state(tsk, TASK_RUNNING); |
| 246 | |||
| 247 | return sem; | 246 | return sem; |
| 248 | } | 247 | } |
| 249 | EXPORT_SYMBOL(rwsem_down_read_failed); | 248 | EXPORT_SYMBOL(rwsem_down_read_failed); |
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 4b082b5cac9e..db3ccb1dd614 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c | |||
| @@ -363,6 +363,14 @@ void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) | |||
| 363 | } | 363 | } |
| 364 | EXPORT_SYMBOL(_raw_spin_lock_nested); | 364 | EXPORT_SYMBOL(_raw_spin_lock_nested); |
| 365 | 365 | ||
| 366 | void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass) | ||
| 367 | { | ||
| 368 | __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); | ||
| 369 | spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | ||
| 370 | LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); | ||
| 371 | } | ||
| 372 | EXPORT_SYMBOL(_raw_spin_lock_bh_nested); | ||
| 373 | |||
| 366 | unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, | 374 | unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, |
| 367 | int subclass) | 375 | int subclass) |
| 368 | { | 376 | { |
diff --git a/kernel/module.c b/kernel/module.c index d856e96a3cce..b34813f725e9 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -56,6 +56,7 @@ | |||
| 56 | #include <linux/async.h> | 56 | #include <linux/async.h> |
| 57 | #include <linux/percpu.h> | 57 | #include <linux/percpu.h> |
| 58 | #include <linux/kmemleak.h> | 58 | #include <linux/kmemleak.h> |
| 59 | #include <linux/kasan.h> | ||
| 59 | #include <linux/jump_label.h> | 60 | #include <linux/jump_label.h> |
| 60 | #include <linux/pfn.h> | 61 | #include <linux/pfn.h> |
| 61 | #include <linux/bsearch.h> | 62 | #include <linux/bsearch.h> |
| @@ -1225,6 +1226,12 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, | |||
| 1225 | const unsigned long *crc; | 1226 | const unsigned long *crc; |
| 1226 | int err; | 1227 | int err; |
| 1227 | 1228 | ||
| 1229 | /* | ||
| 1230 | * The module_mutex should not be a heavily contended lock; | ||
| 1231 | * if we get the occasional sleep here, we'll go an extra iteration | ||
| 1232 | * in the wait_event_interruptible(), which is harmless. | ||
| 1233 | */ | ||
| 1234 | sched_annotate_sleep(); | ||
| 1228 | mutex_lock(&module_mutex); | 1235 | mutex_lock(&module_mutex); |
| 1229 | sym = find_symbol(name, &owner, &crc, | 1236 | sym = find_symbol(name, &owner, &crc, |
| 1230 | !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); | 1237 | !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); |
| @@ -1807,6 +1814,7 @@ static void unset_module_init_ro_nx(struct module *mod) { } | |||
| 1807 | void __weak module_memfree(void *module_region) | 1814 | void __weak module_memfree(void *module_region) |
| 1808 | { | 1815 | { |
| 1809 | vfree(module_region); | 1816 | vfree(module_region); |
| 1817 | kasan_module_free(module_region); | ||
| 1810 | } | 1818 | } |
| 1811 | 1819 | ||
| 1812 | void __weak module_arch_cleanup(struct module *mod) | 1820 | void __weak module_arch_cleanup(struct module *mod) |
| @@ -2978,6 +2986,12 @@ static bool finished_loading(const char *name) | |||
| 2978 | struct module *mod; | 2986 | struct module *mod; |
| 2979 | bool ret; | 2987 | bool ret; |
| 2980 | 2988 | ||
| 2989 | /* | ||
| 2990 | * The module_mutex should not be a heavily contended lock; | ||
| 2991 | * if we get the occasional sleep here, we'll go an extra iteration | ||
| 2992 | * in the wait_event_interruptible(), which is harmless. | ||
| 2993 | */ | ||
| 2994 | sched_annotate_sleep(); | ||
| 2981 | mutex_lock(&module_mutex); | 2995 | mutex_lock(&module_mutex); |
| 2982 | mod = find_module_all(name, strlen(name), true); | 2996 | mod = find_module_all(name, strlen(name), true); |
| 2983 | ret = !mod || mod->state == MODULE_STATE_LIVE | 2997 | ret = !mod || mod->state == MODULE_STATE_LIVE |
| @@ -3011,8 +3025,13 @@ static void do_free_init(struct rcu_head *head) | |||
| 3011 | kfree(m); | 3025 | kfree(m); |
| 3012 | } | 3026 | } |
| 3013 | 3027 | ||
| 3014 | /* This is where the real work happens */ | 3028 | /* |
| 3015 | static int do_init_module(struct module *mod) | 3029 | * This is where the real work happens. |
| 3030 | * | ||
| 3031 | * Keep it uninlined to provide a reliable breakpoint target, e.g. for the gdb | ||
| 3032 | * helper command 'lx-symbols'. | ||
| 3033 | */ | ||
| 3034 | static noinline int do_init_module(struct module *mod) | ||
| 3016 | { | 3035 | { |
| 3017 | int ret = 0; | 3036 | int ret = 0; |
| 3018 | struct mod_initfree *freeinit; | 3037 | struct mod_initfree *freeinit; |
| @@ -3120,32 +3139,6 @@ static int may_init_module(void) | |||
| 3120 | } | 3139 | } |
| 3121 | 3140 | ||
| 3122 | /* | 3141 | /* |
| 3123 | * Can't use wait_event_interruptible() because our condition | ||
| 3124 | * 'finished_loading()' contains a blocking primitive itself (mutex_lock). | ||
| 3125 | */ | ||
| 3126 | static int wait_finished_loading(struct module *mod) | ||
| 3127 | { | ||
| 3128 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | ||
| 3129 | int ret = 0; | ||
| 3130 | |||
| 3131 | add_wait_queue(&module_wq, &wait); | ||
| 3132 | for (;;) { | ||
| 3133 | if (finished_loading(mod->name)) | ||
| 3134 | break; | ||
| 3135 | |||
| 3136 | if (signal_pending(current)) { | ||
| 3137 | ret = -ERESTARTSYS; | ||
| 3138 | break; | ||
| 3139 | } | ||
| 3140 | |||
| 3141 | wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); | ||
| 3142 | } | ||
| 3143 | remove_wait_queue(&module_wq, &wait); | ||
| 3144 | |||
| 3145 | return ret; | ||
| 3146 | } | ||
| 3147 | |||
| 3148 | /* | ||
| 3149 | * We try to place it in the list now to make sure it's unique before | 3142 | * We try to place it in the list now to make sure it's unique before |
| 3150 | * we dedicate too many resources. In particular, temporary percpu | 3143 | * we dedicate too many resources. In particular, temporary percpu |
| 3151 | * memory exhaustion. | 3144 | * memory exhaustion. |
| @@ -3165,8 +3158,8 @@ again: | |||
| 3165 | || old->state == MODULE_STATE_UNFORMED) { | 3158 | || old->state == MODULE_STATE_UNFORMED) { |
| 3166 | /* Wait in case it fails to load. */ | 3159 | /* Wait in case it fails to load. */ |
| 3167 | mutex_unlock(&module_mutex); | 3160 | mutex_unlock(&module_mutex); |
| 3168 | 3161 | err = wait_event_interruptible(module_wq, | |
| 3169 | err = wait_finished_loading(mod); | 3162 | finished_loading(mod->name)); |
| 3170 | if (err) | 3163 | if (err) |
| 3171 | goto out_unlocked; | 3164 | goto out_unlocked; |
| 3172 | goto again; | 3165 | goto again; |
| @@ -3265,7 +3258,7 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
| 3265 | mod->sig_ok = info->sig_ok; | 3258 | mod->sig_ok = info->sig_ok; |
| 3266 | if (!mod->sig_ok) { | 3259 | if (!mod->sig_ok) { |
| 3267 | pr_notice_once("%s: module verification failed: signature " | 3260 | pr_notice_once("%s: module verification failed: signature " |
| 3268 | "and/or required key missing - tainting " | 3261 | "and/or required key missing - tainting " |
| 3269 | "kernel\n", mod->name); | 3262 | "kernel\n", mod->name); |
| 3270 | add_taint_module(mod, TAINT_UNSIGNED_MODULE, LOCKDEP_STILL_OK); | 3263 | add_taint_module(mod, TAINT_UNSIGNED_MODULE, LOCKDEP_STILL_OK); |
| 3271 | } | 3264 | } |
| @@ -3356,6 +3349,9 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
| 3356 | module_bug_cleanup(mod); | 3349 | module_bug_cleanup(mod); |
| 3357 | mutex_unlock(&module_mutex); | 3350 | mutex_unlock(&module_mutex); |
| 3358 | 3351 | ||
| 3352 | /* Free lock-classes: */ | ||
| 3353 | lockdep_free_key_range(mod->module_core, mod->core_size); | ||
| 3354 | |||
| 3359 | /* we can't deallocate the module until we clear memory protection */ | 3355 | /* we can't deallocate the module until we clear memory protection */ |
| 3360 | unset_module_init_ro_nx(mod); | 3356 | unset_module_init_ro_nx(mod); |
| 3361 | unset_module_core_ro_nx(mod); | 3357 | unset_module_core_ro_nx(mod); |
diff --git a/kernel/notifier.c b/kernel/notifier.c index 4803da6eab62..ae9fc7cc360e 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c | |||
| @@ -402,6 +402,7 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh, | |||
| 402 | } | 402 | } |
| 403 | EXPORT_SYMBOL_GPL(raw_notifier_call_chain); | 403 | EXPORT_SYMBOL_GPL(raw_notifier_call_chain); |
| 404 | 404 | ||
| 405 | #ifdef CONFIG_SRCU | ||
| 405 | /* | 406 | /* |
| 406 | * SRCU notifier chain routines. Registration and unregistration | 407 | * SRCU notifier chain routines. Registration and unregistration |
| 407 | * use a mutex, and call_chain is synchronized by SRCU (no locks). | 408 | * use a mutex, and call_chain is synchronized by SRCU (no locks). |
| @@ -528,6 +529,8 @@ void srcu_init_notifier_head(struct srcu_notifier_head *nh) | |||
| 528 | } | 529 | } |
| 529 | EXPORT_SYMBOL_GPL(srcu_init_notifier_head); | 530 | EXPORT_SYMBOL_GPL(srcu_init_notifier_head); |
| 530 | 531 | ||
| 532 | #endif /* CONFIG_SRCU */ | ||
| 533 | |||
| 531 | static ATOMIC_NOTIFIER_HEAD(die_chain); | 534 | static ATOMIC_NOTIFIER_HEAD(die_chain); |
| 532 | 535 | ||
| 533 | int notrace notify_die(enum die_val val, const char *str, | 536 | int notrace notify_die(enum die_val val, const char *str, |
diff --git a/kernel/padata.c b/kernel/padata.c index 161402f0b517..b38bea9c466a 100644 --- a/kernel/padata.c +++ b/kernel/padata.c | |||
| @@ -917,15 +917,10 @@ static ssize_t show_cpumask(struct padata_instance *pinst, | |||
| 917 | else | 917 | else |
| 918 | cpumask = pinst->cpumask.pcpu; | 918 | cpumask = pinst->cpumask.pcpu; |
| 919 | 919 | ||
| 920 | len = bitmap_scnprintf(buf, PAGE_SIZE, cpumask_bits(cpumask), | 920 | len = snprintf(buf, PAGE_SIZE, "%*pb\n", |
| 921 | nr_cpu_ids); | 921 | nr_cpu_ids, cpumask_bits(cpumask)); |
| 922 | if (PAGE_SIZE - len < 2) | ||
| 923 | len = -EINVAL; | ||
| 924 | else | ||
| 925 | len += sprintf(buf + len, "\n"); | ||
| 926 | |||
| 927 | mutex_unlock(&pinst->lock); | 922 | mutex_unlock(&pinst->lock); |
| 928 | return len; | 923 | return len < PAGE_SIZE ? len : -EINVAL; |
| 929 | } | 924 | } |
| 930 | 925 | ||
| 931 | static ssize_t store_cpumask(struct padata_instance *pinst, | 926 | static ssize_t store_cpumask(struct padata_instance *pinst, |
diff --git a/kernel/panic.c b/kernel/panic.c index 4d8d6f906dec..8136ad76e5fd 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -226,6 +226,7 @@ static const struct tnt tnts[] = { | |||
| 226 | { TAINT_OOT_MODULE, 'O', ' ' }, | 226 | { TAINT_OOT_MODULE, 'O', ' ' }, |
| 227 | { TAINT_UNSIGNED_MODULE, 'E', ' ' }, | 227 | { TAINT_UNSIGNED_MODULE, 'E', ' ' }, |
| 228 | { TAINT_SOFTLOCKUP, 'L', ' ' }, | 228 | { TAINT_SOFTLOCKUP, 'L', ' ' }, |
| 229 | { TAINT_LIVEPATCH, 'K', ' ' }, | ||
| 229 | }; | 230 | }; |
| 230 | 231 | ||
| 231 | /** | 232 | /** |
| @@ -246,6 +247,7 @@ static const struct tnt tnts[] = { | |||
| 246 | * 'O' - Out-of-tree module has been loaded. | 247 | * 'O' - Out-of-tree module has been loaded. |
| 247 | * 'E' - Unsigned module has been loaded. | 248 | * 'E' - Unsigned module has been loaded. |
| 248 | * 'L' - A soft lockup has previously occurred. | 249 | * 'L' - A soft lockup has previously occurred. |
| 250 | * 'K' - Kernel has been live patched. | ||
| 249 | * | 251 | * |
| 250 | * The string is overwritten by the next call to print_tainted(). | 252 | * The string is overwritten by the next call to print_tainted(). |
| 251 | */ | 253 | */ |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 48b28d387c7f..7e01f78f0417 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
| @@ -251,6 +251,7 @@ config APM_EMULATION | |||
| 251 | 251 | ||
| 252 | config PM_OPP | 252 | config PM_OPP |
| 253 | bool | 253 | bool |
| 254 | select SRCU | ||
| 254 | ---help--- | 255 | ---help--- |
| 255 | SOCs have a standard set of tuples consisting of frequency and | 256 | SOCs have a standard set of tuples consisting of frequency and |
| 256 | voltage pairs that the device will support per voltage domain. This | 257 | voltage pairs that the device will support per voltage domain. This |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 5a6ec8678b9a..564f786df470 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
| @@ -84,8 +84,8 @@ static int try_to_freeze_tasks(bool user_only) | |||
| 84 | elapsed_msecs = elapsed_msecs64; | 84 | elapsed_msecs = elapsed_msecs64; |
| 85 | 85 | ||
| 86 | if (todo) { | 86 | if (todo) { |
| 87 | printk("\n"); | 87 | pr_cont("\n"); |
| 88 | printk(KERN_ERR "Freezing of tasks %s after %d.%03d seconds " | 88 | pr_err("Freezing of tasks %s after %d.%03d seconds " |
| 89 | "(%d tasks refusing to freeze, wq_busy=%d):\n", | 89 | "(%d tasks refusing to freeze, wq_busy=%d):\n", |
| 90 | wakeup ? "aborted" : "failed", | 90 | wakeup ? "aborted" : "failed", |
| 91 | elapsed_msecs / 1000, elapsed_msecs % 1000, | 91 | elapsed_msecs / 1000, elapsed_msecs % 1000, |
| @@ -101,37 +101,13 @@ static int try_to_freeze_tasks(bool user_only) | |||
| 101 | read_unlock(&tasklist_lock); | 101 | read_unlock(&tasklist_lock); |
| 102 | } | 102 | } |
| 103 | } else { | 103 | } else { |
| 104 | printk("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, | 104 | pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, |
| 105 | elapsed_msecs % 1000); | 105 | elapsed_msecs % 1000); |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | return todo ? -EBUSY : 0; | 108 | return todo ? -EBUSY : 0; |
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | static bool __check_frozen_processes(void) | ||
| 112 | { | ||
| 113 | struct task_struct *g, *p; | ||
| 114 | |||
| 115 | for_each_process_thread(g, p) | ||
| 116 | if (p != current && !freezer_should_skip(p) && !frozen(p)) | ||
| 117 | return false; | ||
| 118 | |||
| 119 | return true; | ||
| 120 | } | ||
| 121 | |||
| 122 | /* | ||
| 123 | * Returns true if all freezable tasks (except for current) are frozen already | ||
| 124 | */ | ||
| 125 | static bool check_frozen_processes(void) | ||
| 126 | { | ||
| 127 | bool ret; | ||
| 128 | |||
| 129 | read_lock(&tasklist_lock); | ||
| 130 | ret = __check_frozen_processes(); | ||
| 131 | read_unlock(&tasklist_lock); | ||
| 132 | return ret; | ||
| 133 | } | ||
| 134 | |||
| 135 | /** | 111 | /** |
| 136 | * freeze_processes - Signal user space processes to enter the refrigerator. | 112 | * freeze_processes - Signal user space processes to enter the refrigerator. |
| 137 | * The current thread will not be frozen. The same process that calls | 113 | * The current thread will not be frozen. The same process that calls |
| @@ -142,7 +118,6 @@ static bool check_frozen_processes(void) | |||
| 142 | int freeze_processes(void) | 118 | int freeze_processes(void) |
| 143 | { | 119 | { |
| 144 | int error; | 120 | int error; |
| 145 | int oom_kills_saved; | ||
| 146 | 121 | ||
| 147 | error = __usermodehelper_disable(UMH_FREEZING); | 122 | error = __usermodehelper_disable(UMH_FREEZING); |
| 148 | if (error) | 123 | if (error) |
| @@ -155,31 +130,24 @@ int freeze_processes(void) | |||
| 155 | atomic_inc(&system_freezing_cnt); | 130 | atomic_inc(&system_freezing_cnt); |
| 156 | 131 | ||
| 157 | pm_wakeup_clear(); | 132 | pm_wakeup_clear(); |
| 158 | printk("Freezing user space processes ... "); | 133 | pr_info("Freezing user space processes ... "); |
| 159 | pm_freezing = true; | 134 | pm_freezing = true; |
| 160 | oom_kills_saved = oom_kills_count(); | ||
| 161 | error = try_to_freeze_tasks(true); | 135 | error = try_to_freeze_tasks(true); |
| 162 | if (!error) { | 136 | if (!error) { |
| 163 | __usermodehelper_set_disable_depth(UMH_DISABLED); | 137 | __usermodehelper_set_disable_depth(UMH_DISABLED); |
| 164 | oom_killer_disable(); | 138 | pr_cont("done."); |
| 165 | |||
| 166 | /* | ||
| 167 | * There might have been an OOM kill while we were | ||
| 168 | * freezing tasks and the killed task might be still | ||
| 169 | * on the way out so we have to double check for race. | ||
| 170 | */ | ||
| 171 | if (oom_kills_count() != oom_kills_saved && | ||
| 172 | !check_frozen_processes()) { | ||
| 173 | __usermodehelper_set_disable_depth(UMH_ENABLED); | ||
| 174 | printk("OOM in progress."); | ||
| 175 | error = -EBUSY; | ||
| 176 | } else { | ||
| 177 | printk("done."); | ||
| 178 | } | ||
| 179 | } | 139 | } |
| 180 | printk("\n"); | 140 | pr_cont("\n"); |
| 181 | BUG_ON(in_atomic()); | 141 | BUG_ON(in_atomic()); |
| 182 | 142 | ||
| 143 | /* | ||
| 144 | * Now that the whole userspace is frozen we need to disbale | ||
| 145 | * the OOM killer to disallow any further interference with | ||
| 146 | * killable tasks. | ||
| 147 | */ | ||
| 148 | if (!error && !oom_killer_disable()) | ||
| 149 | error = -EBUSY; | ||
| 150 | |||
| 183 | if (error) | 151 | if (error) |
| 184 | thaw_processes(); | 152 | thaw_processes(); |
| 185 | return error; | 153 | return error; |
| @@ -197,13 +165,14 @@ int freeze_kernel_threads(void) | |||
| 197 | { | 165 | { |
| 198 | int error; | 166 | int error; |
| 199 | 167 | ||
| 200 | printk("Freezing remaining freezable tasks ... "); | 168 | pr_info("Freezing remaining freezable tasks ... "); |
| 169 | |||
| 201 | pm_nosig_freezing = true; | 170 | pm_nosig_freezing = true; |
| 202 | error = try_to_freeze_tasks(false); | 171 | error = try_to_freeze_tasks(false); |
| 203 | if (!error) | 172 | if (!error) |
| 204 | printk("done."); | 173 | pr_cont("done."); |
| 205 | 174 | ||
| 206 | printk("\n"); | 175 | pr_cont("\n"); |
| 207 | BUG_ON(in_atomic()); | 176 | BUG_ON(in_atomic()); |
| 208 | 177 | ||
| 209 | if (error) | 178 | if (error) |
| @@ -224,7 +193,7 @@ void thaw_processes(void) | |||
| 224 | 193 | ||
| 225 | oom_killer_enable(); | 194 | oom_killer_enable(); |
| 226 | 195 | ||
| 227 | printk("Restarting tasks ... "); | 196 | pr_info("Restarting tasks ... "); |
| 228 | 197 | ||
| 229 | __usermodehelper_set_disable_depth(UMH_FREEZING); | 198 | __usermodehelper_set_disable_depth(UMH_FREEZING); |
| 230 | thaw_workqueues(); | 199 | thaw_workqueues(); |
| @@ -243,7 +212,7 @@ void thaw_processes(void) | |||
| 243 | usermodehelper_enable(); | 212 | usermodehelper_enable(); |
| 244 | 213 | ||
| 245 | schedule(); | 214 | schedule(); |
| 246 | printk("done.\n"); | 215 | pr_cont("done.\n"); |
| 247 | trace_suspend_resume(TPS("thaw_processes"), 0, false); | 216 | trace_suspend_resume(TPS("thaw_processes"), 0, false); |
| 248 | } | 217 | } |
| 249 | 218 | ||
| @@ -252,7 +221,7 @@ void thaw_kernel_threads(void) | |||
| 252 | struct task_struct *g, *p; | 221 | struct task_struct *g, *p; |
| 253 | 222 | ||
| 254 | pm_nosig_freezing = false; | 223 | pm_nosig_freezing = false; |
| 255 | printk("Restarting kernel threads ... "); | 224 | pr_info("Restarting kernel threads ... "); |
| 256 | 225 | ||
| 257 | thaw_workqueues(); | 226 | thaw_workqueues(); |
| 258 | 227 | ||
| @@ -264,5 +233,5 @@ void thaw_kernel_threads(void) | |||
| 264 | read_unlock(&tasklist_lock); | 233 | read_unlock(&tasklist_lock); |
| 265 | 234 | ||
| 266 | schedule(); | 235 | schedule(); |
| 267 | printk("done.\n"); | 236 | pr_cont("done.\n"); |
| 268 | } | 237 | } |
diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 5f4c006c4b1e..97b0df71303e 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c | |||
| @@ -41,6 +41,8 @@ | |||
| 41 | #include <linux/platform_device.h> | 41 | #include <linux/platform_device.h> |
| 42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
| 43 | #include <linux/kernel.h> | 43 | #include <linux/kernel.h> |
| 44 | #include <linux/debugfs.h> | ||
| 45 | #include <linux/seq_file.h> | ||
| 44 | 46 | ||
| 45 | #include <linux/uaccess.h> | 47 | #include <linux/uaccess.h> |
| 46 | #include <linux/export.h> | 48 | #include <linux/export.h> |
| @@ -182,6 +184,81 @@ static inline void pm_qos_set_value(struct pm_qos_constraints *c, s32 value) | |||
| 182 | c->target_value = value; | 184 | c->target_value = value; |
| 183 | } | 185 | } |
| 184 | 186 | ||
| 187 | static inline int pm_qos_get_value(struct pm_qos_constraints *c); | ||
| 188 | static int pm_qos_dbg_show_requests(struct seq_file *s, void *unused) | ||
| 189 | { | ||
| 190 | struct pm_qos_object *qos = (struct pm_qos_object *)s->private; | ||
| 191 | struct pm_qos_constraints *c; | ||
| 192 | struct pm_qos_request *req; | ||
| 193 | char *type; | ||
| 194 | unsigned long flags; | ||
| 195 | int tot_reqs = 0; | ||
| 196 | int active_reqs = 0; | ||
| 197 | |||
| 198 | if (IS_ERR_OR_NULL(qos)) { | ||
| 199 | pr_err("%s: bad qos param!\n", __func__); | ||
| 200 | return -EINVAL; | ||
| 201 | } | ||
| 202 | c = qos->constraints; | ||
| 203 | if (IS_ERR_OR_NULL(c)) { | ||
| 204 | pr_err("%s: Bad constraints on qos?\n", __func__); | ||
| 205 | return -EINVAL; | ||
| 206 | } | ||
| 207 | |||
| 208 | /* Lock to ensure we have a snapshot */ | ||
| 209 | spin_lock_irqsave(&pm_qos_lock, flags); | ||
| 210 | if (plist_head_empty(&c->list)) { | ||
| 211 | seq_puts(s, "Empty!\n"); | ||
| 212 | goto out; | ||
| 213 | } | ||
| 214 | |||
| 215 | switch (c->type) { | ||
| 216 | case PM_QOS_MIN: | ||
| 217 | type = "Minimum"; | ||
| 218 | break; | ||
| 219 | case PM_QOS_MAX: | ||
| 220 | type = "Maximum"; | ||
| 221 | break; | ||
| 222 | case PM_QOS_SUM: | ||
| 223 | type = "Sum"; | ||
| 224 | break; | ||
| 225 | default: | ||
| 226 | type = "Unknown"; | ||
| 227 | } | ||
| 228 | |||
| 229 | plist_for_each_entry(req, &c->list, node) { | ||
| 230 | char *state = "Default"; | ||
| 231 | |||
| 232 | if ((req->node).prio != c->default_value) { | ||
| 233 | active_reqs++; | ||
| 234 | state = "Active"; | ||
| 235 | } | ||
| 236 | tot_reqs++; | ||
| 237 | seq_printf(s, "%d: %d: %s\n", tot_reqs, | ||
| 238 | (req->node).prio, state); | ||
| 239 | } | ||
| 240 | |||
| 241 | seq_printf(s, "Type=%s, Value=%d, Requests: active=%d / total=%d\n", | ||
| 242 | type, pm_qos_get_value(c), active_reqs, tot_reqs); | ||
| 243 | |||
| 244 | out: | ||
| 245 | spin_unlock_irqrestore(&pm_qos_lock, flags); | ||
| 246 | return 0; | ||
| 247 | } | ||
| 248 | |||
| 249 | static int pm_qos_dbg_open(struct inode *inode, struct file *file) | ||
| 250 | { | ||
| 251 | return single_open(file, pm_qos_dbg_show_requests, | ||
| 252 | inode->i_private); | ||
| 253 | } | ||
| 254 | |||
| 255 | static const struct file_operations pm_qos_debug_fops = { | ||
| 256 | .open = pm_qos_dbg_open, | ||
| 257 | .read = seq_read, | ||
| 258 | .llseek = seq_lseek, | ||
| 259 | .release = single_release, | ||
| 260 | }; | ||
| 261 | |||
| 185 | /** | 262 | /** |
| 186 | * pm_qos_update_target - manages the constraints list and calls the notifiers | 263 | * pm_qos_update_target - manages the constraints list and calls the notifiers |
| 187 | * if needed | 264 | * if needed |
| @@ -509,12 +586,17 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) | |||
| 509 | EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); | 586 | EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); |
| 510 | 587 | ||
| 511 | /* User space interface to PM QoS classes via misc devices */ | 588 | /* User space interface to PM QoS classes via misc devices */ |
| 512 | static int register_pm_qos_misc(struct pm_qos_object *qos) | 589 | static int register_pm_qos_misc(struct pm_qos_object *qos, struct dentry *d) |
| 513 | { | 590 | { |
| 514 | qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; | 591 | qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; |
| 515 | qos->pm_qos_power_miscdev.name = qos->name; | 592 | qos->pm_qos_power_miscdev.name = qos->name; |
| 516 | qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; | 593 | qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; |
| 517 | 594 | ||
| 595 | if (d) { | ||
| 596 | (void)debugfs_create_file(qos->name, S_IRUGO, d, | ||
| 597 | (void *)qos, &pm_qos_debug_fops); | ||
| 598 | } | ||
| 599 | |||
| 518 | return misc_register(&qos->pm_qos_power_miscdev); | 600 | return misc_register(&qos->pm_qos_power_miscdev); |
| 519 | } | 601 | } |
| 520 | 602 | ||
| @@ -608,11 +690,16 @@ static int __init pm_qos_power_init(void) | |||
| 608 | { | 690 | { |
| 609 | int ret = 0; | 691 | int ret = 0; |
| 610 | int i; | 692 | int i; |
| 693 | struct dentry *d; | ||
| 611 | 694 | ||
| 612 | BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES); | 695 | BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES); |
| 613 | 696 | ||
| 697 | d = debugfs_create_dir("pm_qos", NULL); | ||
| 698 | if (IS_ERR_OR_NULL(d)) | ||
| 699 | d = NULL; | ||
| 700 | |||
| 614 | for (i = PM_QOS_CPU_DMA_LATENCY; i < PM_QOS_NUM_CLASSES; i++) { | 701 | for (i = PM_QOS_CPU_DMA_LATENCY; i < PM_QOS_NUM_CLASSES; i++) { |
| 615 | ret = register_pm_qos_misc(pm_qos_array[i]); | 702 | ret = register_pm_qos_misc(pm_qos_array[i], d); |
| 616 | if (ret < 0) { | 703 | if (ret < 0) { |
| 617 | printk(KERN_ERR "pm_qos_param: %s setup failed\n", | 704 | printk(KERN_ERR "pm_qos_param: %s setup failed\n", |
| 618 | pm_qos_array[i]->name); | 705 | pm_qos_array[i]->name); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0c40c16174b4..c24d5a23bf93 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
| @@ -1472,9 +1472,9 @@ static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, | |||
| 1472 | /** | 1472 | /** |
| 1473 | * free_unnecessary_pages - Release preallocated pages not needed for the image | 1473 | * free_unnecessary_pages - Release preallocated pages not needed for the image |
| 1474 | */ | 1474 | */ |
| 1475 | static void free_unnecessary_pages(void) | 1475 | static unsigned long free_unnecessary_pages(void) |
| 1476 | { | 1476 | { |
| 1477 | unsigned long save, to_free_normal, to_free_highmem; | 1477 | unsigned long save, to_free_normal, to_free_highmem, free; |
| 1478 | 1478 | ||
| 1479 | save = count_data_pages(); | 1479 | save = count_data_pages(); |
| 1480 | if (alloc_normal >= save) { | 1480 | if (alloc_normal >= save) { |
| @@ -1495,6 +1495,7 @@ static void free_unnecessary_pages(void) | |||
| 1495 | else | 1495 | else |
| 1496 | to_free_normal = 0; | 1496 | to_free_normal = 0; |
| 1497 | } | 1497 | } |
| 1498 | free = to_free_normal + to_free_highmem; | ||
| 1498 | 1499 | ||
| 1499 | memory_bm_position_reset(©_bm); | 1500 | memory_bm_position_reset(©_bm); |
| 1500 | 1501 | ||
| @@ -1518,6 +1519,8 @@ static void free_unnecessary_pages(void) | |||
| 1518 | swsusp_unset_page_free(page); | 1519 | swsusp_unset_page_free(page); |
| 1519 | __free_page(page); | 1520 | __free_page(page); |
| 1520 | } | 1521 | } |
| 1522 | |||
| 1523 | return free; | ||
| 1521 | } | 1524 | } |
| 1522 | 1525 | ||
| 1523 | /** | 1526 | /** |
| @@ -1707,7 +1710,7 @@ int hibernate_preallocate_memory(void) | |||
| 1707 | * pages in memory, but we have allocated more. Release the excessive | 1710 | * pages in memory, but we have allocated more. Release the excessive |
| 1708 | * ones now. | 1711 | * ones now. |
| 1709 | */ | 1712 | */ |
| 1710 | free_unnecessary_pages(); | 1713 | pages -= free_unnecessary_pages(); |
| 1711 | 1714 | ||
| 1712 | out: | 1715 | out: |
| 1713 | stop = ktime_get(); | 1716 | stop = ktime_get(); |
| @@ -2310,8 +2313,6 @@ static inline void free_highmem_data(void) | |||
| 2310 | free_image_page(buffer, PG_UNSAFE_CLEAR); | 2313 | free_image_page(buffer, PG_UNSAFE_CLEAR); |
| 2311 | } | 2314 | } |
| 2312 | #else | 2315 | #else |
| 2313 | static inline int get_safe_write_buffer(void) { return 0; } | ||
| 2314 | |||
| 2315 | static unsigned int | 2316 | static unsigned int |
| 2316 | count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } | 2317 | count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } |
| 2317 | 2318 | ||
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c347e3ce3a55..b7d6b3a721b1 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
| @@ -37,7 +37,9 @@ const char *pm_states[PM_SUSPEND_MAX]; | |||
| 37 | static const struct platform_suspend_ops *suspend_ops; | 37 | static const struct platform_suspend_ops *suspend_ops; |
| 38 | static const struct platform_freeze_ops *freeze_ops; | 38 | static const struct platform_freeze_ops *freeze_ops; |
| 39 | static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head); | 39 | static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head); |
| 40 | static bool suspend_freeze_wake; | 40 | |
| 41 | enum freeze_state __read_mostly suspend_freeze_state; | ||
| 42 | static DEFINE_SPINLOCK(suspend_freeze_lock); | ||
| 41 | 43 | ||
| 42 | void freeze_set_ops(const struct platform_freeze_ops *ops) | 44 | void freeze_set_ops(const struct platform_freeze_ops *ops) |
| 43 | { | 45 | { |
| @@ -48,22 +50,49 @@ void freeze_set_ops(const struct platform_freeze_ops *ops) | |||
| 48 | 50 | ||
| 49 | static void freeze_begin(void) | 51 | static void freeze_begin(void) |
| 50 | { | 52 | { |
| 51 | suspend_freeze_wake = false; | 53 | suspend_freeze_state = FREEZE_STATE_NONE; |
| 52 | } | 54 | } |
| 53 | 55 | ||
| 54 | static void freeze_enter(void) | 56 | static void freeze_enter(void) |
| 55 | { | 57 | { |
| 56 | cpuidle_use_deepest_state(true); | 58 | spin_lock_irq(&suspend_freeze_lock); |
| 59 | if (pm_wakeup_pending()) | ||
| 60 | goto out; | ||
| 61 | |||
| 62 | suspend_freeze_state = FREEZE_STATE_ENTER; | ||
| 63 | spin_unlock_irq(&suspend_freeze_lock); | ||
| 64 | |||
| 65 | get_online_cpus(); | ||
| 57 | cpuidle_resume(); | 66 | cpuidle_resume(); |
| 58 | wait_event(suspend_freeze_wait_head, suspend_freeze_wake); | 67 | |
| 68 | /* Push all the CPUs into the idle loop. */ | ||
| 69 | wake_up_all_idle_cpus(); | ||
| 70 | pr_debug("PM: suspend-to-idle\n"); | ||
| 71 | /* Make the current CPU wait so it can enter the idle loop too. */ | ||
| 72 | wait_event(suspend_freeze_wait_head, | ||
| 73 | suspend_freeze_state == FREEZE_STATE_WAKE); | ||
| 74 | pr_debug("PM: resume from suspend-to-idle\n"); | ||
| 75 | |||
| 59 | cpuidle_pause(); | 76 | cpuidle_pause(); |
| 60 | cpuidle_use_deepest_state(false); | 77 | put_online_cpus(); |
| 78 | |||
| 79 | spin_lock_irq(&suspend_freeze_lock); | ||
| 80 | |||
| 81 | out: | ||
| 82 | suspend_freeze_state = FREEZE_STATE_NONE; | ||
| 83 | spin_unlock_irq(&suspend_freeze_lock); | ||
| 61 | } | 84 | } |
| 62 | 85 | ||
| 63 | void freeze_wake(void) | 86 | void freeze_wake(void) |
| 64 | { | 87 | { |
| 65 | suspend_freeze_wake = true; | 88 | unsigned long flags; |
| 66 | wake_up(&suspend_freeze_wait_head); | 89 | |
| 90 | spin_lock_irqsave(&suspend_freeze_lock, flags); | ||
| 91 | if (suspend_freeze_state > FREEZE_STATE_NONE) { | ||
| 92 | suspend_freeze_state = FREEZE_STATE_WAKE; | ||
| 93 | wake_up(&suspend_freeze_wait_head); | ||
| 94 | } | ||
| 95 | spin_unlock_irqrestore(&suspend_freeze_lock, flags); | ||
| 67 | } | 96 | } |
| 68 | EXPORT_SYMBOL_GPL(freeze_wake); | 97 | EXPORT_SYMBOL_GPL(freeze_wake); |
| 69 | 98 | ||
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 02d6b6d28796..01cfd69c54c6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
| @@ -935,8 +935,8 @@ static int __init ignore_loglevel_setup(char *str) | |||
| 935 | 935 | ||
| 936 | early_param("ignore_loglevel", ignore_loglevel_setup); | 936 | early_param("ignore_loglevel", ignore_loglevel_setup); |
| 937 | module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); | 937 | module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); |
| 938 | MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" | 938 | MODULE_PARM_DESC(ignore_loglevel, |
| 939 | "print all kernel messages to the console."); | 939 | "ignore loglevel setting (prints all kernel messages to the console)"); |
| 940 | 940 | ||
| 941 | #ifdef CONFIG_BOOT_PRINTK_DELAY | 941 | #ifdef CONFIG_BOOT_PRINTK_DELAY |
| 942 | 942 | ||
| @@ -1419,16 +1419,16 @@ static void call_console_drivers(int level, const char *text, size_t len) | |||
| 1419 | } | 1419 | } |
| 1420 | 1420 | ||
| 1421 | /* | 1421 | /* |
| 1422 | * Zap console related locks when oopsing. Only zap at most once | 1422 | * Zap console related locks when oopsing. |
| 1423 | * every 10 seconds, to leave time for slow consoles to print a | 1423 | * To leave time for slow consoles to print a full oops, |
| 1424 | * full oops. | 1424 | * only zap at most once every 30 seconds. |
| 1425 | */ | 1425 | */ |
| 1426 | static void zap_locks(void) | 1426 | static void zap_locks(void) |
| 1427 | { | 1427 | { |
| 1428 | static unsigned long oops_timestamp; | 1428 | static unsigned long oops_timestamp; |
| 1429 | 1429 | ||
| 1430 | if (time_after_eq(jiffies, oops_timestamp) && | 1430 | if (time_after_eq(jiffies, oops_timestamp) && |
| 1431 | !time_after(jiffies, oops_timestamp + 30 * HZ)) | 1431 | !time_after(jiffies, oops_timestamp + 30 * HZ)) |
| 1432 | return; | 1432 | return; |
| 1433 | 1433 | ||
| 1434 | oops_timestamp = jiffies; | 1434 | oops_timestamp = jiffies; |
| @@ -1811,7 +1811,7 @@ int vprintk_default(const char *fmt, va_list args) | |||
| 1811 | 1811 | ||
| 1812 | #ifdef CONFIG_KGDB_KDB | 1812 | #ifdef CONFIG_KGDB_KDB |
| 1813 | if (unlikely(kdb_trap_printk)) { | 1813 | if (unlikely(kdb_trap_printk)) { |
| 1814 | r = vkdb_printf(fmt, args); | 1814 | r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); |
| 1815 | return r; | 1815 | return r; |
| 1816 | } | 1816 | } |
| 1817 | #endif | 1817 | #endif |
diff --git a/kernel/profile.c b/kernel/profile.c index 54bf5ba26420..a7bcd28d6e9f 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
| @@ -422,8 +422,7 @@ void profile_tick(int type) | |||
| 422 | 422 | ||
| 423 | static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) | 423 | static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) |
| 424 | { | 424 | { |
| 425 | seq_cpumask(m, prof_cpu_mask); | 425 | seq_printf(m, "%*pb\n", cpumask_pr_args(prof_cpu_mask)); |
| 426 | seq_putc(m, '\n'); | ||
| 427 | return 0; | 426 | return 0; |
| 428 | } | 427 | } |
| 429 | 428 | ||
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1eb9d90c3af9..227fec36b12a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -1077,7 +1077,6 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, | |||
| 1077 | } | 1077 | } |
| 1078 | 1078 | ||
| 1079 | #if defined CONFIG_COMPAT | 1079 | #if defined CONFIG_COMPAT |
| 1080 | #include <linux/compat.h> | ||
| 1081 | 1080 | ||
| 1082 | int compat_ptrace_request(struct task_struct *child, compat_long_t request, | 1081 | int compat_ptrace_request(struct task_struct *child, compat_long_t request, |
| 1083 | compat_ulong_t addr, compat_ulong_t data) | 1082 | compat_ulong_t addr, compat_ulong_t data) |
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index e6fae503d1bc..50a808424b06 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | obj-y += update.o srcu.o | 1 | obj-y += update.o |
| 2 | obj-$(CONFIG_SRCU) += srcu.o | ||
| 2 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 3 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
| 3 | obj-$(CONFIG_TREE_RCU) += tree.o | 4 | obj-$(CONFIG_TREE_RCU) += tree.o |
| 4 | obj-$(CONFIG_PREEMPT_RCU) += tree.o | 5 | obj-$(CONFIG_PREEMPT_RCU) += tree.o |
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 07bb02eda844..80adef7d4c3d 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h | |||
| @@ -137,4 +137,10 @@ int rcu_jiffies_till_stall_check(void); | |||
| 137 | 137 | ||
| 138 | void rcu_early_boot_tests(void); | 138 | void rcu_early_boot_tests(void); |
| 139 | 139 | ||
| 140 | /* | ||
| 141 | * This function really isn't for public consumption, but RCU is special in | ||
| 142 | * that context switches can allow the state machine to make progress. | ||
| 143 | */ | ||
| 144 | extern void resched_cpu(int cpu); | ||
| 145 | |||
| 140 | #endif /* __LINUX_RCU_H */ | 146 | #endif /* __LINUX_RCU_H */ |
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 4d559baf06e0..30d42aa55d83 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c | |||
| @@ -244,7 +244,8 @@ struct rcu_torture_ops { | |||
| 244 | int (*readlock)(void); | 244 | int (*readlock)(void); |
| 245 | void (*read_delay)(struct torture_random_state *rrsp); | 245 | void (*read_delay)(struct torture_random_state *rrsp); |
| 246 | void (*readunlock)(int idx); | 246 | void (*readunlock)(int idx); |
| 247 | int (*completed)(void); | 247 | unsigned long (*started)(void); |
| 248 | unsigned long (*completed)(void); | ||
| 248 | void (*deferred_free)(struct rcu_torture *p); | 249 | void (*deferred_free)(struct rcu_torture *p); |
| 249 | void (*sync)(void); | 250 | void (*sync)(void); |
| 250 | void (*exp_sync)(void); | 251 | void (*exp_sync)(void); |
| @@ -296,11 +297,6 @@ static void rcu_torture_read_unlock(int idx) __releases(RCU) | |||
| 296 | rcu_read_unlock(); | 297 | rcu_read_unlock(); |
| 297 | } | 298 | } |
| 298 | 299 | ||
| 299 | static int rcu_torture_completed(void) | ||
| 300 | { | ||
| 301 | return rcu_batches_completed(); | ||
| 302 | } | ||
| 303 | |||
| 304 | /* | 300 | /* |
| 305 | * Update callback in the pipe. This should be invoked after a grace period. | 301 | * Update callback in the pipe. This should be invoked after a grace period. |
| 306 | */ | 302 | */ |
| @@ -356,7 +352,7 @@ rcu_torture_cb(struct rcu_head *p) | |||
| 356 | cur_ops->deferred_free(rp); | 352 | cur_ops->deferred_free(rp); |
| 357 | } | 353 | } |
| 358 | 354 | ||
| 359 | static int rcu_no_completed(void) | 355 | static unsigned long rcu_no_completed(void) |
| 360 | { | 356 | { |
| 361 | return 0; | 357 | return 0; |
| 362 | } | 358 | } |
| @@ -377,7 +373,8 @@ static struct rcu_torture_ops rcu_ops = { | |||
| 377 | .readlock = rcu_torture_read_lock, | 373 | .readlock = rcu_torture_read_lock, |
| 378 | .read_delay = rcu_read_delay, | 374 | .read_delay = rcu_read_delay, |
| 379 | .readunlock = rcu_torture_read_unlock, | 375 | .readunlock = rcu_torture_read_unlock, |
| 380 | .completed = rcu_torture_completed, | 376 | .started = rcu_batches_started, |
| 377 | .completed = rcu_batches_completed, | ||
| 381 | .deferred_free = rcu_torture_deferred_free, | 378 | .deferred_free = rcu_torture_deferred_free, |
| 382 | .sync = synchronize_rcu, | 379 | .sync = synchronize_rcu, |
| 383 | .exp_sync = synchronize_rcu_expedited, | 380 | .exp_sync = synchronize_rcu_expedited, |
| @@ -407,11 +404,6 @@ static void rcu_bh_torture_read_unlock(int idx) __releases(RCU_BH) | |||
| 407 | rcu_read_unlock_bh(); | 404 | rcu_read_unlock_bh(); |
| 408 | } | 405 | } |
| 409 | 406 | ||
| 410 | static int rcu_bh_torture_completed(void) | ||
| 411 | { | ||
| 412 | return rcu_batches_completed_bh(); | ||
| 413 | } | ||
| 414 | |||
| 415 | static void rcu_bh_torture_deferred_free(struct rcu_torture *p) | 407 | static void rcu_bh_torture_deferred_free(struct rcu_torture *p) |
| 416 | { | 408 | { |
| 417 | call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); | 409 | call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); |
| @@ -423,7 +415,8 @@ static struct rcu_torture_ops rcu_bh_ops = { | |||
| 423 | .readlock = rcu_bh_torture_read_lock, | 415 | .readlock = rcu_bh_torture_read_lock, |
| 424 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 416 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ |
| 425 | .readunlock = rcu_bh_torture_read_unlock, | 417 | .readunlock = rcu_bh_torture_read_unlock, |
| 426 | .completed = rcu_bh_torture_completed, | 418 | .started = rcu_batches_started_bh, |
| 419 | .completed = rcu_batches_completed_bh, | ||
| 427 | .deferred_free = rcu_bh_torture_deferred_free, | 420 | .deferred_free = rcu_bh_torture_deferred_free, |
| 428 | .sync = synchronize_rcu_bh, | 421 | .sync = synchronize_rcu_bh, |
| 429 | .exp_sync = synchronize_rcu_bh_expedited, | 422 | .exp_sync = synchronize_rcu_bh_expedited, |
| @@ -466,6 +459,7 @@ static struct rcu_torture_ops rcu_busted_ops = { | |||
| 466 | .readlock = rcu_torture_read_lock, | 459 | .readlock = rcu_torture_read_lock, |
| 467 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 460 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ |
| 468 | .readunlock = rcu_torture_read_unlock, | 461 | .readunlock = rcu_torture_read_unlock, |
| 462 | .started = rcu_no_completed, | ||
| 469 | .completed = rcu_no_completed, | 463 | .completed = rcu_no_completed, |
| 470 | .deferred_free = rcu_busted_torture_deferred_free, | 464 | .deferred_free = rcu_busted_torture_deferred_free, |
| 471 | .sync = synchronize_rcu_busted, | 465 | .sync = synchronize_rcu_busted, |
| @@ -510,7 +504,7 @@ static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) | |||
| 510 | srcu_read_unlock(&srcu_ctl, idx); | 504 | srcu_read_unlock(&srcu_ctl, idx); |
| 511 | } | 505 | } |
| 512 | 506 | ||
| 513 | static int srcu_torture_completed(void) | 507 | static unsigned long srcu_torture_completed(void) |
| 514 | { | 508 | { |
| 515 | return srcu_batches_completed(&srcu_ctl); | 509 | return srcu_batches_completed(&srcu_ctl); |
| 516 | } | 510 | } |
| @@ -564,6 +558,7 @@ static struct rcu_torture_ops srcu_ops = { | |||
| 564 | .readlock = srcu_torture_read_lock, | 558 | .readlock = srcu_torture_read_lock, |
| 565 | .read_delay = srcu_read_delay, | 559 | .read_delay = srcu_read_delay, |
| 566 | .readunlock = srcu_torture_read_unlock, | 560 | .readunlock = srcu_torture_read_unlock, |
| 561 | .started = NULL, | ||
| 567 | .completed = srcu_torture_completed, | 562 | .completed = srcu_torture_completed, |
| 568 | .deferred_free = srcu_torture_deferred_free, | 563 | .deferred_free = srcu_torture_deferred_free, |
| 569 | .sync = srcu_torture_synchronize, | 564 | .sync = srcu_torture_synchronize, |
| @@ -600,7 +595,8 @@ static struct rcu_torture_ops sched_ops = { | |||
| 600 | .readlock = sched_torture_read_lock, | 595 | .readlock = sched_torture_read_lock, |
| 601 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 596 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ |
| 602 | .readunlock = sched_torture_read_unlock, | 597 | .readunlock = sched_torture_read_unlock, |
| 603 | .completed = rcu_no_completed, | 598 | .started = rcu_batches_started_sched, |
| 599 | .completed = rcu_batches_completed_sched, | ||
| 604 | .deferred_free = rcu_sched_torture_deferred_free, | 600 | .deferred_free = rcu_sched_torture_deferred_free, |
| 605 | .sync = synchronize_sched, | 601 | .sync = synchronize_sched, |
| 606 | .exp_sync = synchronize_sched_expedited, | 602 | .exp_sync = synchronize_sched_expedited, |
| @@ -638,6 +634,7 @@ static struct rcu_torture_ops tasks_ops = { | |||
| 638 | .readlock = tasks_torture_read_lock, | 634 | .readlock = tasks_torture_read_lock, |
| 639 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 635 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ |
| 640 | .readunlock = tasks_torture_read_unlock, | 636 | .readunlock = tasks_torture_read_unlock, |
| 637 | .started = rcu_no_completed, | ||
| 641 | .completed = rcu_no_completed, | 638 | .completed = rcu_no_completed, |
| 642 | .deferred_free = rcu_tasks_torture_deferred_free, | 639 | .deferred_free = rcu_tasks_torture_deferred_free, |
| 643 | .sync = synchronize_rcu_tasks, | 640 | .sync = synchronize_rcu_tasks, |
| @@ -1015,8 +1012,8 @@ static void rcutorture_trace_dump(void) | |||
| 1015 | static void rcu_torture_timer(unsigned long unused) | 1012 | static void rcu_torture_timer(unsigned long unused) |
| 1016 | { | 1013 | { |
| 1017 | int idx; | 1014 | int idx; |
| 1018 | int completed; | 1015 | unsigned long started; |
| 1019 | int completed_end; | 1016 | unsigned long completed; |
| 1020 | static DEFINE_TORTURE_RANDOM(rand); | 1017 | static DEFINE_TORTURE_RANDOM(rand); |
| 1021 | static DEFINE_SPINLOCK(rand_lock); | 1018 | static DEFINE_SPINLOCK(rand_lock); |
| 1022 | struct rcu_torture *p; | 1019 | struct rcu_torture *p; |
| @@ -1024,7 +1021,10 @@ static void rcu_torture_timer(unsigned long unused) | |||
| 1024 | unsigned long long ts; | 1021 | unsigned long long ts; |
| 1025 | 1022 | ||
| 1026 | idx = cur_ops->readlock(); | 1023 | idx = cur_ops->readlock(); |
| 1027 | completed = cur_ops->completed(); | 1024 | if (cur_ops->started) |
| 1025 | started = cur_ops->started(); | ||
| 1026 | else | ||
| 1027 | started = cur_ops->completed(); | ||
| 1028 | ts = rcu_trace_clock_local(); | 1028 | ts = rcu_trace_clock_local(); |
| 1029 | p = rcu_dereference_check(rcu_torture_current, | 1029 | p = rcu_dereference_check(rcu_torture_current, |
| 1030 | rcu_read_lock_bh_held() || | 1030 | rcu_read_lock_bh_held() || |
| @@ -1047,14 +1047,16 @@ static void rcu_torture_timer(unsigned long unused) | |||
| 1047 | /* Should not happen, but... */ | 1047 | /* Should not happen, but... */ |
| 1048 | pipe_count = RCU_TORTURE_PIPE_LEN; | 1048 | pipe_count = RCU_TORTURE_PIPE_LEN; |
| 1049 | } | 1049 | } |
| 1050 | completed_end = cur_ops->completed(); | 1050 | completed = cur_ops->completed(); |
| 1051 | if (pipe_count > 1) { | 1051 | if (pipe_count > 1) { |
| 1052 | do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, | 1052 | do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, |
| 1053 | completed, completed_end); | 1053 | started, completed); |
| 1054 | rcutorture_trace_dump(); | 1054 | rcutorture_trace_dump(); |
| 1055 | } | 1055 | } |
| 1056 | __this_cpu_inc(rcu_torture_count[pipe_count]); | 1056 | __this_cpu_inc(rcu_torture_count[pipe_count]); |
| 1057 | completed = completed_end - completed; | 1057 | completed = completed - started; |
| 1058 | if (cur_ops->started) | ||
| 1059 | completed++; | ||
| 1058 | if (completed > RCU_TORTURE_PIPE_LEN) { | 1060 | if (completed > RCU_TORTURE_PIPE_LEN) { |
| 1059 | /* Should not happen, but... */ | 1061 | /* Should not happen, but... */ |
| 1060 | completed = RCU_TORTURE_PIPE_LEN; | 1062 | completed = RCU_TORTURE_PIPE_LEN; |
| @@ -1073,8 +1075,8 @@ static void rcu_torture_timer(unsigned long unused) | |||
| 1073 | static int | 1075 | static int |
| 1074 | rcu_torture_reader(void *arg) | 1076 | rcu_torture_reader(void *arg) |
| 1075 | { | 1077 | { |
| 1076 | int completed; | 1078 | unsigned long started; |
| 1077 | int completed_end; | 1079 | unsigned long completed; |
| 1078 | int idx; | 1080 | int idx; |
| 1079 | DEFINE_TORTURE_RANDOM(rand); | 1081 | DEFINE_TORTURE_RANDOM(rand); |
| 1080 | struct rcu_torture *p; | 1082 | struct rcu_torture *p; |
| @@ -1093,7 +1095,10 @@ rcu_torture_reader(void *arg) | |||
| 1093 | mod_timer(&t, jiffies + 1); | 1095 | mod_timer(&t, jiffies + 1); |
| 1094 | } | 1096 | } |
| 1095 | idx = cur_ops->readlock(); | 1097 | idx = cur_ops->readlock(); |
| 1096 | completed = cur_ops->completed(); | 1098 | if (cur_ops->started) |
| 1099 | started = cur_ops->started(); | ||
| 1100 | else | ||
| 1101 | started = cur_ops->completed(); | ||
| 1097 | ts = rcu_trace_clock_local(); | 1102 | ts = rcu_trace_clock_local(); |
| 1098 | p = rcu_dereference_check(rcu_torture_current, | 1103 | p = rcu_dereference_check(rcu_torture_current, |
| 1099 | rcu_read_lock_bh_held() || | 1104 | rcu_read_lock_bh_held() || |
| @@ -1114,14 +1119,16 @@ rcu_torture_reader(void *arg) | |||
| 1114 | /* Should not happen, but... */ | 1119 | /* Should not happen, but... */ |
| 1115 | pipe_count = RCU_TORTURE_PIPE_LEN; | 1120 | pipe_count = RCU_TORTURE_PIPE_LEN; |
| 1116 | } | 1121 | } |
| 1117 | completed_end = cur_ops->completed(); | 1122 | completed = cur_ops->completed(); |
| 1118 | if (pipe_count > 1) { | 1123 | if (pipe_count > 1) { |
| 1119 | do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, | 1124 | do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, |
| 1120 | ts, completed, completed_end); | 1125 | ts, started, completed); |
| 1121 | rcutorture_trace_dump(); | 1126 | rcutorture_trace_dump(); |
| 1122 | } | 1127 | } |
| 1123 | __this_cpu_inc(rcu_torture_count[pipe_count]); | 1128 | __this_cpu_inc(rcu_torture_count[pipe_count]); |
| 1124 | completed = completed_end - completed; | 1129 | completed = completed - started; |
| 1130 | if (cur_ops->started) | ||
| 1131 | completed++; | ||
| 1125 | if (completed > RCU_TORTURE_PIPE_LEN) { | 1132 | if (completed > RCU_TORTURE_PIPE_LEN) { |
| 1126 | /* Should not happen, but... */ | 1133 | /* Should not happen, but... */ |
| 1127 | completed = RCU_TORTURE_PIPE_LEN; | 1134 | completed = RCU_TORTURE_PIPE_LEN; |
| @@ -1420,6 +1427,9 @@ static int rcu_torture_barrier(void *arg) | |||
| 1420 | cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */ | 1427 | cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */ |
| 1421 | if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { | 1428 | if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { |
| 1422 | n_rcu_torture_barrier_error++; | 1429 | n_rcu_torture_barrier_error++; |
| 1430 | pr_err("barrier_cbs_invoked = %d, n_barrier_cbs = %d\n", | ||
| 1431 | atomic_read(&barrier_cbs_invoked), | ||
| 1432 | n_barrier_cbs); | ||
| 1423 | WARN_ON_ONCE(1); | 1433 | WARN_ON_ONCE(1); |
| 1424 | } | 1434 | } |
| 1425 | n_barrier_successes++; | 1435 | n_barrier_successes++; |
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index e037f3eb2f7b..445bf8ffe3fb 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c | |||
| @@ -546,7 +546,7 @@ EXPORT_SYMBOL_GPL(srcu_barrier); | |||
| 546 | * Report the number of batches, correlated with, but not necessarily | 546 | * Report the number of batches, correlated with, but not necessarily |
| 547 | * precisely the same as, the number of grace periods that have elapsed. | 547 | * precisely the same as, the number of grace periods that have elapsed. |
| 548 | */ | 548 | */ |
| 549 | long srcu_batches_completed(struct srcu_struct *sp) | 549 | unsigned long srcu_batches_completed(struct srcu_struct *sp) |
| 550 | { | 550 | { |
| 551 | return sp->completed; | 551 | return sp->completed; |
| 552 | } | 552 | } |
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 0db5649f8817..cc9ceca7bde1 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c | |||
| @@ -47,54 +47,14 @@ static void __call_rcu(struct rcu_head *head, | |||
| 47 | void (*func)(struct rcu_head *rcu), | 47 | void (*func)(struct rcu_head *rcu), |
| 48 | struct rcu_ctrlblk *rcp); | 48 | struct rcu_ctrlblk *rcp); |
| 49 | 49 | ||
| 50 | static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | ||
| 51 | |||
| 52 | #include "tiny_plugin.h" | 50 | #include "tiny_plugin.h" |
| 53 | 51 | ||
| 54 | /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */ | ||
| 55 | static void rcu_idle_enter_common(long long newval) | ||
| 56 | { | ||
| 57 | if (newval) { | ||
| 58 | RCU_TRACE(trace_rcu_dyntick(TPS("--="), | ||
| 59 | rcu_dynticks_nesting, newval)); | ||
| 60 | rcu_dynticks_nesting = newval; | ||
| 61 | return; | ||
| 62 | } | ||
| 63 | RCU_TRACE(trace_rcu_dyntick(TPS("Start"), | ||
| 64 | rcu_dynticks_nesting, newval)); | ||
| 65 | if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) { | ||
| 66 | struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); | ||
| 67 | |||
| 68 | RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"), | ||
| 69 | rcu_dynticks_nesting, newval)); | ||
| 70 | ftrace_dump(DUMP_ALL); | ||
| 71 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | ||
| 72 | current->pid, current->comm, | ||
| 73 | idle->pid, idle->comm); /* must be idle task! */ | ||
| 74 | } | ||
| 75 | rcu_sched_qs(); /* implies rcu_bh_inc() */ | ||
| 76 | barrier(); | ||
| 77 | rcu_dynticks_nesting = newval; | ||
| 78 | } | ||
| 79 | |||
| 80 | /* | 52 | /* |
| 81 | * Enter idle, which is an extended quiescent state if we have fully | 53 | * Enter idle, which is an extended quiescent state if we have fully |
| 82 | * entered that mode (i.e., if the new value of dynticks_nesting is zero). | 54 | * entered that mode. |
| 83 | */ | 55 | */ |
| 84 | void rcu_idle_enter(void) | 56 | void rcu_idle_enter(void) |
| 85 | { | 57 | { |
| 86 | unsigned long flags; | ||
| 87 | long long newval; | ||
| 88 | |||
| 89 | local_irq_save(flags); | ||
| 90 | WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); | ||
| 91 | if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == | ||
| 92 | DYNTICK_TASK_NEST_VALUE) | ||
| 93 | newval = 0; | ||
| 94 | else | ||
| 95 | newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE; | ||
| 96 | rcu_idle_enter_common(newval); | ||
| 97 | local_irq_restore(flags); | ||
| 98 | } | 58 | } |
| 99 | EXPORT_SYMBOL_GPL(rcu_idle_enter); | 59 | EXPORT_SYMBOL_GPL(rcu_idle_enter); |
| 100 | 60 | ||
| @@ -103,55 +63,14 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); | |||
| 103 | */ | 63 | */ |
| 104 | void rcu_irq_exit(void) | 64 | void rcu_irq_exit(void) |
| 105 | { | 65 | { |
| 106 | unsigned long flags; | ||
| 107 | long long newval; | ||
| 108 | |||
| 109 | local_irq_save(flags); | ||
| 110 | newval = rcu_dynticks_nesting - 1; | ||
| 111 | WARN_ON_ONCE(newval < 0); | ||
| 112 | rcu_idle_enter_common(newval); | ||
| 113 | local_irq_restore(flags); | ||
| 114 | } | 66 | } |
| 115 | EXPORT_SYMBOL_GPL(rcu_irq_exit); | 67 | EXPORT_SYMBOL_GPL(rcu_irq_exit); |
| 116 | 68 | ||
| 117 | /* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */ | ||
| 118 | static void rcu_idle_exit_common(long long oldval) | ||
| 119 | { | ||
| 120 | if (oldval) { | ||
| 121 | RCU_TRACE(trace_rcu_dyntick(TPS("++="), | ||
| 122 | oldval, rcu_dynticks_nesting)); | ||
| 123 | return; | ||
| 124 | } | ||
| 125 | RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting)); | ||
| 126 | if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) { | ||
| 127 | struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); | ||
| 128 | |||
| 129 | RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"), | ||
| 130 | oldval, rcu_dynticks_nesting)); | ||
| 131 | ftrace_dump(DUMP_ALL); | ||
| 132 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | ||
| 133 | current->pid, current->comm, | ||
| 134 | idle->pid, idle->comm); /* must be idle task! */ | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 138 | /* | 69 | /* |
| 139 | * Exit idle, so that we are no longer in an extended quiescent state. | 70 | * Exit idle, so that we are no longer in an extended quiescent state. |
| 140 | */ | 71 | */ |
| 141 | void rcu_idle_exit(void) | 72 | void rcu_idle_exit(void) |
| 142 | { | 73 | { |
| 143 | unsigned long flags; | ||
| 144 | long long oldval; | ||
| 145 | |||
| 146 | local_irq_save(flags); | ||
| 147 | oldval = rcu_dynticks_nesting; | ||
| 148 | WARN_ON_ONCE(rcu_dynticks_nesting < 0); | ||
| 149 | if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) | ||
| 150 | rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE; | ||
| 151 | else | ||
| 152 | rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | ||
| 153 | rcu_idle_exit_common(oldval); | ||
| 154 | local_irq_restore(flags); | ||
| 155 | } | 74 | } |
| 156 | EXPORT_SYMBOL_GPL(rcu_idle_exit); | 75 | EXPORT_SYMBOL_GPL(rcu_idle_exit); |
| 157 | 76 | ||
| @@ -160,15 +79,6 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit); | |||
| 160 | */ | 79 | */ |
| 161 | void rcu_irq_enter(void) | 80 | void rcu_irq_enter(void) |
| 162 | { | 81 | { |
| 163 | unsigned long flags; | ||
| 164 | long long oldval; | ||
| 165 | |||
| 166 | local_irq_save(flags); | ||
| 167 | oldval = rcu_dynticks_nesting; | ||
| 168 | rcu_dynticks_nesting++; | ||
| 169 | WARN_ON_ONCE(rcu_dynticks_nesting == 0); | ||
| 170 | rcu_idle_exit_common(oldval); | ||
| 171 | local_irq_restore(flags); | ||
| 172 | } | 82 | } |
| 173 | EXPORT_SYMBOL_GPL(rcu_irq_enter); | 83 | EXPORT_SYMBOL_GPL(rcu_irq_enter); |
| 174 | 84 | ||
| @@ -179,23 +89,13 @@ EXPORT_SYMBOL_GPL(rcu_irq_enter); | |||
| 179 | */ | 89 | */ |
| 180 | bool notrace __rcu_is_watching(void) | 90 | bool notrace __rcu_is_watching(void) |
| 181 | { | 91 | { |
| 182 | return rcu_dynticks_nesting; | 92 | return true; |
| 183 | } | 93 | } |
| 184 | EXPORT_SYMBOL(__rcu_is_watching); | 94 | EXPORT_SYMBOL(__rcu_is_watching); |
| 185 | 95 | ||
| 186 | #endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ | 96 | #endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ |
| 187 | 97 | ||
| 188 | /* | 98 | /* |
| 189 | * Test whether the current CPU was interrupted from idle. Nested | ||
| 190 | * interrupts don't count, we must be running at the first interrupt | ||
| 191 | * level. | ||
| 192 | */ | ||
| 193 | static int rcu_is_cpu_rrupt_from_idle(void) | ||
| 194 | { | ||
| 195 | return rcu_dynticks_nesting <= 1; | ||
| 196 | } | ||
| 197 | |||
| 198 | /* | ||
| 199 | * Helper function for rcu_sched_qs() and rcu_bh_qs(). | 99 | * Helper function for rcu_sched_qs() and rcu_bh_qs(). |
| 200 | * Also irqs are disabled to avoid confusion due to interrupt handlers | 100 | * Also irqs are disabled to avoid confusion due to interrupt handlers |
| 201 | * invoking call_rcu(). | 101 | * invoking call_rcu(). |
| @@ -250,7 +150,7 @@ void rcu_bh_qs(void) | |||
| 250 | void rcu_check_callbacks(int user) | 150 | void rcu_check_callbacks(int user) |
| 251 | { | 151 | { |
| 252 | RCU_TRACE(check_cpu_stalls()); | 152 | RCU_TRACE(check_cpu_stalls()); |
| 253 | if (user || rcu_is_cpu_rrupt_from_idle()) | 153 | if (user) |
| 254 | rcu_sched_qs(); | 154 | rcu_sched_qs(); |
| 255 | else if (!in_softirq()) | 155 | else if (!in_softirq()) |
| 256 | rcu_bh_qs(); | 156 | rcu_bh_qs(); |
| @@ -357,6 +257,11 @@ static void __call_rcu(struct rcu_head *head, | |||
| 357 | rcp->curtail = &head->next; | 257 | rcp->curtail = &head->next; |
| 358 | RCU_TRACE(rcp->qlen++); | 258 | RCU_TRACE(rcp->qlen++); |
| 359 | local_irq_restore(flags); | 259 | local_irq_restore(flags); |
| 260 | |||
| 261 | if (unlikely(is_idle_task(current))) { | ||
| 262 | /* force scheduling for rcu_sched_qs() */ | ||
| 263 | resched_cpu(0); | ||
| 264 | } | ||
| 360 | } | 265 | } |
| 361 | 266 | ||
| 362 | /* | 267 | /* |
| @@ -383,6 +288,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); | |||
| 383 | void __init rcu_init(void) | 288 | void __init rcu_init(void) |
| 384 | { | 289 | { |
| 385 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 290 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
| 291 | RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk)); | ||
| 292 | RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk)); | ||
| 386 | 293 | ||
| 387 | rcu_early_boot_tests(); | 294 | rcu_early_boot_tests(); |
| 388 | } | 295 | } |
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 858c56569127..f94e209a10d6 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h | |||
| @@ -145,17 +145,16 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp) | |||
| 145 | rcp->ticks_this_gp++; | 145 | rcp->ticks_this_gp++; |
| 146 | j = jiffies; | 146 | j = jiffies; |
| 147 | js = ACCESS_ONCE(rcp->jiffies_stall); | 147 | js = ACCESS_ONCE(rcp->jiffies_stall); |
| 148 | if (*rcp->curtail && ULONG_CMP_GE(j, js)) { | 148 | if (rcp->rcucblist && ULONG_CMP_GE(j, js)) { |
| 149 | pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", | 149 | pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", |
| 150 | rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting, | 150 | rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE, |
| 151 | jiffies - rcp->gp_start, rcp->qlen); | 151 | jiffies - rcp->gp_start, rcp->qlen); |
| 152 | dump_stack(); | 152 | dump_stack(); |
| 153 | } | ||
| 154 | if (*rcp->curtail && ULONG_CMP_GE(j, js)) | ||
| 155 | ACCESS_ONCE(rcp->jiffies_stall) = jiffies + | 153 | ACCESS_ONCE(rcp->jiffies_stall) = jiffies + |
| 156 | 3 * rcu_jiffies_till_stall_check() + 3; | 154 | 3 * rcu_jiffies_till_stall_check() + 3; |
| 157 | else if (ULONG_CMP_GE(j, js)) | 155 | } else if (ULONG_CMP_GE(j, js)) { |
| 158 | ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); | 156 | ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); |
| 157 | } | ||
| 159 | } | 158 | } |
| 160 | 159 | ||
| 161 | static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) | 160 | static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) |
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7680fc275036..48d640ca1a05 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
| @@ -156,6 +156,10 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | |||
| 156 | static void invoke_rcu_core(void); | 156 | static void invoke_rcu_core(void); |
| 157 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | 157 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); |
| 158 | 158 | ||
| 159 | /* rcuc/rcub kthread realtime priority */ | ||
| 160 | static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; | ||
| 161 | module_param(kthread_prio, int, 0644); | ||
| 162 | |||
| 159 | /* | 163 | /* |
| 160 | * Track the rcutorture test sequence number and the update version | 164 | * Track the rcutorture test sequence number and the update version |
| 161 | * number within a given test. The rcutorture_testseq is incremented | 165 | * number within a given test. The rcutorture_testseq is incremented |
| @@ -215,6 +219,9 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | |||
| 215 | #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ | 219 | #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ |
| 216 | }; | 220 | }; |
| 217 | 221 | ||
| 222 | DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); | ||
| 223 | EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr); | ||
| 224 | |||
| 218 | /* | 225 | /* |
| 219 | * Let the RCU core know that this CPU has gone through the scheduler, | 226 | * Let the RCU core know that this CPU has gone through the scheduler, |
| 220 | * which is a quiescent state. This is called when the need for a | 227 | * which is a quiescent state. This is called when the need for a |
| @@ -284,6 +291,22 @@ void rcu_note_context_switch(void) | |||
| 284 | } | 291 | } |
| 285 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 292 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
| 286 | 293 | ||
| 294 | /* | ||
| 295 | * Register a quiesecent state for all RCU flavors. If there is an | ||
| 296 | * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight | ||
| 297 | * dyntick-idle quiescent state visible to other CPUs (but only for those | ||
| 298 | * RCU flavors in desparate need of a quiescent state, which will normally | ||
| 299 | * be none of them). Either way, do a lightweight quiescent state for | ||
| 300 | * all RCU flavors. | ||
| 301 | */ | ||
| 302 | void rcu_all_qs(void) | ||
| 303 | { | ||
| 304 | if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) | ||
| 305 | rcu_momentary_dyntick_idle(); | ||
| 306 | this_cpu_inc(rcu_qs_ctr); | ||
| 307 | } | ||
| 308 | EXPORT_SYMBOL_GPL(rcu_all_qs); | ||
| 309 | |||
| 287 | static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ | 310 | static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ |
| 288 | static long qhimark = 10000; /* If this many pending, ignore blimit. */ | 311 | static long qhimark = 10000; /* If this many pending, ignore blimit. */ |
| 289 | static long qlowmark = 100; /* Once only this many pending, use blimit. */ | 312 | static long qlowmark = 100; /* Once only this many pending, use blimit. */ |
| @@ -315,18 +338,54 @@ static void force_quiescent_state(struct rcu_state *rsp); | |||
| 315 | static int rcu_pending(void); | 338 | static int rcu_pending(void); |
| 316 | 339 | ||
| 317 | /* | 340 | /* |
| 318 | * Return the number of RCU-sched batches processed thus far for debug & stats. | 341 | * Return the number of RCU batches started thus far for debug & stats. |
| 342 | */ | ||
| 343 | unsigned long rcu_batches_started(void) | ||
| 344 | { | ||
| 345 | return rcu_state_p->gpnum; | ||
| 346 | } | ||
| 347 | EXPORT_SYMBOL_GPL(rcu_batches_started); | ||
| 348 | |||
| 349 | /* | ||
| 350 | * Return the number of RCU-sched batches started thus far for debug & stats. | ||
| 351 | */ | ||
| 352 | unsigned long rcu_batches_started_sched(void) | ||
| 353 | { | ||
| 354 | return rcu_sched_state.gpnum; | ||
| 355 | } | ||
| 356 | EXPORT_SYMBOL_GPL(rcu_batches_started_sched); | ||
| 357 | |||
| 358 | /* | ||
| 359 | * Return the number of RCU BH batches started thus far for debug & stats. | ||
| 319 | */ | 360 | */ |
| 320 | long rcu_batches_completed_sched(void) | 361 | unsigned long rcu_batches_started_bh(void) |
| 362 | { | ||
| 363 | return rcu_bh_state.gpnum; | ||
| 364 | } | ||
| 365 | EXPORT_SYMBOL_GPL(rcu_batches_started_bh); | ||
| 366 | |||
| 367 | /* | ||
| 368 | * Return the number of RCU batches completed thus far for debug & stats. | ||
| 369 | */ | ||
| 370 | unsigned long rcu_batches_completed(void) | ||
| 371 | { | ||
| 372 | return rcu_state_p->completed; | ||
| 373 | } | ||
| 374 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | ||
| 375 | |||
| 376 | /* | ||
| 377 | * Return the number of RCU-sched batches completed thus far for debug & stats. | ||
| 378 | */ | ||
| 379 | unsigned long rcu_batches_completed_sched(void) | ||
| 321 | { | 380 | { |
| 322 | return rcu_sched_state.completed; | 381 | return rcu_sched_state.completed; |
| 323 | } | 382 | } |
| 324 | EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); | 383 | EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); |
| 325 | 384 | ||
| 326 | /* | 385 | /* |
| 327 | * Return the number of RCU BH batches processed thus far for debug & stats. | 386 | * Return the number of RCU BH batches completed thus far for debug & stats. |
| 328 | */ | 387 | */ |
| 329 | long rcu_batches_completed_bh(void) | 388 | unsigned long rcu_batches_completed_bh(void) |
| 330 | { | 389 | { |
| 331 | return rcu_bh_state.completed; | 390 | return rcu_bh_state.completed; |
| 332 | } | 391 | } |
| @@ -759,39 +818,71 @@ void rcu_irq_enter(void) | |||
| 759 | /** | 818 | /** |
| 760 | * rcu_nmi_enter - inform RCU of entry to NMI context | 819 | * rcu_nmi_enter - inform RCU of entry to NMI context |
| 761 | * | 820 | * |
| 762 | * If the CPU was idle with dynamic ticks active, and there is no | 821 | * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and |
| 763 | * irq handler running, this updates rdtp->dynticks_nmi to let the | 822 | * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know |
| 764 | * RCU grace-period handling know that the CPU is active. | 823 | * that the CPU is active. This implementation permits nested NMIs, as |
| 824 | * long as the nesting level does not overflow an int. (You will probably | ||
| 825 | * run out of stack space first.) | ||
| 765 | */ | 826 | */ |
| 766 | void rcu_nmi_enter(void) | 827 | void rcu_nmi_enter(void) |
| 767 | { | 828 | { |
| 768 | struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); | 829 | struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
| 830 | int incby = 2; | ||
| 769 | 831 | ||
| 770 | if (rdtp->dynticks_nmi_nesting == 0 && | 832 | /* Complain about underflow. */ |
| 771 | (atomic_read(&rdtp->dynticks) & 0x1)) | 833 | WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); |
| 772 | return; | 834 | |
| 773 | rdtp->dynticks_nmi_nesting++; | 835 | /* |
| 774 | smp_mb__before_atomic(); /* Force delay from prior write. */ | 836 | * If idle from RCU viewpoint, atomically increment ->dynticks |
| 775 | atomic_inc(&rdtp->dynticks); | 837 | * to mark non-idle and increment ->dynticks_nmi_nesting by one. |
| 776 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | 838 | * Otherwise, increment ->dynticks_nmi_nesting by two. This means |
| 777 | smp_mb__after_atomic(); /* See above. */ | 839 | * if ->dynticks_nmi_nesting is equal to one, we are guaranteed |
| 778 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | 840 | * to be in the outermost NMI handler that interrupted an RCU-idle |
| 841 | * period (observation due to Andy Lutomirski). | ||
| 842 | */ | ||
| 843 | if (!(atomic_read(&rdtp->dynticks) & 0x1)) { | ||
| 844 | smp_mb__before_atomic(); /* Force delay from prior write. */ | ||
| 845 | atomic_inc(&rdtp->dynticks); | ||
| 846 | /* atomic_inc() before later RCU read-side crit sects */ | ||
| 847 | smp_mb__after_atomic(); /* See above. */ | ||
| 848 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | ||
| 849 | incby = 1; | ||
| 850 | } | ||
| 851 | rdtp->dynticks_nmi_nesting += incby; | ||
| 852 | barrier(); | ||
| 779 | } | 853 | } |
| 780 | 854 | ||
| 781 | /** | 855 | /** |
| 782 | * rcu_nmi_exit - inform RCU of exit from NMI context | 856 | * rcu_nmi_exit - inform RCU of exit from NMI context |
| 783 | * | 857 | * |
| 784 | * If the CPU was idle with dynamic ticks active, and there is no | 858 | * If we are returning from the outermost NMI handler that interrupted an |
| 785 | * irq handler running, this updates rdtp->dynticks_nmi to let the | 859 | * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting |
| 786 | * RCU grace-period handling know that the CPU is no longer active. | 860 | * to let the RCU grace-period handling know that the CPU is back to |
| 861 | * being RCU-idle. | ||
| 787 | */ | 862 | */ |
| 788 | void rcu_nmi_exit(void) | 863 | void rcu_nmi_exit(void) |
| 789 | { | 864 | { |
| 790 | struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); | 865 | struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
| 791 | 866 | ||
| 792 | if (rdtp->dynticks_nmi_nesting == 0 || | 867 | /* |
| 793 | --rdtp->dynticks_nmi_nesting != 0) | 868 | * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. |
| 869 | * (We are exiting an NMI handler, so RCU better be paying attention | ||
| 870 | * to us!) | ||
| 871 | */ | ||
| 872 | WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); | ||
| 873 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | ||
| 874 | |||
| 875 | /* | ||
| 876 | * If the nesting level is not 1, the CPU wasn't RCU-idle, so | ||
| 877 | * leave it in non-RCU-idle state. | ||
| 878 | */ | ||
| 879 | if (rdtp->dynticks_nmi_nesting != 1) { | ||
| 880 | rdtp->dynticks_nmi_nesting -= 2; | ||
| 794 | return; | 881 | return; |
| 882 | } | ||
| 883 | |||
| 884 | /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ | ||
| 885 | rdtp->dynticks_nmi_nesting = 0; | ||
| 795 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | 886 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ |
| 796 | smp_mb__before_atomic(); /* See above. */ | 887 | smp_mb__before_atomic(); /* See above. */ |
| 797 | atomic_inc(&rdtp->dynticks); | 888 | atomic_inc(&rdtp->dynticks); |
| @@ -898,17 +989,14 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp, | |||
| 898 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); | 989 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); |
| 899 | return 1; | 990 | return 1; |
| 900 | } else { | 991 | } else { |
| 992 | if (ULONG_CMP_LT(ACCESS_ONCE(rdp->gpnum) + ULONG_MAX / 4, | ||
| 993 | rdp->mynode->gpnum)) | ||
| 994 | ACCESS_ONCE(rdp->gpwrap) = true; | ||
| 901 | return 0; | 995 | return 0; |
| 902 | } | 996 | } |
| 903 | } | 997 | } |
| 904 | 998 | ||
| 905 | /* | 999 | /* |
| 906 | * This function really isn't for public consumption, but RCU is special in | ||
| 907 | * that context switches can allow the state machine to make progress. | ||
| 908 | */ | ||
| 909 | extern void resched_cpu(int cpu); | ||
| 910 | |||
| 911 | /* | ||
| 912 | * Return true if the specified CPU has passed through a quiescent | 1000 | * Return true if the specified CPU has passed through a quiescent |
| 913 | * state by virtue of being in or having passed through an dynticks | 1001 | * state by virtue of being in or having passed through an dynticks |
| 914 | * idle state since the last call to dyntick_save_progress_counter() | 1002 | * idle state since the last call to dyntick_save_progress_counter() |
| @@ -1011,6 +1099,22 @@ static void record_gp_stall_check_time(struct rcu_state *rsp) | |||
| 1011 | j1 = rcu_jiffies_till_stall_check(); | 1099 | j1 = rcu_jiffies_till_stall_check(); |
| 1012 | ACCESS_ONCE(rsp->jiffies_stall) = j + j1; | 1100 | ACCESS_ONCE(rsp->jiffies_stall) = j + j1; |
| 1013 | rsp->jiffies_resched = j + j1 / 2; | 1101 | rsp->jiffies_resched = j + j1 / 2; |
| 1102 | rsp->n_force_qs_gpstart = ACCESS_ONCE(rsp->n_force_qs); | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | /* | ||
| 1106 | * Complain about starvation of grace-period kthread. | ||
| 1107 | */ | ||
| 1108 | static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) | ||
| 1109 | { | ||
| 1110 | unsigned long gpa; | ||
| 1111 | unsigned long j; | ||
| 1112 | |||
| 1113 | j = jiffies; | ||
| 1114 | gpa = ACCESS_ONCE(rsp->gp_activity); | ||
| 1115 | if (j - gpa > 2 * HZ) | ||
| 1116 | pr_err("%s kthread starved for %ld jiffies!\n", | ||
| 1117 | rsp->name, j - gpa); | ||
| 1014 | } | 1118 | } |
| 1015 | 1119 | ||
| 1016 | /* | 1120 | /* |
| @@ -1033,11 +1137,13 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp) | |||
| 1033 | } | 1137 | } |
| 1034 | } | 1138 | } |
| 1035 | 1139 | ||
| 1036 | static void print_other_cpu_stall(struct rcu_state *rsp) | 1140 | static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) |
| 1037 | { | 1141 | { |
| 1038 | int cpu; | 1142 | int cpu; |
| 1039 | long delta; | 1143 | long delta; |
| 1040 | unsigned long flags; | 1144 | unsigned long flags; |
| 1145 | unsigned long gpa; | ||
| 1146 | unsigned long j; | ||
| 1041 | int ndetected = 0; | 1147 | int ndetected = 0; |
| 1042 | struct rcu_node *rnp = rcu_get_root(rsp); | 1148 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 1043 | long totqlen = 0; | 1149 | long totqlen = 0; |
| @@ -1075,30 +1181,34 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
| 1075 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1181 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1076 | } | 1182 | } |
| 1077 | 1183 | ||
| 1078 | /* | ||
| 1079 | * Now rat on any tasks that got kicked up to the root rcu_node | ||
| 1080 | * due to CPU offlining. | ||
| 1081 | */ | ||
| 1082 | rnp = rcu_get_root(rsp); | ||
| 1083 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
| 1084 | ndetected += rcu_print_task_stall(rnp); | ||
| 1085 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 1086 | |||
| 1087 | print_cpu_stall_info_end(); | 1184 | print_cpu_stall_info_end(); |
| 1088 | for_each_possible_cpu(cpu) | 1185 | for_each_possible_cpu(cpu) |
| 1089 | totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; | 1186 | totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; |
| 1090 | pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", | 1187 | pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", |
| 1091 | smp_processor_id(), (long)(jiffies - rsp->gp_start), | 1188 | smp_processor_id(), (long)(jiffies - rsp->gp_start), |
| 1092 | (long)rsp->gpnum, (long)rsp->completed, totqlen); | 1189 | (long)rsp->gpnum, (long)rsp->completed, totqlen); |
| 1093 | if (ndetected == 0) | 1190 | if (ndetected) { |
| 1094 | pr_err("INFO: Stall ended before state dump start\n"); | ||
| 1095 | else | ||
| 1096 | rcu_dump_cpu_stacks(rsp); | 1191 | rcu_dump_cpu_stacks(rsp); |
| 1192 | } else { | ||
| 1193 | if (ACCESS_ONCE(rsp->gpnum) != gpnum || | ||
| 1194 | ACCESS_ONCE(rsp->completed) == gpnum) { | ||
| 1195 | pr_err("INFO: Stall ended before state dump start\n"); | ||
| 1196 | } else { | ||
| 1197 | j = jiffies; | ||
| 1198 | gpa = ACCESS_ONCE(rsp->gp_activity); | ||
| 1199 | pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld\n", | ||
| 1200 | rsp->name, j - gpa, j, gpa, | ||
| 1201 | jiffies_till_next_fqs); | ||
| 1202 | /* In this case, the current CPU might be at fault. */ | ||
| 1203 | sched_show_task(current); | ||
| 1204 | } | ||
| 1205 | } | ||
| 1097 | 1206 | ||
| 1098 | /* Complain about tasks blocking the grace period. */ | 1207 | /* Complain about tasks blocking the grace period. */ |
| 1099 | |||
| 1100 | rcu_print_detail_task_stall(rsp); | 1208 | rcu_print_detail_task_stall(rsp); |
| 1101 | 1209 | ||
| 1210 | rcu_check_gp_kthread_starvation(rsp); | ||
| 1211 | |||
| 1102 | force_quiescent_state(rsp); /* Kick them all. */ | 1212 | force_quiescent_state(rsp); /* Kick them all. */ |
| 1103 | } | 1213 | } |
| 1104 | 1214 | ||
| @@ -1123,6 +1233,9 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
| 1123 | pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", | 1233 | pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", |
| 1124 | jiffies - rsp->gp_start, | 1234 | jiffies - rsp->gp_start, |
| 1125 | (long)rsp->gpnum, (long)rsp->completed, totqlen); | 1235 | (long)rsp->gpnum, (long)rsp->completed, totqlen); |
| 1236 | |||
| 1237 | rcu_check_gp_kthread_starvation(rsp); | ||
| 1238 | |||
| 1126 | rcu_dump_cpu_stacks(rsp); | 1239 | rcu_dump_cpu_stacks(rsp); |
| 1127 | 1240 | ||
| 1128 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1241 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| @@ -1193,7 +1306,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1193 | ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { | 1306 | ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { |
| 1194 | 1307 | ||
| 1195 | /* They had a few time units to dump stack, so complain. */ | 1308 | /* They had a few time units to dump stack, so complain. */ |
| 1196 | print_other_cpu_stall(rsp); | 1309 | print_other_cpu_stall(rsp, gpnum); |
| 1197 | } | 1310 | } |
| 1198 | } | 1311 | } |
| 1199 | 1312 | ||
| @@ -1530,7 +1643,8 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, | |||
| 1530 | bool ret; | 1643 | bool ret; |
| 1531 | 1644 | ||
| 1532 | /* Handle the ends of any preceding grace periods first. */ | 1645 | /* Handle the ends of any preceding grace periods first. */ |
| 1533 | if (rdp->completed == rnp->completed) { | 1646 | if (rdp->completed == rnp->completed && |
| 1647 | !unlikely(ACCESS_ONCE(rdp->gpwrap))) { | ||
| 1534 | 1648 | ||
| 1535 | /* No grace period end, so just accelerate recent callbacks. */ | 1649 | /* No grace period end, so just accelerate recent callbacks. */ |
| 1536 | ret = rcu_accelerate_cbs(rsp, rnp, rdp); | 1650 | ret = rcu_accelerate_cbs(rsp, rnp, rdp); |
| @@ -1545,7 +1659,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, | |||
| 1545 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend")); | 1659 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend")); |
| 1546 | } | 1660 | } |
| 1547 | 1661 | ||
| 1548 | if (rdp->gpnum != rnp->gpnum) { | 1662 | if (rdp->gpnum != rnp->gpnum || unlikely(ACCESS_ONCE(rdp->gpwrap))) { |
| 1549 | /* | 1663 | /* |
| 1550 | * If the current grace period is waiting for this CPU, | 1664 | * If the current grace period is waiting for this CPU, |
| 1551 | * set up to detect a quiescent state, otherwise don't | 1665 | * set up to detect a quiescent state, otherwise don't |
| @@ -1554,8 +1668,10 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, | |||
| 1554 | rdp->gpnum = rnp->gpnum; | 1668 | rdp->gpnum = rnp->gpnum; |
| 1555 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); | 1669 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); |
| 1556 | rdp->passed_quiesce = 0; | 1670 | rdp->passed_quiesce = 0; |
| 1671 | rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); | ||
| 1557 | rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); | 1672 | rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); |
| 1558 | zero_cpu_stall_ticks(rdp); | 1673 | zero_cpu_stall_ticks(rdp); |
| 1674 | ACCESS_ONCE(rdp->gpwrap) = false; | ||
| 1559 | } | 1675 | } |
| 1560 | return ret; | 1676 | return ret; |
| 1561 | } | 1677 | } |
| @@ -1569,7 +1685,8 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1569 | local_irq_save(flags); | 1685 | local_irq_save(flags); |
| 1570 | rnp = rdp->mynode; | 1686 | rnp = rdp->mynode; |
| 1571 | if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) && | 1687 | if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) && |
| 1572 | rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */ | 1688 | rdp->completed == ACCESS_ONCE(rnp->completed) && |
| 1689 | !unlikely(ACCESS_ONCE(rdp->gpwrap))) || /* w/out lock. */ | ||
| 1573 | !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ | 1690 | !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ |
| 1574 | local_irq_restore(flags); | 1691 | local_irq_restore(flags); |
| 1575 | return; | 1692 | return; |
| @@ -1589,6 +1706,7 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
| 1589 | struct rcu_data *rdp; | 1706 | struct rcu_data *rdp; |
| 1590 | struct rcu_node *rnp = rcu_get_root(rsp); | 1707 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 1591 | 1708 | ||
| 1709 | ACCESS_ONCE(rsp->gp_activity) = jiffies; | ||
| 1592 | rcu_bind_gp_kthread(); | 1710 | rcu_bind_gp_kthread(); |
| 1593 | raw_spin_lock_irq(&rnp->lock); | 1711 | raw_spin_lock_irq(&rnp->lock); |
| 1594 | smp_mb__after_unlock_lock(); | 1712 | smp_mb__after_unlock_lock(); |
| @@ -1649,6 +1767,7 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
| 1649 | rnp->grphi, rnp->qsmask); | 1767 | rnp->grphi, rnp->qsmask); |
| 1650 | raw_spin_unlock_irq(&rnp->lock); | 1768 | raw_spin_unlock_irq(&rnp->lock); |
| 1651 | cond_resched_rcu_qs(); | 1769 | cond_resched_rcu_qs(); |
| 1770 | ACCESS_ONCE(rsp->gp_activity) = jiffies; | ||
| 1652 | } | 1771 | } |
| 1653 | 1772 | ||
| 1654 | mutex_unlock(&rsp->onoff_mutex); | 1773 | mutex_unlock(&rsp->onoff_mutex); |
| @@ -1665,6 +1784,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) | |||
| 1665 | unsigned long maxj; | 1784 | unsigned long maxj; |
| 1666 | struct rcu_node *rnp = rcu_get_root(rsp); | 1785 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 1667 | 1786 | ||
| 1787 | ACCESS_ONCE(rsp->gp_activity) = jiffies; | ||
| 1668 | rsp->n_force_qs++; | 1788 | rsp->n_force_qs++; |
| 1669 | if (fqs_state == RCU_SAVE_DYNTICK) { | 1789 | if (fqs_state == RCU_SAVE_DYNTICK) { |
| 1670 | /* Collect dyntick-idle snapshots. */ | 1790 | /* Collect dyntick-idle snapshots. */ |
| @@ -1703,6 +1823,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |||
| 1703 | struct rcu_data *rdp; | 1823 | struct rcu_data *rdp; |
| 1704 | struct rcu_node *rnp = rcu_get_root(rsp); | 1824 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 1705 | 1825 | ||
| 1826 | ACCESS_ONCE(rsp->gp_activity) = jiffies; | ||
| 1706 | raw_spin_lock_irq(&rnp->lock); | 1827 | raw_spin_lock_irq(&rnp->lock); |
| 1707 | smp_mb__after_unlock_lock(); | 1828 | smp_mb__after_unlock_lock(); |
| 1708 | gp_duration = jiffies - rsp->gp_start; | 1829 | gp_duration = jiffies - rsp->gp_start; |
| @@ -1739,6 +1860,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |||
| 1739 | nocb += rcu_future_gp_cleanup(rsp, rnp); | 1860 | nocb += rcu_future_gp_cleanup(rsp, rnp); |
| 1740 | raw_spin_unlock_irq(&rnp->lock); | 1861 | raw_spin_unlock_irq(&rnp->lock); |
| 1741 | cond_resched_rcu_qs(); | 1862 | cond_resched_rcu_qs(); |
| 1863 | ACCESS_ONCE(rsp->gp_activity) = jiffies; | ||
| 1742 | } | 1864 | } |
| 1743 | rnp = rcu_get_root(rsp); | 1865 | rnp = rcu_get_root(rsp); |
| 1744 | raw_spin_lock_irq(&rnp->lock); | 1866 | raw_spin_lock_irq(&rnp->lock); |
| @@ -1788,6 +1910,7 @@ static int __noreturn rcu_gp_kthread(void *arg) | |||
| 1788 | if (rcu_gp_init(rsp)) | 1910 | if (rcu_gp_init(rsp)) |
| 1789 | break; | 1911 | break; |
| 1790 | cond_resched_rcu_qs(); | 1912 | cond_resched_rcu_qs(); |
| 1913 | ACCESS_ONCE(rsp->gp_activity) = jiffies; | ||
| 1791 | WARN_ON(signal_pending(current)); | 1914 | WARN_ON(signal_pending(current)); |
| 1792 | trace_rcu_grace_period(rsp->name, | 1915 | trace_rcu_grace_period(rsp->name, |
| 1793 | ACCESS_ONCE(rsp->gpnum), | 1916 | ACCESS_ONCE(rsp->gpnum), |
| @@ -1831,9 +1954,11 @@ static int __noreturn rcu_gp_kthread(void *arg) | |||
| 1831 | ACCESS_ONCE(rsp->gpnum), | 1954 | ACCESS_ONCE(rsp->gpnum), |
| 1832 | TPS("fqsend")); | 1955 | TPS("fqsend")); |
| 1833 | cond_resched_rcu_qs(); | 1956 | cond_resched_rcu_qs(); |
| 1957 | ACCESS_ONCE(rsp->gp_activity) = jiffies; | ||
| 1834 | } else { | 1958 | } else { |
| 1835 | /* Deal with stray signal. */ | 1959 | /* Deal with stray signal. */ |
| 1836 | cond_resched_rcu_qs(); | 1960 | cond_resched_rcu_qs(); |
| 1961 | ACCESS_ONCE(rsp->gp_activity) = jiffies; | ||
| 1837 | WARN_ON(signal_pending(current)); | 1962 | WARN_ON(signal_pending(current)); |
| 1838 | trace_rcu_grace_period(rsp->name, | 1963 | trace_rcu_grace_period(rsp->name, |
| 1839 | ACCESS_ONCE(rsp->gpnum), | 1964 | ACCESS_ONCE(rsp->gpnum), |
| @@ -2010,8 +2135,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 2010 | rnp = rdp->mynode; | 2135 | rnp = rdp->mynode; |
| 2011 | raw_spin_lock_irqsave(&rnp->lock, flags); | 2136 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 2012 | smp_mb__after_unlock_lock(); | 2137 | smp_mb__after_unlock_lock(); |
| 2013 | if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || | 2138 | if ((rdp->passed_quiesce == 0 && |
| 2014 | rnp->completed == rnp->gpnum) { | 2139 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) || |
| 2140 | rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum || | ||
| 2141 | rdp->gpwrap) { | ||
| 2015 | 2142 | ||
| 2016 | /* | 2143 | /* |
| 2017 | * The grace period in which this quiescent state was | 2144 | * The grace period in which this quiescent state was |
| @@ -2020,6 +2147,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 2020 | * within the current grace period. | 2147 | * within the current grace period. |
| 2021 | */ | 2148 | */ |
| 2022 | rdp->passed_quiesce = 0; /* need qs for new gp. */ | 2149 | rdp->passed_quiesce = 0; /* need qs for new gp. */ |
| 2150 | rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); | ||
| 2023 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2151 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 2024 | return; | 2152 | return; |
| 2025 | } | 2153 | } |
| @@ -2064,7 +2192,8 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 2064 | * Was there a quiescent state since the beginning of the grace | 2192 | * Was there a quiescent state since the beginning of the grace |
| 2065 | * period? If no, then exit and wait for the next call. | 2193 | * period? If no, then exit and wait for the next call. |
| 2066 | */ | 2194 | */ |
| 2067 | if (!rdp->passed_quiesce) | 2195 | if (!rdp->passed_quiesce && |
| 2196 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) | ||
| 2068 | return; | 2197 | return; |
| 2069 | 2198 | ||
| 2070 | /* | 2199 | /* |
| @@ -2195,6 +2324,46 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | |||
| 2195 | } | 2324 | } |
| 2196 | 2325 | ||
| 2197 | /* | 2326 | /* |
| 2327 | * All CPUs for the specified rcu_node structure have gone offline, | ||
| 2328 | * and all tasks that were preempted within an RCU read-side critical | ||
| 2329 | * section while running on one of those CPUs have since exited their RCU | ||
| 2330 | * read-side critical section. Some other CPU is reporting this fact with | ||
| 2331 | * the specified rcu_node structure's ->lock held and interrupts disabled. | ||
| 2332 | * This function therefore goes up the tree of rcu_node structures, | ||
| 2333 | * clearing the corresponding bits in the ->qsmaskinit fields. Note that | ||
| 2334 | * the leaf rcu_node structure's ->qsmaskinit field has already been | ||
| 2335 | * updated | ||
| 2336 | * | ||
| 2337 | * This function does check that the specified rcu_node structure has | ||
| 2338 | * all CPUs offline and no blocked tasks, so it is OK to invoke it | ||
| 2339 | * prematurely. That said, invoking it after the fact will cost you | ||
| 2340 | * a needless lock acquisition. So once it has done its work, don't | ||
| 2341 | * invoke it again. | ||
| 2342 | */ | ||
| 2343 | static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) | ||
| 2344 | { | ||
| 2345 | long mask; | ||
| 2346 | struct rcu_node *rnp = rnp_leaf; | ||
| 2347 | |||
| 2348 | if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) | ||
| 2349 | return; | ||
| 2350 | for (;;) { | ||
| 2351 | mask = rnp->grpmask; | ||
| 2352 | rnp = rnp->parent; | ||
| 2353 | if (!rnp) | ||
| 2354 | break; | ||
| 2355 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
| 2356 | smp_mb__after_unlock_lock(); /* GP memory ordering. */ | ||
| 2357 | rnp->qsmaskinit &= ~mask; | ||
| 2358 | if (rnp->qsmaskinit) { | ||
| 2359 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 2360 | return; | ||
| 2361 | } | ||
| 2362 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 2363 | } | ||
| 2364 | } | ||
| 2365 | |||
| 2366 | /* | ||
| 2198 | * The CPU has been completely removed, and some other CPU is reporting | 2367 | * The CPU has been completely removed, and some other CPU is reporting |
| 2199 | * this fact from process context. Do the remainder of the cleanup, | 2368 | * this fact from process context. Do the remainder of the cleanup, |
| 2200 | * including orphaning the outgoing CPU's RCU callbacks, and also | 2369 | * including orphaning the outgoing CPU's RCU callbacks, and also |
| @@ -2204,8 +2373,6 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | |||
| 2204 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | 2373 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
| 2205 | { | 2374 | { |
| 2206 | unsigned long flags; | 2375 | unsigned long flags; |
| 2207 | unsigned long mask; | ||
| 2208 | int need_report = 0; | ||
| 2209 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 2376 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 2210 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ | 2377 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ |
| 2211 | 2378 | ||
| @@ -2219,40 +2386,15 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
| 2219 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ | 2386 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ |
| 2220 | rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); | 2387 | rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); |
| 2221 | rcu_adopt_orphan_cbs(rsp, flags); | 2388 | rcu_adopt_orphan_cbs(rsp, flags); |
| 2389 | raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); | ||
| 2222 | 2390 | ||
| 2223 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ | 2391 | /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ |
| 2224 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | 2392 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 2225 | do { | 2393 | smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */ |
| 2226 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 2394 | rnp->qsmaskinit &= ~rdp->grpmask; |
| 2227 | smp_mb__after_unlock_lock(); | 2395 | if (rnp->qsmaskinit == 0 && !rcu_preempt_has_tasks(rnp)) |
| 2228 | rnp->qsmaskinit &= ~mask; | 2396 | rcu_cleanup_dead_rnp(rnp); |
| 2229 | if (rnp->qsmaskinit != 0) { | 2397 | rcu_report_qs_rnp(rdp->grpmask, rsp, rnp, flags); /* Rlses rnp->lock. */ |
| 2230 | if (rnp != rdp->mynode) | ||
| 2231 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 2232 | break; | ||
| 2233 | } | ||
| 2234 | if (rnp == rdp->mynode) | ||
| 2235 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | ||
| 2236 | else | ||
| 2237 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 2238 | mask = rnp->grpmask; | ||
| 2239 | rnp = rnp->parent; | ||
| 2240 | } while (rnp != NULL); | ||
| 2241 | |||
| 2242 | /* | ||
| 2243 | * We still hold the leaf rcu_node structure lock here, and | ||
| 2244 | * irqs are still disabled. The reason for this subterfuge is | ||
| 2245 | * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock | ||
| 2246 | * held leads to deadlock. | ||
| 2247 | */ | ||
| 2248 | raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */ | ||
| 2249 | rnp = rdp->mynode; | ||
| 2250 | if (need_report & RCU_OFL_TASKS_NORM_GP) | ||
| 2251 | rcu_report_unblock_qs_rnp(rnp, flags); | ||
| 2252 | else | ||
| 2253 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 2254 | if (need_report & RCU_OFL_TASKS_EXP_GP) | ||
| 2255 | rcu_report_exp_rnp(rsp, rnp, true); | ||
| 2256 | WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, | 2398 | WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, |
| 2257 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", | 2399 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", |
| 2258 | cpu, rdp->qlen, rdp->nxtlist); | 2400 | cpu, rdp->qlen, rdp->nxtlist); |
| @@ -2268,6 +2410,10 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | |||
| 2268 | { | 2410 | { |
| 2269 | } | 2411 | } |
| 2270 | 2412 | ||
| 2413 | static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) | ||
| 2414 | { | ||
| 2415 | } | ||
| 2416 | |||
| 2271 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | 2417 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
| 2272 | { | 2418 | { |
| 2273 | } | 2419 | } |
| @@ -2464,12 +2610,6 @@ static void force_qs_rnp(struct rcu_state *rsp, | |||
| 2464 | } | 2610 | } |
| 2465 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2611 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 2466 | } | 2612 | } |
| 2467 | rnp = rcu_get_root(rsp); | ||
| 2468 | if (rnp->qsmask == 0) { | ||
| 2469 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
| 2470 | smp_mb__after_unlock_lock(); | ||
| 2471 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ | ||
| 2472 | } | ||
| 2473 | } | 2613 | } |
| 2474 | 2614 | ||
| 2475 | /* | 2615 | /* |
| @@ -2569,7 +2709,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) | |||
| 2569 | * Schedule RCU callback invocation. If the specified type of RCU | 2709 | * Schedule RCU callback invocation. If the specified type of RCU |
| 2570 | * does not support RCU priority boosting, just do a direct call, | 2710 | * does not support RCU priority boosting, just do a direct call, |
| 2571 | * otherwise wake up the per-CPU kernel kthread. Note that because we | 2711 | * otherwise wake up the per-CPU kernel kthread. Note that because we |
| 2572 | * are running on the current CPU with interrupts disabled, the | 2712 | * are running on the current CPU with softirqs disabled, the |
| 2573 | * rcu_cpu_kthread_task cannot disappear out from under us. | 2713 | * rcu_cpu_kthread_task cannot disappear out from under us. |
| 2574 | */ | 2714 | */ |
| 2575 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 2715 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
| @@ -3109,9 +3249,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 3109 | 3249 | ||
| 3110 | /* Is the RCU core waiting for a quiescent state from this CPU? */ | 3250 | /* Is the RCU core waiting for a quiescent state from this CPU? */ |
| 3111 | if (rcu_scheduler_fully_active && | 3251 | if (rcu_scheduler_fully_active && |
| 3112 | rdp->qs_pending && !rdp->passed_quiesce) { | 3252 | rdp->qs_pending && !rdp->passed_quiesce && |
| 3253 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { | ||
| 3113 | rdp->n_rp_qs_pending++; | 3254 | rdp->n_rp_qs_pending++; |
| 3114 | } else if (rdp->qs_pending && rdp->passed_quiesce) { | 3255 | } else if (rdp->qs_pending && |
| 3256 | (rdp->passed_quiesce || | ||
| 3257 | rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) { | ||
| 3115 | rdp->n_rp_report_qs++; | 3258 | rdp->n_rp_report_qs++; |
| 3116 | return 1; | 3259 | return 1; |
| 3117 | } | 3260 | } |
| @@ -3135,7 +3278,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 3135 | } | 3278 | } |
| 3136 | 3279 | ||
| 3137 | /* Has a new RCU grace period started? */ | 3280 | /* Has a new RCU grace period started? */ |
| 3138 | if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ | 3281 | if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum || |
| 3282 | unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* outside lock */ | ||
| 3139 | rdp->n_rp_gp_started++; | 3283 | rdp->n_rp_gp_started++; |
| 3140 | return 1; | 3284 | return 1; |
| 3141 | } | 3285 | } |
| @@ -3318,6 +3462,7 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
| 3318 | } else { | 3462 | } else { |
| 3319 | _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, | 3463 | _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, |
| 3320 | rsp->n_barrier_done); | 3464 | rsp->n_barrier_done); |
| 3465 | smp_mb__before_atomic(); | ||
| 3321 | atomic_inc(&rsp->barrier_cpu_count); | 3466 | atomic_inc(&rsp->barrier_cpu_count); |
| 3322 | __call_rcu(&rdp->barrier_head, | 3467 | __call_rcu(&rdp->barrier_head, |
| 3323 | rcu_barrier_callback, rsp, cpu, 0); | 3468 | rcu_barrier_callback, rsp, cpu, 0); |
| @@ -3385,9 +3530,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
| 3385 | /* Set up local state, ensuring consistent view of global state. */ | 3530 | /* Set up local state, ensuring consistent view of global state. */ |
| 3386 | raw_spin_lock_irqsave(&rnp->lock, flags); | 3531 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 3387 | rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); | 3532 | rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); |
| 3388 | init_callback_list(rdp); | ||
| 3389 | rdp->qlen_lazy = 0; | ||
| 3390 | ACCESS_ONCE(rdp->qlen) = 0; | ||
| 3391 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 3533 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
| 3392 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); | 3534 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); |
| 3393 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | 3535 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); |
| @@ -3444,6 +3586,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
| 3444 | rdp->gpnum = rnp->completed; | 3586 | rdp->gpnum = rnp->completed; |
| 3445 | rdp->completed = rnp->completed; | 3587 | rdp->completed = rnp->completed; |
| 3446 | rdp->passed_quiesce = 0; | 3588 | rdp->passed_quiesce = 0; |
| 3589 | rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); | ||
| 3447 | rdp->qs_pending = 0; | 3590 | rdp->qs_pending = 0; |
| 3448 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); | 3591 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); |
| 3449 | } | 3592 | } |
| @@ -3535,17 +3678,35 @@ static int rcu_pm_notify(struct notifier_block *self, | |||
| 3535 | static int __init rcu_spawn_gp_kthread(void) | 3678 | static int __init rcu_spawn_gp_kthread(void) |
| 3536 | { | 3679 | { |
| 3537 | unsigned long flags; | 3680 | unsigned long flags; |
| 3681 | int kthread_prio_in = kthread_prio; | ||
| 3538 | struct rcu_node *rnp; | 3682 | struct rcu_node *rnp; |
| 3539 | struct rcu_state *rsp; | 3683 | struct rcu_state *rsp; |
| 3684 | struct sched_param sp; | ||
| 3540 | struct task_struct *t; | 3685 | struct task_struct *t; |
| 3541 | 3686 | ||
| 3687 | /* Force priority into range. */ | ||
| 3688 | if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1) | ||
| 3689 | kthread_prio = 1; | ||
| 3690 | else if (kthread_prio < 0) | ||
| 3691 | kthread_prio = 0; | ||
| 3692 | else if (kthread_prio > 99) | ||
| 3693 | kthread_prio = 99; | ||
| 3694 | if (kthread_prio != kthread_prio_in) | ||
| 3695 | pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n", | ||
| 3696 | kthread_prio, kthread_prio_in); | ||
| 3697 | |||
| 3542 | rcu_scheduler_fully_active = 1; | 3698 | rcu_scheduler_fully_active = 1; |
| 3543 | for_each_rcu_flavor(rsp) { | 3699 | for_each_rcu_flavor(rsp) { |
| 3544 | t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name); | 3700 | t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name); |
| 3545 | BUG_ON(IS_ERR(t)); | 3701 | BUG_ON(IS_ERR(t)); |
| 3546 | rnp = rcu_get_root(rsp); | 3702 | rnp = rcu_get_root(rsp); |
| 3547 | raw_spin_lock_irqsave(&rnp->lock, flags); | 3703 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 3548 | rsp->gp_kthread = t; | 3704 | rsp->gp_kthread = t; |
| 3705 | if (kthread_prio) { | ||
| 3706 | sp.sched_priority = kthread_prio; | ||
| 3707 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | ||
| 3708 | } | ||
| 3709 | wake_up_process(t); | ||
| 3549 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3710 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 3550 | } | 3711 | } |
| 3551 | rcu_spawn_nocb_kthreads(); | 3712 | rcu_spawn_nocb_kthreads(); |
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 8e7b1843896e..119de399eb2f 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h | |||
| @@ -27,7 +27,6 @@ | |||
| 27 | #include <linux/threads.h> | 27 | #include <linux/threads.h> |
| 28 | #include <linux/cpumask.h> | 28 | #include <linux/cpumask.h> |
| 29 | #include <linux/seqlock.h> | 29 | #include <linux/seqlock.h> |
| 30 | #include <linux/irq_work.h> | ||
| 31 | 30 | ||
| 32 | /* | 31 | /* |
| 33 | * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and | 32 | * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and |
| @@ -172,11 +171,6 @@ struct rcu_node { | |||
| 172 | /* queued on this rcu_node structure that */ | 171 | /* queued on this rcu_node structure that */ |
| 173 | /* are blocking the current grace period, */ | 172 | /* are blocking the current grace period, */ |
| 174 | /* there can be no such task. */ | 173 | /* there can be no such task. */ |
| 175 | struct completion boost_completion; | ||
| 176 | /* Used to ensure that the rt_mutex used */ | ||
| 177 | /* to carry out the boosting is fully */ | ||
| 178 | /* released with no future boostee accesses */ | ||
| 179 | /* before that rt_mutex is re-initialized. */ | ||
| 180 | struct rt_mutex boost_mtx; | 174 | struct rt_mutex boost_mtx; |
| 181 | /* Used only for the priority-boosting */ | 175 | /* Used only for the priority-boosting */ |
| 182 | /* side effect, not as a lock. */ | 176 | /* side effect, not as a lock. */ |
| @@ -257,9 +251,12 @@ struct rcu_data { | |||
| 257 | /* in order to detect GP end. */ | 251 | /* in order to detect GP end. */ |
| 258 | unsigned long gpnum; /* Highest gp number that this CPU */ | 252 | unsigned long gpnum; /* Highest gp number that this CPU */ |
| 259 | /* is aware of having started. */ | 253 | /* is aware of having started. */ |
| 254 | unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ | ||
| 255 | /* for rcu_all_qs() invocations. */ | ||
| 260 | bool passed_quiesce; /* User-mode/idle loop etc. */ | 256 | bool passed_quiesce; /* User-mode/idle loop etc. */ |
| 261 | bool qs_pending; /* Core waits for quiesc state. */ | 257 | bool qs_pending; /* Core waits for quiesc state. */ |
| 262 | bool beenonline; /* CPU online at least once. */ | 258 | bool beenonline; /* CPU online at least once. */ |
| 259 | bool gpwrap; /* Possible gpnum/completed wrap. */ | ||
| 263 | struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ | 260 | struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ |
| 264 | unsigned long grpmask; /* Mask to apply to leaf qsmask. */ | 261 | unsigned long grpmask; /* Mask to apply to leaf qsmask. */ |
| 265 | #ifdef CONFIG_RCU_CPU_STALL_INFO | 262 | #ifdef CONFIG_RCU_CPU_STALL_INFO |
| @@ -340,14 +337,10 @@ struct rcu_data { | |||
| 340 | #ifdef CONFIG_RCU_NOCB_CPU | 337 | #ifdef CONFIG_RCU_NOCB_CPU |
| 341 | struct rcu_head *nocb_head; /* CBs waiting for kthread. */ | 338 | struct rcu_head *nocb_head; /* CBs waiting for kthread. */ |
| 342 | struct rcu_head **nocb_tail; | 339 | struct rcu_head **nocb_tail; |
| 343 | atomic_long_t nocb_q_count; /* # CBs waiting for kthread */ | 340 | atomic_long_t nocb_q_count; /* # CBs waiting for nocb */ |
| 344 | atomic_long_t nocb_q_count_lazy; /* (approximate). */ | 341 | atomic_long_t nocb_q_count_lazy; /* invocation (all stages). */ |
| 345 | struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */ | 342 | struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */ |
| 346 | struct rcu_head **nocb_follower_tail; | 343 | struct rcu_head **nocb_follower_tail; |
| 347 | atomic_long_t nocb_follower_count; /* # CBs ready to invoke. */ | ||
| 348 | atomic_long_t nocb_follower_count_lazy; /* (approximate). */ | ||
| 349 | int nocb_p_count; /* # CBs being invoked by kthread */ | ||
| 350 | int nocb_p_count_lazy; /* (approximate). */ | ||
| 351 | wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ | 344 | wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ |
| 352 | struct task_struct *nocb_kthread; | 345 | struct task_struct *nocb_kthread; |
| 353 | int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ | 346 | int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ |
| @@ -356,8 +349,6 @@ struct rcu_data { | |||
| 356 | struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; | 349 | struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; |
| 357 | /* CBs waiting for GP. */ | 350 | /* CBs waiting for GP. */ |
| 358 | struct rcu_head **nocb_gp_tail; | 351 | struct rcu_head **nocb_gp_tail; |
| 359 | long nocb_gp_count; | ||
| 360 | long nocb_gp_count_lazy; | ||
| 361 | bool nocb_leader_sleep; /* Is the nocb leader thread asleep? */ | 352 | bool nocb_leader_sleep; /* Is the nocb leader thread asleep? */ |
| 362 | struct rcu_data *nocb_next_follower; | 353 | struct rcu_data *nocb_next_follower; |
| 363 | /* Next follower in wakeup chain. */ | 354 | /* Next follower in wakeup chain. */ |
| @@ -488,10 +479,14 @@ struct rcu_state { | |||
| 488 | /* due to no GP active. */ | 479 | /* due to no GP active. */ |
| 489 | unsigned long gp_start; /* Time at which GP started, */ | 480 | unsigned long gp_start; /* Time at which GP started, */ |
| 490 | /* but in jiffies. */ | 481 | /* but in jiffies. */ |
| 482 | unsigned long gp_activity; /* Time of last GP kthread */ | ||
| 483 | /* activity in jiffies. */ | ||
| 491 | unsigned long jiffies_stall; /* Time at which to check */ | 484 | unsigned long jiffies_stall; /* Time at which to check */ |
| 492 | /* for CPU stalls. */ | 485 | /* for CPU stalls. */ |
| 493 | unsigned long jiffies_resched; /* Time at which to resched */ | 486 | unsigned long jiffies_resched; /* Time at which to resched */ |
| 494 | /* a reluctant CPU. */ | 487 | /* a reluctant CPU. */ |
| 488 | unsigned long n_force_qs_gpstart; /* Snapshot of n_force_qs at */ | ||
| 489 | /* GP start. */ | ||
| 495 | unsigned long gp_max; /* Maximum GP duration in */ | 490 | unsigned long gp_max; /* Maximum GP duration in */ |
| 496 | /* jiffies. */ | 491 | /* jiffies. */ |
| 497 | const char *name; /* Name of structure. */ | 492 | const char *name; /* Name of structure. */ |
| @@ -514,13 +509,6 @@ extern struct list_head rcu_struct_flavors; | |||
| 514 | #define for_each_rcu_flavor(rsp) \ | 509 | #define for_each_rcu_flavor(rsp) \ |
| 515 | list_for_each_entry((rsp), &rcu_struct_flavors, flavors) | 510 | list_for_each_entry((rsp), &rcu_struct_flavors, flavors) |
| 516 | 511 | ||
| 517 | /* Return values for rcu_preempt_offline_tasks(). */ | ||
| 518 | |||
| 519 | #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ | ||
| 520 | /* GP were moved to root. */ | ||
| 521 | #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ | ||
| 522 | /* GP were moved to root. */ | ||
| 523 | |||
| 524 | /* | 512 | /* |
| 525 | * RCU implementation internal declarations: | 513 | * RCU implementation internal declarations: |
| 526 | */ | 514 | */ |
| @@ -546,27 +534,16 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work); | |||
| 546 | 534 | ||
| 547 | /* Forward declarations for rcutree_plugin.h */ | 535 | /* Forward declarations for rcutree_plugin.h */ |
| 548 | static void rcu_bootup_announce(void); | 536 | static void rcu_bootup_announce(void); |
| 549 | long rcu_batches_completed(void); | ||
| 550 | static void rcu_preempt_note_context_switch(void); | 537 | static void rcu_preempt_note_context_switch(void); |
| 551 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); | 538 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); |
| 552 | #ifdef CONFIG_HOTPLUG_CPU | 539 | #ifdef CONFIG_HOTPLUG_CPU |
| 553 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | 540 | static bool rcu_preempt_has_tasks(struct rcu_node *rnp); |
| 554 | unsigned long flags); | ||
| 555 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 541 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
| 556 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); | 542 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); |
| 557 | static int rcu_print_task_stall(struct rcu_node *rnp); | 543 | static int rcu_print_task_stall(struct rcu_node *rnp); |
| 558 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | 544 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); |
| 559 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 560 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | ||
| 561 | struct rcu_node *rnp, | ||
| 562 | struct rcu_data *rdp); | ||
| 563 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 564 | static void rcu_preempt_check_callbacks(void); | 545 | static void rcu_preempt_check_callbacks(void); |
| 565 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | 546 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); |
| 566 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU) | ||
| 567 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | ||
| 568 | bool wake); | ||
| 569 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU) */ | ||
| 570 | static void __init __rcu_init_preempt(void); | 547 | static void __init __rcu_init_preempt(void); |
| 571 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 548 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
| 572 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | 549 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
| @@ -622,24 +599,15 @@ static void rcu_dynticks_task_exit(void); | |||
| 622 | #endif /* #ifndef RCU_TREE_NONCORE */ | 599 | #endif /* #ifndef RCU_TREE_NONCORE */ |
| 623 | 600 | ||
| 624 | #ifdef CONFIG_RCU_TRACE | 601 | #ifdef CONFIG_RCU_TRACE |
| 625 | #ifdef CONFIG_RCU_NOCB_CPU | 602 | /* Read out queue lengths for tracing. */ |
| 626 | /* Sum up queue lengths for tracing. */ | ||
| 627 | static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) | 603 | static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) |
| 628 | { | 604 | { |
| 629 | *ql = atomic_long_read(&rdp->nocb_q_count) + | 605 | #ifdef CONFIG_RCU_NOCB_CPU |
| 630 | rdp->nocb_p_count + | 606 | *ql = atomic_long_read(&rdp->nocb_q_count); |
| 631 | atomic_long_read(&rdp->nocb_follower_count) + | 607 | *qll = atomic_long_read(&rdp->nocb_q_count_lazy); |
| 632 | rdp->nocb_p_count + rdp->nocb_gp_count; | ||
| 633 | *qll = atomic_long_read(&rdp->nocb_q_count_lazy) + | ||
| 634 | rdp->nocb_p_count_lazy + | ||
| 635 | atomic_long_read(&rdp->nocb_follower_count_lazy) + | ||
| 636 | rdp->nocb_p_count_lazy + rdp->nocb_gp_count_lazy; | ||
| 637 | } | ||
| 638 | #else /* #ifdef CONFIG_RCU_NOCB_CPU */ | 608 | #else /* #ifdef CONFIG_RCU_NOCB_CPU */ |
| 639 | static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) | ||
| 640 | { | ||
| 641 | *ql = 0; | 609 | *ql = 0; |
| 642 | *qll = 0; | 610 | *qll = 0; |
| 643 | } | ||
| 644 | #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ | 611 | #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ |
| 612 | } | ||
| 645 | #endif /* #ifdef CONFIG_RCU_TRACE */ | 613 | #endif /* #ifdef CONFIG_RCU_TRACE */ |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 3ec85cb5d544..0a571e9a0f1d 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
| @@ -34,10 +34,6 @@ | |||
| 34 | 34 | ||
| 35 | #include "../locking/rtmutex_common.h" | 35 | #include "../locking/rtmutex_common.h" |
| 36 | 36 | ||
| 37 | /* rcuc/rcub kthread realtime priority */ | ||
| 38 | static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; | ||
| 39 | module_param(kthread_prio, int, 0644); | ||
| 40 | |||
| 41 | /* | 37 | /* |
| 42 | * Control variables for per-CPU and per-rcu_node kthreads. These | 38 | * Control variables for per-CPU and per-rcu_node kthreads. These |
| 43 | * handle all flavors of RCU. | 39 | * handle all flavors of RCU. |
| @@ -53,7 +49,6 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); | |||
| 53 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ | 49 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ |
| 54 | static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ | 50 | static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ |
| 55 | static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ | 51 | static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ |
| 56 | static char __initdata nocb_buf[NR_CPUS * 5]; | ||
| 57 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ | 52 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ |
| 58 | 53 | ||
| 59 | /* | 54 | /* |
| @@ -103,6 +98,8 @@ RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); | |||
| 103 | static struct rcu_state *rcu_state_p = &rcu_preempt_state; | 98 | static struct rcu_state *rcu_state_p = &rcu_preempt_state; |
| 104 | 99 | ||
| 105 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | 100 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); |
| 101 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | ||
| 102 | bool wake); | ||
| 106 | 103 | ||
| 107 | /* | 104 | /* |
| 108 | * Tell them what RCU they are running. | 105 | * Tell them what RCU they are running. |
| @@ -114,25 +111,6 @@ static void __init rcu_bootup_announce(void) | |||
| 114 | } | 111 | } |
| 115 | 112 | ||
| 116 | /* | 113 | /* |
| 117 | * Return the number of RCU-preempt batches processed thus far | ||
| 118 | * for debug and statistics. | ||
| 119 | */ | ||
| 120 | static long rcu_batches_completed_preempt(void) | ||
| 121 | { | ||
| 122 | return rcu_preempt_state.completed; | ||
| 123 | } | ||
| 124 | EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); | ||
| 125 | |||
| 126 | /* | ||
| 127 | * Return the number of RCU batches processed thus far for debug & stats. | ||
| 128 | */ | ||
| 129 | long rcu_batches_completed(void) | ||
| 130 | { | ||
| 131 | return rcu_batches_completed_preempt(); | ||
| 132 | } | ||
| 133 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | ||
| 134 | |||
| 135 | /* | ||
| 136 | * Record a preemptible-RCU quiescent state for the specified CPU. Note | 114 | * Record a preemptible-RCU quiescent state for the specified CPU. Note |
| 137 | * that this just means that the task currently running on the CPU is | 115 | * that this just means that the task currently running on the CPU is |
| 138 | * not in a quiescent state. There might be any number of tasks blocked | 116 | * not in a quiescent state. There might be any number of tasks blocked |
| @@ -307,15 +285,25 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t, | |||
| 307 | } | 285 | } |
| 308 | 286 | ||
| 309 | /* | 287 | /* |
| 288 | * Return true if the specified rcu_node structure has tasks that were | ||
| 289 | * preempted within an RCU read-side critical section. | ||
| 290 | */ | ||
| 291 | static bool rcu_preempt_has_tasks(struct rcu_node *rnp) | ||
| 292 | { | ||
| 293 | return !list_empty(&rnp->blkd_tasks); | ||
| 294 | } | ||
| 295 | |||
| 296 | /* | ||
| 310 | * Handle special cases during rcu_read_unlock(), such as needing to | 297 | * Handle special cases during rcu_read_unlock(), such as needing to |
| 311 | * notify RCU core processing or task having blocked during the RCU | 298 | * notify RCU core processing or task having blocked during the RCU |
| 312 | * read-side critical section. | 299 | * read-side critical section. |
| 313 | */ | 300 | */ |
| 314 | void rcu_read_unlock_special(struct task_struct *t) | 301 | void rcu_read_unlock_special(struct task_struct *t) |
| 315 | { | 302 | { |
| 316 | int empty; | 303 | bool empty; |
| 317 | int empty_exp; | 304 | bool empty_exp; |
| 318 | int empty_exp_now; | 305 | bool empty_norm; |
| 306 | bool empty_exp_now; | ||
| 319 | unsigned long flags; | 307 | unsigned long flags; |
| 320 | struct list_head *np; | 308 | struct list_head *np; |
| 321 | #ifdef CONFIG_RCU_BOOST | 309 | #ifdef CONFIG_RCU_BOOST |
| @@ -338,6 +326,7 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
| 338 | special = t->rcu_read_unlock_special; | 326 | special = t->rcu_read_unlock_special; |
| 339 | if (special.b.need_qs) { | 327 | if (special.b.need_qs) { |
| 340 | rcu_preempt_qs(); | 328 | rcu_preempt_qs(); |
| 329 | t->rcu_read_unlock_special.b.need_qs = false; | ||
| 341 | if (!t->rcu_read_unlock_special.s) { | 330 | if (!t->rcu_read_unlock_special.s) { |
| 342 | local_irq_restore(flags); | 331 | local_irq_restore(flags); |
| 343 | return; | 332 | return; |
| @@ -367,7 +356,8 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
| 367 | break; | 356 | break; |
| 368 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 357 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
| 369 | } | 358 | } |
| 370 | empty = !rcu_preempt_blocked_readers_cgp(rnp); | 359 | empty = !rcu_preempt_has_tasks(rnp); |
| 360 | empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); | ||
| 371 | empty_exp = !rcu_preempted_readers_exp(rnp); | 361 | empty_exp = !rcu_preempted_readers_exp(rnp); |
| 372 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | 362 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ |
| 373 | np = rcu_next_node_entry(t, rnp); | 363 | np = rcu_next_node_entry(t, rnp); |
| @@ -387,13 +377,21 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
| 387 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 377 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
| 388 | 378 | ||
| 389 | /* | 379 | /* |
| 380 | * If this was the last task on the list, go see if we | ||
| 381 | * need to propagate ->qsmaskinit bit clearing up the | ||
| 382 | * rcu_node tree. | ||
| 383 | */ | ||
| 384 | if (!empty && !rcu_preempt_has_tasks(rnp)) | ||
| 385 | rcu_cleanup_dead_rnp(rnp); | ||
| 386 | |||
| 387 | /* | ||
| 390 | * If this was the last task on the current list, and if | 388 | * If this was the last task on the current list, and if |
| 391 | * we aren't waiting on any CPUs, report the quiescent state. | 389 | * we aren't waiting on any CPUs, report the quiescent state. |
| 392 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, | 390 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, |
| 393 | * so we must take a snapshot of the expedited state. | 391 | * so we must take a snapshot of the expedited state. |
| 394 | */ | 392 | */ |
| 395 | empty_exp_now = !rcu_preempted_readers_exp(rnp); | 393 | empty_exp_now = !rcu_preempted_readers_exp(rnp); |
| 396 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { | 394 | if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) { |
| 397 | trace_rcu_quiescent_state_report(TPS("preempt_rcu"), | 395 | trace_rcu_quiescent_state_report(TPS("preempt_rcu"), |
| 398 | rnp->gpnum, | 396 | rnp->gpnum, |
| 399 | 0, rnp->qsmask, | 397 | 0, rnp->qsmask, |
| @@ -408,10 +406,8 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
| 408 | 406 | ||
| 409 | #ifdef CONFIG_RCU_BOOST | 407 | #ifdef CONFIG_RCU_BOOST |
| 410 | /* Unboost if we were boosted. */ | 408 | /* Unboost if we were boosted. */ |
| 411 | if (drop_boost_mutex) { | 409 | if (drop_boost_mutex) |
| 412 | rt_mutex_unlock(&rnp->boost_mtx); | 410 | rt_mutex_unlock(&rnp->boost_mtx); |
| 413 | complete(&rnp->boost_completion); | ||
| 414 | } | ||
| 415 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 411 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
| 416 | 412 | ||
| 417 | /* | 413 | /* |
| @@ -519,99 +515,13 @@ static int rcu_print_task_stall(struct rcu_node *rnp) | |||
| 519 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | 515 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) |
| 520 | { | 516 | { |
| 521 | WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); | 517 | WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); |
| 522 | if (!list_empty(&rnp->blkd_tasks)) | 518 | if (rcu_preempt_has_tasks(rnp)) |
| 523 | rnp->gp_tasks = rnp->blkd_tasks.next; | 519 | rnp->gp_tasks = rnp->blkd_tasks.next; |
| 524 | WARN_ON_ONCE(rnp->qsmask); | 520 | WARN_ON_ONCE(rnp->qsmask); |
| 525 | } | 521 | } |
| 526 | 522 | ||
| 527 | #ifdef CONFIG_HOTPLUG_CPU | 523 | #ifdef CONFIG_HOTPLUG_CPU |
| 528 | 524 | ||
| 529 | /* | ||
| 530 | * Handle tasklist migration for case in which all CPUs covered by the | ||
| 531 | * specified rcu_node have gone offline. Move them up to the root | ||
| 532 | * rcu_node. The reason for not just moving them to the immediate | ||
| 533 | * parent is to remove the need for rcu_read_unlock_special() to | ||
| 534 | * make more than two attempts to acquire the target rcu_node's lock. | ||
| 535 | * Returns true if there were tasks blocking the current RCU grace | ||
| 536 | * period. | ||
| 537 | * | ||
| 538 | * Returns 1 if there was previously a task blocking the current grace | ||
| 539 | * period on the specified rcu_node structure. | ||
| 540 | * | ||
| 541 | * The caller must hold rnp->lock with irqs disabled. | ||
| 542 | */ | ||
| 543 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | ||
| 544 | struct rcu_node *rnp, | ||
| 545 | struct rcu_data *rdp) | ||
| 546 | { | ||
| 547 | struct list_head *lp; | ||
| 548 | struct list_head *lp_root; | ||
| 549 | int retval = 0; | ||
| 550 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
| 551 | struct task_struct *t; | ||
| 552 | |||
| 553 | if (rnp == rnp_root) { | ||
| 554 | WARN_ONCE(1, "Last CPU thought to be offlined?"); | ||
| 555 | return 0; /* Shouldn't happen: at least one CPU online. */ | ||
| 556 | } | ||
| 557 | |||
| 558 | /* If we are on an internal node, complain bitterly. */ | ||
| 559 | WARN_ON_ONCE(rnp != rdp->mynode); | ||
| 560 | |||
| 561 | /* | ||
| 562 | * Move tasks up to root rcu_node. Don't try to get fancy for | ||
| 563 | * this corner-case operation -- just put this node's tasks | ||
| 564 | * at the head of the root node's list, and update the root node's | ||
| 565 | * ->gp_tasks and ->exp_tasks pointers to those of this node's, | ||
| 566 | * if non-NULL. This might result in waiting for more tasks than | ||
| 567 | * absolutely necessary, but this is a good performance/complexity | ||
| 568 | * tradeoff. | ||
| 569 | */ | ||
| 570 | if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0) | ||
| 571 | retval |= RCU_OFL_TASKS_NORM_GP; | ||
| 572 | if (rcu_preempted_readers_exp(rnp)) | ||
| 573 | retval |= RCU_OFL_TASKS_EXP_GP; | ||
| 574 | lp = &rnp->blkd_tasks; | ||
| 575 | lp_root = &rnp_root->blkd_tasks; | ||
| 576 | while (!list_empty(lp)) { | ||
| 577 | t = list_entry(lp->next, typeof(*t), rcu_node_entry); | ||
| 578 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ | ||
| 579 | smp_mb__after_unlock_lock(); | ||
| 580 | list_del(&t->rcu_node_entry); | ||
| 581 | t->rcu_blocked_node = rnp_root; | ||
| 582 | list_add(&t->rcu_node_entry, lp_root); | ||
| 583 | if (&t->rcu_node_entry == rnp->gp_tasks) | ||
| 584 | rnp_root->gp_tasks = rnp->gp_tasks; | ||
| 585 | if (&t->rcu_node_entry == rnp->exp_tasks) | ||
| 586 | rnp_root->exp_tasks = rnp->exp_tasks; | ||
| 587 | #ifdef CONFIG_RCU_BOOST | ||
| 588 | if (&t->rcu_node_entry == rnp->boost_tasks) | ||
| 589 | rnp_root->boost_tasks = rnp->boost_tasks; | ||
| 590 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 591 | raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ | ||
| 592 | } | ||
| 593 | |||
| 594 | rnp->gp_tasks = NULL; | ||
| 595 | rnp->exp_tasks = NULL; | ||
| 596 | #ifdef CONFIG_RCU_BOOST | ||
| 597 | rnp->boost_tasks = NULL; | ||
| 598 | /* | ||
| 599 | * In case root is being boosted and leaf was not. Make sure | ||
| 600 | * that we boost the tasks blocking the current grace period | ||
| 601 | * in this case. | ||
| 602 | */ | ||
| 603 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ | ||
| 604 | smp_mb__after_unlock_lock(); | ||
| 605 | if (rnp_root->boost_tasks != NULL && | ||
| 606 | rnp_root->boost_tasks != rnp_root->gp_tasks && | ||
| 607 | rnp_root->boost_tasks != rnp_root->exp_tasks) | ||
| 608 | rnp_root->boost_tasks = rnp_root->gp_tasks; | ||
| 609 | raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ | ||
| 610 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 611 | |||
| 612 | return retval; | ||
| 613 | } | ||
| 614 | |||
| 615 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 525 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
| 616 | 526 | ||
| 617 | /* | 527 | /* |
| @@ -771,7 +681,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |||
| 771 | 681 | ||
| 772 | raw_spin_lock_irqsave(&rnp->lock, flags); | 682 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 773 | smp_mb__after_unlock_lock(); | 683 | smp_mb__after_unlock_lock(); |
| 774 | if (list_empty(&rnp->blkd_tasks)) { | 684 | if (!rcu_preempt_has_tasks(rnp)) { |
| 775 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 685 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 776 | } else { | 686 | } else { |
| 777 | rnp->exp_tasks = rnp->blkd_tasks.next; | 687 | rnp->exp_tasks = rnp->blkd_tasks.next; |
| @@ -933,15 +843,6 @@ static void __init rcu_bootup_announce(void) | |||
| 933 | } | 843 | } |
| 934 | 844 | ||
| 935 | /* | 845 | /* |
| 936 | * Return the number of RCU batches processed thus far for debug & stats. | ||
| 937 | */ | ||
| 938 | long rcu_batches_completed(void) | ||
| 939 | { | ||
| 940 | return rcu_batches_completed_sched(); | ||
| 941 | } | ||
| 942 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | ||
| 943 | |||
| 944 | /* | ||
| 945 | * Because preemptible RCU does not exist, we never have to check for | 846 | * Because preemptible RCU does not exist, we never have to check for |
| 946 | * CPUs being in quiescent states. | 847 | * CPUs being in quiescent states. |
| 947 | */ | 848 | */ |
| @@ -960,11 +861,12 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) | |||
| 960 | 861 | ||
| 961 | #ifdef CONFIG_HOTPLUG_CPU | 862 | #ifdef CONFIG_HOTPLUG_CPU |
| 962 | 863 | ||
| 963 | /* Because preemptible RCU does not exist, no quieting of tasks. */ | 864 | /* |
| 964 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | 865 | * Because there is no preemptible RCU, there can be no readers blocked. |
| 965 | __releases(rnp->lock) | 866 | */ |
| 867 | static bool rcu_preempt_has_tasks(struct rcu_node *rnp) | ||
| 966 | { | 868 | { |
| 967 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 869 | return false; |
| 968 | } | 870 | } |
| 969 | 871 | ||
| 970 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 872 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
| @@ -996,23 +898,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | |||
| 996 | WARN_ON_ONCE(rnp->qsmask); | 898 | WARN_ON_ONCE(rnp->qsmask); |
| 997 | } | 899 | } |
| 998 | 900 | ||
| 999 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 1000 | |||
| 1001 | /* | ||
| 1002 | * Because preemptible RCU does not exist, it never needs to migrate | ||
| 1003 | * tasks that were blocked within RCU read-side critical sections, and | ||
| 1004 | * such non-existent tasks cannot possibly have been blocking the current | ||
| 1005 | * grace period. | ||
| 1006 | */ | ||
| 1007 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | ||
| 1008 | struct rcu_node *rnp, | ||
| 1009 | struct rcu_data *rdp) | ||
| 1010 | { | ||
| 1011 | return 0; | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 1015 | |||
| 1016 | /* | 901 | /* |
| 1017 | * Because preemptible RCU does not exist, it never has any callbacks | 902 | * Because preemptible RCU does not exist, it never has any callbacks |
| 1018 | * to check. | 903 | * to check. |
| @@ -1031,20 +916,6 @@ void synchronize_rcu_expedited(void) | |||
| 1031 | } | 916 | } |
| 1032 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 917 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
| 1033 | 918 | ||
| 1034 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 1035 | |||
| 1036 | /* | ||
| 1037 | * Because preemptible RCU does not exist, there is never any need to | ||
| 1038 | * report on tasks preempted in RCU read-side critical sections during | ||
| 1039 | * expedited RCU grace periods. | ||
| 1040 | */ | ||
| 1041 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | ||
| 1042 | bool wake) | ||
| 1043 | { | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 1047 | |||
| 1048 | /* | 919 | /* |
| 1049 | * Because preemptible RCU does not exist, rcu_barrier() is just | 920 | * Because preemptible RCU does not exist, rcu_barrier() is just |
| 1050 | * another name for rcu_barrier_sched(). | 921 | * another name for rcu_barrier_sched(). |
| @@ -1080,7 +951,7 @@ void exit_rcu(void) | |||
| 1080 | 951 | ||
| 1081 | static void rcu_initiate_boost_trace(struct rcu_node *rnp) | 952 | static void rcu_initiate_boost_trace(struct rcu_node *rnp) |
| 1082 | { | 953 | { |
| 1083 | if (list_empty(&rnp->blkd_tasks)) | 954 | if (!rcu_preempt_has_tasks(rnp)) |
| 1084 | rnp->n_balk_blkd_tasks++; | 955 | rnp->n_balk_blkd_tasks++; |
| 1085 | else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) | 956 | else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) |
| 1086 | rnp->n_balk_exp_gp_tasks++; | 957 | rnp->n_balk_exp_gp_tasks++; |
| @@ -1127,7 +998,8 @@ static int rcu_boost(struct rcu_node *rnp) | |||
| 1127 | struct task_struct *t; | 998 | struct task_struct *t; |
| 1128 | struct list_head *tb; | 999 | struct list_head *tb; |
| 1129 | 1000 | ||
| 1130 | if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) | 1001 | if (ACCESS_ONCE(rnp->exp_tasks) == NULL && |
| 1002 | ACCESS_ONCE(rnp->boost_tasks) == NULL) | ||
| 1131 | return 0; /* Nothing left to boost. */ | 1003 | return 0; /* Nothing left to boost. */ |
| 1132 | 1004 | ||
| 1133 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1005 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| @@ -1175,15 +1047,11 @@ static int rcu_boost(struct rcu_node *rnp) | |||
| 1175 | */ | 1047 | */ |
| 1176 | t = container_of(tb, struct task_struct, rcu_node_entry); | 1048 | t = container_of(tb, struct task_struct, rcu_node_entry); |
| 1177 | rt_mutex_init_proxy_locked(&rnp->boost_mtx, t); | 1049 | rt_mutex_init_proxy_locked(&rnp->boost_mtx, t); |
| 1178 | init_completion(&rnp->boost_completion); | ||
| 1179 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1050 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1180 | /* Lock only for side effect: boosts task t's priority. */ | 1051 | /* Lock only for side effect: boosts task t's priority. */ |
| 1181 | rt_mutex_lock(&rnp->boost_mtx); | 1052 | rt_mutex_lock(&rnp->boost_mtx); |
| 1182 | rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */ | 1053 | rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */ |
| 1183 | 1054 | ||
| 1184 | /* Wait for boostee to be done w/boost_mtx before reinitializing. */ | ||
| 1185 | wait_for_completion(&rnp->boost_completion); | ||
| 1186 | |||
| 1187 | return ACCESS_ONCE(rnp->exp_tasks) != NULL || | 1055 | return ACCESS_ONCE(rnp->exp_tasks) != NULL || |
| 1188 | ACCESS_ONCE(rnp->boost_tasks) != NULL; | 1056 | ACCESS_ONCE(rnp->boost_tasks) != NULL; |
| 1189 | } | 1057 | } |
| @@ -1416,12 +1284,8 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | |||
| 1416 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) | 1284 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) |
| 1417 | if ((mask & 0x1) && cpu != outgoingcpu) | 1285 | if ((mask & 0x1) && cpu != outgoingcpu) |
| 1418 | cpumask_set_cpu(cpu, cm); | 1286 | cpumask_set_cpu(cpu, cm); |
| 1419 | if (cpumask_weight(cm) == 0) { | 1287 | if (cpumask_weight(cm) == 0) |
| 1420 | cpumask_setall(cm); | 1288 | cpumask_setall(cm); |
| 1421 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) | ||
| 1422 | cpumask_clear_cpu(cpu, cm); | ||
| 1423 | WARN_ON_ONCE(cpumask_weight(cm) == 0); | ||
| 1424 | } | ||
| 1425 | set_cpus_allowed_ptr(t, cm); | 1289 | set_cpus_allowed_ptr(t, cm); |
| 1426 | free_cpumask_var(cm); | 1290 | free_cpumask_var(cm); |
| 1427 | } | 1291 | } |
| @@ -1446,12 +1310,8 @@ static void __init rcu_spawn_boost_kthreads(void) | |||
| 1446 | for_each_possible_cpu(cpu) | 1310 | for_each_possible_cpu(cpu) |
| 1447 | per_cpu(rcu_cpu_has_work, cpu) = 0; | 1311 | per_cpu(rcu_cpu_has_work, cpu) = 0; |
| 1448 | BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | 1312 | BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); |
| 1449 | rnp = rcu_get_root(rcu_state_p); | 1313 | rcu_for_each_leaf_node(rcu_state_p, rnp) |
| 1450 | (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); | 1314 | (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); |
| 1451 | if (NUM_RCU_NODES > 1) { | ||
| 1452 | rcu_for_each_leaf_node(rcu_state_p, rnp) | ||
| 1453 | (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); | ||
| 1454 | } | ||
| 1455 | } | 1315 | } |
| 1456 | 1316 | ||
| 1457 | static void rcu_prepare_kthreads(int cpu) | 1317 | static void rcu_prepare_kthreads(int cpu) |
| @@ -1605,7 +1465,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) | |||
| 1605 | * completed since we last checked and there are | 1465 | * completed since we last checked and there are |
| 1606 | * callbacks not yet ready to invoke. | 1466 | * callbacks not yet ready to invoke. |
| 1607 | */ | 1467 | */ |
| 1608 | if (rdp->completed != rnp->completed && | 1468 | if ((rdp->completed != rnp->completed || |
| 1469 | unlikely(ACCESS_ONCE(rdp->gpwrap))) && | ||
| 1609 | rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) | 1470 | rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) |
| 1610 | note_gp_changes(rsp, rdp); | 1471 | note_gp_changes(rsp, rdp); |
| 1611 | 1472 | ||
| @@ -1898,11 +1759,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) | |||
| 1898 | ticks_value = rsp->gpnum - rdp->gpnum; | 1759 | ticks_value = rsp->gpnum - rdp->gpnum; |
| 1899 | } | 1760 | } |
| 1900 | print_cpu_stall_fast_no_hz(fast_no_hz, cpu); | 1761 | print_cpu_stall_fast_no_hz(fast_no_hz, cpu); |
| 1901 | pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n", | 1762 | pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", |
| 1902 | cpu, ticks_value, ticks_title, | 1763 | cpu, ticks_value, ticks_title, |
| 1903 | atomic_read(&rdtp->dynticks) & 0xfff, | 1764 | atomic_read(&rdtp->dynticks) & 0xfff, |
| 1904 | rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, | 1765 | rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, |
| 1905 | rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), | 1766 | rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), |
| 1767 | ACCESS_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart, | ||
| 1906 | fast_no_hz); | 1768 | fast_no_hz); |
| 1907 | } | 1769 | } |
| 1908 | 1770 | ||
| @@ -2056,9 +1918,26 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force) | |||
| 2056 | static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) | 1918 | static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) |
| 2057 | { | 1919 | { |
| 2058 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 1920 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 1921 | unsigned long ret; | ||
| 1922 | #ifdef CONFIG_PROVE_RCU | ||
| 2059 | struct rcu_head *rhp; | 1923 | struct rcu_head *rhp; |
| 1924 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
| 2060 | 1925 | ||
| 2061 | /* No-CBs CPUs might have callbacks on any of three lists. */ | 1926 | /* |
| 1927 | * Check count of all no-CBs callbacks awaiting invocation. | ||
| 1928 | * There needs to be a barrier before this function is called, | ||
| 1929 | * but associated with a prior determination that no more | ||
| 1930 | * callbacks would be posted. In the worst case, the first | ||
| 1931 | * barrier in _rcu_barrier() suffices (but the caller cannot | ||
| 1932 | * necessarily rely on this, not a substitute for the caller | ||
| 1933 | * getting the concurrency design right!). There must also be | ||
| 1934 | * a barrier between the following load an posting of a callback | ||
| 1935 | * (if a callback is in fact needed). This is associated with an | ||
| 1936 | * atomic_inc() in the caller. | ||
| 1937 | */ | ||
| 1938 | ret = atomic_long_read(&rdp->nocb_q_count); | ||
| 1939 | |||
| 1940 | #ifdef CONFIG_PROVE_RCU | ||
| 2062 | rhp = ACCESS_ONCE(rdp->nocb_head); | 1941 | rhp = ACCESS_ONCE(rdp->nocb_head); |
| 2063 | if (!rhp) | 1942 | if (!rhp) |
| 2064 | rhp = ACCESS_ONCE(rdp->nocb_gp_head); | 1943 | rhp = ACCESS_ONCE(rdp->nocb_gp_head); |
| @@ -2072,8 +1951,9 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) | |||
| 2072 | cpu, rhp->func); | 1951 | cpu, rhp->func); |
| 2073 | WARN_ON_ONCE(1); | 1952 | WARN_ON_ONCE(1); |
| 2074 | } | 1953 | } |
| 1954 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
| 2075 | 1955 | ||
| 2076 | return !!rhp; | 1956 | return !!ret; |
| 2077 | } | 1957 | } |
| 2078 | 1958 | ||
| 2079 | /* | 1959 | /* |
| @@ -2095,9 +1975,10 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, | |||
| 2095 | struct task_struct *t; | 1975 | struct task_struct *t; |
| 2096 | 1976 | ||
| 2097 | /* Enqueue the callback on the nocb list and update counts. */ | 1977 | /* Enqueue the callback on the nocb list and update counts. */ |
| 1978 | atomic_long_add(rhcount, &rdp->nocb_q_count); | ||
| 1979 | /* rcu_barrier() relies on ->nocb_q_count add before xchg. */ | ||
| 2098 | old_rhpp = xchg(&rdp->nocb_tail, rhtp); | 1980 | old_rhpp = xchg(&rdp->nocb_tail, rhtp); |
| 2099 | ACCESS_ONCE(*old_rhpp) = rhp; | 1981 | ACCESS_ONCE(*old_rhpp) = rhp; |
| 2100 | atomic_long_add(rhcount, &rdp->nocb_q_count); | ||
| 2101 | atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); | 1982 | atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); |
| 2102 | smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */ | 1983 | smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */ |
| 2103 | 1984 | ||
| @@ -2288,9 +2169,6 @@ wait_again: | |||
| 2288 | /* Move callbacks to wait-for-GP list, which is empty. */ | 2169 | /* Move callbacks to wait-for-GP list, which is empty. */ |
| 2289 | ACCESS_ONCE(rdp->nocb_head) = NULL; | 2170 | ACCESS_ONCE(rdp->nocb_head) = NULL; |
| 2290 | rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); | 2171 | rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); |
| 2291 | rdp->nocb_gp_count = atomic_long_xchg(&rdp->nocb_q_count, 0); | ||
| 2292 | rdp->nocb_gp_count_lazy = | ||
| 2293 | atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); | ||
| 2294 | gotcbs = true; | 2172 | gotcbs = true; |
| 2295 | } | 2173 | } |
| 2296 | 2174 | ||
| @@ -2338,9 +2216,6 @@ wait_again: | |||
| 2338 | /* Append callbacks to follower's "done" list. */ | 2216 | /* Append callbacks to follower's "done" list. */ |
| 2339 | tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail); | 2217 | tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail); |
| 2340 | *tail = rdp->nocb_gp_head; | 2218 | *tail = rdp->nocb_gp_head; |
| 2341 | atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count); | ||
| 2342 | atomic_long_add(rdp->nocb_gp_count_lazy, | ||
| 2343 | &rdp->nocb_follower_count_lazy); | ||
| 2344 | smp_mb__after_atomic(); /* Store *tail before wakeup. */ | 2219 | smp_mb__after_atomic(); /* Store *tail before wakeup. */ |
| 2345 | if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { | 2220 | if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { |
| 2346 | /* | 2221 | /* |
| @@ -2415,13 +2290,11 @@ static int rcu_nocb_kthread(void *arg) | |||
| 2415 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty"); | 2290 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty"); |
| 2416 | ACCESS_ONCE(rdp->nocb_follower_head) = NULL; | 2291 | ACCESS_ONCE(rdp->nocb_follower_head) = NULL; |
| 2417 | tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head); | 2292 | tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head); |
| 2418 | c = atomic_long_xchg(&rdp->nocb_follower_count, 0); | ||
| 2419 | cl = atomic_long_xchg(&rdp->nocb_follower_count_lazy, 0); | ||
| 2420 | rdp->nocb_p_count += c; | ||
| 2421 | rdp->nocb_p_count_lazy += cl; | ||
| 2422 | 2293 | ||
| 2423 | /* Each pass through the following loop invokes a callback. */ | 2294 | /* Each pass through the following loop invokes a callback. */ |
| 2424 | trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); | 2295 | trace_rcu_batch_start(rdp->rsp->name, |
| 2296 | atomic_long_read(&rdp->nocb_q_count_lazy), | ||
| 2297 | atomic_long_read(&rdp->nocb_q_count), -1); | ||
| 2425 | c = cl = 0; | 2298 | c = cl = 0; |
| 2426 | while (list) { | 2299 | while (list) { |
| 2427 | next = list->next; | 2300 | next = list->next; |
| @@ -2443,9 +2316,9 @@ static int rcu_nocb_kthread(void *arg) | |||
| 2443 | list = next; | 2316 | list = next; |
| 2444 | } | 2317 | } |
| 2445 | trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); | 2318 | trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); |
| 2446 | ACCESS_ONCE(rdp->nocb_p_count) = rdp->nocb_p_count - c; | 2319 | smp_mb__before_atomic(); /* _add after CB invocation. */ |
| 2447 | ACCESS_ONCE(rdp->nocb_p_count_lazy) = | 2320 | atomic_long_add(-c, &rdp->nocb_q_count); |
| 2448 | rdp->nocb_p_count_lazy - cl; | 2321 | atomic_long_add(-cl, &rdp->nocb_q_count_lazy); |
| 2449 | rdp->n_nocbs_invoked += c; | 2322 | rdp->n_nocbs_invoked += c; |
| 2450 | } | 2323 | } |
| 2451 | return 0; | 2324 | return 0; |
| @@ -2513,8 +2386,8 @@ void __init rcu_init_nohz(void) | |||
| 2513 | cpumask_and(rcu_nocb_mask, cpu_possible_mask, | 2386 | cpumask_and(rcu_nocb_mask, cpu_possible_mask, |
| 2514 | rcu_nocb_mask); | 2387 | rcu_nocb_mask); |
| 2515 | } | 2388 | } |
| 2516 | cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); | 2389 | pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n", |
| 2517 | pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); | 2390 | cpumask_pr_args(rcu_nocb_mask)); |
| 2518 | if (rcu_nocb_poll) | 2391 | if (rcu_nocb_poll) |
| 2519 | pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); | 2392 | pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); |
| 2520 | 2393 | ||
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 5cdc62e1beeb..fbb6240509ea 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c | |||
| @@ -46,6 +46,8 @@ | |||
| 46 | #define RCU_TREE_NONCORE | 46 | #define RCU_TREE_NONCORE |
| 47 | #include "tree.h" | 47 | #include "tree.h" |
| 48 | 48 | ||
| 49 | DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); | ||
| 50 | |||
| 49 | static int r_open(struct inode *inode, struct file *file, | 51 | static int r_open(struct inode *inode, struct file *file, |
| 50 | const struct seq_operations *op) | 52 | const struct seq_operations *op) |
| 51 | { | 53 | { |
| @@ -115,11 +117,13 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
| 115 | 117 | ||
| 116 | if (!rdp->beenonline) | 118 | if (!rdp->beenonline) |
| 117 | return; | 119 | return; |
| 118 | seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d", | 120 | seq_printf(m, "%3d%cc=%ld g=%ld pq=%d/%d qp=%d", |
| 119 | rdp->cpu, | 121 | rdp->cpu, |
| 120 | cpu_is_offline(rdp->cpu) ? '!' : ' ', | 122 | cpu_is_offline(rdp->cpu) ? '!' : ' ', |
| 121 | ulong2long(rdp->completed), ulong2long(rdp->gpnum), | 123 | ulong2long(rdp->completed), ulong2long(rdp->gpnum), |
| 122 | rdp->passed_quiesce, rdp->qs_pending); | 124 | rdp->passed_quiesce, |
| 125 | rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu), | ||
| 126 | rdp->qs_pending); | ||
| 123 | seq_printf(m, " dt=%d/%llx/%d df=%lu", | 127 | seq_printf(m, " dt=%d/%llx/%d df=%lu", |
| 124 | atomic_read(&rdp->dynticks->dynticks), | 128 | atomic_read(&rdp->dynticks->dynticks), |
| 125 | rdp->dynticks->dynticks_nesting, | 129 | rdp->dynticks->dynticks_nesting, |
diff --git a/kernel/resource.c b/kernel/resource.c index 0bcebffc4e77..19f2357dfda3 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/device.h> | 22 | #include <linux/device.h> |
| 23 | #include <linux/pfn.h> | 23 | #include <linux/pfn.h> |
| 24 | #include <linux/mm.h> | 24 | #include <linux/mm.h> |
| 25 | #include <linux/resource_ext.h> | ||
| 25 | #include <asm/io.h> | 26 | #include <asm/io.h> |
| 26 | 27 | ||
| 27 | 28 | ||
| @@ -1529,6 +1530,30 @@ int iomem_is_exclusive(u64 addr) | |||
| 1529 | return err; | 1530 | return err; |
| 1530 | } | 1531 | } |
| 1531 | 1532 | ||
| 1533 | struct resource_entry *resource_list_create_entry(struct resource *res, | ||
| 1534 | size_t extra_size) | ||
| 1535 | { | ||
| 1536 | struct resource_entry *entry; | ||
| 1537 | |||
| 1538 | entry = kzalloc(sizeof(*entry) + extra_size, GFP_KERNEL); | ||
| 1539 | if (entry) { | ||
| 1540 | INIT_LIST_HEAD(&entry->node); | ||
| 1541 | entry->res = res ? res : &entry->__res; | ||
| 1542 | } | ||
| 1543 | |||
| 1544 | return entry; | ||
| 1545 | } | ||
| 1546 | EXPORT_SYMBOL(resource_list_create_entry); | ||
| 1547 | |||
| 1548 | void resource_list_free(struct list_head *head) | ||
| 1549 | { | ||
| 1550 | struct resource_entry *entry, *tmp; | ||
| 1551 | |||
| 1552 | list_for_each_entry_safe(entry, tmp, head, node) | ||
| 1553 | resource_list_destroy_entry(entry); | ||
| 1554 | } | ||
| 1555 | EXPORT_SYMBOL(resource_list_free); | ||
| 1556 | |||
| 1532 | static int __init strict_iomem(char *str) | 1557 | static int __init strict_iomem(char *str) |
| 1533 | { | 1558 | { |
| 1534 | if (strstr(str, "relaxed")) | 1559 | if (strstr(str, "relaxed")) |
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index ab32b7b0db5c..46be87024875 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ifdef CONFIG_FUNCTION_TRACER | 1 | ifdef CONFIG_FUNCTION_TRACER |
| 2 | CFLAGS_REMOVE_clock.o = -pg | 2 | CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE) |
| 3 | endif | 3 | endif |
| 4 | 4 | ||
| 5 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 5 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 8a2e230fb86a..eae160dd669d 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c | |||
| @@ -87,8 +87,7 @@ static inline struct autogroup *autogroup_create(void) | |||
| 87 | * so we don't have to move tasks around upon policy change, | 87 | * so we don't have to move tasks around upon policy change, |
| 88 | * or flail around trying to allocate bandwidth on the fly. | 88 | * or flail around trying to allocate bandwidth on the fly. |
| 89 | * A bandwidth exception in __sched_setscheduler() allows | 89 | * A bandwidth exception in __sched_setscheduler() allows |
| 90 | * the policy change to proceed. Thereafter, task_group() | 90 | * the policy change to proceed. |
| 91 | * returns &root_task_group, so zero bandwidth is required. | ||
| 92 | */ | 91 | */ |
| 93 | free_rt_sched_group(tg); | 92 | free_rt_sched_group(tg); |
| 94 | tg->rt_se = root_task_group.rt_se; | 93 | tg->rt_se = root_task_group.rt_se; |
| @@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) | |||
| 115 | if (tg != &root_task_group) | 114 | if (tg != &root_task_group) |
| 116 | return false; | 115 | return false; |
| 117 | 116 | ||
| 118 | if (p->sched_class != &fair_sched_class) | ||
| 119 | return false; | ||
| 120 | |||
| 121 | /* | 117 | /* |
| 122 | * We can only assume the task group can't go away on us if | 118 | * We can only assume the task group can't go away on us if |
| 123 | * autogroup_move_group() can see us on ->thread_group list. | 119 | * autogroup_move_group() can see us on ->thread_group list. |
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index c27e4f8f4879..c0a205101c23 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c | |||
| @@ -420,3 +420,16 @@ u64 local_clock(void) | |||
| 420 | 420 | ||
| 421 | EXPORT_SYMBOL_GPL(cpu_clock); | 421 | EXPORT_SYMBOL_GPL(cpu_clock); |
| 422 | EXPORT_SYMBOL_GPL(local_clock); | 422 | EXPORT_SYMBOL_GPL(local_clock); |
| 423 | |||
| 424 | /* | ||
| 425 | * Running clock - returns the time that has elapsed while a guest has been | ||
| 426 | * running. | ||
| 427 | * On a guest this value should be local_clock minus the time the guest was | ||
| 428 | * suspended by the hypervisor (for any reason). | ||
| 429 | * On bare metal this function should return the same as local_clock. | ||
| 430 | * Architectures and sub-architectures can override this. | ||
| 431 | */ | ||
| 432 | u64 __weak running_clock(void) | ||
| 433 | { | ||
| 434 | return local_clock(); | ||
| 435 | } | ||
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 607f852b4d04..8d0f35debf35 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c | |||
| @@ -268,6 +268,15 @@ bool try_wait_for_completion(struct completion *x) | |||
| 268 | unsigned long flags; | 268 | unsigned long flags; |
| 269 | int ret = 1; | 269 | int ret = 1; |
| 270 | 270 | ||
| 271 | /* | ||
| 272 | * Since x->done will need to be locked only | ||
| 273 | * in the non-blocking case, we check x->done | ||
| 274 | * first without taking the lock so we can | ||
| 275 | * return early in the blocking case. | ||
| 276 | */ | ||
| 277 | if (!READ_ONCE(x->done)) | ||
| 278 | return 0; | ||
| 279 | |||
| 271 | spin_lock_irqsave(&x->wait.lock, flags); | 280 | spin_lock_irqsave(&x->wait.lock, flags); |
| 272 | if (!x->done) | 281 | if (!x->done) |
| 273 | ret = 0; | 282 | ret = 0; |
| @@ -288,13 +297,21 @@ EXPORT_SYMBOL(try_wait_for_completion); | |||
| 288 | */ | 297 | */ |
| 289 | bool completion_done(struct completion *x) | 298 | bool completion_done(struct completion *x) |
| 290 | { | 299 | { |
| 291 | unsigned long flags; | 300 | if (!READ_ONCE(x->done)) |
| 292 | int ret = 1; | 301 | return false; |
| 293 | 302 | ||
| 294 | spin_lock_irqsave(&x->wait.lock, flags); | 303 | /* |
| 295 | if (!x->done) | 304 | * If ->done, we need to wait for complete() to release ->wait.lock |
| 296 | ret = 0; | 305 | * otherwise we can end up freeing the completion before complete() |
| 297 | spin_unlock_irqrestore(&x->wait.lock, flags); | 306 | * is done referencing it. |
| 298 | return ret; | 307 | * |
| 308 | * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders | ||
| 309 | * the loads of ->done and ->wait.lock such that we cannot observe | ||
| 310 | * the lock before complete() acquires it while observing the ->done | ||
| 311 | * after it's acquired the lock. | ||
| 312 | */ | ||
| 313 | smp_rmb(); | ||
| 314 | spin_unlock_wait(&x->wait.lock); | ||
| 315 | return true; | ||
| 299 | } | 316 | } |
| 300 | EXPORT_SYMBOL(completion_done); | 317 | EXPORT_SYMBOL(completion_done); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5eab11d4b747..f0f831e8a345 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -119,7 +119,9 @@ void update_rq_clock(struct rq *rq) | |||
| 119 | { | 119 | { |
| 120 | s64 delta; | 120 | s64 delta; |
| 121 | 121 | ||
| 122 | if (rq->skip_clock_update > 0) | 122 | lockdep_assert_held(&rq->lock); |
| 123 | |||
| 124 | if (rq->clock_skip_update & RQCF_ACT_SKIP) | ||
| 123 | return; | 125 | return; |
| 124 | 126 | ||
| 125 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; | 127 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; |
| @@ -305,66 +307,6 @@ __read_mostly int scheduler_running; | |||
| 305 | int sysctl_sched_rt_runtime = 950000; | 307 | int sysctl_sched_rt_runtime = 950000; |
| 306 | 308 | ||
| 307 | /* | 309 | /* |
| 308 | * __task_rq_lock - lock the rq @p resides on. | ||
| 309 | */ | ||
| 310 | static inline struct rq *__task_rq_lock(struct task_struct *p) | ||
| 311 | __acquires(rq->lock) | ||
| 312 | { | ||
| 313 | struct rq *rq; | ||
| 314 | |||
| 315 | lockdep_assert_held(&p->pi_lock); | ||
| 316 | |||
| 317 | for (;;) { | ||
| 318 | rq = task_rq(p); | ||
| 319 | raw_spin_lock(&rq->lock); | ||
| 320 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) | ||
| 321 | return rq; | ||
| 322 | raw_spin_unlock(&rq->lock); | ||
| 323 | |||
| 324 | while (unlikely(task_on_rq_migrating(p))) | ||
| 325 | cpu_relax(); | ||
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 329 | /* | ||
| 330 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. | ||
| 331 | */ | ||
| 332 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | ||
| 333 | __acquires(p->pi_lock) | ||
| 334 | __acquires(rq->lock) | ||
| 335 | { | ||
| 336 | struct rq *rq; | ||
| 337 | |||
| 338 | for (;;) { | ||
| 339 | raw_spin_lock_irqsave(&p->pi_lock, *flags); | ||
| 340 | rq = task_rq(p); | ||
| 341 | raw_spin_lock(&rq->lock); | ||
| 342 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) | ||
| 343 | return rq; | ||
| 344 | raw_spin_unlock(&rq->lock); | ||
| 345 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
| 346 | |||
| 347 | while (unlikely(task_on_rq_migrating(p))) | ||
| 348 | cpu_relax(); | ||
| 349 | } | ||
| 350 | } | ||
| 351 | |||
| 352 | static void __task_rq_unlock(struct rq *rq) | ||
| 353 | __releases(rq->lock) | ||
| 354 | { | ||
| 355 | raw_spin_unlock(&rq->lock); | ||
| 356 | } | ||
| 357 | |||
| 358 | static inline void | ||
| 359 | task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) | ||
| 360 | __releases(rq->lock) | ||
| 361 | __releases(p->pi_lock) | ||
| 362 | { | ||
| 363 | raw_spin_unlock(&rq->lock); | ||
| 364 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
| 365 | } | ||
| 366 | |||
| 367 | /* | ||
| 368 | * this_rq_lock - lock this runqueue and disable interrupts. | 310 | * this_rq_lock - lock this runqueue and disable interrupts. |
| 369 | */ | 311 | */ |
| 370 | static struct rq *this_rq_lock(void) | 312 | static struct rq *this_rq_lock(void) |
| @@ -490,6 +432,11 @@ static __init void init_hrtick(void) | |||
| 490 | */ | 432 | */ |
| 491 | void hrtick_start(struct rq *rq, u64 delay) | 433 | void hrtick_start(struct rq *rq, u64 delay) |
| 492 | { | 434 | { |
| 435 | /* | ||
| 436 | * Don't schedule slices shorter than 10000ns, that just | ||
| 437 | * doesn't make sense. Rely on vruntime for fairness. | ||
| 438 | */ | ||
| 439 | delay = max_t(u64, delay, 10000LL); | ||
| 493 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, | 440 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, |
| 494 | HRTIMER_MODE_REL_PINNED, 0); | 441 | HRTIMER_MODE_REL_PINNED, 0); |
| 495 | } | 442 | } |
| @@ -1046,7 +993,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
| 1046 | * this case, we can save a useless back to back clock update. | 993 | * this case, we can save a useless back to back clock update. |
| 1047 | */ | 994 | */ |
| 1048 | if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) | 995 | if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) |
| 1049 | rq->skip_clock_update = 1; | 996 | rq_clock_skip_update(rq, true); |
| 1050 | } | 997 | } |
| 1051 | 998 | ||
| 1052 | #ifdef CONFIG_SMP | 999 | #ifdef CONFIG_SMP |
| @@ -1082,7 +1029,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 1082 | if (p->sched_class->migrate_task_rq) | 1029 | if (p->sched_class->migrate_task_rq) |
| 1083 | p->sched_class->migrate_task_rq(p, new_cpu); | 1030 | p->sched_class->migrate_task_rq(p, new_cpu); |
| 1084 | p->se.nr_migrations++; | 1031 | p->se.nr_migrations++; |
| 1085 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); | 1032 | perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); |
| 1086 | } | 1033 | } |
| 1087 | 1034 | ||
| 1088 | __set_task_cpu(p, new_cpu); | 1035 | __set_task_cpu(p, new_cpu); |
| @@ -1836,6 +1783,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
| 1836 | p->se.prev_sum_exec_runtime = 0; | 1783 | p->se.prev_sum_exec_runtime = 0; |
| 1837 | p->se.nr_migrations = 0; | 1784 | p->se.nr_migrations = 0; |
| 1838 | p->se.vruntime = 0; | 1785 | p->se.vruntime = 0; |
| 1786 | #ifdef CONFIG_SMP | ||
| 1787 | p->se.avg.decay_count = 0; | ||
| 1788 | #endif | ||
| 1839 | INIT_LIST_HEAD(&p->se.group_node); | 1789 | INIT_LIST_HEAD(&p->se.group_node); |
| 1840 | 1790 | ||
| 1841 | #ifdef CONFIG_SCHEDSTATS | 1791 | #ifdef CONFIG_SCHEDSTATS |
| @@ -2755,6 +2705,10 @@ again: | |||
| 2755 | * - explicit schedule() call | 2705 | * - explicit schedule() call |
| 2756 | * - return from syscall or exception to user-space | 2706 | * - return from syscall or exception to user-space |
| 2757 | * - return from interrupt-handler to user-space | 2707 | * - return from interrupt-handler to user-space |
| 2708 | * | ||
| 2709 | * WARNING: all callers must re-check need_resched() afterward and reschedule | ||
| 2710 | * accordingly in case an event triggered the need for rescheduling (such as | ||
| 2711 | * an interrupt waking up a task) while preemption was disabled in __schedule(). | ||
| 2758 | */ | 2712 | */ |
| 2759 | static void __sched __schedule(void) | 2713 | static void __sched __schedule(void) |
| 2760 | { | 2714 | { |
| @@ -2763,7 +2717,6 @@ static void __sched __schedule(void) | |||
| 2763 | struct rq *rq; | 2717 | struct rq *rq; |
| 2764 | int cpu; | 2718 | int cpu; |
| 2765 | 2719 | ||
| 2766 | need_resched: | ||
| 2767 | preempt_disable(); | 2720 | preempt_disable(); |
| 2768 | cpu = smp_processor_id(); | 2721 | cpu = smp_processor_id(); |
| 2769 | rq = cpu_rq(cpu); | 2722 | rq = cpu_rq(cpu); |
| @@ -2783,6 +2736,8 @@ need_resched: | |||
| 2783 | smp_mb__before_spinlock(); | 2736 | smp_mb__before_spinlock(); |
| 2784 | raw_spin_lock_irq(&rq->lock); | 2737 | raw_spin_lock_irq(&rq->lock); |
| 2785 | 2738 | ||
| 2739 | rq->clock_skip_update <<= 1; /* promote REQ to ACT */ | ||
| 2740 | |||
| 2786 | switch_count = &prev->nivcsw; | 2741 | switch_count = &prev->nivcsw; |
| 2787 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 2742 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
| 2788 | if (unlikely(signal_pending_state(prev->state, prev))) { | 2743 | if (unlikely(signal_pending_state(prev->state, prev))) { |
| @@ -2807,13 +2762,13 @@ need_resched: | |||
| 2807 | switch_count = &prev->nvcsw; | 2762 | switch_count = &prev->nvcsw; |
| 2808 | } | 2763 | } |
| 2809 | 2764 | ||
| 2810 | if (task_on_rq_queued(prev) || rq->skip_clock_update < 0) | 2765 | if (task_on_rq_queued(prev)) |
| 2811 | update_rq_clock(rq); | 2766 | update_rq_clock(rq); |
| 2812 | 2767 | ||
| 2813 | next = pick_next_task(rq, prev); | 2768 | next = pick_next_task(rq, prev); |
| 2814 | clear_tsk_need_resched(prev); | 2769 | clear_tsk_need_resched(prev); |
| 2815 | clear_preempt_need_resched(); | 2770 | clear_preempt_need_resched(); |
| 2816 | rq->skip_clock_update = 0; | 2771 | rq->clock_skip_update = 0; |
| 2817 | 2772 | ||
| 2818 | if (likely(prev != next)) { | 2773 | if (likely(prev != next)) { |
| 2819 | rq->nr_switches++; | 2774 | rq->nr_switches++; |
| @@ -2828,8 +2783,6 @@ need_resched: | |||
| 2828 | post_schedule(rq); | 2783 | post_schedule(rq); |
| 2829 | 2784 | ||
| 2830 | sched_preempt_enable_no_resched(); | 2785 | sched_preempt_enable_no_resched(); |
| 2831 | if (need_resched()) | ||
| 2832 | goto need_resched; | ||
| 2833 | } | 2786 | } |
| 2834 | 2787 | ||
| 2835 | static inline void sched_submit_work(struct task_struct *tsk) | 2788 | static inline void sched_submit_work(struct task_struct *tsk) |
| @@ -2849,7 +2802,9 @@ asmlinkage __visible void __sched schedule(void) | |||
| 2849 | struct task_struct *tsk = current; | 2802 | struct task_struct *tsk = current; |
| 2850 | 2803 | ||
| 2851 | sched_submit_work(tsk); | 2804 | sched_submit_work(tsk); |
| 2852 | __schedule(); | 2805 | do { |
| 2806 | __schedule(); | ||
| 2807 | } while (need_resched()); | ||
| 2853 | } | 2808 | } |
| 2854 | EXPORT_SYMBOL(schedule); | 2809 | EXPORT_SYMBOL(schedule); |
| 2855 | 2810 | ||
| @@ -2884,6 +2839,21 @@ void __sched schedule_preempt_disabled(void) | |||
| 2884 | preempt_disable(); | 2839 | preempt_disable(); |
| 2885 | } | 2840 | } |
| 2886 | 2841 | ||
| 2842 | static void __sched notrace preempt_schedule_common(void) | ||
| 2843 | { | ||
| 2844 | do { | ||
| 2845 | __preempt_count_add(PREEMPT_ACTIVE); | ||
| 2846 | __schedule(); | ||
| 2847 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
| 2848 | |||
| 2849 | /* | ||
| 2850 | * Check again in case we missed a preemption opportunity | ||
| 2851 | * between schedule and now. | ||
| 2852 | */ | ||
| 2853 | barrier(); | ||
| 2854 | } while (need_resched()); | ||
| 2855 | } | ||
| 2856 | |||
| 2887 | #ifdef CONFIG_PREEMPT | 2857 | #ifdef CONFIG_PREEMPT |
| 2888 | /* | 2858 | /* |
| 2889 | * this is the entry point to schedule() from in-kernel preemption | 2859 | * this is the entry point to schedule() from in-kernel preemption |
| @@ -2899,17 +2869,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) | |||
| 2899 | if (likely(!preemptible())) | 2869 | if (likely(!preemptible())) |
| 2900 | return; | 2870 | return; |
| 2901 | 2871 | ||
| 2902 | do { | 2872 | preempt_schedule_common(); |
| 2903 | __preempt_count_add(PREEMPT_ACTIVE); | ||
| 2904 | __schedule(); | ||
| 2905 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
| 2906 | |||
| 2907 | /* | ||
| 2908 | * Check again in case we missed a preemption opportunity | ||
| 2909 | * between schedule and now. | ||
| 2910 | */ | ||
| 2911 | barrier(); | ||
| 2912 | } while (need_resched()); | ||
| 2913 | } | 2873 | } |
| 2914 | NOKPROBE_SYMBOL(preempt_schedule); | 2874 | NOKPROBE_SYMBOL(preempt_schedule); |
| 2915 | EXPORT_SYMBOL(preempt_schedule); | 2875 | EXPORT_SYMBOL(preempt_schedule); |
| @@ -3405,6 +3365,20 @@ static bool check_same_owner(struct task_struct *p) | |||
| 3405 | return match; | 3365 | return match; |
| 3406 | } | 3366 | } |
| 3407 | 3367 | ||
| 3368 | static bool dl_param_changed(struct task_struct *p, | ||
| 3369 | const struct sched_attr *attr) | ||
| 3370 | { | ||
| 3371 | struct sched_dl_entity *dl_se = &p->dl; | ||
| 3372 | |||
| 3373 | if (dl_se->dl_runtime != attr->sched_runtime || | ||
| 3374 | dl_se->dl_deadline != attr->sched_deadline || | ||
| 3375 | dl_se->dl_period != attr->sched_period || | ||
| 3376 | dl_se->flags != attr->sched_flags) | ||
| 3377 | return true; | ||
| 3378 | |||
| 3379 | return false; | ||
| 3380 | } | ||
| 3381 | |||
| 3408 | static int __sched_setscheduler(struct task_struct *p, | 3382 | static int __sched_setscheduler(struct task_struct *p, |
| 3409 | const struct sched_attr *attr, | 3383 | const struct sched_attr *attr, |
| 3410 | bool user) | 3384 | bool user) |
| @@ -3533,7 +3507,7 @@ recheck: | |||
| 3533 | goto change; | 3507 | goto change; |
| 3534 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) | 3508 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) |
| 3535 | goto change; | 3509 | goto change; |
| 3536 | if (dl_policy(policy)) | 3510 | if (dl_policy(policy) && dl_param_changed(p, attr)) |
| 3537 | goto change; | 3511 | goto change; |
| 3538 | 3512 | ||
| 3539 | p->sched_reset_on_fork = reset_on_fork; | 3513 | p->sched_reset_on_fork = reset_on_fork; |
| @@ -4225,17 +4199,10 @@ SYSCALL_DEFINE0(sched_yield) | |||
| 4225 | return 0; | 4199 | return 0; |
| 4226 | } | 4200 | } |
| 4227 | 4201 | ||
| 4228 | static void __cond_resched(void) | ||
| 4229 | { | ||
| 4230 | __preempt_count_add(PREEMPT_ACTIVE); | ||
| 4231 | __schedule(); | ||
| 4232 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
| 4233 | } | ||
| 4234 | |||
| 4235 | int __sched _cond_resched(void) | 4202 | int __sched _cond_resched(void) |
| 4236 | { | 4203 | { |
| 4237 | if (should_resched()) { | 4204 | if (should_resched()) { |
| 4238 | __cond_resched(); | 4205 | preempt_schedule_common(); |
| 4239 | return 1; | 4206 | return 1; |
| 4240 | } | 4207 | } |
| 4241 | return 0; | 4208 | return 0; |
| @@ -4260,7 +4227,7 @@ int __cond_resched_lock(spinlock_t *lock) | |||
| 4260 | if (spin_needbreak(lock) || resched) { | 4227 | if (spin_needbreak(lock) || resched) { |
| 4261 | spin_unlock(lock); | 4228 | spin_unlock(lock); |
| 4262 | if (resched) | 4229 | if (resched) |
| 4263 | __cond_resched(); | 4230 | preempt_schedule_common(); |
| 4264 | else | 4231 | else |
| 4265 | cpu_relax(); | 4232 | cpu_relax(); |
| 4266 | ret = 1; | 4233 | ret = 1; |
| @@ -4276,7 +4243,7 @@ int __sched __cond_resched_softirq(void) | |||
| 4276 | 4243 | ||
| 4277 | if (should_resched()) { | 4244 | if (should_resched()) { |
| 4278 | local_bh_enable(); | 4245 | local_bh_enable(); |
| 4279 | __cond_resched(); | 4246 | preempt_schedule_common(); |
| 4280 | local_bh_disable(); | 4247 | local_bh_disable(); |
| 4281 | return 1; | 4248 | return 1; |
| 4282 | } | 4249 | } |
| @@ -4391,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to); | |||
| 4391 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 4358 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
| 4392 | * that process accounting knows that this is a task in IO wait state. | 4359 | * that process accounting knows that this is a task in IO wait state. |
| 4393 | */ | 4360 | */ |
| 4394 | void __sched io_schedule(void) | ||
| 4395 | { | ||
| 4396 | struct rq *rq = raw_rq(); | ||
| 4397 | |||
| 4398 | delayacct_blkio_start(); | ||
| 4399 | atomic_inc(&rq->nr_iowait); | ||
| 4400 | blk_flush_plug(current); | ||
| 4401 | current->in_iowait = 1; | ||
| 4402 | schedule(); | ||
| 4403 | current->in_iowait = 0; | ||
| 4404 | atomic_dec(&rq->nr_iowait); | ||
| 4405 | delayacct_blkio_end(); | ||
| 4406 | } | ||
| 4407 | EXPORT_SYMBOL(io_schedule); | ||
| 4408 | |||
| 4409 | long __sched io_schedule_timeout(long timeout) | 4361 | long __sched io_schedule_timeout(long timeout) |
| 4410 | { | 4362 | { |
| 4411 | struct rq *rq = raw_rq(); | 4363 | int old_iowait = current->in_iowait; |
| 4364 | struct rq *rq; | ||
| 4412 | long ret; | 4365 | long ret; |
| 4413 | 4366 | ||
| 4367 | current->in_iowait = 1; | ||
| 4368 | if (old_iowait) | ||
| 4369 | blk_schedule_flush_plug(current); | ||
| 4370 | else | ||
| 4371 | blk_flush_plug(current); | ||
| 4372 | |||
| 4414 | delayacct_blkio_start(); | 4373 | delayacct_blkio_start(); |
| 4374 | rq = raw_rq(); | ||
| 4415 | atomic_inc(&rq->nr_iowait); | 4375 | atomic_inc(&rq->nr_iowait); |
| 4416 | blk_flush_plug(current); | ||
| 4417 | current->in_iowait = 1; | ||
| 4418 | ret = schedule_timeout(timeout); | 4376 | ret = schedule_timeout(timeout); |
| 4419 | current->in_iowait = 0; | 4377 | current->in_iowait = old_iowait; |
| 4420 | atomic_dec(&rq->nr_iowait); | 4378 | atomic_dec(&rq->nr_iowait); |
| 4421 | delayacct_blkio_end(); | 4379 | delayacct_blkio_end(); |
| 4380 | |||
| 4422 | return ret; | 4381 | return ret; |
| 4423 | } | 4382 | } |
| 4383 | EXPORT_SYMBOL(io_schedule_timeout); | ||
| 4424 | 4384 | ||
| 4425 | /** | 4385 | /** |
| 4426 | * sys_sched_get_priority_max - return maximum RT priority. | 4386 | * sys_sched_get_priority_max - return maximum RT priority. |
| @@ -4531,9 +4491,10 @@ void sched_show_task(struct task_struct *p) | |||
| 4531 | { | 4491 | { |
| 4532 | unsigned long free = 0; | 4492 | unsigned long free = 0; |
| 4533 | int ppid; | 4493 | int ppid; |
| 4534 | unsigned state; | 4494 | unsigned long state = p->state; |
| 4535 | 4495 | ||
| 4536 | state = p->state ? __ffs(p->state) + 1 : 0; | 4496 | if (state) |
| 4497 | state = __ffs(state) + 1; | ||
| 4537 | printk(KERN_INFO "%-15.15s %c", p->comm, | 4498 | printk(KERN_INFO "%-15.15s %c", p->comm, |
| 4538 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); | 4499 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); |
| 4539 | #if BITS_PER_LONG == 32 | 4500 | #if BITS_PER_LONG == 32 |
| @@ -4766,7 +4727,7 @@ static struct rq *move_queued_task(struct task_struct *p, int new_cpu) | |||
| 4766 | 4727 | ||
| 4767 | void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | 4728 | void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
| 4768 | { | 4729 | { |
| 4769 | if (p->sched_class && p->sched_class->set_cpus_allowed) | 4730 | if (p->sched_class->set_cpus_allowed) |
| 4770 | p->sched_class->set_cpus_allowed(p, new_mask); | 4731 | p->sched_class->set_cpus_allowed(p, new_mask); |
| 4771 | 4732 | ||
| 4772 | cpumask_copy(&p->cpus_allowed, new_mask); | 4733 | cpumask_copy(&p->cpus_allowed, new_mask); |
| @@ -5434,9 +5395,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 5434 | struct cpumask *groupmask) | 5395 | struct cpumask *groupmask) |
| 5435 | { | 5396 | { |
| 5436 | struct sched_group *group = sd->groups; | 5397 | struct sched_group *group = sd->groups; |
| 5437 | char str[256]; | ||
| 5438 | 5398 | ||
| 5439 | cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd)); | ||
| 5440 | cpumask_clear(groupmask); | 5399 | cpumask_clear(groupmask); |
| 5441 | 5400 | ||
| 5442 | printk(KERN_DEBUG "%*s domain %d: ", level, "", level); | 5401 | printk(KERN_DEBUG "%*s domain %d: ", level, "", level); |
| @@ -5449,7 +5408,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 5449 | return -1; | 5408 | return -1; |
| 5450 | } | 5409 | } |
| 5451 | 5410 | ||
| 5452 | printk(KERN_CONT "span %s level %s\n", str, sd->name); | 5411 | printk(KERN_CONT "span %*pbl level %s\n", |
| 5412 | cpumask_pr_args(sched_domain_span(sd)), sd->name); | ||
| 5453 | 5413 | ||
| 5454 | if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { | 5414 | if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { |
| 5455 | printk(KERN_ERR "ERROR: domain->span does not contain " | 5415 | printk(KERN_ERR "ERROR: domain->span does not contain " |
| @@ -5494,9 +5454,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 5494 | 5454 | ||
| 5495 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); | 5455 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); |
| 5496 | 5456 | ||
| 5497 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 5457 | printk(KERN_CONT " %*pbl", |
| 5498 | 5458 | cpumask_pr_args(sched_group_cpus(group))); | |
| 5499 | printk(KERN_CONT " %s", str); | ||
| 5500 | if (group->sgc->capacity != SCHED_CAPACITY_SCALE) { | 5459 | if (group->sgc->capacity != SCHED_CAPACITY_SCALE) { |
| 5501 | printk(KERN_CONT " (cpu_capacity = %d)", | 5460 | printk(KERN_CONT " (cpu_capacity = %d)", |
| 5502 | group->sgc->capacity); | 5461 | group->sgc->capacity); |
| @@ -7276,6 +7235,11 @@ void __init sched_init(void) | |||
| 7276 | enter_lazy_tlb(&init_mm, current); | 7235 | enter_lazy_tlb(&init_mm, current); |
| 7277 | 7236 | ||
| 7278 | /* | 7237 | /* |
| 7238 | * During early bootup we pretend to be a normal task: | ||
| 7239 | */ | ||
| 7240 | current->sched_class = &fair_sched_class; | ||
| 7241 | |||
| 7242 | /* | ||
| 7279 | * Make us the idle thread. Technically, schedule() should not be | 7243 | * Make us the idle thread. Technically, schedule() should not be |
| 7280 | * called from this thread, however somewhere below it might be, | 7244 | * called from this thread, however somewhere below it might be, |
| 7281 | * but because we are the idle thread, we just pick up running again | 7245 | * but because we are the idle thread, we just pick up running again |
| @@ -7285,11 +7249,6 @@ void __init sched_init(void) | |||
| 7285 | 7249 | ||
| 7286 | calc_load_update = jiffies + LOAD_FREQ; | 7250 | calc_load_update = jiffies + LOAD_FREQ; |
| 7287 | 7251 | ||
| 7288 | /* | ||
| 7289 | * During early bootup we pretend to be a normal task: | ||
| 7290 | */ | ||
| 7291 | current->sched_class = &fair_sched_class; | ||
| 7292 | |||
| 7293 | #ifdef CONFIG_SMP | 7252 | #ifdef CONFIG_SMP |
| 7294 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); | 7253 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); |
| 7295 | /* May be allocated at isolcpus cmdline parse time */ | 7254 | /* May be allocated at isolcpus cmdline parse time */ |
| @@ -7350,6 +7309,9 @@ void ___might_sleep(const char *file, int line, int preempt_offset) | |||
| 7350 | in_atomic(), irqs_disabled(), | 7309 | in_atomic(), irqs_disabled(), |
| 7351 | current->pid, current->comm); | 7310 | current->pid, current->comm); |
| 7352 | 7311 | ||
| 7312 | if (task_stack_end_corrupted(current)) | ||
| 7313 | printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); | ||
| 7314 | |||
| 7353 | debug_show_held_locks(current); | 7315 | debug_show_held_locks(current); |
| 7354 | if (irqs_disabled()) | 7316 | if (irqs_disabled()) |
| 7355 | print_irqtrace_events(current); | 7317 | print_irqtrace_events(current); |
| @@ -7613,6 +7575,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg) | |||
| 7613 | { | 7575 | { |
| 7614 | struct task_struct *g, *p; | 7576 | struct task_struct *g, *p; |
| 7615 | 7577 | ||
| 7578 | /* | ||
| 7579 | * Autogroups do not have RT tasks; see autogroup_create(). | ||
| 7580 | */ | ||
| 7581 | if (task_group_is_autogroup(tg)) | ||
| 7582 | return 0; | ||
| 7583 | |||
| 7616 | for_each_process_thread(g, p) { | 7584 | for_each_process_thread(g, p) { |
| 7617 | if (rt_task(p) && task_group(p) == tg) | 7585 | if (rt_task(p) && task_group(p) == tg) |
| 7618 | return 1; | 7586 | return 1; |
| @@ -7705,6 +7673,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg, | |||
| 7705 | { | 7673 | { |
| 7706 | int i, err = 0; | 7674 | int i, err = 0; |
| 7707 | 7675 | ||
| 7676 | /* | ||
| 7677 | * Disallowing the root group RT runtime is BAD, it would disallow the | ||
| 7678 | * kernel creating (and or operating) RT threads. | ||
| 7679 | */ | ||
| 7680 | if (tg == &root_task_group && rt_runtime == 0) | ||
| 7681 | return -EINVAL; | ||
| 7682 | |||
| 7683 | /* No period doesn't make any sense. */ | ||
| 7684 | if (rt_period == 0) | ||
| 7685 | return -EINVAL; | ||
| 7686 | |||
| 7708 | mutex_lock(&rt_constraints_mutex); | 7687 | mutex_lock(&rt_constraints_mutex); |
| 7709 | read_lock(&tasklist_lock); | 7688 | read_lock(&tasklist_lock); |
| 7710 | err = __rt_schedulable(tg, rt_period, rt_runtime); | 7689 | err = __rt_schedulable(tg, rt_period, rt_runtime); |
| @@ -7761,9 +7740,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) | |||
| 7761 | rt_period = (u64)rt_period_us * NSEC_PER_USEC; | 7740 | rt_period = (u64)rt_period_us * NSEC_PER_USEC; |
| 7762 | rt_runtime = tg->rt_bandwidth.rt_runtime; | 7741 | rt_runtime = tg->rt_bandwidth.rt_runtime; |
| 7763 | 7742 | ||
| 7764 | if (rt_period == 0) | ||
| 7765 | return -EINVAL; | ||
| 7766 | |||
| 7767 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); | 7743 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); |
| 7768 | } | 7744 | } |
| 7769 | 7745 | ||
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 539ca3ce071b..c6acb07466bb 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c | |||
| @@ -107,7 +107,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, | |||
| 107 | int best_cpu = -1; | 107 | int best_cpu = -1; |
| 108 | const struct sched_dl_entity *dl_se = &p->dl; | 108 | const struct sched_dl_entity *dl_se = &p->dl; |
| 109 | 109 | ||
| 110 | if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) { | 110 | if (later_mask && |
| 111 | cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { | ||
| 111 | best_cpu = cpumask_any(later_mask); | 112 | best_cpu = cpumask_any(later_mask); |
| 112 | goto out; | 113 | goto out; |
| 113 | } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && | 114 | } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && |
| @@ -186,6 +187,26 @@ out: | |||
| 186 | } | 187 | } |
| 187 | 188 | ||
| 188 | /* | 189 | /* |
| 190 | * cpudl_set_freecpu - Set the cpudl.free_cpus | ||
| 191 | * @cp: the cpudl max-heap context | ||
| 192 | * @cpu: rd attached cpu | ||
| 193 | */ | ||
| 194 | void cpudl_set_freecpu(struct cpudl *cp, int cpu) | ||
| 195 | { | ||
| 196 | cpumask_set_cpu(cpu, cp->free_cpus); | ||
| 197 | } | ||
| 198 | |||
| 199 | /* | ||
| 200 | * cpudl_clear_freecpu - Clear the cpudl.free_cpus | ||
| 201 | * @cp: the cpudl max-heap context | ||
| 202 | * @cpu: rd attached cpu | ||
| 203 | */ | ||
| 204 | void cpudl_clear_freecpu(struct cpudl *cp, int cpu) | ||
| 205 | { | ||
| 206 | cpumask_clear_cpu(cpu, cp->free_cpus); | ||
| 207 | } | ||
| 208 | |||
| 209 | /* | ||
| 189 | * cpudl_init - initialize the cpudl structure | 210 | * cpudl_init - initialize the cpudl structure |
| 190 | * @cp: the cpudl max-heap context | 211 | * @cp: the cpudl max-heap context |
| 191 | */ | 212 | */ |
| @@ -203,7 +224,7 @@ int cpudl_init(struct cpudl *cp) | |||
| 203 | if (!cp->elements) | 224 | if (!cp->elements) |
| 204 | return -ENOMEM; | 225 | return -ENOMEM; |
| 205 | 226 | ||
| 206 | if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) { | 227 | if (!zalloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) { |
| 207 | kfree(cp->elements); | 228 | kfree(cp->elements); |
| 208 | return -ENOMEM; | 229 | return -ENOMEM; |
| 209 | } | 230 | } |
| @@ -211,8 +232,6 @@ int cpudl_init(struct cpudl *cp) | |||
| 211 | for_each_possible_cpu(i) | 232 | for_each_possible_cpu(i) |
| 212 | cp->elements[i].idx = IDX_INVALID; | 233 | cp->elements[i].idx = IDX_INVALID; |
| 213 | 234 | ||
| 214 | cpumask_setall(cp->free_cpus); | ||
| 215 | |||
| 216 | return 0; | 235 | return 0; |
| 217 | } | 236 | } |
| 218 | 237 | ||
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h index 020039bd1326..1a0a6ef2fbe1 100644 --- a/kernel/sched/cpudeadline.h +++ b/kernel/sched/cpudeadline.h | |||
| @@ -24,6 +24,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, | |||
| 24 | struct cpumask *later_mask); | 24 | struct cpumask *later_mask); |
| 25 | void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid); | 25 | void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid); |
| 26 | int cpudl_init(struct cpudl *cp); | 26 | int cpudl_init(struct cpudl *cp); |
| 27 | void cpudl_set_freecpu(struct cpudl *cp, int cpu); | ||
| 28 | void cpudl_clear_freecpu(struct cpudl *cp, int cpu); | ||
| 27 | void cpudl_cleanup(struct cpudl *cp); | 29 | void cpudl_cleanup(struct cpudl *cp); |
| 28 | #endif /* CONFIG_SMP */ | 30 | #endif /* CONFIG_SMP */ |
| 29 | 31 | ||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 726470d47f87..3fa8fa6d9403 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
| @@ -350,6 +350,11 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, | |||
| 350 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | 350 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
| 351 | dl_se->runtime = pi_se->dl_runtime; | 351 | dl_se->runtime = pi_se->dl_runtime; |
| 352 | } | 352 | } |
| 353 | |||
| 354 | if (dl_se->dl_yielded) | ||
| 355 | dl_se->dl_yielded = 0; | ||
| 356 | if (dl_se->dl_throttled) | ||
| 357 | dl_se->dl_throttled = 0; | ||
| 353 | } | 358 | } |
| 354 | 359 | ||
| 355 | /* | 360 | /* |
| @@ -506,16 +511,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | |||
| 506 | struct sched_dl_entity, | 511 | struct sched_dl_entity, |
| 507 | dl_timer); | 512 | dl_timer); |
| 508 | struct task_struct *p = dl_task_of(dl_se); | 513 | struct task_struct *p = dl_task_of(dl_se); |
| 514 | unsigned long flags; | ||
| 509 | struct rq *rq; | 515 | struct rq *rq; |
| 510 | again: | ||
| 511 | rq = task_rq(p); | ||
| 512 | raw_spin_lock(&rq->lock); | ||
| 513 | 516 | ||
| 514 | if (rq != task_rq(p)) { | 517 | rq = task_rq_lock(current, &flags); |
| 515 | /* Task was moved, retrying. */ | ||
| 516 | raw_spin_unlock(&rq->lock); | ||
| 517 | goto again; | ||
| 518 | } | ||
| 519 | 518 | ||
| 520 | /* | 519 | /* |
| 521 | * We need to take care of several possible races here: | 520 | * We need to take care of several possible races here: |
| @@ -536,25 +535,41 @@ again: | |||
| 536 | 535 | ||
| 537 | sched_clock_tick(); | 536 | sched_clock_tick(); |
| 538 | update_rq_clock(rq); | 537 | update_rq_clock(rq); |
| 539 | dl_se->dl_throttled = 0; | 538 | |
| 540 | dl_se->dl_yielded = 0; | 539 | /* |
| 541 | if (task_on_rq_queued(p)) { | 540 | * If the throttle happened during sched-out; like: |
| 542 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | 541 | * |
| 543 | if (dl_task(rq->curr)) | 542 | * schedule() |
| 544 | check_preempt_curr_dl(rq, p, 0); | 543 | * deactivate_task() |
| 545 | else | 544 | * dequeue_task_dl() |
| 546 | resched_curr(rq); | 545 | * update_curr_dl() |
| 546 | * start_dl_timer() | ||
| 547 | * __dequeue_task_dl() | ||
| 548 | * prev->on_rq = 0; | ||
| 549 | * | ||
| 550 | * We can be both throttled and !queued. Replenish the counter | ||
| 551 | * but do not enqueue -- wait for our wakeup to do that. | ||
| 552 | */ | ||
| 553 | if (!task_on_rq_queued(p)) { | ||
| 554 | replenish_dl_entity(dl_se, dl_se); | ||
| 555 | goto unlock; | ||
| 556 | } | ||
| 557 | |||
| 558 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | ||
| 559 | if (dl_task(rq->curr)) | ||
| 560 | check_preempt_curr_dl(rq, p, 0); | ||
| 561 | else | ||
| 562 | resched_curr(rq); | ||
| 547 | #ifdef CONFIG_SMP | 563 | #ifdef CONFIG_SMP |
| 548 | /* | 564 | /* |
| 549 | * Queueing this task back might have overloaded rq, | 565 | * Queueing this task back might have overloaded rq, |
| 550 | * check if we need to kick someone away. | 566 | * check if we need to kick someone away. |
| 551 | */ | 567 | */ |
| 552 | if (has_pushable_dl_tasks(rq)) | 568 | if (has_pushable_dl_tasks(rq)) |
| 553 | push_dl_task(rq); | 569 | push_dl_task(rq); |
| 554 | #endif | 570 | #endif |
| 555 | } | ||
| 556 | unlock: | 571 | unlock: |
| 557 | raw_spin_unlock(&rq->lock); | 572 | task_rq_unlock(rq, current, &flags); |
| 558 | 573 | ||
| 559 | return HRTIMER_NORESTART; | 574 | return HRTIMER_NORESTART; |
| 560 | } | 575 | } |
| @@ -613,10 +628,9 @@ static void update_curr_dl(struct rq *rq) | |||
| 613 | 628 | ||
| 614 | dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec; | 629 | dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec; |
| 615 | if (dl_runtime_exceeded(rq, dl_se)) { | 630 | if (dl_runtime_exceeded(rq, dl_se)) { |
| 631 | dl_se->dl_throttled = 1; | ||
| 616 | __dequeue_task_dl(rq, curr, 0); | 632 | __dequeue_task_dl(rq, curr, 0); |
| 617 | if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted))) | 633 | if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted))) |
| 618 | dl_se->dl_throttled = 1; | ||
| 619 | else | ||
| 620 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); | 634 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); |
| 621 | 635 | ||
| 622 | if (!is_leftmost(curr, &rq->dl)) | 636 | if (!is_leftmost(curr, &rq->dl)) |
| @@ -853,7 +867,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |||
| 853 | * its rq, the bandwidth timer callback (which clearly has not | 867 | * its rq, the bandwidth timer callback (which clearly has not |
| 854 | * run yet) will take care of this. | 868 | * run yet) will take care of this. |
| 855 | */ | 869 | */ |
| 856 | if (p->dl.dl_throttled) | 870 | if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) |
| 857 | return; | 871 | return; |
| 858 | 872 | ||
| 859 | enqueue_dl_entity(&p->dl, pi_se, flags); | 873 | enqueue_dl_entity(&p->dl, pi_se, flags); |
| @@ -898,6 +912,7 @@ static void yield_task_dl(struct rq *rq) | |||
| 898 | rq->curr->dl.dl_yielded = 1; | 912 | rq->curr->dl.dl_yielded = 1; |
| 899 | p->dl.runtime = 0; | 913 | p->dl.runtime = 0; |
| 900 | } | 914 | } |
| 915 | update_rq_clock(rq); | ||
| 901 | update_curr_dl(rq); | 916 | update_curr_dl(rq); |
| 902 | } | 917 | } |
| 903 | 918 | ||
| @@ -1073,7 +1088,13 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) | |||
| 1073 | { | 1088 | { |
| 1074 | update_curr_dl(rq); | 1089 | update_curr_dl(rq); |
| 1075 | 1090 | ||
| 1076 | if (hrtick_enabled(rq) && queued && p->dl.runtime > 0) | 1091 | /* |
| 1092 | * Even when we have runtime, update_curr_dl() might have resulted in us | ||
| 1093 | * not being the leftmost task anymore. In that case NEED_RESCHED will | ||
| 1094 | * be set and schedule() will start a new hrtick for the next task. | ||
| 1095 | */ | ||
| 1096 | if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 && | ||
| 1097 | is_leftmost(p, &rq->dl)) | ||
| 1077 | start_hrtick_dl(rq, p); | 1098 | start_hrtick_dl(rq, p); |
| 1078 | } | 1099 | } |
| 1079 | 1100 | ||
| @@ -1166,9 +1187,6 @@ static int find_later_rq(struct task_struct *task) | |||
| 1166 | * We have to consider system topology and task affinity | 1187 | * We have to consider system topology and task affinity |
| 1167 | * first, then we can look for a suitable cpu. | 1188 | * first, then we can look for a suitable cpu. |
| 1168 | */ | 1189 | */ |
| 1169 | cpumask_copy(later_mask, task_rq(task)->rd->span); | ||
| 1170 | cpumask_and(later_mask, later_mask, cpu_active_mask); | ||
| 1171 | cpumask_and(later_mask, later_mask, &task->cpus_allowed); | ||
| 1172 | best_cpu = cpudl_find(&task_rq(task)->rd->cpudl, | 1190 | best_cpu = cpudl_find(&task_rq(task)->rd->cpudl, |
| 1173 | task, later_mask); | 1191 | task, later_mask); |
| 1174 | if (best_cpu == -1) | 1192 | if (best_cpu == -1) |
| @@ -1563,6 +1581,7 @@ static void rq_online_dl(struct rq *rq) | |||
| 1563 | if (rq->dl.overloaded) | 1581 | if (rq->dl.overloaded) |
| 1564 | dl_set_overload(rq); | 1582 | dl_set_overload(rq); |
| 1565 | 1583 | ||
| 1584 | cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu); | ||
| 1566 | if (rq->dl.dl_nr_running > 0) | 1585 | if (rq->dl.dl_nr_running > 0) |
| 1567 | cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1); | 1586 | cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1); |
| 1568 | } | 1587 | } |
| @@ -1574,6 +1593,7 @@ static void rq_offline_dl(struct rq *rq) | |||
| 1574 | dl_clear_overload(rq); | 1593 | dl_clear_overload(rq); |
| 1575 | 1594 | ||
| 1576 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); | 1595 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); |
| 1596 | cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu); | ||
| 1577 | } | 1597 | } |
| 1578 | 1598 | ||
| 1579 | void init_sched_dl_class(void) | 1599 | void init_sched_dl_class(void) |
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 92cc52001e74..8baaf858d25c 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
| @@ -305,6 +305,7 @@ do { \ | |||
| 305 | PN(next_balance); | 305 | PN(next_balance); |
| 306 | SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); | 306 | SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); |
| 307 | PN(clock); | 307 | PN(clock); |
| 308 | PN(clock_task); | ||
| 308 | P(cpu_load[0]); | 309 | P(cpu_load[0]); |
| 309 | P(cpu_load[1]); | 310 | P(cpu_load[1]); |
| 310 | P(cpu_load[2]); | 311 | P(cpu_load[2]); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index fe331fc391f5..7ce18f3c097a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -676,7 +676,6 @@ void init_task_runnable_average(struct task_struct *p) | |||
| 676 | { | 676 | { |
| 677 | u32 slice; | 677 | u32 slice; |
| 678 | 678 | ||
| 679 | p->se.avg.decay_count = 0; | ||
| 680 | slice = sched_slice(task_cfs_rq(p), &p->se) >> 10; | 679 | slice = sched_slice(task_cfs_rq(p), &p->se) >> 10; |
| 681 | p->se.avg.runnable_avg_sum = slice; | 680 | p->se.avg.runnable_avg_sum = slice; |
| 682 | p->se.avg.runnable_avg_period = slice; | 681 | p->se.avg.runnable_avg_period = slice; |
| @@ -2574,11 +2573,11 @@ static inline u64 __synchronize_entity_decay(struct sched_entity *se) | |||
| 2574 | u64 decays = atomic64_read(&cfs_rq->decay_counter); | 2573 | u64 decays = atomic64_read(&cfs_rq->decay_counter); |
| 2575 | 2574 | ||
| 2576 | decays -= se->avg.decay_count; | 2575 | decays -= se->avg.decay_count; |
| 2576 | se->avg.decay_count = 0; | ||
| 2577 | if (!decays) | 2577 | if (!decays) |
| 2578 | return 0; | 2578 | return 0; |
| 2579 | 2579 | ||
| 2580 | se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); | 2580 | se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); |
| 2581 | se->avg.decay_count = 0; | ||
| 2582 | 2581 | ||
| 2583 | return decays; | 2582 | return decays; |
| 2584 | } | 2583 | } |
| @@ -5157,7 +5156,7 @@ static void yield_task_fair(struct rq *rq) | |||
| 5157 | * so we don't do microscopic update in schedule() | 5156 | * so we don't do microscopic update in schedule() |
| 5158 | * and double the fastpath cost. | 5157 | * and double the fastpath cost. |
| 5159 | */ | 5158 | */ |
| 5160 | rq->skip_clock_update = 1; | 5159 | rq_clock_skip_update(rq, true); |
| 5161 | } | 5160 | } |
| 5162 | 5161 | ||
| 5163 | set_skip_buddy(se); | 5162 | set_skip_buddy(se); |
| @@ -5949,8 +5948,8 @@ static unsigned long scale_rt_capacity(int cpu) | |||
| 5949 | */ | 5948 | */ |
| 5950 | age_stamp = ACCESS_ONCE(rq->age_stamp); | 5949 | age_stamp = ACCESS_ONCE(rq->age_stamp); |
| 5951 | avg = ACCESS_ONCE(rq->rt_avg); | 5950 | avg = ACCESS_ONCE(rq->rt_avg); |
| 5951 | delta = __rq_clock_broken(rq) - age_stamp; | ||
| 5952 | 5952 | ||
| 5953 | delta = rq_clock(rq) - age_stamp; | ||
| 5954 | if (unlikely(delta < 0)) | 5953 | if (unlikely(delta < 0)) |
| 5955 | delta = 0; | 5954 | delta = 0; |
| 5956 | 5955 | ||
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index c47fce75e666..94b2d7b88a27 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <linux/tick.h> | 7 | #include <linux/tick.h> |
| 8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
| 9 | #include <linux/stackprotector.h> | 9 | #include <linux/stackprotector.h> |
| 10 | #include <linux/suspend.h> | ||
| 10 | 11 | ||
| 11 | #include <asm/tlb.h> | 12 | #include <asm/tlb.h> |
| 12 | 13 | ||
| @@ -47,7 +48,8 @@ static inline int cpu_idle_poll(void) | |||
| 47 | rcu_idle_enter(); | 48 | rcu_idle_enter(); |
| 48 | trace_cpu_idle_rcuidle(0, smp_processor_id()); | 49 | trace_cpu_idle_rcuidle(0, smp_processor_id()); |
| 49 | local_irq_enable(); | 50 | local_irq_enable(); |
| 50 | while (!tif_need_resched()) | 51 | while (!tif_need_resched() && |
| 52 | (cpu_idle_force_poll || tick_check_broadcast_expired())) | ||
| 51 | cpu_relax(); | 53 | cpu_relax(); |
| 52 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); | 54 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
| 53 | rcu_idle_exit(); | 55 | rcu_idle_exit(); |
| @@ -104,6 +106,21 @@ static void cpuidle_idle_call(void) | |||
| 104 | rcu_idle_enter(); | 106 | rcu_idle_enter(); |
| 105 | 107 | ||
| 106 | /* | 108 | /* |
| 109 | * Suspend-to-idle ("freeze") is a system state in which all user space | ||
| 110 | * has been frozen, all I/O devices have been suspended and the only | ||
| 111 | * activity happens here and in iterrupts (if any). In that case bypass | ||
| 112 | * the cpuidle governor and go stratight for the deepest idle state | ||
| 113 | * available. Possibly also suspend the local tick and the entire | ||
| 114 | * timekeeping to prevent timer interrupts from kicking us out of idle | ||
| 115 | * until a proper wakeup interrupt happens. | ||
| 116 | */ | ||
| 117 | if (idle_should_freeze()) { | ||
| 118 | cpuidle_enter_freeze(); | ||
| 119 | local_irq_enable(); | ||
| 120 | goto exit_idle; | ||
| 121 | } | ||
| 122 | |||
| 123 | /* | ||
| 107 | * Ask the cpuidle framework to choose a convenient idle state. | 124 | * Ask the cpuidle framework to choose a convenient idle state. |
| 108 | * Fall back to the default arch idle method on errors. | 125 | * Fall back to the default arch idle method on errors. |
| 109 | */ | 126 | */ |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index ee15f5a0d1c1..f4d4b077eba0 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
| @@ -831,11 +831,14 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
| 831 | enqueue = 1; | 831 | enqueue = 1; |
| 832 | 832 | ||
| 833 | /* | 833 | /* |
| 834 | * Force a clock update if the CPU was idle, | 834 | * When we're idle and a woken (rt) task is |
| 835 | * lest wakeup -> unthrottle time accumulate. | 835 | * throttled check_preempt_curr() will set |
| 836 | * skip_update and the time between the wakeup | ||
| 837 | * and this unthrottle will get accounted as | ||
| 838 | * 'runtime'. | ||
| 836 | */ | 839 | */ |
| 837 | if (rt_rq->rt_nr_running && rq->curr == rq->idle) | 840 | if (rt_rq->rt_nr_running && rq->curr == rq->idle) |
| 838 | rq->skip_clock_update = -1; | 841 | rq_clock_skip_update(rq, false); |
| 839 | } | 842 | } |
| 840 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | 843 | if (rt_rq->rt_time || rt_rq->rt_nr_running) |
| 841 | idle = 0; | 844 | idle = 0; |
| @@ -1337,7 +1340,12 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) | |||
| 1337 | curr->prio <= p->prio)) { | 1340 | curr->prio <= p->prio)) { |
| 1338 | int target = find_lowest_rq(p); | 1341 | int target = find_lowest_rq(p); |
| 1339 | 1342 | ||
| 1340 | if (target != -1) | 1343 | /* |
| 1344 | * Don't bother moving it if the destination CPU is | ||
| 1345 | * not running a lower priority task. | ||
| 1346 | */ | ||
| 1347 | if (target != -1 && | ||
| 1348 | p->prio < cpu_rq(target)->rt.highest_prio.curr) | ||
| 1341 | cpu = target; | 1349 | cpu = target; |
| 1342 | } | 1350 | } |
| 1343 | rcu_read_unlock(); | 1351 | rcu_read_unlock(); |
| @@ -1614,6 +1622,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | |||
| 1614 | 1622 | ||
| 1615 | lowest_rq = cpu_rq(cpu); | 1623 | lowest_rq = cpu_rq(cpu); |
| 1616 | 1624 | ||
| 1625 | if (lowest_rq->rt.highest_prio.curr <= task->prio) { | ||
| 1626 | /* | ||
| 1627 | * Target rq has tasks of equal or higher priority, | ||
| 1628 | * retrying does not release any lock and is unlikely | ||
| 1629 | * to yield a different result. | ||
| 1630 | */ | ||
| 1631 | lowest_rq = NULL; | ||
| 1632 | break; | ||
| 1633 | } | ||
| 1634 | |||
| 1617 | /* if the prio of this runqueue changed, try again */ | 1635 | /* if the prio of this runqueue changed, try again */ |
| 1618 | if (double_lock_balance(rq, lowest_rq)) { | 1636 | if (double_lock_balance(rq, lowest_rq)) { |
| 1619 | /* | 1637 | /* |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9a2a45c970e7..dc0f435a2779 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -558,8 +558,6 @@ struct rq { | |||
| 558 | #ifdef CONFIG_NO_HZ_FULL | 558 | #ifdef CONFIG_NO_HZ_FULL |
| 559 | unsigned long last_sched_tick; | 559 | unsigned long last_sched_tick; |
| 560 | #endif | 560 | #endif |
| 561 | int skip_clock_update; | ||
| 562 | |||
| 563 | /* capture load from *all* tasks on this cpu: */ | 561 | /* capture load from *all* tasks on this cpu: */ |
| 564 | struct load_weight load; | 562 | struct load_weight load; |
| 565 | unsigned long nr_load_updates; | 563 | unsigned long nr_load_updates; |
| @@ -588,6 +586,7 @@ struct rq { | |||
| 588 | unsigned long next_balance; | 586 | unsigned long next_balance; |
| 589 | struct mm_struct *prev_mm; | 587 | struct mm_struct *prev_mm; |
| 590 | 588 | ||
| 589 | unsigned int clock_skip_update; | ||
| 591 | u64 clock; | 590 | u64 clock; |
| 592 | u64 clock_task; | 591 | u64 clock_task; |
| 593 | 592 | ||
| @@ -687,16 +686,35 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | |||
| 687 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 686 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
| 688 | #define raw_rq() raw_cpu_ptr(&runqueues) | 687 | #define raw_rq() raw_cpu_ptr(&runqueues) |
| 689 | 688 | ||
| 689 | static inline u64 __rq_clock_broken(struct rq *rq) | ||
| 690 | { | ||
| 691 | return ACCESS_ONCE(rq->clock); | ||
| 692 | } | ||
| 693 | |||
| 690 | static inline u64 rq_clock(struct rq *rq) | 694 | static inline u64 rq_clock(struct rq *rq) |
| 691 | { | 695 | { |
| 696 | lockdep_assert_held(&rq->lock); | ||
| 692 | return rq->clock; | 697 | return rq->clock; |
| 693 | } | 698 | } |
| 694 | 699 | ||
| 695 | static inline u64 rq_clock_task(struct rq *rq) | 700 | static inline u64 rq_clock_task(struct rq *rq) |
| 696 | { | 701 | { |
| 702 | lockdep_assert_held(&rq->lock); | ||
| 697 | return rq->clock_task; | 703 | return rq->clock_task; |
| 698 | } | 704 | } |
| 699 | 705 | ||
| 706 | #define RQCF_REQ_SKIP 0x01 | ||
| 707 | #define RQCF_ACT_SKIP 0x02 | ||
| 708 | |||
| 709 | static inline void rq_clock_skip_update(struct rq *rq, bool skip) | ||
| 710 | { | ||
| 711 | lockdep_assert_held(&rq->lock); | ||
| 712 | if (skip) | ||
| 713 | rq->clock_skip_update |= RQCF_REQ_SKIP; | ||
| 714 | else | ||
| 715 | rq->clock_skip_update &= ~RQCF_REQ_SKIP; | ||
| 716 | } | ||
| 717 | |||
| 700 | #ifdef CONFIG_NUMA | 718 | #ifdef CONFIG_NUMA |
| 701 | enum numa_topology_type { | 719 | enum numa_topology_type { |
| 702 | NUMA_DIRECT, | 720 | NUMA_DIRECT, |
| @@ -1362,6 +1380,82 @@ static inline void sched_avg_update(struct rq *rq) { } | |||
| 1362 | 1380 | ||
| 1363 | extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period); | 1381 | extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period); |
| 1364 | 1382 | ||
| 1383 | /* | ||
| 1384 | * __task_rq_lock - lock the rq @p resides on. | ||
| 1385 | */ | ||
| 1386 | static inline struct rq *__task_rq_lock(struct task_struct *p) | ||
| 1387 | __acquires(rq->lock) | ||
| 1388 | { | ||
| 1389 | struct rq *rq; | ||
| 1390 | |||
| 1391 | lockdep_assert_held(&p->pi_lock); | ||
| 1392 | |||
| 1393 | for (;;) { | ||
| 1394 | rq = task_rq(p); | ||
| 1395 | raw_spin_lock(&rq->lock); | ||
| 1396 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) | ||
| 1397 | return rq; | ||
| 1398 | raw_spin_unlock(&rq->lock); | ||
| 1399 | |||
| 1400 | while (unlikely(task_on_rq_migrating(p))) | ||
| 1401 | cpu_relax(); | ||
| 1402 | } | ||
| 1403 | } | ||
| 1404 | |||
| 1405 | /* | ||
| 1406 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. | ||
| 1407 | */ | ||
| 1408 | static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | ||
| 1409 | __acquires(p->pi_lock) | ||
| 1410 | __acquires(rq->lock) | ||
| 1411 | { | ||
| 1412 | struct rq *rq; | ||
| 1413 | |||
| 1414 | for (;;) { | ||
| 1415 | raw_spin_lock_irqsave(&p->pi_lock, *flags); | ||
| 1416 | rq = task_rq(p); | ||
| 1417 | raw_spin_lock(&rq->lock); | ||
| 1418 | /* | ||
| 1419 | * move_queued_task() task_rq_lock() | ||
| 1420 | * | ||
| 1421 | * ACQUIRE (rq->lock) | ||
| 1422 | * [S] ->on_rq = MIGRATING [L] rq = task_rq() | ||
| 1423 | * WMB (__set_task_cpu()) ACQUIRE (rq->lock); | ||
| 1424 | * [S] ->cpu = new_cpu [L] task_rq() | ||
| 1425 | * [L] ->on_rq | ||
| 1426 | * RELEASE (rq->lock) | ||
| 1427 | * | ||
| 1428 | * If we observe the old cpu in task_rq_lock, the acquire of | ||
| 1429 | * the old rq->lock will fully serialize against the stores. | ||
| 1430 | * | ||
| 1431 | * If we observe the new cpu in task_rq_lock, the acquire will | ||
| 1432 | * pair with the WMB to ensure we must then also see migrating. | ||
| 1433 | */ | ||
| 1434 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) | ||
| 1435 | return rq; | ||
| 1436 | raw_spin_unlock(&rq->lock); | ||
| 1437 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
| 1438 | |||
| 1439 | while (unlikely(task_on_rq_migrating(p))) | ||
| 1440 | cpu_relax(); | ||
| 1441 | } | ||
| 1442 | } | ||
| 1443 | |||
| 1444 | static inline void __task_rq_unlock(struct rq *rq) | ||
| 1445 | __releases(rq->lock) | ||
| 1446 | { | ||
| 1447 | raw_spin_unlock(&rq->lock); | ||
| 1448 | } | ||
| 1449 | |||
| 1450 | static inline void | ||
| 1451 | task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) | ||
| 1452 | __releases(rq->lock) | ||
| 1453 | __releases(p->pi_lock) | ||
| 1454 | { | ||
| 1455 | raw_spin_unlock(&rq->lock); | ||
| 1456 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
| 1457 | } | ||
| 1458 | |||
| 1365 | #ifdef CONFIG_SMP | 1459 | #ifdef CONFIG_SMP |
| 1366 | #ifdef CONFIG_PREEMPT | 1460 | #ifdef CONFIG_PREEMPT |
| 1367 | 1461 | ||
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index a476bea17fbc..87e2c9f0c33e 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c | |||
| @@ -15,11 +15,6 @@ | |||
| 15 | static int show_schedstat(struct seq_file *seq, void *v) | 15 | static int show_schedstat(struct seq_file *seq, void *v) |
| 16 | { | 16 | { |
| 17 | int cpu; | 17 | int cpu; |
| 18 | int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; | ||
| 19 | char *mask_str = kmalloc(mask_len, GFP_KERNEL); | ||
| 20 | |||
| 21 | if (mask_str == NULL) | ||
| 22 | return -ENOMEM; | ||
| 23 | 18 | ||
| 24 | if (v == (void *)1) { | 19 | if (v == (void *)1) { |
| 25 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | 20 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); |
| @@ -50,9 +45,8 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
| 50 | for_each_domain(cpu, sd) { | 45 | for_each_domain(cpu, sd) { |
| 51 | enum cpu_idle_type itype; | 46 | enum cpu_idle_type itype; |
| 52 | 47 | ||
| 53 | cpumask_scnprintf(mask_str, mask_len, | 48 | seq_printf(seq, "domain%d %*pb", dcount++, |
| 54 | sched_domain_span(sd)); | 49 | cpumask_pr_args(sched_domain_span(sd))); |
| 55 | seq_printf(seq, "domain%d %s", dcount++, mask_str); | ||
| 56 | for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; | 50 | for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; |
| 57 | itype++) { | 51 | itype++) { |
| 58 | seq_printf(seq, " %u %u %u %u %u %u %u %u", | 52 | seq_printf(seq, " %u %u %u %u %u %u %u %u", |
| @@ -76,7 +70,6 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
| 76 | rcu_read_unlock(); | 70 | rcu_read_unlock(); |
| 77 | #endif | 71 | #endif |
| 78 | } | 72 | } |
| 79 | kfree(mask_str); | ||
| 80 | return 0; | 73 | return 0; |
| 81 | } | 74 | } |
| 82 | 75 | ||
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 4ef9687ac115..4f44028943e6 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
| @@ -629,7 +629,9 @@ static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) | |||
| 629 | 629 | ||
| 630 | switch (action) { | 630 | switch (action) { |
| 631 | case SECCOMP_RET_ERRNO: | 631 | case SECCOMP_RET_ERRNO: |
| 632 | /* Set the low-order 16-bits as a errno. */ | 632 | /* Set low-order bits as an errno, capped at MAX_ERRNO. */ |
| 633 | if (data > MAX_ERRNO) | ||
| 634 | data = MAX_ERRNO; | ||
| 633 | syscall_set_return_value(current, task_pt_regs(current), | 635 | syscall_set_return_value(current, task_pt_regs(current), |
| 634 | -data, 0); | 636 | -data, 0); |
| 635 | goto skip; | 637 | goto skip; |
diff --git a/kernel/signal.c b/kernel/signal.c index 16a305295256..a390499943e4 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -2501,7 +2501,7 @@ EXPORT_SYMBOL(unblock_all_signals); | |||
| 2501 | */ | 2501 | */ |
| 2502 | SYSCALL_DEFINE0(restart_syscall) | 2502 | SYSCALL_DEFINE0(restart_syscall) |
| 2503 | { | 2503 | { |
| 2504 | struct restart_block *restart = ¤t_thread_info()->restart_block; | 2504 | struct restart_block *restart = ¤t->restart_block; |
| 2505 | return restart->fn(restart); | 2505 | return restart->fn(restart); |
| 2506 | } | 2506 | } |
| 2507 | 2507 | ||
| @@ -3550,7 +3550,7 @@ SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler) | |||
| 3550 | SYSCALL_DEFINE0(pause) | 3550 | SYSCALL_DEFINE0(pause) |
| 3551 | { | 3551 | { |
| 3552 | while (!signal_pending(current)) { | 3552 | while (!signal_pending(current)) { |
| 3553 | current->state = TASK_INTERRUPTIBLE; | 3553 | __set_current_state(TASK_INTERRUPTIBLE); |
| 3554 | schedule(); | 3554 | schedule(); |
| 3555 | } | 3555 | } |
| 3556 | return -ERESTARTNOHAND; | 3556 | return -ERESTARTNOHAND; |
| @@ -3563,7 +3563,7 @@ int sigsuspend(sigset_t *set) | |||
| 3563 | current->saved_sigmask = current->blocked; | 3563 | current->saved_sigmask = current->blocked; |
| 3564 | set_current_blocked(set); | 3564 | set_current_blocked(set); |
| 3565 | 3565 | ||
| 3566 | current->state = TASK_INTERRUPTIBLE; | 3566 | __set_current_state(TASK_INTERRUPTIBLE); |
| 3567 | schedule(); | 3567 | schedule(); |
| 3568 | set_restore_sigmask(); | 3568 | set_restore_sigmask(); |
| 3569 | return -ERESTARTNOHAND; | 3569 | return -ERESTARTNOHAND; |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 501baa9ac1be..479e4436f787 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -114,8 +114,12 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) | |||
| 114 | trace_softirqs_off(ip); | 114 | trace_softirqs_off(ip); |
| 115 | raw_local_irq_restore(flags); | 115 | raw_local_irq_restore(flags); |
| 116 | 116 | ||
| 117 | if (preempt_count() == cnt) | 117 | if (preempt_count() == cnt) { |
| 118 | #ifdef CONFIG_DEBUG_PREEMPT | ||
| 119 | current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1); | ||
| 120 | #endif | ||
| 118 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); | 121 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); |
| 122 | } | ||
| 119 | } | 123 | } |
| 120 | EXPORT_SYMBOL(__local_bh_disable_ip); | 124 | EXPORT_SYMBOL(__local_bh_disable_ip); |
| 121 | #endif /* CONFIG_TRACE_IRQFLAGS */ | 125 | #endif /* CONFIG_TRACE_IRQFLAGS */ |
| @@ -656,9 +660,8 @@ static void run_ksoftirqd(unsigned int cpu) | |||
| 656 | * in the task stack here. | 660 | * in the task stack here. |
| 657 | */ | 661 | */ |
| 658 | __do_softirq(); | 662 | __do_softirq(); |
| 659 | rcu_note_context_switch(); | ||
| 660 | local_irq_enable(); | 663 | local_irq_enable(); |
| 661 | cond_resched(); | 664 | cond_resched_rcu_qs(); |
| 662 | return; | 665 | return; |
| 663 | } | 666 | } |
| 664 | local_irq_enable(); | 667 | local_irq_enable(); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 137c7f69b264..88ea2d6e0031 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -1248,7 +1248,6 @@ static struct ctl_table vm_table[] = { | |||
| 1248 | .maxlen = sizeof(unsigned long), | 1248 | .maxlen = sizeof(unsigned long), |
| 1249 | .mode = 0644, | 1249 | .mode = 0644, |
| 1250 | .proc_handler = hugetlb_sysctl_handler, | 1250 | .proc_handler = hugetlb_sysctl_handler, |
| 1251 | .extra1 = &zero, | ||
| 1252 | }, | 1251 | }, |
| 1253 | #ifdef CONFIG_NUMA | 1252 | #ifdef CONFIG_NUMA |
| 1254 | { | 1253 | { |
| @@ -1257,7 +1256,6 @@ static struct ctl_table vm_table[] = { | |||
| 1257 | .maxlen = sizeof(unsigned long), | 1256 | .maxlen = sizeof(unsigned long), |
| 1258 | .mode = 0644, | 1257 | .mode = 0644, |
| 1259 | .proc_handler = &hugetlb_mempolicy_sysctl_handler, | 1258 | .proc_handler = &hugetlb_mempolicy_sysctl_handler, |
| 1260 | .extra1 = &zero, | ||
| 1261 | }, | 1259 | }, |
| 1262 | #endif | 1260 | #endif |
| 1263 | { | 1261 | { |
| @@ -1280,7 +1278,6 @@ static struct ctl_table vm_table[] = { | |||
| 1280 | .maxlen = sizeof(unsigned long), | 1278 | .maxlen = sizeof(unsigned long), |
| 1281 | .mode = 0644, | 1279 | .mode = 0644, |
| 1282 | .proc_handler = hugetlb_overcommit_handler, | 1280 | .proc_handler = hugetlb_overcommit_handler, |
| 1283 | .extra1 = &zero, | ||
| 1284 | }, | 1281 | }, |
| 1285 | #endif | 1282 | #endif |
| 1286 | { | 1283 | { |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 670fff88a961..21f82c29c914 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
| @@ -111,13 +111,8 @@ static int send_reply(struct sk_buff *skb, struct genl_info *info) | |||
| 111 | { | 111 | { |
| 112 | struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); | 112 | struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); |
| 113 | void *reply = genlmsg_data(genlhdr); | 113 | void *reply = genlmsg_data(genlhdr); |
| 114 | int rc; | ||
| 115 | 114 | ||
| 116 | rc = genlmsg_end(skb, reply); | 115 | genlmsg_end(skb, reply); |
| 117 | if (rc < 0) { | ||
| 118 | nlmsg_free(skb); | ||
| 119 | return rc; | ||
| 120 | } | ||
| 121 | 116 | ||
| 122 | return genlmsg_reply(skb, info); | 117 | return genlmsg_reply(skb, info); |
| 123 | } | 118 | } |
| @@ -134,11 +129,7 @@ static void send_cpu_listeners(struct sk_buff *skb, | |||
| 134 | void *reply = genlmsg_data(genlhdr); | 129 | void *reply = genlmsg_data(genlhdr); |
| 135 | int rc, delcount = 0; | 130 | int rc, delcount = 0; |
| 136 | 131 | ||
| 137 | rc = genlmsg_end(skb, reply); | 132 | genlmsg_end(skb, reply); |
| 138 | if (rc < 0) { | ||
| 139 | nlmsg_free(skb); | ||
| 140 | return; | ||
| 141 | } | ||
| 142 | 133 | ||
| 143 | rc = 0; | 134 | rc = 0; |
| 144 | down_read(&listeners->sem); | 135 | down_read(&listeners->sem); |
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index f622cf28628a..c09c07817d7a 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o | 1 | obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o |
| 2 | obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o | 2 | obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o |
| 3 | obj-y += timeconv.o posix-clock.o alarmtimer.o | 3 | obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o |
| 4 | 4 | ||
| 5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o | 5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o |
| 6 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o | 6 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o |
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index a7077d3ae52f..1b001ed1edb9 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
| @@ -788,7 +788,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, | |||
| 788 | goto out; | 788 | goto out; |
| 789 | } | 789 | } |
| 790 | 790 | ||
| 791 | restart = ¤t_thread_info()->restart_block; | 791 | restart = ¤t->restart_block; |
| 792 | restart->fn = alarm_timer_nsleep_restart; | 792 | restart->fn = alarm_timer_nsleep_restart; |
| 793 | restart->nanosleep.clockid = type; | 793 | restart->nanosleep.clockid = type; |
| 794 | restart->nanosleep.expires = exp.tv64; | 794 | restart->nanosleep.expires = exp.tv64; |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index b79f39bda7e1..4892352f0e49 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -34,82 +34,6 @@ | |||
| 34 | #include "tick-internal.h" | 34 | #include "tick-internal.h" |
| 35 | #include "timekeeping_internal.h" | 35 | #include "timekeeping_internal.h" |
| 36 | 36 | ||
| 37 | void timecounter_init(struct timecounter *tc, | ||
| 38 | const struct cyclecounter *cc, | ||
| 39 | u64 start_tstamp) | ||
| 40 | { | ||
| 41 | tc->cc = cc; | ||
| 42 | tc->cycle_last = cc->read(cc); | ||
| 43 | tc->nsec = start_tstamp; | ||
| 44 | } | ||
| 45 | EXPORT_SYMBOL_GPL(timecounter_init); | ||
| 46 | |||
| 47 | /** | ||
| 48 | * timecounter_read_delta - get nanoseconds since last call of this function | ||
| 49 | * @tc: Pointer to time counter | ||
| 50 | * | ||
| 51 | * When the underlying cycle counter runs over, this will be handled | ||
| 52 | * correctly as long as it does not run over more than once between | ||
| 53 | * calls. | ||
| 54 | * | ||
| 55 | * The first call to this function for a new time counter initializes | ||
| 56 | * the time tracking and returns an undefined result. | ||
| 57 | */ | ||
| 58 | static u64 timecounter_read_delta(struct timecounter *tc) | ||
| 59 | { | ||
| 60 | cycle_t cycle_now, cycle_delta; | ||
| 61 | u64 ns_offset; | ||
| 62 | |||
| 63 | /* read cycle counter: */ | ||
| 64 | cycle_now = tc->cc->read(tc->cc); | ||
| 65 | |||
| 66 | /* calculate the delta since the last timecounter_read_delta(): */ | ||
| 67 | cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask; | ||
| 68 | |||
| 69 | /* convert to nanoseconds: */ | ||
| 70 | ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta); | ||
| 71 | |||
| 72 | /* update time stamp of timecounter_read_delta() call: */ | ||
| 73 | tc->cycle_last = cycle_now; | ||
| 74 | |||
| 75 | return ns_offset; | ||
| 76 | } | ||
| 77 | |||
| 78 | u64 timecounter_read(struct timecounter *tc) | ||
| 79 | { | ||
| 80 | u64 nsec; | ||
| 81 | |||
| 82 | /* increment time by nanoseconds since last call */ | ||
| 83 | nsec = timecounter_read_delta(tc); | ||
| 84 | nsec += tc->nsec; | ||
| 85 | tc->nsec = nsec; | ||
| 86 | |||
| 87 | return nsec; | ||
| 88 | } | ||
| 89 | EXPORT_SYMBOL_GPL(timecounter_read); | ||
| 90 | |||
| 91 | u64 timecounter_cyc2time(struct timecounter *tc, | ||
| 92 | cycle_t cycle_tstamp) | ||
| 93 | { | ||
| 94 | u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask; | ||
| 95 | u64 nsec; | ||
| 96 | |||
| 97 | /* | ||
| 98 | * Instead of always treating cycle_tstamp as more recent | ||
| 99 | * than tc->cycle_last, detect when it is too far in the | ||
| 100 | * future and treat it as old time stamp instead. | ||
| 101 | */ | ||
| 102 | if (cycle_delta > tc->cc->mask / 2) { | ||
| 103 | cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask; | ||
| 104 | nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta); | ||
| 105 | } else { | ||
| 106 | nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec; | ||
| 107 | } | ||
| 108 | |||
| 109 | return nsec; | ||
| 110 | } | ||
| 111 | EXPORT_SYMBOL_GPL(timecounter_cyc2time); | ||
| 112 | |||
| 113 | /** | 37 | /** |
| 114 | * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks | 38 | * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks |
| 115 | * @mult: pointer to mult variable | 39 | * @mult: pointer to mult variable |
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d8c724cda37b..bee0c1f78091 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c | |||
| @@ -266,7 +266,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | |||
| 266 | /* | 266 | /* |
| 267 | * Divide a ktime value by a nanosecond value | 267 | * Divide a ktime value by a nanosecond value |
| 268 | */ | 268 | */ |
| 269 | u64 ktime_divns(const ktime_t kt, s64 div) | 269 | u64 __ktime_divns(const ktime_t kt, s64 div) |
| 270 | { | 270 | { |
| 271 | u64 dclc; | 271 | u64 dclc; |
| 272 | int sft = 0; | 272 | int sft = 0; |
| @@ -282,7 +282,7 @@ u64 ktime_divns(const ktime_t kt, s64 div) | |||
| 282 | 282 | ||
| 283 | return dclc; | 283 | return dclc; |
| 284 | } | 284 | } |
| 285 | EXPORT_SYMBOL_GPL(ktime_divns); | 285 | EXPORT_SYMBOL_GPL(__ktime_divns); |
| 286 | #endif /* BITS_PER_LONG >= 64 */ | 286 | #endif /* BITS_PER_LONG >= 64 */ |
| 287 | 287 | ||
| 288 | /* | 288 | /* |
| @@ -440,6 +440,37 @@ static inline void debug_deactivate(struct hrtimer *timer) | |||
| 440 | trace_hrtimer_cancel(timer); | 440 | trace_hrtimer_cancel(timer); |
| 441 | } | 441 | } |
| 442 | 442 | ||
| 443 | #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) | ||
| 444 | static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) | ||
| 445 | { | ||
| 446 | struct hrtimer_clock_base *base = cpu_base->clock_base; | ||
| 447 | ktime_t expires, expires_next = { .tv64 = KTIME_MAX }; | ||
| 448 | int i; | ||
| 449 | |||
| 450 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { | ||
| 451 | struct timerqueue_node *next; | ||
| 452 | struct hrtimer *timer; | ||
| 453 | |||
| 454 | next = timerqueue_getnext(&base->active); | ||
| 455 | if (!next) | ||
| 456 | continue; | ||
| 457 | |||
| 458 | timer = container_of(next, struct hrtimer, node); | ||
| 459 | expires = ktime_sub(hrtimer_get_expires(timer), base->offset); | ||
| 460 | if (expires.tv64 < expires_next.tv64) | ||
| 461 | expires_next = expires; | ||
| 462 | } | ||
| 463 | /* | ||
| 464 | * clock_was_set() might have changed base->offset of any of | ||
| 465 | * the clock bases so the result might be negative. Fix it up | ||
| 466 | * to prevent a false positive in clockevents_program_event(). | ||
| 467 | */ | ||
| 468 | if (expires_next.tv64 < 0) | ||
| 469 | expires_next.tv64 = 0; | ||
| 470 | return expires_next; | ||
| 471 | } | ||
| 472 | #endif | ||
| 473 | |||
| 443 | /* High resolution timer related functions */ | 474 | /* High resolution timer related functions */ |
| 444 | #ifdef CONFIG_HIGH_RES_TIMERS | 475 | #ifdef CONFIG_HIGH_RES_TIMERS |
| 445 | 476 | ||
| @@ -488,32 +519,7 @@ static inline int hrtimer_hres_active(void) | |||
| 488 | static void | 519 | static void |
| 489 | hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) | 520 | hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) |
| 490 | { | 521 | { |
| 491 | int i; | 522 | ktime_t expires_next = __hrtimer_get_next_event(cpu_base); |
| 492 | struct hrtimer_clock_base *base = cpu_base->clock_base; | ||
| 493 | ktime_t expires, expires_next; | ||
| 494 | |||
| 495 | expires_next.tv64 = KTIME_MAX; | ||
| 496 | |||
| 497 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { | ||
| 498 | struct hrtimer *timer; | ||
| 499 | struct timerqueue_node *next; | ||
| 500 | |||
| 501 | next = timerqueue_getnext(&base->active); | ||
| 502 | if (!next) | ||
| 503 | continue; | ||
| 504 | timer = container_of(next, struct hrtimer, node); | ||
| 505 | |||
| 506 | expires = ktime_sub(hrtimer_get_expires(timer), base->offset); | ||
| 507 | /* | ||
| 508 | * clock_was_set() has changed base->offset so the | ||
| 509 | * result might be negative. Fix it up to prevent a | ||
| 510 | * false positive in clockevents_program_event() | ||
| 511 | */ | ||
| 512 | if (expires.tv64 < 0) | ||
| 513 | expires.tv64 = 0; | ||
| 514 | if (expires.tv64 < expires_next.tv64) | ||
| 515 | expires_next = expires; | ||
| 516 | } | ||
| 517 | 523 | ||
| 518 | if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64) | 524 | if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64) |
| 519 | return; | 525 | return; |
| @@ -587,6 +593,15 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
| 587 | return 0; | 593 | return 0; |
| 588 | 594 | ||
| 589 | /* | 595 | /* |
| 596 | * When the target cpu of the timer is currently executing | ||
| 597 | * hrtimer_interrupt(), then we do not touch the clock event | ||
| 598 | * device. hrtimer_interrupt() will reevaluate all clock bases | ||
| 599 | * before reprogramming the device. | ||
| 600 | */ | ||
| 601 | if (cpu_base->in_hrtirq) | ||
| 602 | return 0; | ||
| 603 | |||
| 604 | /* | ||
| 590 | * If a hang was detected in the last timer interrupt then we | 605 | * If a hang was detected in the last timer interrupt then we |
| 591 | * do not schedule a timer which is earlier than the expiry | 606 | * do not schedule a timer which is earlier than the expiry |
| 592 | * which we enforced in the hang detection. We want the system | 607 | * which we enforced in the hang detection. We want the system |
| @@ -1104,29 +1119,14 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining); | |||
| 1104 | ktime_t hrtimer_get_next_event(void) | 1119 | ktime_t hrtimer_get_next_event(void) |
| 1105 | { | 1120 | { |
| 1106 | struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); | 1121 | struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); |
| 1107 | struct hrtimer_clock_base *base = cpu_base->clock_base; | 1122 | ktime_t mindelta = { .tv64 = KTIME_MAX }; |
| 1108 | ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; | ||
| 1109 | unsigned long flags; | 1123 | unsigned long flags; |
| 1110 | int i; | ||
| 1111 | 1124 | ||
| 1112 | raw_spin_lock_irqsave(&cpu_base->lock, flags); | 1125 | raw_spin_lock_irqsave(&cpu_base->lock, flags); |
| 1113 | 1126 | ||
| 1114 | if (!hrtimer_hres_active()) { | 1127 | if (!hrtimer_hres_active()) |
| 1115 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { | 1128 | mindelta = ktime_sub(__hrtimer_get_next_event(cpu_base), |
| 1116 | struct hrtimer *timer; | 1129 | ktime_get()); |
| 1117 | struct timerqueue_node *next; | ||
| 1118 | |||
| 1119 | next = timerqueue_getnext(&base->active); | ||
| 1120 | if (!next) | ||
| 1121 | continue; | ||
| 1122 | |||
| 1123 | timer = container_of(next, struct hrtimer, node); | ||
| 1124 | delta.tv64 = hrtimer_get_expires_tv64(timer); | ||
| 1125 | delta = ktime_sub(delta, base->get_time()); | ||
| 1126 | if (delta.tv64 < mindelta.tv64) | ||
| 1127 | mindelta.tv64 = delta.tv64; | ||
| 1128 | } | ||
| 1129 | } | ||
| 1130 | 1130 | ||
| 1131 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); | 1131 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); |
| 1132 | 1132 | ||
| @@ -1253,7 +1253,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
| 1253 | raw_spin_lock(&cpu_base->lock); | 1253 | raw_spin_lock(&cpu_base->lock); |
| 1254 | entry_time = now = hrtimer_update_base(cpu_base); | 1254 | entry_time = now = hrtimer_update_base(cpu_base); |
| 1255 | retry: | 1255 | retry: |
| 1256 | expires_next.tv64 = KTIME_MAX; | 1256 | cpu_base->in_hrtirq = 1; |
| 1257 | /* | 1257 | /* |
| 1258 | * We set expires_next to KTIME_MAX here with cpu_base->lock | 1258 | * We set expires_next to KTIME_MAX here with cpu_base->lock |
| 1259 | * held to prevent that a timer is enqueued in our queue via | 1259 | * held to prevent that a timer is enqueued in our queue via |
| @@ -1291,28 +1291,20 @@ retry: | |||
| 1291 | * are right-of a not yet expired timer, because that | 1291 | * are right-of a not yet expired timer, because that |
| 1292 | * timer will have to trigger a wakeup anyway. | 1292 | * timer will have to trigger a wakeup anyway. |
| 1293 | */ | 1293 | */ |
| 1294 | 1294 | if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) | |
| 1295 | if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) { | ||
| 1296 | ktime_t expires; | ||
| 1297 | |||
| 1298 | expires = ktime_sub(hrtimer_get_expires(timer), | ||
| 1299 | base->offset); | ||
| 1300 | if (expires.tv64 < 0) | ||
| 1301 | expires.tv64 = KTIME_MAX; | ||
| 1302 | if (expires.tv64 < expires_next.tv64) | ||
| 1303 | expires_next = expires; | ||
| 1304 | break; | 1295 | break; |
| 1305 | } | ||
| 1306 | 1296 | ||
| 1307 | __run_hrtimer(timer, &basenow); | 1297 | __run_hrtimer(timer, &basenow); |
| 1308 | } | 1298 | } |
| 1309 | } | 1299 | } |
| 1310 | 1300 | /* Reevaluate the clock bases for the next expiry */ | |
| 1301 | expires_next = __hrtimer_get_next_event(cpu_base); | ||
| 1311 | /* | 1302 | /* |
| 1312 | * Store the new expiry value so the migration code can verify | 1303 | * Store the new expiry value so the migration code can verify |
| 1313 | * against it. | 1304 | * against it. |
| 1314 | */ | 1305 | */ |
| 1315 | cpu_base->expires_next = expires_next; | 1306 | cpu_base->expires_next = expires_next; |
| 1307 | cpu_base->in_hrtirq = 0; | ||
| 1316 | raw_spin_unlock(&cpu_base->lock); | 1308 | raw_spin_unlock(&cpu_base->lock); |
| 1317 | 1309 | ||
| 1318 | /* Reprogramming necessary ? */ | 1310 | /* Reprogramming necessary ? */ |
| @@ -1591,7 +1583,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
| 1591 | goto out; | 1583 | goto out; |
| 1592 | } | 1584 | } |
| 1593 | 1585 | ||
| 1594 | restart = ¤t_thread_info()->restart_block; | 1586 | restart = ¤t->restart_block; |
| 1595 | restart->fn = hrtimer_nanosleep_restart; | 1587 | restart->fn = hrtimer_nanosleep_restart; |
| 1596 | restart->nanosleep.clockid = t.timer.base->clockid; | 1588 | restart->nanosleep.clockid = t.timer.base->clockid; |
| 1597 | restart->nanosleep.rmtp = rmtp; | 1589 | restart->nanosleep.rmtp = rmtp; |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 28bf91c60a0b..0f60b08a4f07 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -488,13 +488,13 @@ static void sync_cmos_clock(struct work_struct *work) | |||
| 488 | 488 | ||
| 489 | getnstimeofday64(&now); | 489 | getnstimeofday64(&now); |
| 490 | if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) { | 490 | if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) { |
| 491 | struct timespec adjust = timespec64_to_timespec(now); | 491 | struct timespec64 adjust = now; |
| 492 | 492 | ||
| 493 | fail = -ENODEV; | 493 | fail = -ENODEV; |
| 494 | if (persistent_clock_is_local) | 494 | if (persistent_clock_is_local) |
| 495 | adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); | 495 | adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); |
| 496 | #ifdef CONFIG_GENERIC_CMOS_UPDATE | 496 | #ifdef CONFIG_GENERIC_CMOS_UPDATE |
| 497 | fail = update_persistent_clock(adjust); | 497 | fail = update_persistent_clock(timespec64_to_timespec(adjust)); |
| 498 | #endif | 498 | #endif |
| 499 | #ifdef CONFIG_RTC_SYSTOHC | 499 | #ifdef CONFIG_RTC_SYSTOHC |
| 500 | if (fail == -ENODEV) | 500 | if (fail == -ENODEV) |
| @@ -633,10 +633,14 @@ int ntp_validate_timex(struct timex *txc) | |||
| 633 | if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) | 633 | if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) |
| 634 | return -EPERM; | 634 | return -EPERM; |
| 635 | 635 | ||
| 636 | if (txc->modes & ADJ_FREQUENCY) { | 636 | /* |
| 637 | if (LONG_MIN / PPM_SCALE > txc->freq) | 637 | * Check for potential multiplication overflows that can |
| 638 | * only happen on 64-bit systems: | ||
| 639 | */ | ||
| 640 | if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { | ||
| 641 | if (LLONG_MIN / PPM_SCALE > txc->freq) | ||
| 638 | return -EINVAL; | 642 | return -EINVAL; |
| 639 | if (LONG_MAX / PPM_SCALE < txc->freq) | 643 | if (LLONG_MAX / PPM_SCALE < txc->freq) |
| 640 | return -EINVAL; | 644 | return -EINVAL; |
| 641 | } | 645 | } |
| 642 | 646 | ||
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index a16b67859e2a..0075da74abf0 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c | |||
| @@ -1334,8 +1334,7 @@ static long posix_cpu_nsleep_restart(struct restart_block *restart_block); | |||
| 1334 | static int posix_cpu_nsleep(const clockid_t which_clock, int flags, | 1334 | static int posix_cpu_nsleep(const clockid_t which_clock, int flags, |
| 1335 | struct timespec *rqtp, struct timespec __user *rmtp) | 1335 | struct timespec *rqtp, struct timespec __user *rmtp) |
| 1336 | { | 1336 | { |
| 1337 | struct restart_block *restart_block = | 1337 | struct restart_block *restart_block = ¤t->restart_block; |
| 1338 | ¤t_thread_info()->restart_block; | ||
| 1339 | struct itimerspec it; | 1338 | struct itimerspec it; |
| 1340 | int error; | 1339 | int error; |
| 1341 | 1340 | ||
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 7efeedf53ebd..f7c515595b42 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
| @@ -394,6 +394,56 @@ void tick_resume(void) | |||
| 394 | } | 394 | } |
| 395 | } | 395 | } |
| 396 | 396 | ||
| 397 | static DEFINE_RAW_SPINLOCK(tick_freeze_lock); | ||
| 398 | static unsigned int tick_freeze_depth; | ||
| 399 | |||
| 400 | /** | ||
| 401 | * tick_freeze - Suspend the local tick and (possibly) timekeeping. | ||
| 402 | * | ||
| 403 | * Check if this is the last online CPU executing the function and if so, | ||
| 404 | * suspend timekeeping. Otherwise suspend the local tick. | ||
| 405 | * | ||
| 406 | * Call with interrupts disabled. Must be balanced with %tick_unfreeze(). | ||
| 407 | * Interrupts must not be enabled before the subsequent %tick_unfreeze(). | ||
| 408 | */ | ||
| 409 | void tick_freeze(void) | ||
| 410 | { | ||
| 411 | raw_spin_lock(&tick_freeze_lock); | ||
| 412 | |||
| 413 | tick_freeze_depth++; | ||
| 414 | if (tick_freeze_depth == num_online_cpus()) { | ||
| 415 | timekeeping_suspend(); | ||
| 416 | } else { | ||
| 417 | tick_suspend(); | ||
| 418 | tick_suspend_broadcast(); | ||
| 419 | } | ||
| 420 | |||
| 421 | raw_spin_unlock(&tick_freeze_lock); | ||
| 422 | } | ||
| 423 | |||
| 424 | /** | ||
| 425 | * tick_unfreeze - Resume the local tick and (possibly) timekeeping. | ||
| 426 | * | ||
| 427 | * Check if this is the first CPU executing the function and if so, resume | ||
| 428 | * timekeeping. Otherwise resume the local tick. | ||
| 429 | * | ||
| 430 | * Call with interrupts disabled. Must be balanced with %tick_freeze(). | ||
| 431 | * Interrupts must not be enabled after the preceding %tick_freeze(). | ||
| 432 | */ | ||
| 433 | void tick_unfreeze(void) | ||
| 434 | { | ||
| 435 | raw_spin_lock(&tick_freeze_lock); | ||
| 436 | |||
| 437 | if (tick_freeze_depth == num_online_cpus()) | ||
| 438 | timekeeping_resume(); | ||
| 439 | else | ||
| 440 | tick_resume(); | ||
| 441 | |||
| 442 | tick_freeze_depth--; | ||
| 443 | |||
| 444 | raw_spin_unlock(&tick_freeze_lock); | ||
| 445 | } | ||
| 446 | |||
| 397 | /** | 447 | /** |
| 398 | * tick_init - initialize the tick control | 448 | * tick_init - initialize the tick control |
| 399 | */ | 449 | */ |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1363d58f07e9..a4c4edac4528 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -326,13 +326,6 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, | |||
| 326 | return NOTIFY_OK; | 326 | return NOTIFY_OK; |
| 327 | } | 327 | } |
| 328 | 328 | ||
| 329 | /* | ||
| 330 | * Worst case string length in chunks of CPU range seems 2 steps | ||
| 331 | * separations: 0,2,4,6,... | ||
| 332 | * This is NR_CPUS + sizeof('\0') | ||
| 333 | */ | ||
| 334 | static char __initdata nohz_full_buf[NR_CPUS + 1]; | ||
| 335 | |||
| 336 | static int tick_nohz_init_all(void) | 329 | static int tick_nohz_init_all(void) |
| 337 | { | 330 | { |
| 338 | int err = -1; | 331 | int err = -1; |
| @@ -393,8 +386,8 @@ void __init tick_nohz_init(void) | |||
| 393 | context_tracking_cpu_set(cpu); | 386 | context_tracking_cpu_set(cpu); |
| 394 | 387 | ||
| 395 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | 388 | cpu_notifier(tick_nohz_cpu_down_callback, 0); |
| 396 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask); | 389 | pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", |
| 397 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); | 390 | cpumask_pr_args(tick_nohz_full_mask)); |
| 398 | } | 391 | } |
| 399 | #endif | 392 | #endif |
| 400 | 393 | ||
diff --git a/kernel/time/timecounter.c b/kernel/time/timecounter.c new file mode 100644 index 000000000000..4687b3104bae --- /dev/null +++ b/kernel/time/timecounter.c | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | /* | ||
| 2 | * linux/kernel/time/timecounter.c | ||
| 3 | * | ||
| 4 | * based on code that migrated away from | ||
| 5 | * linux/kernel/time/clocksource.c | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License as published by | ||
| 9 | * the Free Software Foundation; either version 2 of the License, or | ||
| 10 | * (at your option) any later version. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | * GNU General Public License for more details. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <linux/export.h> | ||
| 19 | #include <linux/timecounter.h> | ||
| 20 | |||
| 21 | void timecounter_init(struct timecounter *tc, | ||
| 22 | const struct cyclecounter *cc, | ||
| 23 | u64 start_tstamp) | ||
| 24 | { | ||
| 25 | tc->cc = cc; | ||
| 26 | tc->cycle_last = cc->read(cc); | ||
| 27 | tc->nsec = start_tstamp; | ||
| 28 | tc->mask = (1ULL << cc->shift) - 1; | ||
| 29 | tc->frac = 0; | ||
| 30 | } | ||
| 31 | EXPORT_SYMBOL_GPL(timecounter_init); | ||
| 32 | |||
| 33 | /** | ||
| 34 | * timecounter_read_delta - get nanoseconds since last call of this function | ||
| 35 | * @tc: Pointer to time counter | ||
| 36 | * | ||
| 37 | * When the underlying cycle counter runs over, this will be handled | ||
| 38 | * correctly as long as it does not run over more than once between | ||
| 39 | * calls. | ||
| 40 | * | ||
| 41 | * The first call to this function for a new time counter initializes | ||
| 42 | * the time tracking and returns an undefined result. | ||
| 43 | */ | ||
| 44 | static u64 timecounter_read_delta(struct timecounter *tc) | ||
| 45 | { | ||
| 46 | cycle_t cycle_now, cycle_delta; | ||
| 47 | u64 ns_offset; | ||
| 48 | |||
| 49 | /* read cycle counter: */ | ||
| 50 | cycle_now = tc->cc->read(tc->cc); | ||
| 51 | |||
| 52 | /* calculate the delta since the last timecounter_read_delta(): */ | ||
| 53 | cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask; | ||
| 54 | |||
| 55 | /* convert to nanoseconds: */ | ||
| 56 | ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta, | ||
| 57 | tc->mask, &tc->frac); | ||
| 58 | |||
| 59 | /* update time stamp of timecounter_read_delta() call: */ | ||
| 60 | tc->cycle_last = cycle_now; | ||
| 61 | |||
| 62 | return ns_offset; | ||
| 63 | } | ||
| 64 | |||
| 65 | u64 timecounter_read(struct timecounter *tc) | ||
| 66 | { | ||
| 67 | u64 nsec; | ||
| 68 | |||
| 69 | /* increment time by nanoseconds since last call */ | ||
| 70 | nsec = timecounter_read_delta(tc); | ||
| 71 | nsec += tc->nsec; | ||
| 72 | tc->nsec = nsec; | ||
| 73 | |||
| 74 | return nsec; | ||
| 75 | } | ||
| 76 | EXPORT_SYMBOL_GPL(timecounter_read); | ||
| 77 | |||
| 78 | /* | ||
| 79 | * This is like cyclecounter_cyc2ns(), but it is used for computing a | ||
| 80 | * time previous to the time stored in the cycle counter. | ||
| 81 | */ | ||
| 82 | static u64 cc_cyc2ns_backwards(const struct cyclecounter *cc, | ||
| 83 | cycle_t cycles, u64 mask, u64 frac) | ||
| 84 | { | ||
| 85 | u64 ns = (u64) cycles; | ||
| 86 | |||
| 87 | ns = ((ns * cc->mult) - frac) >> cc->shift; | ||
| 88 | |||
| 89 | return ns; | ||
| 90 | } | ||
| 91 | |||
| 92 | u64 timecounter_cyc2time(struct timecounter *tc, | ||
| 93 | cycle_t cycle_tstamp) | ||
| 94 | { | ||
| 95 | u64 delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask; | ||
| 96 | u64 nsec = tc->nsec, frac = tc->frac; | ||
| 97 | |||
| 98 | /* | ||
| 99 | * Instead of always treating cycle_tstamp as more recent | ||
| 100 | * than tc->cycle_last, detect when it is too far in the | ||
| 101 | * future and treat it as old time stamp instead. | ||
| 102 | */ | ||
| 103 | if (delta > tc->cc->mask / 2) { | ||
| 104 | delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask; | ||
| 105 | nsec -= cc_cyc2ns_backwards(tc->cc, delta, tc->mask, frac); | ||
| 106 | } else { | ||
| 107 | nsec += cyclecounter_cyc2ns(tc->cc, delta, tc->mask, &frac); | ||
| 108 | } | ||
| 109 | |||
| 110 | return nsec; | ||
| 111 | } | ||
| 112 | EXPORT_SYMBOL_GPL(timecounter_cyc2time); | ||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6a931852082f..91db94136c10 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -230,9 +230,7 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | |||
| 230 | 230 | ||
| 231 | /** | 231 | /** |
| 232 | * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. | 232 | * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. |
| 233 | * @tk: The timekeeper from which we take the update | 233 | * @tkr: Timekeeping readout base from which we take the update |
| 234 | * @tkf: The fast timekeeper to update | ||
| 235 | * @tbase: The time base for the fast timekeeper (mono/raw) | ||
| 236 | * | 234 | * |
| 237 | * We want to use this from any context including NMI and tracing / | 235 | * We want to use this from any context including NMI and tracing / |
| 238 | * instrumenting the timekeeping code itself. | 236 | * instrumenting the timekeeping code itself. |
| @@ -244,11 +242,11 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | |||
| 244 | * smp_wmb(); <- Ensure that the last base[1] update is visible | 242 | * smp_wmb(); <- Ensure that the last base[1] update is visible |
| 245 | * tkf->seq++; | 243 | * tkf->seq++; |
| 246 | * smp_wmb(); <- Ensure that the seqcount update is visible | 244 | * smp_wmb(); <- Ensure that the seqcount update is visible |
| 247 | * update(tkf->base[0], tk); | 245 | * update(tkf->base[0], tkr); |
| 248 | * smp_wmb(); <- Ensure that the base[0] update is visible | 246 | * smp_wmb(); <- Ensure that the base[0] update is visible |
| 249 | * tkf->seq++; | 247 | * tkf->seq++; |
| 250 | * smp_wmb(); <- Ensure that the seqcount update is visible | 248 | * smp_wmb(); <- Ensure that the seqcount update is visible |
| 251 | * update(tkf->base[1], tk); | 249 | * update(tkf->base[1], tkr); |
| 252 | * | 250 | * |
| 253 | * The reader side does: | 251 | * The reader side does: |
| 254 | * | 252 | * |
| @@ -269,7 +267,7 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | |||
| 269 | * slightly wrong timestamp (a few nanoseconds). See | 267 | * slightly wrong timestamp (a few nanoseconds). See |
| 270 | * @ktime_get_mono_fast_ns. | 268 | * @ktime_get_mono_fast_ns. |
| 271 | */ | 269 | */ |
| 272 | static void update_fast_timekeeper(struct timekeeper *tk) | 270 | static void update_fast_timekeeper(struct tk_read_base *tkr) |
| 273 | { | 271 | { |
| 274 | struct tk_read_base *base = tk_fast_mono.base; | 272 | struct tk_read_base *base = tk_fast_mono.base; |
| 275 | 273 | ||
| @@ -277,7 +275,7 @@ static void update_fast_timekeeper(struct timekeeper *tk) | |||
| 277 | raw_write_seqcount_latch(&tk_fast_mono.seq); | 275 | raw_write_seqcount_latch(&tk_fast_mono.seq); |
| 278 | 276 | ||
| 279 | /* Update base[0] */ | 277 | /* Update base[0] */ |
| 280 | memcpy(base, &tk->tkr, sizeof(*base)); | 278 | memcpy(base, tkr, sizeof(*base)); |
| 281 | 279 | ||
| 282 | /* Force readers back to base[0] */ | 280 | /* Force readers back to base[0] */ |
| 283 | raw_write_seqcount_latch(&tk_fast_mono.seq); | 281 | raw_write_seqcount_latch(&tk_fast_mono.seq); |
| @@ -334,6 +332,35 @@ u64 notrace ktime_get_mono_fast_ns(void) | |||
| 334 | } | 332 | } |
| 335 | EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); | 333 | EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); |
| 336 | 334 | ||
| 335 | /* Suspend-time cycles value for halted fast timekeeper. */ | ||
| 336 | static cycle_t cycles_at_suspend; | ||
| 337 | |||
| 338 | static cycle_t dummy_clock_read(struct clocksource *cs) | ||
| 339 | { | ||
| 340 | return cycles_at_suspend; | ||
| 341 | } | ||
| 342 | |||
| 343 | /** | ||
| 344 | * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. | ||
| 345 | * @tk: Timekeeper to snapshot. | ||
| 346 | * | ||
| 347 | * It generally is unsafe to access the clocksource after timekeeping has been | ||
| 348 | * suspended, so take a snapshot of the readout base of @tk and use it as the | ||
| 349 | * fast timekeeper's readout base while suspended. It will return the same | ||
| 350 | * number of cycles every time until timekeeping is resumed at which time the | ||
| 351 | * proper readout base for the fast timekeeper will be restored automatically. | ||
| 352 | */ | ||
| 353 | static void halt_fast_timekeeper(struct timekeeper *tk) | ||
| 354 | { | ||
| 355 | static struct tk_read_base tkr_dummy; | ||
| 356 | struct tk_read_base *tkr = &tk->tkr; | ||
| 357 | |||
| 358 | memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); | ||
| 359 | cycles_at_suspend = tkr->read(tkr->clock); | ||
| 360 | tkr_dummy.read = dummy_clock_read; | ||
| 361 | update_fast_timekeeper(&tkr_dummy); | ||
| 362 | } | ||
| 363 | |||
| 337 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD | 364 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD |
| 338 | 365 | ||
| 339 | static inline void update_vsyscall(struct timekeeper *tk) | 366 | static inline void update_vsyscall(struct timekeeper *tk) |
| @@ -462,7 +489,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) | |||
| 462 | memcpy(&shadow_timekeeper, &tk_core.timekeeper, | 489 | memcpy(&shadow_timekeeper, &tk_core.timekeeper, |
| 463 | sizeof(tk_core.timekeeper)); | 490 | sizeof(tk_core.timekeeper)); |
| 464 | 491 | ||
| 465 | update_fast_timekeeper(tk); | 492 | update_fast_timekeeper(&tk->tkr); |
| 466 | } | 493 | } |
| 467 | 494 | ||
| 468 | /** | 495 | /** |
| @@ -1170,7 +1197,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) | |||
| 1170 | * xtime/wall_to_monotonic/jiffies/etc are | 1197 | * xtime/wall_to_monotonic/jiffies/etc are |
| 1171 | * still managed by arch specific suspend/resume code. | 1198 | * still managed by arch specific suspend/resume code. |
| 1172 | */ | 1199 | */ |
| 1173 | static void timekeeping_resume(void) | 1200 | void timekeeping_resume(void) |
| 1174 | { | 1201 | { |
| 1175 | struct timekeeper *tk = &tk_core.timekeeper; | 1202 | struct timekeeper *tk = &tk_core.timekeeper; |
| 1176 | struct clocksource *clock = tk->tkr.clock; | 1203 | struct clocksource *clock = tk->tkr.clock; |
| @@ -1251,7 +1278,7 @@ static void timekeeping_resume(void) | |||
| 1251 | hrtimers_resume(); | 1278 | hrtimers_resume(); |
| 1252 | } | 1279 | } |
| 1253 | 1280 | ||
| 1254 | static int timekeeping_suspend(void) | 1281 | int timekeeping_suspend(void) |
| 1255 | { | 1282 | { |
| 1256 | struct timekeeper *tk = &tk_core.timekeeper; | 1283 | struct timekeeper *tk = &tk_core.timekeeper; |
| 1257 | unsigned long flags; | 1284 | unsigned long flags; |
| @@ -1296,6 +1323,7 @@ static int timekeeping_suspend(void) | |||
| 1296 | } | 1323 | } |
| 1297 | 1324 | ||
| 1298 | timekeeping_update(tk, TK_MIRROR); | 1325 | timekeeping_update(tk, TK_MIRROR); |
| 1326 | halt_fast_timekeeper(tk); | ||
| 1299 | write_seqcount_end(&tk_core.seq); | 1327 | write_seqcount_end(&tk_core.seq); |
| 1300 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 1328 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
| 1301 | 1329 | ||
| @@ -1659,24 +1687,24 @@ out: | |||
| 1659 | } | 1687 | } |
| 1660 | 1688 | ||
| 1661 | /** | 1689 | /** |
| 1662 | * getboottime - Return the real time of system boot. | 1690 | * getboottime64 - Return the real time of system boot. |
| 1663 | * @ts: pointer to the timespec to be set | 1691 | * @ts: pointer to the timespec64 to be set |
| 1664 | * | 1692 | * |
| 1665 | * Returns the wall-time of boot in a timespec. | 1693 | * Returns the wall-time of boot in a timespec64. |
| 1666 | * | 1694 | * |
| 1667 | * This is based on the wall_to_monotonic offset and the total suspend | 1695 | * This is based on the wall_to_monotonic offset and the total suspend |
| 1668 | * time. Calls to settimeofday will affect the value returned (which | 1696 | * time. Calls to settimeofday will affect the value returned (which |
| 1669 | * basically means that however wrong your real time clock is at boot time, | 1697 | * basically means that however wrong your real time clock is at boot time, |
| 1670 | * you get the right time here). | 1698 | * you get the right time here). |
| 1671 | */ | 1699 | */ |
| 1672 | void getboottime(struct timespec *ts) | 1700 | void getboottime64(struct timespec64 *ts) |
| 1673 | { | 1701 | { |
| 1674 | struct timekeeper *tk = &tk_core.timekeeper; | 1702 | struct timekeeper *tk = &tk_core.timekeeper; |
| 1675 | ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot); | 1703 | ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot); |
| 1676 | 1704 | ||
| 1677 | *ts = ktime_to_timespec(t); | 1705 | *ts = ktime_to_timespec64(t); |
| 1678 | } | 1706 | } |
| 1679 | EXPORT_SYMBOL_GPL(getboottime); | 1707 | EXPORT_SYMBOL_GPL(getboottime64); |
| 1680 | 1708 | ||
| 1681 | unsigned long get_seconds(void) | 1709 | unsigned long get_seconds(void) |
| 1682 | { | 1710 | { |
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index adc1fc98bde3..1d91416055d5 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h | |||
| @@ -16,5 +16,7 @@ extern int timekeeping_inject_offset(struct timespec *ts); | |||
| 16 | extern s32 timekeeping_get_tai_offset(void); | 16 | extern s32 timekeeping_get_tai_offset(void); |
| 17 | extern void timekeeping_set_tai_offset(s32 tai_offset); | 17 | extern void timekeeping_set_tai_offset(s32 tai_offset); |
| 18 | extern void timekeeping_clocktai(struct timespec *ts); | 18 | extern void timekeeping_clocktai(struct timespec *ts); |
| 19 | extern int timekeeping_suspend(void); | ||
| 20 | extern void timekeeping_resume(void); | ||
| 19 | 21 | ||
| 20 | #endif | 22 | #endif |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 979ccde26720..98f26588255e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
| @@ -3,11 +3,11 @@ | |||
| 3 | 3 | ||
| 4 | ifdef CONFIG_FUNCTION_TRACER | 4 | ifdef CONFIG_FUNCTION_TRACER |
| 5 | ORIG_CFLAGS := $(KBUILD_CFLAGS) | 5 | ORIG_CFLAGS := $(KBUILD_CFLAGS) |
| 6 | KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) | 6 | KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) |
| 7 | 7 | ||
| 8 | ifdef CONFIG_FTRACE_SELFTEST | 8 | ifdef CONFIG_FTRACE_SELFTEST |
| 9 | # selftest needs instrumentation | 9 | # selftest needs instrumentation |
| 10 | CFLAGS_trace_selftest_dynamic.o = -pg | 10 | CFLAGS_trace_selftest_dynamic.o = $(CC_FLAGS_FTRACE) |
| 11 | obj-y += trace_selftest_dynamic.o | 11 | obj-y += trace_selftest_dynamic.o |
| 12 | endif | 12 | endif |
| 13 | endif | 13 | endif |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 224e768bdc73..45e5cb143d17 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -5456,7 +5456,7 @@ static __init int ftrace_init_debugfs(void) | |||
| 5456 | struct dentry *d_tracer; | 5456 | struct dentry *d_tracer; |
| 5457 | 5457 | ||
| 5458 | d_tracer = tracing_init_dentry(); | 5458 | d_tracer = tracing_init_dentry(); |
| 5459 | if (!d_tracer) | 5459 | if (IS_ERR(d_tracer)) |
| 5460 | return 0; | 5460 | return 0; |
| 5461 | 5461 | ||
| 5462 | ftrace_init_dyn_debugfs(d_tracer); | 5462 | ftrace_init_dyn_debugfs(d_tracer); |
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index 1c71382b283d..eb4220a132ec 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c | |||
| @@ -13,5 +13,6 @@ | |||
| 13 | #define CREATE_TRACE_POINTS | 13 | #define CREATE_TRACE_POINTS |
| 14 | #include <trace/events/power.h> | 14 | #include <trace/events/power.h> |
| 15 | 15 | ||
| 16 | EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume); | ||
| 16 | EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); | 17 | EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); |
| 17 | 18 | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7a4104cb95cb..5040d44fe5a3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include <linux/trace_seq.h> | 9 | #include <linux/trace_seq.h> |
| 10 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
| 11 | #include <linux/irq_work.h> | 11 | #include <linux/irq_work.h> |
| 12 | #include <linux/debugfs.h> | ||
| 13 | #include <linux/uaccess.h> | 12 | #include <linux/uaccess.h> |
| 14 | #include <linux/hardirq.h> | 13 | #include <linux/hardirq.h> |
| 15 | #include <linux/kthread.h> /* for self test */ | 14 | #include <linux/kthread.h> /* for self test */ |
| @@ -23,7 +22,6 @@ | |||
| 23 | #include <linux/hash.h> | 22 | #include <linux/hash.h> |
| 24 | #include <linux/list.h> | 23 | #include <linux/list.h> |
| 25 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
| 26 | #include <linux/fs.h> | ||
| 27 | 25 | ||
| 28 | #include <asm/local.h> | 26 | #include <asm/local.h> |
| 29 | 27 | ||
| @@ -447,7 +445,10 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
| 447 | struct rb_irq_work { | 445 | struct rb_irq_work { |
| 448 | struct irq_work work; | 446 | struct irq_work work; |
| 449 | wait_queue_head_t waiters; | 447 | wait_queue_head_t waiters; |
| 448 | wait_queue_head_t full_waiters; | ||
| 450 | bool waiters_pending; | 449 | bool waiters_pending; |
| 450 | bool full_waiters_pending; | ||
| 451 | bool wakeup_full; | ||
| 451 | }; | 452 | }; |
| 452 | 453 | ||
| 453 | /* | 454 | /* |
| @@ -529,6 +530,10 @@ static void rb_wake_up_waiters(struct irq_work *work) | |||
| 529 | struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); | 530 | struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); |
| 530 | 531 | ||
| 531 | wake_up_all(&rbwork->waiters); | 532 | wake_up_all(&rbwork->waiters); |
| 533 | if (rbwork->wakeup_full) { | ||
| 534 | rbwork->wakeup_full = false; | ||
| 535 | wake_up_all(&rbwork->full_waiters); | ||
| 536 | } | ||
| 532 | } | 537 | } |
| 533 | 538 | ||
| 534 | /** | 539 | /** |
| @@ -553,9 +558,11 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) | |||
| 553 | * data in any cpu buffer, or a specific buffer, put the | 558 | * data in any cpu buffer, or a specific buffer, put the |
| 554 | * caller on the appropriate wait queue. | 559 | * caller on the appropriate wait queue. |
| 555 | */ | 560 | */ |
| 556 | if (cpu == RING_BUFFER_ALL_CPUS) | 561 | if (cpu == RING_BUFFER_ALL_CPUS) { |
| 557 | work = &buffer->irq_work; | 562 | work = &buffer->irq_work; |
| 558 | else { | 563 | /* Full only makes sense on per cpu reads */ |
| 564 | full = false; | ||
| 565 | } else { | ||
| 559 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 566 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| 560 | return -ENODEV; | 567 | return -ENODEV; |
| 561 | cpu_buffer = buffer->buffers[cpu]; | 568 | cpu_buffer = buffer->buffers[cpu]; |
| @@ -564,7 +571,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) | |||
| 564 | 571 | ||
| 565 | 572 | ||
| 566 | while (true) { | 573 | while (true) { |
| 567 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | 574 | if (full) |
| 575 | prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); | ||
| 576 | else | ||
| 577 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | ||
| 568 | 578 | ||
| 569 | /* | 579 | /* |
| 570 | * The events can happen in critical sections where | 580 | * The events can happen in critical sections where |
| @@ -586,7 +596,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) | |||
| 586 | * that is necessary is that the wake up happens after | 596 | * that is necessary is that the wake up happens after |
| 587 | * a task has been queued. It's OK for spurious wake ups. | 597 | * a task has been queued. It's OK for spurious wake ups. |
| 588 | */ | 598 | */ |
| 589 | work->waiters_pending = true; | 599 | if (full) |
| 600 | work->full_waiters_pending = true; | ||
| 601 | else | ||
| 602 | work->waiters_pending = true; | ||
| 590 | 603 | ||
| 591 | if (signal_pending(current)) { | 604 | if (signal_pending(current)) { |
| 592 | ret = -EINTR; | 605 | ret = -EINTR; |
| @@ -615,7 +628,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) | |||
| 615 | schedule(); | 628 | schedule(); |
| 616 | } | 629 | } |
| 617 | 630 | ||
| 618 | finish_wait(&work->waiters, &wait); | 631 | if (full) |
| 632 | finish_wait(&work->full_waiters, &wait); | ||
| 633 | else | ||
| 634 | finish_wait(&work->waiters, &wait); | ||
| 619 | 635 | ||
| 620 | return ret; | 636 | return ret; |
| 621 | } | 637 | } |
| @@ -1230,6 +1246,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu) | |||
| 1230 | init_completion(&cpu_buffer->update_done); | 1246 | init_completion(&cpu_buffer->update_done); |
| 1231 | init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); | 1247 | init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); |
| 1232 | init_waitqueue_head(&cpu_buffer->irq_work.waiters); | 1248 | init_waitqueue_head(&cpu_buffer->irq_work.waiters); |
| 1249 | init_waitqueue_head(&cpu_buffer->irq_work.full_waiters); | ||
| 1233 | 1250 | ||
| 1234 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1251 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
| 1235 | GFP_KERNEL, cpu_to_node(cpu)); | 1252 | GFP_KERNEL, cpu_to_node(cpu)); |
| @@ -2801,6 +2818,8 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 2801 | static __always_inline void | 2818 | static __always_inline void |
| 2802 | rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) | 2819 | rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) |
| 2803 | { | 2820 | { |
| 2821 | bool pagebusy; | ||
| 2822 | |||
| 2804 | if (buffer->irq_work.waiters_pending) { | 2823 | if (buffer->irq_work.waiters_pending) { |
| 2805 | buffer->irq_work.waiters_pending = false; | 2824 | buffer->irq_work.waiters_pending = false; |
| 2806 | /* irq_work_queue() supplies it's own memory barriers */ | 2825 | /* irq_work_queue() supplies it's own memory barriers */ |
| @@ -2812,6 +2831,15 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) | |||
| 2812 | /* irq_work_queue() supplies it's own memory barriers */ | 2831 | /* irq_work_queue() supplies it's own memory barriers */ |
| 2813 | irq_work_queue(&cpu_buffer->irq_work.work); | 2832 | irq_work_queue(&cpu_buffer->irq_work.work); |
| 2814 | } | 2833 | } |
| 2834 | |||
| 2835 | pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; | ||
| 2836 | |||
| 2837 | if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) { | ||
| 2838 | cpu_buffer->irq_work.wakeup_full = true; | ||
| 2839 | cpu_buffer->irq_work.full_waiters_pending = false; | ||
| 2840 | /* irq_work_queue() supplies it's own memory barriers */ | ||
| 2841 | irq_work_queue(&cpu_buffer->irq_work.work); | ||
| 2842 | } | ||
| 2815 | } | 2843 | } |
| 2816 | 2844 | ||
| 2817 | /** | 2845 | /** |
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 3f9e328c30b5..13d945c0d03f 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <linux/completion.h> | 7 | #include <linux/completion.h> |
| 8 | #include <linux/kthread.h> | 8 | #include <linux/kthread.h> |
| 9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
| 10 | #include <linux/time.h> | 10 | #include <linux/ktime.h> |
| 11 | #include <asm/local.h> | 11 | #include <asm/local.h> |
| 12 | 12 | ||
| 13 | struct rb_page { | 13 | struct rb_page { |
| @@ -17,7 +17,7 @@ struct rb_page { | |||
| 17 | }; | 17 | }; |
| 18 | 18 | ||
| 19 | /* run time and sleep time in seconds */ | 19 | /* run time and sleep time in seconds */ |
| 20 | #define RUN_TIME 10 | 20 | #define RUN_TIME 10ULL |
| 21 | #define SLEEP_TIME 10 | 21 | #define SLEEP_TIME 10 |
| 22 | 22 | ||
| 23 | /* number of events for writer to wake up the reader */ | 23 | /* number of events for writer to wake up the reader */ |
| @@ -212,8 +212,7 @@ static void ring_buffer_consumer(void) | |||
| 212 | 212 | ||
| 213 | static void ring_buffer_producer(void) | 213 | static void ring_buffer_producer(void) |
| 214 | { | 214 | { |
| 215 | struct timeval start_tv; | 215 | ktime_t start_time, end_time, timeout; |
| 216 | struct timeval end_tv; | ||
| 217 | unsigned long long time; | 216 | unsigned long long time; |
| 218 | unsigned long long entries; | 217 | unsigned long long entries; |
| 219 | unsigned long long overruns; | 218 | unsigned long long overruns; |
| @@ -227,7 +226,8 @@ static void ring_buffer_producer(void) | |||
| 227 | * make the system stall) | 226 | * make the system stall) |
| 228 | */ | 227 | */ |
| 229 | trace_printk("Starting ring buffer hammer\n"); | 228 | trace_printk("Starting ring buffer hammer\n"); |
| 230 | do_gettimeofday(&start_tv); | 229 | start_time = ktime_get(); |
| 230 | timeout = ktime_add_ns(start_time, RUN_TIME * NSEC_PER_SEC); | ||
| 231 | do { | 231 | do { |
| 232 | struct ring_buffer_event *event; | 232 | struct ring_buffer_event *event; |
| 233 | int *entry; | 233 | int *entry; |
| @@ -244,7 +244,7 @@ static void ring_buffer_producer(void) | |||
| 244 | ring_buffer_unlock_commit(buffer, event); | 244 | ring_buffer_unlock_commit(buffer, event); |
| 245 | } | 245 | } |
| 246 | } | 246 | } |
| 247 | do_gettimeofday(&end_tv); | 247 | end_time = ktime_get(); |
| 248 | 248 | ||
| 249 | cnt++; | 249 | cnt++; |
| 250 | if (consumer && !(cnt % wakeup_interval)) | 250 | if (consumer && !(cnt % wakeup_interval)) |
| @@ -264,7 +264,7 @@ static void ring_buffer_producer(void) | |||
| 264 | cond_resched(); | 264 | cond_resched(); |
| 265 | #endif | 265 | #endif |
| 266 | 266 | ||
| 267 | } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); | 267 | } while (ktime_before(end_time, timeout) && !kill_test); |
| 268 | trace_printk("End ring buffer hammer\n"); | 268 | trace_printk("End ring buffer hammer\n"); |
| 269 | 269 | ||
| 270 | if (consumer) { | 270 | if (consumer) { |
| @@ -280,9 +280,7 @@ static void ring_buffer_producer(void) | |||
| 280 | wait_for_completion(&read_done); | 280 | wait_for_completion(&read_done); |
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | time = end_tv.tv_sec - start_tv.tv_sec; | 283 | time = ktime_us_delta(end_time, start_time); |
| 284 | time *= USEC_PER_SEC; | ||
| 285 | time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec); | ||
| 286 | 284 | ||
| 287 | entries = ring_buffer_entries(buffer); | 285 | entries = ring_buffer_entries(buffer); |
| 288 | overruns = ring_buffer_overruns(buffer); | 286 | overruns = ring_buffer_overruns(buffer); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4a9079b9f082..62c6506d663f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -2036,7 +2036,8 @@ void trace_printk_init_buffers(void) | |||
| 2036 | 2036 | ||
| 2037 | /* trace_printk() is for debug use only. Don't use it in production. */ | 2037 | /* trace_printk() is for debug use only. Don't use it in production. */ |
| 2038 | 2038 | ||
| 2039 | pr_warning("\n**********************************************************\n"); | 2039 | pr_warning("\n"); |
| 2040 | pr_warning("**********************************************************\n"); | ||
| 2040 | pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); | 2041 | pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); |
| 2041 | pr_warning("** **\n"); | 2042 | pr_warning("** **\n"); |
| 2042 | pr_warning("** trace_printk() being used. Allocating extra memory. **\n"); | 2043 | pr_warning("** trace_printk() being used. Allocating extra memory. **\n"); |
| @@ -3352,12 +3353,12 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, | |||
| 3352 | 3353 | ||
| 3353 | mutex_lock(&tracing_cpumask_update_lock); | 3354 | mutex_lock(&tracing_cpumask_update_lock); |
| 3354 | 3355 | ||
| 3355 | len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask); | 3356 | len = snprintf(mask_str, count, "%*pb\n", |
| 3356 | if (count - len < 2) { | 3357 | cpumask_pr_args(tr->tracing_cpumask)); |
| 3358 | if (len >= count) { | ||
| 3357 | count = -EINVAL; | 3359 | count = -EINVAL; |
| 3358 | goto out_err; | 3360 | goto out_err; |
| 3359 | } | 3361 | } |
| 3360 | len += sprintf(mask_str + len, "\n"); | ||
| 3361 | count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); | 3362 | count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); |
| 3362 | 3363 | ||
| 3363 | out_err: | 3364 | out_err: |
| @@ -4140,6 +4141,12 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) | |||
| 4140 | goto out; | 4141 | goto out; |
| 4141 | } | 4142 | } |
| 4142 | 4143 | ||
| 4144 | /* If trace pipe files are being read, we can't change the tracer */ | ||
| 4145 | if (tr->current_trace->ref) { | ||
| 4146 | ret = -EBUSY; | ||
| 4147 | goto out; | ||
| 4148 | } | ||
| 4149 | |||
| 4143 | trace_branch_disable(); | 4150 | trace_branch_disable(); |
| 4144 | 4151 | ||
| 4145 | tr->current_trace->enabled--; | 4152 | tr->current_trace->enabled--; |
| @@ -4326,17 +4333,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
| 4326 | } | 4333 | } |
| 4327 | 4334 | ||
| 4328 | trace_seq_init(&iter->seq); | 4335 | trace_seq_init(&iter->seq); |
| 4329 | 4336 | iter->trace = tr->current_trace; | |
| 4330 | /* | ||
| 4331 | * We make a copy of the current tracer to avoid concurrent | ||
| 4332 | * changes on it while we are reading. | ||
| 4333 | */ | ||
| 4334 | iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL); | ||
| 4335 | if (!iter->trace) { | ||
| 4336 | ret = -ENOMEM; | ||
| 4337 | goto fail; | ||
| 4338 | } | ||
| 4339 | *iter->trace = *tr->current_trace; | ||
| 4340 | 4337 | ||
| 4341 | if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { | 4338 | if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { |
| 4342 | ret = -ENOMEM; | 4339 | ret = -ENOMEM; |
| @@ -4363,6 +4360,8 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
| 4363 | iter->trace->pipe_open(iter); | 4360 | iter->trace->pipe_open(iter); |
| 4364 | 4361 | ||
| 4365 | nonseekable_open(inode, filp); | 4362 | nonseekable_open(inode, filp); |
| 4363 | |||
| 4364 | tr->current_trace->ref++; | ||
| 4366 | out: | 4365 | out: |
| 4367 | mutex_unlock(&trace_types_lock); | 4366 | mutex_unlock(&trace_types_lock); |
| 4368 | return ret; | 4367 | return ret; |
| @@ -4382,6 +4381,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
| 4382 | 4381 | ||
| 4383 | mutex_lock(&trace_types_lock); | 4382 | mutex_lock(&trace_types_lock); |
| 4384 | 4383 | ||
| 4384 | tr->current_trace->ref--; | ||
| 4385 | |||
| 4385 | if (iter->trace->pipe_close) | 4386 | if (iter->trace->pipe_close) |
| 4386 | iter->trace->pipe_close(iter); | 4387 | iter->trace->pipe_close(iter); |
| 4387 | 4388 | ||
| @@ -4389,7 +4390,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
| 4389 | 4390 | ||
| 4390 | free_cpumask_var(iter->started); | 4391 | free_cpumask_var(iter->started); |
| 4391 | mutex_destroy(&iter->mutex); | 4392 | mutex_destroy(&iter->mutex); |
| 4392 | kfree(iter->trace); | ||
| 4393 | kfree(iter); | 4393 | kfree(iter); |
| 4394 | 4394 | ||
| 4395 | trace_array_put(tr); | 4395 | trace_array_put(tr); |
| @@ -4422,7 +4422,7 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) | |||
| 4422 | return trace_poll(iter, filp, poll_table); | 4422 | return trace_poll(iter, filp, poll_table); |
| 4423 | } | 4423 | } |
| 4424 | 4424 | ||
| 4425 | /* Must be called with trace_types_lock mutex held. */ | 4425 | /* Must be called with iter->mutex held. */ |
| 4426 | static int tracing_wait_pipe(struct file *filp) | 4426 | static int tracing_wait_pipe(struct file *filp) |
| 4427 | { | 4427 | { |
| 4428 | struct trace_iterator *iter = filp->private_data; | 4428 | struct trace_iterator *iter = filp->private_data; |
| @@ -4467,7 +4467,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, | |||
| 4467 | size_t cnt, loff_t *ppos) | 4467 | size_t cnt, loff_t *ppos) |
| 4468 | { | 4468 | { |
| 4469 | struct trace_iterator *iter = filp->private_data; | 4469 | struct trace_iterator *iter = filp->private_data; |
| 4470 | struct trace_array *tr = iter->tr; | ||
| 4471 | ssize_t sret; | 4470 | ssize_t sret; |
| 4472 | 4471 | ||
| 4473 | /* return any leftover data */ | 4472 | /* return any leftover data */ |
| @@ -4477,12 +4476,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, | |||
| 4477 | 4476 | ||
| 4478 | trace_seq_init(&iter->seq); | 4477 | trace_seq_init(&iter->seq); |
| 4479 | 4478 | ||
| 4480 | /* copy the tracer to avoid using a global lock all around */ | ||
| 4481 | mutex_lock(&trace_types_lock); | ||
| 4482 | if (unlikely(iter->trace->name != tr->current_trace->name)) | ||
| 4483 | *iter->trace = *tr->current_trace; | ||
| 4484 | mutex_unlock(&trace_types_lock); | ||
| 4485 | |||
| 4486 | /* | 4479 | /* |
| 4487 | * Avoid more than one consumer on a single file descriptor | 4480 | * Avoid more than one consumer on a single file descriptor |
| 4488 | * This is just a matter of traces coherency, the ring buffer itself | 4481 | * This is just a matter of traces coherency, the ring buffer itself |
| @@ -4642,7 +4635,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
| 4642 | .ops = &tracing_pipe_buf_ops, | 4635 | .ops = &tracing_pipe_buf_ops, |
| 4643 | .spd_release = tracing_spd_release_pipe, | 4636 | .spd_release = tracing_spd_release_pipe, |
| 4644 | }; | 4637 | }; |
| 4645 | struct trace_array *tr = iter->tr; | ||
| 4646 | ssize_t ret; | 4638 | ssize_t ret; |
| 4647 | size_t rem; | 4639 | size_t rem; |
| 4648 | unsigned int i; | 4640 | unsigned int i; |
| @@ -4650,12 +4642,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
| 4650 | if (splice_grow_spd(pipe, &spd)) | 4642 | if (splice_grow_spd(pipe, &spd)) |
| 4651 | return -ENOMEM; | 4643 | return -ENOMEM; |
| 4652 | 4644 | ||
| 4653 | /* copy the tracer to avoid using a global lock all around */ | ||
| 4654 | mutex_lock(&trace_types_lock); | ||
| 4655 | if (unlikely(iter->trace->name != tr->current_trace->name)) | ||
| 4656 | *iter->trace = *tr->current_trace; | ||
| 4657 | mutex_unlock(&trace_types_lock); | ||
| 4658 | |||
| 4659 | mutex_lock(&iter->mutex); | 4645 | mutex_lock(&iter->mutex); |
| 4660 | 4646 | ||
| 4661 | if (iter->trace->splice_read) { | 4647 | if (iter->trace->splice_read) { |
| @@ -4942,7 +4928,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
| 4942 | *fpos += written; | 4928 | *fpos += written; |
| 4943 | 4929 | ||
| 4944 | out_unlock: | 4930 | out_unlock: |
| 4945 | for (i = 0; i < nr_pages; i++){ | 4931 | for (i = nr_pages - 1; i >= 0; i--) { |
| 4946 | kunmap_atomic(map_page[i]); | 4932 | kunmap_atomic(map_page[i]); |
| 4947 | put_page(pages[i]); | 4933 | put_page(pages[i]); |
| 4948 | } | 4934 | } |
| @@ -5331,6 +5317,8 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) | |||
| 5331 | 5317 | ||
| 5332 | filp->private_data = info; | 5318 | filp->private_data = info; |
| 5333 | 5319 | ||
| 5320 | tr->current_trace->ref++; | ||
| 5321 | |||
| 5334 | mutex_unlock(&trace_types_lock); | 5322 | mutex_unlock(&trace_types_lock); |
| 5335 | 5323 | ||
| 5336 | ret = nonseekable_open(inode, filp); | 5324 | ret = nonseekable_open(inode, filp); |
| @@ -5361,21 +5349,16 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
| 5361 | if (!count) | 5349 | if (!count) |
| 5362 | return 0; | 5350 | return 0; |
| 5363 | 5351 | ||
| 5364 | mutex_lock(&trace_types_lock); | ||
| 5365 | |||
| 5366 | #ifdef CONFIG_TRACER_MAX_TRACE | 5352 | #ifdef CONFIG_TRACER_MAX_TRACE |
| 5367 | if (iter->snapshot && iter->tr->current_trace->use_max_tr) { | 5353 | if (iter->snapshot && iter->tr->current_trace->use_max_tr) |
| 5368 | size = -EBUSY; | 5354 | return -EBUSY; |
| 5369 | goto out_unlock; | ||
| 5370 | } | ||
| 5371 | #endif | 5355 | #endif |
| 5372 | 5356 | ||
| 5373 | if (!info->spare) | 5357 | if (!info->spare) |
| 5374 | info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, | 5358 | info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, |
| 5375 | iter->cpu_file); | 5359 | iter->cpu_file); |
| 5376 | size = -ENOMEM; | ||
| 5377 | if (!info->spare) | 5360 | if (!info->spare) |
| 5378 | goto out_unlock; | 5361 | return -ENOMEM; |
| 5379 | 5362 | ||
| 5380 | /* Do we have previous read data to read? */ | 5363 | /* Do we have previous read data to read? */ |
| 5381 | if (info->read < PAGE_SIZE) | 5364 | if (info->read < PAGE_SIZE) |
| @@ -5391,21 +5374,16 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
| 5391 | 5374 | ||
| 5392 | if (ret < 0) { | 5375 | if (ret < 0) { |
| 5393 | if (trace_empty(iter)) { | 5376 | if (trace_empty(iter)) { |
| 5394 | if ((filp->f_flags & O_NONBLOCK)) { | 5377 | if ((filp->f_flags & O_NONBLOCK)) |
| 5395 | size = -EAGAIN; | 5378 | return -EAGAIN; |
| 5396 | goto out_unlock; | 5379 | |
| 5397 | } | ||
| 5398 | mutex_unlock(&trace_types_lock); | ||
| 5399 | ret = wait_on_pipe(iter, false); | 5380 | ret = wait_on_pipe(iter, false); |
| 5400 | mutex_lock(&trace_types_lock); | 5381 | if (ret) |
| 5401 | if (ret) { | 5382 | return ret; |
| 5402 | size = ret; | 5383 | |
| 5403 | goto out_unlock; | ||
| 5404 | } | ||
| 5405 | goto again; | 5384 | goto again; |
| 5406 | } | 5385 | } |
| 5407 | size = 0; | 5386 | return 0; |
| 5408 | goto out_unlock; | ||
| 5409 | } | 5387 | } |
| 5410 | 5388 | ||
| 5411 | info->read = 0; | 5389 | info->read = 0; |
| @@ -5415,18 +5393,14 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
| 5415 | size = count; | 5393 | size = count; |
| 5416 | 5394 | ||
| 5417 | ret = copy_to_user(ubuf, info->spare + info->read, size); | 5395 | ret = copy_to_user(ubuf, info->spare + info->read, size); |
| 5418 | if (ret == size) { | 5396 | if (ret == size) |
| 5419 | size = -EFAULT; | 5397 | return -EFAULT; |
| 5420 | goto out_unlock; | 5398 | |
| 5421 | } | ||
| 5422 | size -= ret; | 5399 | size -= ret; |
| 5423 | 5400 | ||
| 5424 | *ppos += size; | 5401 | *ppos += size; |
| 5425 | info->read += size; | 5402 | info->read += size; |
| 5426 | 5403 | ||
| 5427 | out_unlock: | ||
| 5428 | mutex_unlock(&trace_types_lock); | ||
| 5429 | |||
| 5430 | return size; | 5404 | return size; |
| 5431 | } | 5405 | } |
| 5432 | 5406 | ||
| @@ -5437,6 +5411,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) | |||
| 5437 | 5411 | ||
| 5438 | mutex_lock(&trace_types_lock); | 5412 | mutex_lock(&trace_types_lock); |
| 5439 | 5413 | ||
| 5414 | iter->tr->current_trace->ref--; | ||
| 5415 | |||
| 5440 | __trace_array_put(iter->tr); | 5416 | __trace_array_put(iter->tr); |
| 5441 | 5417 | ||
| 5442 | if (info->spare) | 5418 | if (info->spare) |
| @@ -5522,30 +5498,20 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 5522 | int entries, size, i; | 5498 | int entries, size, i; |
| 5523 | ssize_t ret = 0; | 5499 | ssize_t ret = 0; |
| 5524 | 5500 | ||
| 5525 | mutex_lock(&trace_types_lock); | ||
| 5526 | |||
| 5527 | #ifdef CONFIG_TRACER_MAX_TRACE | 5501 | #ifdef CONFIG_TRACER_MAX_TRACE |
| 5528 | if (iter->snapshot && iter->tr->current_trace->use_max_tr) { | 5502 | if (iter->snapshot && iter->tr->current_trace->use_max_tr) |
| 5529 | ret = -EBUSY; | 5503 | return -EBUSY; |
| 5530 | goto out; | ||
| 5531 | } | ||
| 5532 | #endif | 5504 | #endif |
| 5533 | 5505 | ||
| 5534 | if (splice_grow_spd(pipe, &spd)) { | 5506 | if (splice_grow_spd(pipe, &spd)) |
| 5535 | ret = -ENOMEM; | 5507 | return -ENOMEM; |
| 5536 | goto out; | ||
| 5537 | } | ||
| 5538 | 5508 | ||
| 5539 | if (*ppos & (PAGE_SIZE - 1)) { | 5509 | if (*ppos & (PAGE_SIZE - 1)) |
| 5540 | ret = -EINVAL; | 5510 | return -EINVAL; |
| 5541 | goto out; | ||
| 5542 | } | ||
| 5543 | 5511 | ||
| 5544 | if (len & (PAGE_SIZE - 1)) { | 5512 | if (len & (PAGE_SIZE - 1)) { |
| 5545 | if (len < PAGE_SIZE) { | 5513 | if (len < PAGE_SIZE) |
| 5546 | ret = -EINVAL; | 5514 | return -EINVAL; |
| 5547 | goto out; | ||
| 5548 | } | ||
| 5549 | len &= PAGE_MASK; | 5515 | len &= PAGE_MASK; |
| 5550 | } | 5516 | } |
| 5551 | 5517 | ||
| @@ -5606,25 +5572,20 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 5606 | /* did we read anything? */ | 5572 | /* did we read anything? */ |
| 5607 | if (!spd.nr_pages) { | 5573 | if (!spd.nr_pages) { |
| 5608 | if (ret) | 5574 | if (ret) |
| 5609 | goto out; | 5575 | return ret; |
| 5576 | |||
| 5577 | if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) | ||
| 5578 | return -EAGAIN; | ||
| 5610 | 5579 | ||
| 5611 | if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) { | ||
| 5612 | ret = -EAGAIN; | ||
| 5613 | goto out; | ||
| 5614 | } | ||
| 5615 | mutex_unlock(&trace_types_lock); | ||
| 5616 | ret = wait_on_pipe(iter, true); | 5580 | ret = wait_on_pipe(iter, true); |
| 5617 | mutex_lock(&trace_types_lock); | ||
| 5618 | if (ret) | 5581 | if (ret) |
| 5619 | goto out; | 5582 | return ret; |
| 5620 | 5583 | ||
| 5621 | goto again; | 5584 | goto again; |
| 5622 | } | 5585 | } |
| 5623 | 5586 | ||
| 5624 | ret = splice_to_pipe(pipe, &spd); | 5587 | ret = splice_to_pipe(pipe, &spd); |
| 5625 | splice_shrink_spd(&spd); | 5588 | splice_shrink_spd(&spd); |
| 5626 | out: | ||
| 5627 | mutex_unlock(&trace_types_lock); | ||
| 5628 | 5589 | ||
| 5629 | return ret; | 5590 | return ret; |
| 5630 | } | 5591 | } |
| @@ -5854,28 +5815,11 @@ static __init int register_snapshot_cmd(void) | |||
| 5854 | static inline __init int register_snapshot_cmd(void) { return 0; } | 5815 | static inline __init int register_snapshot_cmd(void) { return 0; } |
| 5855 | #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ | 5816 | #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ |
| 5856 | 5817 | ||
| 5857 | struct dentry *tracing_init_dentry_tr(struct trace_array *tr) | 5818 | static struct dentry *tracing_get_dentry(struct trace_array *tr) |
| 5858 | { | 5819 | { |
| 5859 | if (tr->dir) | ||
| 5860 | return tr->dir; | ||
| 5861 | |||
| 5862 | if (!debugfs_initialized()) | ||
| 5863 | return NULL; | ||
| 5864 | |||
| 5865 | if (tr->flags & TRACE_ARRAY_FL_GLOBAL) | ||
| 5866 | tr->dir = debugfs_create_dir("tracing", NULL); | ||
| 5867 | |||
| 5868 | if (!tr->dir) | ||
| 5869 | pr_warn_once("Could not create debugfs directory 'tracing'\n"); | ||
| 5870 | |||
| 5871 | return tr->dir; | 5820 | return tr->dir; |
| 5872 | } | 5821 | } |
| 5873 | 5822 | ||
| 5874 | struct dentry *tracing_init_dentry(void) | ||
| 5875 | { | ||
| 5876 | return tracing_init_dentry_tr(&global_trace); | ||
| 5877 | } | ||
| 5878 | |||
| 5879 | static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) | 5823 | static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) |
| 5880 | { | 5824 | { |
| 5881 | struct dentry *d_tracer; | 5825 | struct dentry *d_tracer; |
| @@ -5883,8 +5827,8 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) | |||
| 5883 | if (tr->percpu_dir) | 5827 | if (tr->percpu_dir) |
| 5884 | return tr->percpu_dir; | 5828 | return tr->percpu_dir; |
| 5885 | 5829 | ||
| 5886 | d_tracer = tracing_init_dentry_tr(tr); | 5830 | d_tracer = tracing_get_dentry(tr); |
| 5887 | if (!d_tracer) | 5831 | if (IS_ERR(d_tracer)) |
| 5888 | return NULL; | 5832 | return NULL; |
| 5889 | 5833 | ||
| 5890 | tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer); | 5834 | tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer); |
| @@ -6086,8 +6030,8 @@ static struct dentry *trace_options_init_dentry(struct trace_array *tr) | |||
| 6086 | if (tr->options) | 6030 | if (tr->options) |
| 6087 | return tr->options; | 6031 | return tr->options; |
| 6088 | 6032 | ||
| 6089 | d_tracer = tracing_init_dentry_tr(tr); | 6033 | d_tracer = tracing_get_dentry(tr); |
| 6090 | if (!d_tracer) | 6034 | if (IS_ERR(d_tracer)) |
| 6091 | return NULL; | 6035 | return NULL; |
| 6092 | 6036 | ||
| 6093 | tr->options = debugfs_create_dir("options", d_tracer); | 6037 | tr->options = debugfs_create_dir("options", d_tracer); |
| @@ -6416,7 +6360,7 @@ static int instance_delete(const char *name) | |||
| 6416 | goto out_unlock; | 6360 | goto out_unlock; |
| 6417 | 6361 | ||
| 6418 | ret = -EBUSY; | 6362 | ret = -EBUSY; |
| 6419 | if (tr->ref) | 6363 | if (tr->ref || (tr->current_trace && tr->current_trace->ref)) |
| 6420 | goto out_unlock; | 6364 | goto out_unlock; |
| 6421 | 6365 | ||
| 6422 | list_del(&tr->list); | 6366 | list_del(&tr->list); |
| @@ -6571,6 +6515,33 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) | |||
| 6571 | 6515 | ||
| 6572 | } | 6516 | } |
| 6573 | 6517 | ||
| 6518 | /** | ||
| 6519 | * tracing_init_dentry - initialize top level trace array | ||
| 6520 | * | ||
| 6521 | * This is called when creating files or directories in the tracing | ||
| 6522 | * directory. It is called via fs_initcall() by any of the boot up code | ||
| 6523 | * and expects to return the dentry of the top level tracing directory. | ||
| 6524 | */ | ||
| 6525 | struct dentry *tracing_init_dentry(void) | ||
| 6526 | { | ||
| 6527 | struct trace_array *tr = &global_trace; | ||
| 6528 | |||
| 6529 | if (tr->dir) | ||
| 6530 | return tr->dir; | ||
| 6531 | |||
| 6532 | if (WARN_ON(!debugfs_initialized())) | ||
| 6533 | return ERR_PTR(-ENODEV); | ||
| 6534 | |||
| 6535 | tr->dir = debugfs_create_dir("tracing", NULL); | ||
| 6536 | |||
| 6537 | if (!tr->dir) { | ||
| 6538 | pr_warn_once("Could not create debugfs directory 'tracing'\n"); | ||
| 6539 | return ERR_PTR(-ENOMEM); | ||
| 6540 | } | ||
| 6541 | |||
| 6542 | return tr->dir; | ||
| 6543 | } | ||
| 6544 | |||
| 6574 | static __init int tracer_init_debugfs(void) | 6545 | static __init int tracer_init_debugfs(void) |
| 6575 | { | 6546 | { |
| 6576 | struct dentry *d_tracer; | 6547 | struct dentry *d_tracer; |
| @@ -6578,7 +6549,7 @@ static __init int tracer_init_debugfs(void) | |||
| 6578 | trace_access_lock_init(); | 6549 | trace_access_lock_init(); |
| 6579 | 6550 | ||
| 6580 | d_tracer = tracing_init_dentry(); | 6551 | d_tracer = tracing_init_dentry(); |
| 6581 | if (!d_tracer) | 6552 | if (IS_ERR(d_tracer)) |
| 6582 | return 0; | 6553 | return 0; |
| 6583 | 6554 | ||
| 6584 | init_tracer_debugfs(&global_trace, d_tracer); | 6555 | init_tracer_debugfs(&global_trace, d_tracer); |
| @@ -6811,7 +6782,6 @@ __init static int tracer_alloc_buffers(void) | |||
| 6811 | int ring_buf_size; | 6782 | int ring_buf_size; |
| 6812 | int ret = -ENOMEM; | 6783 | int ret = -ENOMEM; |
| 6813 | 6784 | ||
| 6814 | |||
| 6815 | if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) | 6785 | if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) |
| 6816 | goto out; | 6786 | goto out; |
| 6817 | 6787 | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8de48bac1ce2..dd8205a35760 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -388,6 +388,7 @@ struct tracer { | |||
| 388 | struct tracer *next; | 388 | struct tracer *next; |
| 389 | struct tracer_flags *flags; | 389 | struct tracer_flags *flags; |
| 390 | int enabled; | 390 | int enabled; |
| 391 | int ref; | ||
| 391 | bool print_max; | 392 | bool print_max; |
| 392 | bool allow_instances; | 393 | bool allow_instances; |
| 393 | #ifdef CONFIG_TRACER_MAX_TRACE | 394 | #ifdef CONFIG_TRACER_MAX_TRACE |
| @@ -541,7 +542,6 @@ struct dentry *trace_create_file(const char *name, | |||
| 541 | void *data, | 542 | void *data, |
| 542 | const struct file_operations *fops); | 543 | const struct file_operations *fops); |
| 543 | 544 | ||
| 544 | struct dentry *tracing_init_dentry_tr(struct trace_array *tr); | ||
| 545 | struct dentry *tracing_init_dentry(void); | 545 | struct dentry *tracing_init_dentry(void); |
| 546 | 546 | ||
| 547 | struct ring_buffer_event; | 547 | struct ring_buffer_event; |
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 7d6e2afde669..57cbf1efdd44 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c | |||
| @@ -7,7 +7,6 @@ | |||
| 7 | #include <linux/seq_file.h> | 7 | #include <linux/seq_file.h> |
| 8 | #include <linux/spinlock.h> | 8 | #include <linux/spinlock.h> |
| 9 | #include <linux/irqflags.h> | 9 | #include <linux/irqflags.h> |
| 10 | #include <linux/debugfs.h> | ||
| 11 | #include <linux/uaccess.h> | 10 | #include <linux/uaccess.h> |
| 12 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 13 | #include <linux/ftrace.h> | 12 | #include <linux/ftrace.h> |
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 4b9c114ee9de..6fa484de2ba1 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
| @@ -261,7 +261,7 @@ void perf_trace_del(struct perf_event *p_event, int flags) | |||
| 261 | } | 261 | } |
| 262 | 262 | ||
| 263 | void *perf_trace_buf_prepare(int size, unsigned short type, | 263 | void *perf_trace_buf_prepare(int size, unsigned short type, |
| 264 | struct pt_regs *regs, int *rctxp) | 264 | struct pt_regs **regs, int *rctxp) |
| 265 | { | 265 | { |
| 266 | struct trace_entry *entry; | 266 | struct trace_entry *entry; |
| 267 | unsigned long flags; | 267 | unsigned long flags; |
| @@ -280,6 +280,8 @@ void *perf_trace_buf_prepare(int size, unsigned short type, | |||
| 280 | if (*rctxp < 0) | 280 | if (*rctxp < 0) |
| 281 | return NULL; | 281 | return NULL; |
| 282 | 282 | ||
| 283 | if (regs) | ||
| 284 | *regs = this_cpu_ptr(&__perf_regs[*rctxp]); | ||
| 283 | raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); | 285 | raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); |
| 284 | 286 | ||
| 285 | /* zero the dead bytes from align to not leak stack to user */ | 287 | /* zero the dead bytes from align to not leak stack to user */ |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index b03a0ea77b99..db54dda10ccc 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
| @@ -2531,7 +2531,7 @@ static __init int event_trace_init(void) | |||
| 2531 | return -ENODEV; | 2531 | return -ENODEV; |
| 2532 | 2532 | ||
| 2533 | d_tracer = tracing_init_dentry(); | 2533 | d_tracer = tracing_init_dentry(); |
| 2534 | if (!d_tracer) | 2534 | if (IS_ERR(d_tracer)) |
| 2535 | return 0; | 2535 | return 0; |
| 2536 | 2536 | ||
| 2537 | entry = debugfs_create_file("available_events", 0444, d_tracer, | 2537 | entry = debugfs_create_file("available_events", 0444, d_tracer, |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d4ddde28a81a..12e2b99be862 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
| @@ -6,12 +6,10 @@ | |||
| 6 | #include <linux/stringify.h> | 6 | #include <linux/stringify.h> |
| 7 | #include <linux/kallsyms.h> | 7 | #include <linux/kallsyms.h> |
| 8 | #include <linux/seq_file.h> | 8 | #include <linux/seq_file.h> |
| 9 | #include <linux/debugfs.h> | ||
| 10 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
| 11 | #include <linux/ftrace.h> | 10 | #include <linux/ftrace.h> |
| 12 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 13 | #include <linux/init.h> | 12 | #include <linux/init.h> |
| 14 | #include <linux/fs.h> | ||
| 15 | 13 | ||
| 16 | #include "trace_output.h" | 14 | #include "trace_output.h" |
| 17 | 15 | ||
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index ba476009e5de..2d25ad1526bb 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
| @@ -1437,7 +1437,7 @@ static __init int init_graph_debugfs(void) | |||
| 1437 | struct dentry *d_tracer; | 1437 | struct dentry *d_tracer; |
| 1438 | 1438 | ||
| 1439 | d_tracer = tracing_init_dentry(); | 1439 | d_tracer = tracing_init_dentry(); |
| 1440 | if (!d_tracer) | 1440 | if (IS_ERR(d_tracer)) |
| 1441 | return 0; | 1441 | return 0; |
| 1442 | 1442 | ||
| 1443 | trace_create_file("max_graph_depth", 0644, d_tracer, | 1443 | trace_create_file("max_graph_depth", 0644, d_tracer, |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 9bb104f748d0..8523ea345f2b 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
| @@ -10,11 +10,9 @@ | |||
| 10 | * Copyright (C) 2004 Nadia Yvette Chambers | 10 | * Copyright (C) 2004 Nadia Yvette Chambers |
| 11 | */ | 11 | */ |
| 12 | #include <linux/kallsyms.h> | 12 | #include <linux/kallsyms.h> |
| 13 | #include <linux/debugfs.h> | ||
| 14 | #include <linux/uaccess.h> | 13 | #include <linux/uaccess.h> |
| 15 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 16 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
| 17 | #include <linux/fs.h> | ||
| 18 | 16 | ||
| 19 | #include "trace.h" | 17 | #include "trace.h" |
| 20 | 18 | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5edb518be345..d73f565b4e06 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
| @@ -1148,7 +1148,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) | |||
| 1148 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1148 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
| 1149 | size -= sizeof(u32); | 1149 | size -= sizeof(u32); |
| 1150 | 1150 | ||
| 1151 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 1151 | entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); |
| 1152 | if (!entry) | 1152 | if (!entry) |
| 1153 | return; | 1153 | return; |
| 1154 | 1154 | ||
| @@ -1179,7 +1179,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, | |||
| 1179 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1179 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
| 1180 | size -= sizeof(u32); | 1180 | size -= sizeof(u32); |
| 1181 | 1181 | ||
| 1182 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 1182 | entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); |
| 1183 | if (!entry) | 1183 | if (!entry) |
| 1184 | return; | 1184 | return; |
| 1185 | 1185 | ||
| @@ -1320,7 +1320,7 @@ static __init int init_kprobe_trace(void) | |||
| 1320 | return -EINVAL; | 1320 | return -EINVAL; |
| 1321 | 1321 | ||
| 1322 | d_tracer = tracing_init_dentry(); | 1322 | d_tracer = tracing_init_dentry(); |
| 1323 | if (!d_tracer) | 1323 | if (IS_ERR(d_tracer)) |
| 1324 | return 0; | 1324 | return 0; |
| 1325 | 1325 | ||
| 1326 | entry = debugfs_create_file("kprobe_events", 0644, d_tracer, | 1326 | entry = debugfs_create_file("kprobe_events", 0644, d_tracer, |
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index fcf0a9e48916..8bb2071474dd 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c | |||
| @@ -6,8 +6,6 @@ | |||
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
| 9 | #include <linux/fs.h> | ||
| 10 | #include <linux/debugfs.h> | ||
| 11 | #include <linux/ftrace.h> | 9 | #include <linux/ftrace.h> |
| 12 | 10 | ||
| 13 | #include "trace.h" | 11 | #include "trace.h" |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index b77b9a697619..692bf7184c8c 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
| @@ -177,6 +177,50 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) | |||
| 177 | } | 177 | } |
| 178 | EXPORT_SYMBOL(ftrace_print_hex_seq); | 178 | EXPORT_SYMBOL(ftrace_print_hex_seq); |
| 179 | 179 | ||
| 180 | const char * | ||
| 181 | ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len, | ||
| 182 | size_t el_size) | ||
| 183 | { | ||
| 184 | const char *ret = trace_seq_buffer_ptr(p); | ||
| 185 | const char *prefix = ""; | ||
| 186 | void *ptr = (void *)buf; | ||
| 187 | |||
| 188 | trace_seq_putc(p, '{'); | ||
| 189 | |||
| 190 | while (ptr < buf + buf_len) { | ||
| 191 | switch (el_size) { | ||
| 192 | case 1: | ||
| 193 | trace_seq_printf(p, "%s0x%x", prefix, | ||
| 194 | *(u8 *)ptr); | ||
| 195 | break; | ||
| 196 | case 2: | ||
| 197 | trace_seq_printf(p, "%s0x%x", prefix, | ||
| 198 | *(u16 *)ptr); | ||
| 199 | break; | ||
| 200 | case 4: | ||
| 201 | trace_seq_printf(p, "%s0x%x", prefix, | ||
| 202 | *(u32 *)ptr); | ||
| 203 | break; | ||
| 204 | case 8: | ||
| 205 | trace_seq_printf(p, "%s0x%llx", prefix, | ||
| 206 | *(u64 *)ptr); | ||
| 207 | break; | ||
| 208 | default: | ||
| 209 | trace_seq_printf(p, "BAD SIZE:%zu 0x%x", el_size, | ||
| 210 | *(u8 *)ptr); | ||
| 211 | el_size = 1; | ||
| 212 | } | ||
| 213 | prefix = ","; | ||
| 214 | ptr += el_size; | ||
| 215 | } | ||
| 216 | |||
| 217 | trace_seq_putc(p, '}'); | ||
| 218 | trace_seq_putc(p, 0); | ||
| 219 | |||
| 220 | return ret; | ||
| 221 | } | ||
| 222 | EXPORT_SYMBOL(ftrace_print_array_seq); | ||
| 223 | |||
| 180 | int ftrace_raw_output_prep(struct trace_iterator *iter, | 224 | int ftrace_raw_output_prep(struct trace_iterator *iter, |
| 181 | struct trace_event *trace_event) | 225 | struct trace_event *trace_event) |
| 182 | { | 226 | { |
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index c4e70b6bd7fa..36c1455b7567 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | * | 5 | * |
| 6 | */ | 6 | */ |
| 7 | #include <linux/seq_file.h> | 7 | #include <linux/seq_file.h> |
| 8 | #include <linux/debugfs.h> | ||
| 9 | #include <linux/uaccess.h> | 8 | #include <linux/uaccess.h> |
| 10 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
| 11 | #include <linux/ftrace.h> | 10 | #include <linux/ftrace.h> |
| @@ -15,7 +14,6 @@ | |||
| 15 | #include <linux/ctype.h> | 14 | #include <linux/ctype.h> |
| 16 | #include <linux/list.h> | 15 | #include <linux/list.h> |
| 17 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
| 18 | #include <linux/fs.h> | ||
| 19 | 17 | ||
| 20 | #include "trace.h" | 18 | #include "trace.h" |
| 21 | 19 | ||
| @@ -349,7 +347,7 @@ static __init int init_trace_printk_function_export(void) | |||
| 349 | struct dentry *d_tracer; | 347 | struct dentry *d_tracer; |
| 350 | 348 | ||
| 351 | d_tracer = tracing_init_dentry(); | 349 | d_tracer = tracing_init_dentry(); |
| 352 | if (!d_tracer) | 350 | if (IS_ERR(d_tracer)) |
| 353 | return 0; | 351 | return 0; |
| 354 | 352 | ||
| 355 | trace_create_file("printk_formats", 0444, d_tracer, | 353 | trace_create_file("printk_formats", 0444, d_tracer, |
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 2e293beb186e..419ca37e72c9 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c | |||
| @@ -5,8 +5,6 @@ | |||
| 5 | * | 5 | * |
| 6 | */ | 6 | */ |
| 7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
| 8 | #include <linux/fs.h> | ||
| 9 | #include <linux/debugfs.h> | ||
| 10 | #include <linux/kallsyms.h> | 8 | #include <linux/kallsyms.h> |
| 11 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
| 12 | #include <linux/ftrace.h> | 10 | #include <linux/ftrace.h> |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 8fb84b362816..d6e1003724e9 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
| @@ -10,8 +10,6 @@ | |||
| 10 | * Copyright (C) 2004 Nadia Yvette Chambers | 10 | * Copyright (C) 2004 Nadia Yvette Chambers |
| 11 | */ | 11 | */ |
| 12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
| 13 | #include <linux/fs.h> | ||
| 14 | #include <linux/debugfs.h> | ||
| 15 | #include <linux/kallsyms.h> | 13 | #include <linux/kallsyms.h> |
| 16 | #include <linux/uaccess.h> | 14 | #include <linux/uaccess.h> |
| 17 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index f8b45d8792f9..e694c9f9efa4 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c | |||
| @@ -120,7 +120,7 @@ void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, | |||
| 120 | 120 | ||
| 121 | __trace_seq_init(s); | 121 | __trace_seq_init(s); |
| 122 | 122 | ||
| 123 | seq_buf_bitmask(&s->seq, maskp, nmaskbits); | 123 | seq_buf_printf(&s->seq, "%*pb", nmaskbits, maskp); |
| 124 | 124 | ||
| 125 | if (unlikely(seq_buf_has_overflowed(&s->seq))) { | 125 | if (unlikely(seq_buf_has_overflowed(&s->seq))) { |
| 126 | s->seq.len = save_len; | 126 | s->seq.len = save_len; |
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 16eddb308c33..c3e4fcfddd45 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
| @@ -7,12 +7,10 @@ | |||
| 7 | #include <linux/seq_file.h> | 7 | #include <linux/seq_file.h> |
| 8 | #include <linux/spinlock.h> | 8 | #include <linux/spinlock.h> |
| 9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
| 10 | #include <linux/debugfs.h> | ||
| 11 | #include <linux/ftrace.h> | 10 | #include <linux/ftrace.h> |
| 12 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 13 | #include <linux/sysctl.h> | 12 | #include <linux/sysctl.h> |
| 14 | #include <linux/init.h> | 13 | #include <linux/init.h> |
| 15 | #include <linux/fs.h> | ||
| 16 | 14 | ||
| 17 | #include <asm/setup.h> | 15 | #include <asm/setup.h> |
| 18 | 16 | ||
| @@ -462,7 +460,7 @@ static __init int stack_trace_init(void) | |||
| 462 | struct dentry *d_tracer; | 460 | struct dentry *d_tracer; |
| 463 | 461 | ||
| 464 | d_tracer = tracing_init_dentry(); | 462 | d_tracer = tracing_init_dentry(); |
| 465 | if (!d_tracer) | 463 | if (IS_ERR(d_tracer)) |
| 466 | return 0; | 464 | return 0; |
| 467 | 465 | ||
| 468 | trace_create_file("stack_max_size", 0644, d_tracer, | 466 | trace_create_file("stack_max_size", 0644, d_tracer, |
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 7af67360b330..75e19e86c954 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c | |||
| @@ -276,7 +276,7 @@ static int tracing_stat_init(void) | |||
| 276 | struct dentry *d_tracing; | 276 | struct dentry *d_tracing; |
| 277 | 277 | ||
| 278 | d_tracing = tracing_init_dentry(); | 278 | d_tracing = tracing_init_dentry(); |
| 279 | if (!d_tracing) | 279 | if (IS_ERR(d_tracing)) |
| 280 | return 0; | 280 | return 0; |
| 281 | 281 | ||
| 282 | stat_dir = debugfs_create_dir("trace_stat", d_tracing); | 282 | stat_dir = debugfs_create_dir("trace_stat", d_tracing); |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index c6ee36fcbf90..f97f6e3a676c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
| @@ -574,7 +574,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
| 574 | size -= sizeof(u32); | 574 | size -= sizeof(u32); |
| 575 | 575 | ||
| 576 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, | 576 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, |
| 577 | sys_data->enter_event->event.type, regs, &rctx); | 577 | sys_data->enter_event->event.type, NULL, &rctx); |
| 578 | if (!rec) | 578 | if (!rec) |
| 579 | return; | 579 | return; |
| 580 | 580 | ||
| @@ -647,7 +647,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
| 647 | size -= sizeof(u32); | 647 | size -= sizeof(u32); |
| 648 | 648 | ||
| 649 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, | 649 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, |
| 650 | sys_data->exit_event->event.type, regs, &rctx); | 650 | sys_data->exit_event->event.type, NULL, &rctx); |
| 651 | if (!rec) | 651 | if (!rec) |
| 652 | return; | 652 | return; |
| 653 | 653 | ||
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8520acc34b18..7dc1c8abecd6 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
| @@ -1111,7 +1111,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, | |||
| 1111 | if (hlist_empty(head)) | 1111 | if (hlist_empty(head)) |
| 1112 | goto out; | 1112 | goto out; |
| 1113 | 1113 | ||
| 1114 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 1114 | entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); |
| 1115 | if (!entry) | 1115 | if (!entry) |
| 1116 | goto out; | 1116 | goto out; |
| 1117 | 1117 | ||
| @@ -1321,7 +1321,7 @@ static __init int init_uprobe_trace(void) | |||
| 1321 | struct dentry *d_tracer; | 1321 | struct dentry *d_tracer; |
| 1322 | 1322 | ||
| 1323 | d_tracer = tracing_init_dentry(); | 1323 | d_tracer = tracing_init_dentry(); |
| 1324 | if (!d_tracer) | 1324 | if (IS_ERR(d_tracer)) |
| 1325 | return 0; | 1325 | return 0; |
| 1326 | 1326 | ||
| 1327 | trace_create_file("uprobe_events", 0644, d_tracer, | 1327 | trace_create_file("uprobe_events", 0644, d_tracer, |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 70bf11815f84..3174bf8e3538 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -154,7 +154,7 @@ static int get_softlockup_thresh(void) | |||
| 154 | */ | 154 | */ |
| 155 | static unsigned long get_timestamp(void) | 155 | static unsigned long get_timestamp(void) |
| 156 | { | 156 | { |
| 157 | return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ | 157 | return running_clock() >> 30LL; /* 2^30 ~= 10^9 */ |
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | static void set_sample_period(void) | 160 | static void set_sample_period(void) |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index beeeac9e0e3e..f28849394791 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -3083,10 +3083,9 @@ static ssize_t wq_cpumask_show(struct device *dev, | |||
| 3083 | int written; | 3083 | int written; |
| 3084 | 3084 | ||
| 3085 | mutex_lock(&wq->mutex); | 3085 | mutex_lock(&wq->mutex); |
| 3086 | written = cpumask_scnprintf(buf, PAGE_SIZE, wq->unbound_attrs->cpumask); | 3086 | written = scnprintf(buf, PAGE_SIZE, "%*pb\n", |
| 3087 | cpumask_pr_args(wq->unbound_attrs->cpumask)); | ||
| 3087 | mutex_unlock(&wq->mutex); | 3088 | mutex_unlock(&wq->mutex); |
| 3088 | |||
| 3089 | written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); | ||
| 3090 | return written; | 3089 | return written; |
| 3091 | } | 3090 | } |
| 3092 | 3091 | ||
