diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 2 | ||||
-rw-r--r-- | kernel/events/core.c | 23 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 6 | ||||
-rw-r--r-- | kernel/irq/manage.c | 1 | ||||
-rw-r--r-- | kernel/kcov.c | 9 | ||||
-rw-r--r-- | kernel/power/suspend.c | 4 | ||||
-rw-r--r-- | kernel/printk/printk.c | 4 | ||||
-rw-r--r-- | kernel/ptrace.c | 16 | ||||
-rw-r--r-- | kernel/sched/core.c | 16 | ||||
-rw-r--r-- | kernel/sched/fair.c | 23 | ||||
-rw-r--r-- | kernel/sched/wait.c | 10 | ||||
-rw-r--r-- | kernel/softirq.c | 2 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 2 | ||||
-rw-r--r-- | kernel/time/timer.c | 74 |
14 files changed, 124 insertions, 68 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 5df20d6d1520..29de1a9352c0 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -228,7 +228,7 @@ static struct { | |||
228 | .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), | 228 | .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), |
229 | .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), | 229 | .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), |
230 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 230 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
231 | .dep_map = {.name = "cpu_hotplug.lock" }, | 231 | .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map), |
232 | #endif | 232 | #endif |
233 | }; | 233 | }; |
234 | 234 | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index c6e47e97b33f..0e292132efac 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1960,6 +1960,12 @@ void perf_event_disable(struct perf_event *event) | |||
1960 | } | 1960 | } |
1961 | EXPORT_SYMBOL_GPL(perf_event_disable); | 1961 | EXPORT_SYMBOL_GPL(perf_event_disable); |
1962 | 1962 | ||
1963 | void perf_event_disable_inatomic(struct perf_event *event) | ||
1964 | { | ||
1965 | event->pending_disable = 1; | ||
1966 | irq_work_queue(&event->pending); | ||
1967 | } | ||
1968 | |||
1963 | static void perf_set_shadow_time(struct perf_event *event, | 1969 | static void perf_set_shadow_time(struct perf_event *event, |
1964 | struct perf_event_context *ctx, | 1970 | struct perf_event_context *ctx, |
1965 | u64 tstamp) | 1971 | u64 tstamp) |
@@ -7075,8 +7081,8 @@ static int __perf_event_overflow(struct perf_event *event, | |||
7075 | if (events && atomic_dec_and_test(&event->event_limit)) { | 7081 | if (events && atomic_dec_and_test(&event->event_limit)) { |
7076 | ret = 1; | 7082 | ret = 1; |
7077 | event->pending_kill = POLL_HUP; | 7083 | event->pending_kill = POLL_HUP; |
7078 | event->pending_disable = 1; | 7084 | |
7079 | irq_work_queue(&event->pending); | 7085 | perf_event_disable_inatomic(event); |
7080 | } | 7086 | } |
7081 | 7087 | ||
7082 | READ_ONCE(event->overflow_handler)(event, data, regs); | 7088 | READ_ONCE(event->overflow_handler)(event, data, regs); |
@@ -8855,7 +8861,10 @@ EXPORT_SYMBOL_GPL(perf_pmu_register); | |||
8855 | 8861 | ||
8856 | void perf_pmu_unregister(struct pmu *pmu) | 8862 | void perf_pmu_unregister(struct pmu *pmu) |
8857 | { | 8863 | { |
8864 | int remove_device; | ||
8865 | |||
8858 | mutex_lock(&pmus_lock); | 8866 | mutex_lock(&pmus_lock); |
8867 | remove_device = pmu_bus_running; | ||
8859 | list_del_rcu(&pmu->entry); | 8868 | list_del_rcu(&pmu->entry); |
8860 | mutex_unlock(&pmus_lock); | 8869 | mutex_unlock(&pmus_lock); |
8861 | 8870 | ||
@@ -8869,10 +8878,12 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
8869 | free_percpu(pmu->pmu_disable_count); | 8878 | free_percpu(pmu->pmu_disable_count); |
8870 | if (pmu->type >= PERF_TYPE_MAX) | 8879 | if (pmu->type >= PERF_TYPE_MAX) |
8871 | idr_remove(&pmu_idr, pmu->type); | 8880 | idr_remove(&pmu_idr, pmu->type); |
8872 | if (pmu->nr_addr_filters) | 8881 | if (remove_device) { |
8873 | device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); | 8882 | if (pmu->nr_addr_filters) |
8874 | device_del(pmu->dev); | 8883 | device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); |
8875 | put_device(pmu->dev); | 8884 | device_del(pmu->dev); |
8885 | put_device(pmu->dev); | ||
8886 | } | ||
8876 | free_pmu_context(pmu); | 8887 | free_pmu_context(pmu); |
8877 | } | 8888 | } |
8878 | EXPORT_SYMBOL_GPL(perf_pmu_unregister); | 8889 | EXPORT_SYMBOL_GPL(perf_pmu_unregister); |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d4129bb05e5d..f9ec9add2164 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -300,7 +300,8 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, | |||
300 | 300 | ||
301 | retry: | 301 | retry: |
302 | /* Read the page with vaddr into memory */ | 302 | /* Read the page with vaddr into memory */ |
303 | ret = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma); | 303 | ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page, |
304 | &vma); | ||
304 | if (ret <= 0) | 305 | if (ret <= 0) |
305 | return ret; | 306 | return ret; |
306 | 307 | ||
@@ -1710,7 +1711,8 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) | |||
1710 | * but we treat this as a 'remote' access since it is | 1711 | * but we treat this as a 'remote' access since it is |
1711 | * essentially a kernel access to the memory. | 1712 | * essentially a kernel access to the memory. |
1712 | */ | 1713 | */ |
1713 | result = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &page, NULL); | 1714 | result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page, |
1715 | NULL); | ||
1714 | if (result < 0) | 1716 | if (result < 0) |
1715 | return result; | 1717 | return result; |
1716 | 1718 | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0c5f1a5db654..9c4d30483264 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -721,6 +721,7 @@ int irq_set_parent(int irq, int parent_irq) | |||
721 | irq_put_desc_unlock(desc, flags); | 721 | irq_put_desc_unlock(desc, flags); |
722 | return 0; | 722 | return 0; |
723 | } | 723 | } |
724 | EXPORT_SYMBOL_GPL(irq_set_parent); | ||
724 | #endif | 725 | #endif |
725 | 726 | ||
726 | /* | 727 | /* |
diff --git a/kernel/kcov.c b/kernel/kcov.c index 8d44b3fea9d0..30e6d05aa5a9 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c | |||
@@ -53,8 +53,15 @@ void notrace __sanitizer_cov_trace_pc(void) | |||
53 | /* | 53 | /* |
54 | * We are interested in code coverage as a function of a syscall inputs, | 54 | * We are interested in code coverage as a function of a syscall inputs, |
55 | * so we ignore code executed in interrupts. | 55 | * so we ignore code executed in interrupts. |
56 | * The checks for whether we are in an interrupt are open-coded, because | ||
57 | * 1. We can't use in_interrupt() here, since it also returns true | ||
58 | * when we are inside local_bh_disable() section. | ||
59 | * 2. We don't want to use (in_irq() | in_serving_softirq() | in_nmi()), | ||
60 | * since that leads to slower generated code (three separate tests, | ||
61 | * one for each of the flags). | ||
56 | */ | 62 | */ |
57 | if (!t || in_interrupt()) | 63 | if (!t || (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET |
64 | | NMI_MASK))) | ||
58 | return; | 65 | return; |
59 | mode = READ_ONCE(t->kcov_mode); | 66 | mode = READ_ONCE(t->kcov_mode); |
60 | if (mode == KCOV_MODE_TRACE) { | 67 | if (mode == KCOV_MODE_TRACE) { |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 1e7f5da648d9..6ccb08f57fcb 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -498,9 +498,9 @@ static int enter_state(suspend_state_t state) | |||
498 | 498 | ||
499 | #ifndef CONFIG_SUSPEND_SKIP_SYNC | 499 | #ifndef CONFIG_SUSPEND_SKIP_SYNC |
500 | trace_suspend_resume(TPS("sync_filesystems"), 0, true); | 500 | trace_suspend_resume(TPS("sync_filesystems"), 0, true); |
501 | printk(KERN_INFO "PM: Syncing filesystems ... "); | 501 | pr_info("PM: Syncing filesystems ... "); |
502 | sys_sync(); | 502 | sys_sync(); |
503 | printk("done.\n"); | 503 | pr_cont("done.\n"); |
504 | trace_suspend_resume(TPS("sync_filesystems"), 0, false); | 504 | trace_suspend_resume(TPS("sync_filesystems"), 0, false); |
505 | #endif | 505 | #endif |
506 | 506 | ||
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index d5e397315473..de08fc90baaf 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
@@ -1769,6 +1769,10 @@ static size_t log_output(int facility, int level, enum log_flags lflags, const c | |||
1769 | cont_flush(); | 1769 | cont_flush(); |
1770 | } | 1770 | } |
1771 | 1771 | ||
1772 | /* Skip empty continuation lines that couldn't be added - they just flush */ | ||
1773 | if (!text_len && (lflags & LOG_CONT)) | ||
1774 | return 0; | ||
1775 | |||
1772 | /* If it doesn't end in a newline, try to buffer the current line */ | 1776 | /* If it doesn't end in a newline, try to buffer the current line */ |
1773 | if (!(lflags & LOG_NEWLINE)) { | 1777 | if (!(lflags & LOG_NEWLINE)) { |
1774 | if (cont_add(facility, level, lflags, text, text_len)) | 1778 | if (cont_add(facility, level, lflags, text, text_len)) |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 2a99027312a6..e6474f7272ec 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -537,7 +537,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst | |||
537 | int this_len, retval; | 537 | int this_len, retval; |
538 | 538 | ||
539 | this_len = (len > sizeof(buf)) ? sizeof(buf) : len; | 539 | this_len = (len > sizeof(buf)) ? sizeof(buf) : len; |
540 | retval = access_process_vm(tsk, src, buf, this_len, 0); | 540 | retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE); |
541 | if (!retval) { | 541 | if (!retval) { |
542 | if (copied) | 542 | if (copied) |
543 | break; | 543 | break; |
@@ -564,7 +564,8 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds | |||
564 | this_len = (len > sizeof(buf)) ? sizeof(buf) : len; | 564 | this_len = (len > sizeof(buf)) ? sizeof(buf) : len; |
565 | if (copy_from_user(buf, src, this_len)) | 565 | if (copy_from_user(buf, src, this_len)) |
566 | return -EFAULT; | 566 | return -EFAULT; |
567 | retval = access_process_vm(tsk, dst, buf, this_len, 1); | 567 | retval = access_process_vm(tsk, dst, buf, this_len, |
568 | FOLL_FORCE | FOLL_WRITE); | ||
568 | if (!retval) { | 569 | if (!retval) { |
569 | if (copied) | 570 | if (copied) |
570 | break; | 571 | break; |
@@ -1127,7 +1128,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, | |||
1127 | unsigned long tmp; | 1128 | unsigned long tmp; |
1128 | int copied; | 1129 | int copied; |
1129 | 1130 | ||
1130 | copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0); | 1131 | copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); |
1131 | if (copied != sizeof(tmp)) | 1132 | if (copied != sizeof(tmp)) |
1132 | return -EIO; | 1133 | return -EIO; |
1133 | return put_user(tmp, (unsigned long __user *)data); | 1134 | return put_user(tmp, (unsigned long __user *)data); |
@@ -1138,7 +1139,8 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, | |||
1138 | { | 1139 | { |
1139 | int copied; | 1140 | int copied; |
1140 | 1141 | ||
1141 | copied = access_process_vm(tsk, addr, &data, sizeof(data), 1); | 1142 | copied = access_process_vm(tsk, addr, &data, sizeof(data), |
1143 | FOLL_FORCE | FOLL_WRITE); | ||
1142 | return (copied == sizeof(data)) ? 0 : -EIO; | 1144 | return (copied == sizeof(data)) ? 0 : -EIO; |
1143 | } | 1145 | } |
1144 | 1146 | ||
@@ -1155,7 +1157,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, | |||
1155 | switch (request) { | 1157 | switch (request) { |
1156 | case PTRACE_PEEKTEXT: | 1158 | case PTRACE_PEEKTEXT: |
1157 | case PTRACE_PEEKDATA: | 1159 | case PTRACE_PEEKDATA: |
1158 | ret = access_process_vm(child, addr, &word, sizeof(word), 0); | 1160 | ret = access_process_vm(child, addr, &word, sizeof(word), |
1161 | FOLL_FORCE); | ||
1159 | if (ret != sizeof(word)) | 1162 | if (ret != sizeof(word)) |
1160 | ret = -EIO; | 1163 | ret = -EIO; |
1161 | else | 1164 | else |
@@ -1164,7 +1167,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, | |||
1164 | 1167 | ||
1165 | case PTRACE_POKETEXT: | 1168 | case PTRACE_POKETEXT: |
1166 | case PTRACE_POKEDATA: | 1169 | case PTRACE_POKEDATA: |
1167 | ret = access_process_vm(child, addr, &data, sizeof(data), 1); | 1170 | ret = access_process_vm(child, addr, &data, sizeof(data), |
1171 | FOLL_FORCE | FOLL_WRITE); | ||
1168 | ret = (ret != sizeof(data) ? -EIO : 0); | 1172 | ret = (ret != sizeof(data) ? -EIO : 0); |
1169 | break; | 1173 | break; |
1170 | 1174 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 94732d1ab00a..42d4027f9e26 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -7515,11 +7515,27 @@ static struct kmem_cache *task_group_cache __read_mostly; | |||
7515 | DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); | 7515 | DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); |
7516 | DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); | 7516 | DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); |
7517 | 7517 | ||
7518 | #define WAIT_TABLE_BITS 8 | ||
7519 | #define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS) | ||
7520 | static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned; | ||
7521 | |||
7522 | wait_queue_head_t *bit_waitqueue(void *word, int bit) | ||
7523 | { | ||
7524 | const int shift = BITS_PER_LONG == 32 ? 5 : 6; | ||
7525 | unsigned long val = (unsigned long)word << shift | bit; | ||
7526 | |||
7527 | return bit_wait_table + hash_long(val, WAIT_TABLE_BITS); | ||
7528 | } | ||
7529 | EXPORT_SYMBOL(bit_waitqueue); | ||
7530 | |||
7518 | void __init sched_init(void) | 7531 | void __init sched_init(void) |
7519 | { | 7532 | { |
7520 | int i, j; | 7533 | int i, j; |
7521 | unsigned long alloc_size = 0, ptr; | 7534 | unsigned long alloc_size = 0, ptr; |
7522 | 7535 | ||
7536 | for (i = 0; i < WAIT_TABLE_SIZE; i++) | ||
7537 | init_waitqueue_head(bit_wait_table + i); | ||
7538 | |||
7523 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7539 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7524 | alloc_size += 2 * nr_cpu_ids * sizeof(void **); | 7540 | alloc_size += 2 * nr_cpu_ids * sizeof(void **); |
7525 | #endif | 7541 | #endif |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2d4ad72f8f3c..c242944f5cbd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -690,7 +690,14 @@ void init_entity_runnable_average(struct sched_entity *se) | |||
690 | * will definitely be update (after enqueue). | 690 | * will definitely be update (after enqueue). |
691 | */ | 691 | */ |
692 | sa->period_contrib = 1023; | 692 | sa->period_contrib = 1023; |
693 | sa->load_avg = scale_load_down(se->load.weight); | 693 | /* |
694 | * Tasks are intialized with full load to be seen as heavy tasks until | ||
695 | * they get a chance to stabilize to their real load level. | ||
696 | * Group entities are intialized with zero load to reflect the fact that | ||
697 | * nothing has been attached to the task group yet. | ||
698 | */ | ||
699 | if (entity_is_task(se)) | ||
700 | sa->load_avg = scale_load_down(se->load.weight); | ||
694 | sa->load_sum = sa->load_avg * LOAD_AVG_MAX; | 701 | sa->load_sum = sa->load_avg * LOAD_AVG_MAX; |
695 | /* | 702 | /* |
696 | * At this point, util_avg won't be used in select_task_rq_fair anyway | 703 | * At this point, util_avg won't be used in select_task_rq_fair anyway |
@@ -5471,13 +5478,18 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd | |||
5471 | */ | 5478 | */ |
5472 | static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) | 5479 | static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) |
5473 | { | 5480 | { |
5474 | struct sched_domain *this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); | 5481 | struct sched_domain *this_sd; |
5475 | u64 avg_idle = this_rq()->avg_idle; | 5482 | u64 avg_cost, avg_idle = this_rq()->avg_idle; |
5476 | u64 avg_cost = this_sd->avg_scan_cost; | ||
5477 | u64 time, cost; | 5483 | u64 time, cost; |
5478 | s64 delta; | 5484 | s64 delta; |
5479 | int cpu, wrap; | 5485 | int cpu, wrap; |
5480 | 5486 | ||
5487 | this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); | ||
5488 | if (!this_sd) | ||
5489 | return -1; | ||
5490 | |||
5491 | avg_cost = this_sd->avg_scan_cost; | ||
5492 | |||
5481 | /* | 5493 | /* |
5482 | * Due to large variance we need a large fuzz factor; hackbench in | 5494 | * Due to large variance we need a large fuzz factor; hackbench in |
5483 | * particularly is sensitive here. | 5495 | * particularly is sensitive here. |
@@ -8827,7 +8839,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8827 | { | 8839 | { |
8828 | struct sched_entity *se; | 8840 | struct sched_entity *se; |
8829 | struct cfs_rq *cfs_rq; | 8841 | struct cfs_rq *cfs_rq; |
8830 | struct rq *rq; | ||
8831 | int i; | 8842 | int i; |
8832 | 8843 | ||
8833 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); | 8844 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); |
@@ -8842,8 +8853,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8842 | init_cfs_bandwidth(tg_cfs_bandwidth(tg)); | 8853 | init_cfs_bandwidth(tg_cfs_bandwidth(tg)); |
8843 | 8854 | ||
8844 | for_each_possible_cpu(i) { | 8855 | for_each_possible_cpu(i) { |
8845 | rq = cpu_rq(i); | ||
8846 | |||
8847 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), | 8856 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), |
8848 | GFP_KERNEL, cpu_to_node(i)); | 8857 | GFP_KERNEL, cpu_to_node(i)); |
8849 | if (!cfs_rq) | 8858 | if (!cfs_rq) |
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 4f7053579fe3..9453efe9b25a 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c | |||
@@ -480,16 +480,6 @@ void wake_up_bit(void *word, int bit) | |||
480 | } | 480 | } |
481 | EXPORT_SYMBOL(wake_up_bit); | 481 | EXPORT_SYMBOL(wake_up_bit); |
482 | 482 | ||
483 | wait_queue_head_t *bit_waitqueue(void *word, int bit) | ||
484 | { | ||
485 | const int shift = BITS_PER_LONG == 32 ? 5 : 6; | ||
486 | const struct zone *zone = page_zone(virt_to_page(word)); | ||
487 | unsigned long val = (unsigned long)word << shift | bit; | ||
488 | |||
489 | return &zone->wait_table[hash_long(val, zone->wait_table_bits)]; | ||
490 | } | ||
491 | EXPORT_SYMBOL(bit_waitqueue); | ||
492 | |||
493 | /* | 483 | /* |
494 | * Manipulate the atomic_t address to produce a better bit waitqueue table hash | 484 | * Manipulate the atomic_t address to produce a better bit waitqueue table hash |
495 | * index (we're keying off bit -1, but that would produce a horrible hash | 485 | * index (we're keying off bit -1, but that would produce a horrible hash |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 1bf81ef91375..744fa611cae0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -58,7 +58,7 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp | |||
58 | DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | 58 | DEFINE_PER_CPU(struct task_struct *, ksoftirqd); |
59 | 59 | ||
60 | const char * const softirq_to_name[NR_SOFTIRQS] = { | 60 | const char * const softirq_to_name[NR_SOFTIRQS] = { |
61 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", | 61 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL", |
62 | "TASKLET", "SCHED", "HRTIMER", "RCU" | 62 | "TASKLET", "SCHED", "HRTIMER", "RCU" |
63 | }; | 63 | }; |
64 | 64 | ||
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index c3aad685bbc0..12dd190634ab 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
@@ -542,7 +542,6 @@ static int alarm_clock_get(clockid_t which_clock, struct timespec *tp) | |||
542 | static int alarm_timer_create(struct k_itimer *new_timer) | 542 | static int alarm_timer_create(struct k_itimer *new_timer) |
543 | { | 543 | { |
544 | enum alarmtimer_type type; | 544 | enum alarmtimer_type type; |
545 | struct alarm_base *base; | ||
546 | 545 | ||
547 | if (!alarmtimer_get_rtcdev()) | 546 | if (!alarmtimer_get_rtcdev()) |
548 | return -ENOTSUPP; | 547 | return -ENOTSUPP; |
@@ -551,7 +550,6 @@ static int alarm_timer_create(struct k_itimer *new_timer) | |||
551 | return -EPERM; | 550 | return -EPERM; |
552 | 551 | ||
553 | type = clock2alarm(new_timer->it_clock); | 552 | type = clock2alarm(new_timer->it_clock); |
554 | base = &alarm_bases[type]; | ||
555 | alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer); | 553 | alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer); |
556 | return 0; | 554 | return 0; |
557 | } | 555 | } |
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 2d47980a1bc4..c611c47de884 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c | |||
@@ -878,7 +878,7 @@ static inline struct timer_base *get_timer_base(u32 tflags) | |||
878 | 878 | ||
879 | #ifdef CONFIG_NO_HZ_COMMON | 879 | #ifdef CONFIG_NO_HZ_COMMON |
880 | static inline struct timer_base * | 880 | static inline struct timer_base * |
881 | __get_target_base(struct timer_base *base, unsigned tflags) | 881 | get_target_base(struct timer_base *base, unsigned tflags) |
882 | { | 882 | { |
883 | #ifdef CONFIG_SMP | 883 | #ifdef CONFIG_SMP |
884 | if ((tflags & TIMER_PINNED) || !base->migration_enabled) | 884 | if ((tflags & TIMER_PINNED) || !base->migration_enabled) |
@@ -891,25 +891,27 @@ __get_target_base(struct timer_base *base, unsigned tflags) | |||
891 | 891 | ||
892 | static inline void forward_timer_base(struct timer_base *base) | 892 | static inline void forward_timer_base(struct timer_base *base) |
893 | { | 893 | { |
894 | unsigned long jnow = READ_ONCE(jiffies); | ||
895 | |||
894 | /* | 896 | /* |
895 | * We only forward the base when it's idle and we have a delta between | 897 | * We only forward the base when it's idle and we have a delta between |
896 | * base clock and jiffies. | 898 | * base clock and jiffies. |
897 | */ | 899 | */ |
898 | if (!base->is_idle || (long) (jiffies - base->clk) < 2) | 900 | if (!base->is_idle || (long) (jnow - base->clk) < 2) |
899 | return; | 901 | return; |
900 | 902 | ||
901 | /* | 903 | /* |
902 | * If the next expiry value is > jiffies, then we fast forward to | 904 | * If the next expiry value is > jiffies, then we fast forward to |
903 | * jiffies otherwise we forward to the next expiry value. | 905 | * jiffies otherwise we forward to the next expiry value. |
904 | */ | 906 | */ |
905 | if (time_after(base->next_expiry, jiffies)) | 907 | if (time_after(base->next_expiry, jnow)) |
906 | base->clk = jiffies; | 908 | base->clk = jnow; |
907 | else | 909 | else |
908 | base->clk = base->next_expiry; | 910 | base->clk = base->next_expiry; |
909 | } | 911 | } |
910 | #else | 912 | #else |
911 | static inline struct timer_base * | 913 | static inline struct timer_base * |
912 | __get_target_base(struct timer_base *base, unsigned tflags) | 914 | get_target_base(struct timer_base *base, unsigned tflags) |
913 | { | 915 | { |
914 | return get_timer_this_cpu_base(tflags); | 916 | return get_timer_this_cpu_base(tflags); |
915 | } | 917 | } |
@@ -917,14 +919,6 @@ __get_target_base(struct timer_base *base, unsigned tflags) | |||
917 | static inline void forward_timer_base(struct timer_base *base) { } | 919 | static inline void forward_timer_base(struct timer_base *base) { } |
918 | #endif | 920 | #endif |
919 | 921 | ||
920 | static inline struct timer_base * | ||
921 | get_target_base(struct timer_base *base, unsigned tflags) | ||
922 | { | ||
923 | struct timer_base *target = __get_target_base(base, tflags); | ||
924 | |||
925 | forward_timer_base(target); | ||
926 | return target; | ||
927 | } | ||
928 | 922 | ||
929 | /* | 923 | /* |
930 | * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means | 924 | * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means |
@@ -943,7 +937,14 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, | |||
943 | { | 937 | { |
944 | for (;;) { | 938 | for (;;) { |
945 | struct timer_base *base; | 939 | struct timer_base *base; |
946 | u32 tf = timer->flags; | 940 | u32 tf; |
941 | |||
942 | /* | ||
943 | * We need to use READ_ONCE() here, otherwise the compiler | ||
944 | * might re-read @tf between the check for TIMER_MIGRATING | ||
945 | * and spin_lock(). | ||
946 | */ | ||
947 | tf = READ_ONCE(timer->flags); | ||
947 | 948 | ||
948 | if (!(tf & TIMER_MIGRATING)) { | 949 | if (!(tf & TIMER_MIGRATING)) { |
949 | base = get_timer_base(tf); | 950 | base = get_timer_base(tf); |
@@ -964,6 +965,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |||
964 | unsigned long clk = 0, flags; | 965 | unsigned long clk = 0, flags; |
965 | int ret = 0; | 966 | int ret = 0; |
966 | 967 | ||
968 | BUG_ON(!timer->function); | ||
969 | |||
967 | /* | 970 | /* |
968 | * This is a common optimization triggered by the networking code - if | 971 | * This is a common optimization triggered by the networking code - if |
969 | * the timer is re-modified to have the same timeout or ends up in the | 972 | * the timer is re-modified to have the same timeout or ends up in the |
@@ -972,13 +975,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |||
972 | if (timer_pending(timer)) { | 975 | if (timer_pending(timer)) { |
973 | if (timer->expires == expires) | 976 | if (timer->expires == expires) |
974 | return 1; | 977 | return 1; |
978 | |||
975 | /* | 979 | /* |
976 | * Take the current timer_jiffies of base, but without holding | 980 | * We lock timer base and calculate the bucket index right |
977 | * the lock! | 981 | * here. If the timer ends up in the same bucket, then we |
982 | * just update the expiry time and avoid the whole | ||
983 | * dequeue/enqueue dance. | ||
978 | */ | 984 | */ |
979 | base = get_timer_base(timer->flags); | 985 | base = lock_timer_base(timer, &flags); |
980 | clk = base->clk; | ||
981 | 986 | ||
987 | clk = base->clk; | ||
982 | idx = calc_wheel_index(expires, clk); | 988 | idx = calc_wheel_index(expires, clk); |
983 | 989 | ||
984 | /* | 990 | /* |
@@ -988,14 +994,14 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |||
988 | */ | 994 | */ |
989 | if (idx == timer_get_idx(timer)) { | 995 | if (idx == timer_get_idx(timer)) { |
990 | timer->expires = expires; | 996 | timer->expires = expires; |
991 | return 1; | 997 | ret = 1; |
998 | goto out_unlock; | ||
992 | } | 999 | } |
1000 | } else { | ||
1001 | base = lock_timer_base(timer, &flags); | ||
993 | } | 1002 | } |
994 | 1003 | ||
995 | timer_stats_timer_set_start_info(timer); | 1004 | timer_stats_timer_set_start_info(timer); |
996 | BUG_ON(!timer->function); | ||
997 | |||
998 | base = lock_timer_base(timer, &flags); | ||
999 | 1005 | ||
1000 | ret = detach_if_pending(timer, base, false); | 1006 | ret = detach_if_pending(timer, base, false); |
1001 | if (!ret && pending_only) | 1007 | if (!ret && pending_only) |
@@ -1025,12 +1031,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |||
1025 | } | 1031 | } |
1026 | } | 1032 | } |
1027 | 1033 | ||
1034 | /* Try to forward a stale timer base clock */ | ||
1035 | forward_timer_base(base); | ||
1036 | |||
1028 | timer->expires = expires; | 1037 | timer->expires = expires; |
1029 | /* | 1038 | /* |
1030 | * If 'idx' was calculated above and the base time did not advance | 1039 | * If 'idx' was calculated above and the base time did not advance |
1031 | * between calculating 'idx' and taking the lock, only enqueue_timer() | 1040 | * between calculating 'idx' and possibly switching the base, only |
1032 | * and trigger_dyntick_cpu() is required. Otherwise we need to | 1041 | * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise |
1033 | * (re)calculate the wheel index via internal_add_timer(). | 1042 | * we need to (re)calculate the wheel index via |
1043 | * internal_add_timer(). | ||
1034 | */ | 1044 | */ |
1035 | if (idx != UINT_MAX && clk == base->clk) { | 1045 | if (idx != UINT_MAX && clk == base->clk) { |
1036 | enqueue_timer(base, timer, idx); | 1046 | enqueue_timer(base, timer, idx); |
@@ -1510,12 +1520,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) | |||
1510 | is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); | 1520 | is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); |
1511 | base->next_expiry = nextevt; | 1521 | base->next_expiry = nextevt; |
1512 | /* | 1522 | /* |
1513 | * We have a fresh next event. Check whether we can forward the base: | 1523 | * We have a fresh next event. Check whether we can forward the |
1524 | * base. We can only do that when @basej is past base->clk | ||
1525 | * otherwise we might rewind base->clk. | ||
1514 | */ | 1526 | */ |
1515 | if (time_after(nextevt, jiffies)) | 1527 | if (time_after(basej, base->clk)) { |
1516 | base->clk = jiffies; | 1528 | if (time_after(nextevt, basej)) |
1517 | else if (time_after(nextevt, base->clk)) | 1529 | base->clk = basej; |
1518 | base->clk = nextevt; | 1530 | else if (time_after(nextevt, base->clk)) |
1531 | base->clk = nextevt; | ||
1532 | } | ||
1519 | 1533 | ||
1520 | if (time_before_eq(nextevt, basej)) { | 1534 | if (time_before_eq(nextevt, basej)) { |
1521 | expires = basem; | 1535 | expires = basem; |