diff options
Diffstat (limited to 'kernel')
77 files changed, 3062 insertions, 1503 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index bc010ee272b6..f2a8b6246ce9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -18,11 +18,13 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg | |||
| 18 | CFLAGS_REMOVE_irq_work.o = -pg | 18 | CFLAGS_REMOVE_irq_work.o = -pg |
| 19 | endif | 19 | endif |
| 20 | 20 | ||
| 21 | # cond_syscall is currently not LTO compatible | ||
| 22 | CFLAGS_sys_ni.o = $(DISABLE_LTO) | ||
| 23 | |||
| 21 | obj-y += sched/ | 24 | obj-y += sched/ |
| 22 | obj-y += locking/ | 25 | obj-y += locking/ |
| 23 | obj-y += power/ | 26 | obj-y += power/ |
| 24 | obj-y += printk/ | 27 | obj-y += printk/ |
| 25 | obj-y += cpu/ | ||
| 26 | obj-y += irq/ | 28 | obj-y += irq/ |
| 27 | obj-y += rcu/ | 29 | obj-y += rcu/ |
| 28 | 30 | ||
| @@ -93,6 +95,7 @@ obj-$(CONFIG_PADATA) += padata.o | |||
| 93 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | 95 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o |
| 94 | obj-$(CONFIG_JUMP_LABEL) += jump_label.o | 96 | obj-$(CONFIG_JUMP_LABEL) += jump_label.o |
| 95 | obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o | 97 | obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o |
| 98 | obj-$(CONFIG_TORTURE_TEST) += torture.o | ||
| 96 | 99 | ||
| 97 | $(obj)/configs.o: $(obj)/config_data.h | 100 | $(obj)/configs.o: $(obj)/config_data.h |
| 98 | 101 | ||
diff --git a/kernel/audit.c b/kernel/audit.c index 34c5a2310fbf..95a20f3f52f1 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -182,7 +182,7 @@ struct audit_buffer { | |||
| 182 | 182 | ||
| 183 | struct audit_reply { | 183 | struct audit_reply { |
| 184 | __u32 portid; | 184 | __u32 portid; |
| 185 | pid_t pid; | 185 | struct net *net; |
| 186 | struct sk_buff *skb; | 186 | struct sk_buff *skb; |
| 187 | }; | 187 | }; |
| 188 | 188 | ||
| @@ -500,7 +500,7 @@ int audit_send_list(void *_dest) | |||
| 500 | { | 500 | { |
| 501 | struct audit_netlink_list *dest = _dest; | 501 | struct audit_netlink_list *dest = _dest; |
| 502 | struct sk_buff *skb; | 502 | struct sk_buff *skb; |
| 503 | struct net *net = get_net_ns_by_pid(dest->pid); | 503 | struct net *net = dest->net; |
| 504 | struct audit_net *aunet = net_generic(net, audit_net_id); | 504 | struct audit_net *aunet = net_generic(net, audit_net_id); |
| 505 | 505 | ||
| 506 | /* wait for parent to finish and send an ACK */ | 506 | /* wait for parent to finish and send an ACK */ |
| @@ -510,6 +510,7 @@ int audit_send_list(void *_dest) | |||
| 510 | while ((skb = __skb_dequeue(&dest->q)) != NULL) | 510 | while ((skb = __skb_dequeue(&dest->q)) != NULL) |
| 511 | netlink_unicast(aunet->nlsk, skb, dest->portid, 0); | 511 | netlink_unicast(aunet->nlsk, skb, dest->portid, 0); |
| 512 | 512 | ||
| 513 | put_net(net); | ||
| 513 | kfree(dest); | 514 | kfree(dest); |
| 514 | 515 | ||
| 515 | return 0; | 516 | return 0; |
| @@ -543,7 +544,7 @@ out_kfree_skb: | |||
| 543 | static int audit_send_reply_thread(void *arg) | 544 | static int audit_send_reply_thread(void *arg) |
| 544 | { | 545 | { |
| 545 | struct audit_reply *reply = (struct audit_reply *)arg; | 546 | struct audit_reply *reply = (struct audit_reply *)arg; |
| 546 | struct net *net = get_net_ns_by_pid(reply->pid); | 547 | struct net *net = reply->net; |
| 547 | struct audit_net *aunet = net_generic(net, audit_net_id); | 548 | struct audit_net *aunet = net_generic(net, audit_net_id); |
| 548 | 549 | ||
| 549 | mutex_lock(&audit_cmd_mutex); | 550 | mutex_lock(&audit_cmd_mutex); |
| @@ -552,12 +553,13 @@ static int audit_send_reply_thread(void *arg) | |||
| 552 | /* Ignore failure. It'll only happen if the sender goes away, | 553 | /* Ignore failure. It'll only happen if the sender goes away, |
| 553 | because our timeout is set to infinite. */ | 554 | because our timeout is set to infinite. */ |
| 554 | netlink_unicast(aunet->nlsk , reply->skb, reply->portid, 0); | 555 | netlink_unicast(aunet->nlsk , reply->skb, reply->portid, 0); |
| 556 | put_net(net); | ||
| 555 | kfree(reply); | 557 | kfree(reply); |
| 556 | return 0; | 558 | return 0; |
| 557 | } | 559 | } |
| 558 | /** | 560 | /** |
| 559 | * audit_send_reply - send an audit reply message via netlink | 561 | * audit_send_reply - send an audit reply message via netlink |
| 560 | * @portid: netlink port to which to send reply | 562 | * @request_skb: skb of request we are replying to (used to target the reply) |
| 561 | * @seq: sequence number | 563 | * @seq: sequence number |
| 562 | * @type: audit message type | 564 | * @type: audit message type |
| 563 | * @done: done (last) flag | 565 | * @done: done (last) flag |
| @@ -568,9 +570,11 @@ static int audit_send_reply_thread(void *arg) | |||
| 568 | * Allocates an skb, builds the netlink message, and sends it to the port id. | 570 | * Allocates an skb, builds the netlink message, and sends it to the port id. |
| 569 | * No failure notifications. | 571 | * No failure notifications. |
| 570 | */ | 572 | */ |
| 571 | static void audit_send_reply(__u32 portid, int seq, int type, int done, | 573 | static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int done, |
| 572 | int multi, const void *payload, int size) | 574 | int multi, const void *payload, int size) |
| 573 | { | 575 | { |
| 576 | u32 portid = NETLINK_CB(request_skb).portid; | ||
| 577 | struct net *net = sock_net(NETLINK_CB(request_skb).sk); | ||
| 574 | struct sk_buff *skb; | 578 | struct sk_buff *skb; |
| 575 | struct task_struct *tsk; | 579 | struct task_struct *tsk; |
| 576 | struct audit_reply *reply = kmalloc(sizeof(struct audit_reply), | 580 | struct audit_reply *reply = kmalloc(sizeof(struct audit_reply), |
| @@ -583,8 +587,8 @@ static void audit_send_reply(__u32 portid, int seq, int type, int done, | |||
| 583 | if (!skb) | 587 | if (!skb) |
| 584 | goto out; | 588 | goto out; |
| 585 | 589 | ||
| 590 | reply->net = get_net(net); | ||
| 586 | reply->portid = portid; | 591 | reply->portid = portid; |
| 587 | reply->pid = task_pid_vnr(current); | ||
| 588 | reply->skb = skb; | 592 | reply->skb = skb; |
| 589 | 593 | ||
| 590 | tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply"); | 594 | tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply"); |
| @@ -604,9 +608,19 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) | |||
| 604 | int err = 0; | 608 | int err = 0; |
| 605 | 609 | ||
| 606 | /* Only support the initial namespaces for now. */ | 610 | /* Only support the initial namespaces for now. */ |
| 611 | /* | ||
| 612 | * We return ECONNREFUSED because it tricks userspace into thinking | ||
| 613 | * that audit was not configured into the kernel. Lots of users | ||
| 614 | * configure their PAM stack (because that's what the distro does) | ||
| 615 | * to reject login if unable to send messages to audit. If we return | ||
| 616 | * ECONNREFUSED the PAM stack thinks the kernel does not have audit | ||
| 617 | * configured in and will let login proceed. If we return EPERM | ||
| 618 | * userspace will reject all logins. This should be removed when we | ||
| 619 | * support non init namespaces!! | ||
| 620 | */ | ||
| 607 | if ((current_user_ns() != &init_user_ns) || | 621 | if ((current_user_ns() != &init_user_ns) || |
| 608 | (task_active_pid_ns(current) != &init_pid_ns)) | 622 | (task_active_pid_ns(current) != &init_pid_ns)) |
| 609 | return -EPERM; | 623 | return -ECONNREFUSED; |
| 610 | 624 | ||
| 611 | switch (msg_type) { | 625 | switch (msg_type) { |
| 612 | case AUDIT_LIST: | 626 | case AUDIT_LIST: |
| @@ -673,8 +687,7 @@ static int audit_get_feature(struct sk_buff *skb) | |||
| 673 | 687 | ||
| 674 | seq = nlmsg_hdr(skb)->nlmsg_seq; | 688 | seq = nlmsg_hdr(skb)->nlmsg_seq; |
| 675 | 689 | ||
| 676 | audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, | 690 | audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &af, sizeof(af)); |
| 677 | &af, sizeof(af)); | ||
| 678 | 691 | ||
| 679 | return 0; | 692 | return 0; |
| 680 | } | 693 | } |
| @@ -794,8 +807,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 794 | s.backlog = skb_queue_len(&audit_skb_queue); | 807 | s.backlog = skb_queue_len(&audit_skb_queue); |
| 795 | s.version = AUDIT_VERSION_LATEST; | 808 | s.version = AUDIT_VERSION_LATEST; |
| 796 | s.backlog_wait_time = audit_backlog_wait_time; | 809 | s.backlog_wait_time = audit_backlog_wait_time; |
| 797 | audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, | 810 | audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); |
| 798 | &s, sizeof(s)); | ||
| 799 | break; | 811 | break; |
| 800 | } | 812 | } |
| 801 | case AUDIT_SET: { | 813 | case AUDIT_SET: { |
| @@ -905,7 +917,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 905 | seq, data, nlmsg_len(nlh)); | 917 | seq, data, nlmsg_len(nlh)); |
| 906 | break; | 918 | break; |
| 907 | case AUDIT_LIST_RULES: | 919 | case AUDIT_LIST_RULES: |
| 908 | err = audit_list_rules_send(NETLINK_CB(skb).portid, seq); | 920 | err = audit_list_rules_send(skb, seq); |
| 909 | break; | 921 | break; |
| 910 | case AUDIT_TRIM: | 922 | case AUDIT_TRIM: |
| 911 | audit_trim_trees(); | 923 | audit_trim_trees(); |
| @@ -970,8 +982,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 970 | memcpy(sig_data->ctx, ctx, len); | 982 | memcpy(sig_data->ctx, ctx, len); |
| 971 | security_release_secctx(ctx, len); | 983 | security_release_secctx(ctx, len); |
| 972 | } | 984 | } |
| 973 | audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_SIGNAL_INFO, | 985 | audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0, |
| 974 | 0, 0, sig_data, sizeof(*sig_data) + len); | 986 | sig_data, sizeof(*sig_data) + len); |
| 975 | kfree(sig_data); | 987 | kfree(sig_data); |
| 976 | break; | 988 | break; |
| 977 | case AUDIT_TTY_GET: { | 989 | case AUDIT_TTY_GET: { |
| @@ -983,8 +995,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 983 | s.log_passwd = tsk->signal->audit_tty_log_passwd; | 995 | s.log_passwd = tsk->signal->audit_tty_log_passwd; |
| 984 | spin_unlock(&tsk->sighand->siglock); | 996 | spin_unlock(&tsk->sighand->siglock); |
| 985 | 997 | ||
| 986 | audit_send_reply(NETLINK_CB(skb).portid, seq, | 998 | audit_send_reply(skb, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); |
| 987 | AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); | ||
| 988 | break; | 999 | break; |
| 989 | } | 1000 | } |
| 990 | case AUDIT_TTY_SET: { | 1001 | case AUDIT_TTY_SET: { |
diff --git a/kernel/audit.h b/kernel/audit.h index 57cc64d67718..8df132214606 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
| @@ -247,7 +247,7 @@ extern void audit_panic(const char *message); | |||
| 247 | 247 | ||
| 248 | struct audit_netlink_list { | 248 | struct audit_netlink_list { |
| 249 | __u32 portid; | 249 | __u32 portid; |
| 250 | pid_t pid; | 250 | struct net *net; |
| 251 | struct sk_buff_head q; | 251 | struct sk_buff_head q; |
| 252 | }; | 252 | }; |
| 253 | 253 | ||
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 67ccf0e7cca9..135944a7b28a 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
| @@ -916,7 +916,7 @@ static int audit_tree_handle_event(struct fsnotify_group *group, | |||
| 916 | struct fsnotify_mark *inode_mark, | 916 | struct fsnotify_mark *inode_mark, |
| 917 | struct fsnotify_mark *vfsmount_mark, | 917 | struct fsnotify_mark *vfsmount_mark, |
| 918 | u32 mask, void *data, int data_type, | 918 | u32 mask, void *data, int data_type, |
| 919 | const unsigned char *file_name) | 919 | const unsigned char *file_name, u32 cookie) |
| 920 | { | 920 | { |
| 921 | return 0; | 921 | return 0; |
| 922 | } | 922 | } |
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 2596fac5dcb4..70b4554d2fbe 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c | |||
| @@ -471,7 +471,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group, | |||
| 471 | struct fsnotify_mark *inode_mark, | 471 | struct fsnotify_mark *inode_mark, |
| 472 | struct fsnotify_mark *vfsmount_mark, | 472 | struct fsnotify_mark *vfsmount_mark, |
| 473 | u32 mask, void *data, int data_type, | 473 | u32 mask, void *data, int data_type, |
| 474 | const unsigned char *dname) | 474 | const unsigned char *dname, u32 cookie) |
| 475 | { | 475 | { |
| 476 | struct inode *inode; | 476 | struct inode *inode; |
| 477 | struct audit_parent *parent; | 477 | struct audit_parent *parent; |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 14a78cca384e..92062fd6cc8c 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
| @@ -29,6 +29,8 @@ | |||
| 29 | #include <linux/sched.h> | 29 | #include <linux/sched.h> |
| 30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
| 31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
| 32 | #include <net/net_namespace.h> | ||
| 33 | #include <net/sock.h> | ||
| 32 | #include "audit.h" | 34 | #include "audit.h" |
| 33 | 35 | ||
| 34 | /* | 36 | /* |
| @@ -1065,11 +1067,13 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data, | |||
| 1065 | 1067 | ||
| 1066 | /** | 1068 | /** |
| 1067 | * audit_list_rules_send - list the audit rules | 1069 | * audit_list_rules_send - list the audit rules |
| 1068 | * @portid: target portid for netlink audit messages | 1070 | * @request_skb: skb of request we are replying to (used to target the reply) |
| 1069 | * @seq: netlink audit message sequence (serial) number | 1071 | * @seq: netlink audit message sequence (serial) number |
| 1070 | */ | 1072 | */ |
| 1071 | int audit_list_rules_send(__u32 portid, int seq) | 1073 | int audit_list_rules_send(struct sk_buff *request_skb, int seq) |
| 1072 | { | 1074 | { |
| 1075 | u32 portid = NETLINK_CB(request_skb).portid; | ||
| 1076 | struct net *net = sock_net(NETLINK_CB(request_skb).sk); | ||
| 1073 | struct task_struct *tsk; | 1077 | struct task_struct *tsk; |
| 1074 | struct audit_netlink_list *dest; | 1078 | struct audit_netlink_list *dest; |
| 1075 | int err = 0; | 1079 | int err = 0; |
| @@ -1083,8 +1087,8 @@ int audit_list_rules_send(__u32 portid, int seq) | |||
| 1083 | dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL); | 1087 | dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL); |
| 1084 | if (!dest) | 1088 | if (!dest) |
| 1085 | return -ENOMEM; | 1089 | return -ENOMEM; |
| 1090 | dest->net = get_net(net); | ||
| 1086 | dest->portid = portid; | 1091 | dest->portid = portid; |
| 1087 | dest->pid = task_pid_vnr(current); | ||
| 1088 | skb_queue_head_init(&dest->q); | 1092 | skb_queue_head_init(&dest->q); |
| 1089 | 1093 | ||
| 1090 | mutex_lock(&audit_filter_mutex); | 1094 | mutex_lock(&audit_filter_mutex); |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e2f46ba37f72..0c753ddd223b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -886,7 +886,9 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
| 886 | * per-subsystem and moved to css->id so that lookups are | 886 | * per-subsystem and moved to css->id so that lookups are |
| 887 | * successful until the target css is released. | 887 | * successful until the target css is released. |
| 888 | */ | 888 | */ |
| 889 | mutex_lock(&cgroup_mutex); | ||
| 889 | idr_remove(&cgrp->root->cgroup_idr, cgrp->id); | 890 | idr_remove(&cgrp->root->cgroup_idr, cgrp->id); |
| 891 | mutex_unlock(&cgroup_mutex); | ||
| 890 | cgrp->id = -1; | 892 | cgrp->id = -1; |
| 891 | 893 | ||
| 892 | call_rcu(&cgrp->rcu_head, cgroup_free_rcu); | 894 | call_rcu(&cgrp->rcu_head, cgroup_free_rcu); |
| @@ -1566,10 +1568,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1566 | mutex_lock(&cgroup_mutex); | 1568 | mutex_lock(&cgroup_mutex); |
| 1567 | mutex_lock(&cgroup_root_mutex); | 1569 | mutex_lock(&cgroup_root_mutex); |
| 1568 | 1570 | ||
| 1569 | root_cgrp->id = idr_alloc(&root->cgroup_idr, root_cgrp, | 1571 | ret = idr_alloc(&root->cgroup_idr, root_cgrp, 0, 1, GFP_KERNEL); |
| 1570 | 0, 1, GFP_KERNEL); | 1572 | if (ret < 0) |
| 1571 | if (root_cgrp->id < 0) | ||
| 1572 | goto unlock_drop; | 1573 | goto unlock_drop; |
| 1574 | root_cgrp->id = ret; | ||
| 1573 | 1575 | ||
| 1574 | /* Check for name clashes with existing mounts */ | 1576 | /* Check for name clashes with existing mounts */ |
| 1575 | ret = -EBUSY; | 1577 | ret = -EBUSY; |
| @@ -2763,10 +2765,7 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add) | |||
| 2763 | */ | 2765 | */ |
| 2764 | update_before = cgroup_serial_nr_next; | 2766 | update_before = cgroup_serial_nr_next; |
| 2765 | 2767 | ||
| 2766 | mutex_unlock(&cgroup_mutex); | ||
| 2767 | |||
| 2768 | /* add/rm files for all cgroups created before */ | 2768 | /* add/rm files for all cgroups created before */ |
| 2769 | rcu_read_lock(); | ||
| 2770 | css_for_each_descendant_pre(css, cgroup_css(root, ss)) { | 2769 | css_for_each_descendant_pre(css, cgroup_css(root, ss)) { |
| 2771 | struct cgroup *cgrp = css->cgroup; | 2770 | struct cgroup *cgrp = css->cgroup; |
| 2772 | 2771 | ||
| @@ -2775,23 +2774,19 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add) | |||
| 2775 | 2774 | ||
| 2776 | inode = cgrp->dentry->d_inode; | 2775 | inode = cgrp->dentry->d_inode; |
| 2777 | dget(cgrp->dentry); | 2776 | dget(cgrp->dentry); |
| 2778 | rcu_read_unlock(); | ||
| 2779 | |||
| 2780 | dput(prev); | 2777 | dput(prev); |
| 2781 | prev = cgrp->dentry; | 2778 | prev = cgrp->dentry; |
| 2782 | 2779 | ||
| 2780 | mutex_unlock(&cgroup_mutex); | ||
| 2783 | mutex_lock(&inode->i_mutex); | 2781 | mutex_lock(&inode->i_mutex); |
| 2784 | mutex_lock(&cgroup_mutex); | 2782 | mutex_lock(&cgroup_mutex); |
| 2785 | if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp)) | 2783 | if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp)) |
| 2786 | ret = cgroup_addrm_files(cgrp, cfts, is_add); | 2784 | ret = cgroup_addrm_files(cgrp, cfts, is_add); |
| 2787 | mutex_unlock(&cgroup_mutex); | ||
| 2788 | mutex_unlock(&inode->i_mutex); | 2785 | mutex_unlock(&inode->i_mutex); |
| 2789 | |||
| 2790 | rcu_read_lock(); | ||
| 2791 | if (ret) | 2786 | if (ret) |
| 2792 | break; | 2787 | break; |
| 2793 | } | 2788 | } |
| 2794 | rcu_read_unlock(); | 2789 | mutex_unlock(&cgroup_mutex); |
| 2795 | dput(prev); | 2790 | dput(prev); |
| 2796 | deactivate_super(sb); | 2791 | deactivate_super(sb); |
| 2797 | return ret; | 2792 | return ret; |
| @@ -2910,9 +2905,14 @@ static void cgroup_enable_task_cg_lists(void) | |||
| 2910 | * We should check if the process is exiting, otherwise | 2905 | * We should check if the process is exiting, otherwise |
| 2911 | * it will race with cgroup_exit() in that the list | 2906 | * it will race with cgroup_exit() in that the list |
| 2912 | * entry won't be deleted though the process has exited. | 2907 | * entry won't be deleted though the process has exited. |
| 2908 | * Do it while holding siglock so that we don't end up | ||
| 2909 | * racing against cgroup_exit(). | ||
| 2913 | */ | 2910 | */ |
| 2911 | spin_lock_irq(&p->sighand->siglock); | ||
| 2914 | if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list)) | 2912 | if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list)) |
| 2915 | list_add(&p->cg_list, &task_css_set(p)->tasks); | 2913 | list_add(&p->cg_list, &task_css_set(p)->tasks); |
| 2914 | spin_unlock_irq(&p->sighand->siglock); | ||
| 2915 | |||
| 2916 | task_unlock(p); | 2916 | task_unlock(p); |
| 2917 | } while_each_thread(g, p); | 2917 | } while_each_thread(g, p); |
| 2918 | read_unlock(&tasklist_lock); | 2918 | read_unlock(&tasklist_lock); |
| @@ -4112,17 +4112,17 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) | |||
| 4112 | 4112 | ||
| 4113 | err = percpu_ref_init(&css->refcnt, css_release); | 4113 | err = percpu_ref_init(&css->refcnt, css_release); |
| 4114 | if (err) | 4114 | if (err) |
| 4115 | goto err_free; | 4115 | goto err_free_css; |
| 4116 | 4116 | ||
| 4117 | init_css(css, ss, cgrp); | 4117 | init_css(css, ss, cgrp); |
| 4118 | 4118 | ||
| 4119 | err = cgroup_populate_dir(cgrp, 1 << ss->subsys_id); | 4119 | err = cgroup_populate_dir(cgrp, 1 << ss->subsys_id); |
| 4120 | if (err) | 4120 | if (err) |
| 4121 | goto err_free; | 4121 | goto err_free_percpu_ref; |
| 4122 | 4122 | ||
| 4123 | err = online_css(css); | 4123 | err = online_css(css); |
| 4124 | if (err) | 4124 | if (err) |
| 4125 | goto err_free; | 4125 | goto err_clear_dir; |
| 4126 | 4126 | ||
| 4127 | dget(cgrp->dentry); | 4127 | dget(cgrp->dentry); |
| 4128 | css_get(css->parent); | 4128 | css_get(css->parent); |
| @@ -4138,8 +4138,11 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) | |||
| 4138 | 4138 | ||
| 4139 | return 0; | 4139 | return 0; |
| 4140 | 4140 | ||
| 4141 | err_free: | 4141 | err_clear_dir: |
| 4142 | cgroup_clear_dir(css->cgroup, 1 << css->ss->subsys_id); | ||
| 4143 | err_free_percpu_ref: | ||
| 4142 | percpu_ref_cancel_init(&css->refcnt); | 4144 | percpu_ref_cancel_init(&css->refcnt); |
| 4145 | err_free_css: | ||
| 4143 | ss->css_free(css); | 4146 | ss->css_free(css); |
| 4144 | return err; | 4147 | return err; |
| 4145 | } | 4148 | } |
| @@ -4158,7 +4161,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4158 | struct cgroup *cgrp; | 4161 | struct cgroup *cgrp; |
| 4159 | struct cgroup_name *name; | 4162 | struct cgroup_name *name; |
| 4160 | struct cgroupfs_root *root = parent->root; | 4163 | struct cgroupfs_root *root = parent->root; |
| 4161 | int ssid, err = 0; | 4164 | int ssid, err; |
| 4162 | struct cgroup_subsys *ss; | 4165 | struct cgroup_subsys *ss; |
| 4163 | struct super_block *sb = root->sb; | 4166 | struct super_block *sb = root->sb; |
| 4164 | 4167 | ||
| @@ -4168,19 +4171,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4168 | return -ENOMEM; | 4171 | return -ENOMEM; |
| 4169 | 4172 | ||
| 4170 | name = cgroup_alloc_name(dentry); | 4173 | name = cgroup_alloc_name(dentry); |
| 4171 | if (!name) | 4174 | if (!name) { |
| 4175 | err = -ENOMEM; | ||
| 4172 | goto err_free_cgrp; | 4176 | goto err_free_cgrp; |
| 4177 | } | ||
| 4173 | rcu_assign_pointer(cgrp->name, name); | 4178 | rcu_assign_pointer(cgrp->name, name); |
| 4174 | 4179 | ||
| 4175 | /* | 4180 | /* |
| 4176 | * Temporarily set the pointer to NULL, so idr_find() won't return | ||
| 4177 | * a half-baked cgroup. | ||
| 4178 | */ | ||
| 4179 | cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL); | ||
| 4180 | if (cgrp->id < 0) | ||
| 4181 | goto err_free_name; | ||
| 4182 | |||
| 4183 | /* | ||
| 4184 | * Only live parents can have children. Note that the liveliness | 4181 | * Only live parents can have children. Note that the liveliness |
| 4185 | * check isn't strictly necessary because cgroup_mkdir() and | 4182 | * check isn't strictly necessary because cgroup_mkdir() and |
| 4186 | * cgroup_rmdir() are fully synchronized by i_mutex; however, do it | 4183 | * cgroup_rmdir() are fully synchronized by i_mutex; however, do it |
| @@ -4189,7 +4186,17 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4189 | */ | 4186 | */ |
| 4190 | if (!cgroup_lock_live_group(parent)) { | 4187 | if (!cgroup_lock_live_group(parent)) { |
| 4191 | err = -ENODEV; | 4188 | err = -ENODEV; |
| 4192 | goto err_free_id; | 4189 | goto err_free_name; |
| 4190 | } | ||
| 4191 | |||
| 4192 | /* | ||
| 4193 | * Temporarily set the pointer to NULL, so idr_find() won't return | ||
| 4194 | * a half-baked cgroup. | ||
| 4195 | */ | ||
| 4196 | cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL); | ||
| 4197 | if (cgrp->id < 0) { | ||
| 4198 | err = -ENOMEM; | ||
| 4199 | goto err_unlock; | ||
| 4193 | } | 4200 | } |
| 4194 | 4201 | ||
| 4195 | /* Grab a reference on the superblock so the hierarchy doesn't | 4202 | /* Grab a reference on the superblock so the hierarchy doesn't |
| @@ -4221,7 +4228,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4221 | */ | 4228 | */ |
| 4222 | err = cgroup_create_file(dentry, S_IFDIR | mode, sb); | 4229 | err = cgroup_create_file(dentry, S_IFDIR | mode, sb); |
| 4223 | if (err < 0) | 4230 | if (err < 0) |
| 4224 | goto err_unlock; | 4231 | goto err_free_id; |
| 4225 | lockdep_assert_held(&dentry->d_inode->i_mutex); | 4232 | lockdep_assert_held(&dentry->d_inode->i_mutex); |
| 4226 | 4233 | ||
| 4227 | cgrp->serial_nr = cgroup_serial_nr_next++; | 4234 | cgrp->serial_nr = cgroup_serial_nr_next++; |
| @@ -4257,12 +4264,12 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4257 | 4264 | ||
| 4258 | return 0; | 4265 | return 0; |
| 4259 | 4266 | ||
| 4260 | err_unlock: | ||
| 4261 | mutex_unlock(&cgroup_mutex); | ||
| 4262 | /* Release the reference count that we took on the superblock */ | ||
| 4263 | deactivate_super(sb); | ||
| 4264 | err_free_id: | 4267 | err_free_id: |
| 4265 | idr_remove(&root->cgroup_idr, cgrp->id); | 4268 | idr_remove(&root->cgroup_idr, cgrp->id); |
| 4269 | /* Release the reference count that we took on the superblock */ | ||
| 4270 | deactivate_super(sb); | ||
| 4271 | err_unlock: | ||
| 4272 | mutex_unlock(&cgroup_mutex); | ||
| 4266 | err_free_name: | 4273 | err_free_name: |
| 4267 | kfree(rcu_dereference_raw(cgrp->name)); | 4274 | kfree(rcu_dereference_raw(cgrp->name)); |
| 4268 | err_free_cgrp: | 4275 | err_free_cgrp: |
diff --git a/kernel/compat.c b/kernel/compat.c index 0a09e481b70b..488ff8c4cf48 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
| @@ -110,8 +110,8 @@ static int compat_put_timex(struct compat_timex __user *utp, struct timex *txc) | |||
| 110 | return 0; | 110 | return 0; |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, | 113 | COMPAT_SYSCALL_DEFINE2(gettimeofday, struct compat_timeval __user *, tv, |
| 114 | struct timezone __user *tz) | 114 | struct timezone __user *, tz) |
| 115 | { | 115 | { |
| 116 | if (tv) { | 116 | if (tv) { |
| 117 | struct timeval ktv; | 117 | struct timeval ktv; |
| @@ -127,8 +127,8 @@ asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, | |||
| 127 | return 0; | 127 | return 0; |
| 128 | } | 128 | } |
| 129 | 129 | ||
| 130 | asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, | 130 | COMPAT_SYSCALL_DEFINE2(settimeofday, struct compat_timeval __user *, tv, |
| 131 | struct timezone __user *tz) | 131 | struct timezone __user *, tz) |
| 132 | { | 132 | { |
| 133 | struct timespec kts; | 133 | struct timespec kts; |
| 134 | struct timezone ktz; | 134 | struct timezone ktz; |
| @@ -236,8 +236,8 @@ static long compat_nanosleep_restart(struct restart_block *restart) | |||
| 236 | return ret; | 236 | return ret; |
| 237 | } | 237 | } |
| 238 | 238 | ||
| 239 | asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp, | 239 | COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp, |
| 240 | struct compat_timespec __user *rmtp) | 240 | struct compat_timespec __user *, rmtp) |
| 241 | { | 241 | { |
| 242 | struct timespec tu, rmt; | 242 | struct timespec tu, rmt; |
| 243 | mm_segment_t oldfs; | 243 | mm_segment_t oldfs; |
| @@ -328,7 +328,7 @@ static compat_clock_t clock_t_to_compat_clock_t(clock_t x) | |||
| 328 | return compat_jiffies_to_clock_t(clock_t_to_jiffies(x)); | 328 | return compat_jiffies_to_clock_t(clock_t_to_jiffies(x)); |
| 329 | } | 329 | } |
| 330 | 330 | ||
| 331 | asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) | 331 | COMPAT_SYSCALL_DEFINE1(times, struct compat_tms __user *, tbuf) |
| 332 | { | 332 | { |
| 333 | if (tbuf) { | 333 | if (tbuf) { |
| 334 | struct tms tms; | 334 | struct tms tms; |
| @@ -354,7 +354,7 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) | |||
| 354 | * types that can be passed to put_user()/get_user(). | 354 | * types that can be passed to put_user()/get_user(). |
| 355 | */ | 355 | */ |
| 356 | 356 | ||
| 357 | asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set) | 357 | COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set) |
| 358 | { | 358 | { |
| 359 | old_sigset_t s; | 359 | old_sigset_t s; |
| 360 | long ret; | 360 | long ret; |
| @@ -424,8 +424,8 @@ COMPAT_SYSCALL_DEFINE3(sigprocmask, int, how, | |||
| 424 | 424 | ||
| 425 | #endif | 425 | #endif |
| 426 | 426 | ||
| 427 | asmlinkage long compat_sys_setrlimit(unsigned int resource, | 427 | COMPAT_SYSCALL_DEFINE2(setrlimit, unsigned int, resource, |
| 428 | struct compat_rlimit __user *rlim) | 428 | struct compat_rlimit __user *, rlim) |
| 429 | { | 429 | { |
| 430 | struct rlimit r; | 430 | struct rlimit r; |
| 431 | 431 | ||
| @@ -443,8 +443,8 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource, | |||
| 443 | 443 | ||
| 444 | #ifdef COMPAT_RLIM_OLD_INFINITY | 444 | #ifdef COMPAT_RLIM_OLD_INFINITY |
| 445 | 445 | ||
| 446 | asmlinkage long compat_sys_old_getrlimit(unsigned int resource, | 446 | COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, |
| 447 | struct compat_rlimit __user *rlim) | 447 | struct compat_rlimit __user *, rlim) |
| 448 | { | 448 | { |
| 449 | struct rlimit r; | 449 | struct rlimit r; |
| 450 | int ret; | 450 | int ret; |
| @@ -470,8 +470,8 @@ asmlinkage long compat_sys_old_getrlimit(unsigned int resource, | |||
| 470 | 470 | ||
| 471 | #endif | 471 | #endif |
| 472 | 472 | ||
| 473 | asmlinkage long compat_sys_getrlimit(unsigned int resource, | 473 | COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource, |
| 474 | struct compat_rlimit __user *rlim) | 474 | struct compat_rlimit __user *, rlim) |
| 475 | { | 475 | { |
| 476 | struct rlimit r; | 476 | struct rlimit r; |
| 477 | int ret; | 477 | int ret; |
| @@ -596,9 +596,9 @@ static int compat_get_user_cpu_mask(compat_ulong_t __user *user_mask_ptr, | |||
| 596 | return compat_get_bitmap(k, user_mask_ptr, len * 8); | 596 | return compat_get_bitmap(k, user_mask_ptr, len * 8); |
| 597 | } | 597 | } |
| 598 | 598 | ||
| 599 | asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, | 599 | COMPAT_SYSCALL_DEFINE3(sched_setaffinity, compat_pid_t, pid, |
| 600 | unsigned int len, | 600 | unsigned int, len, |
| 601 | compat_ulong_t __user *user_mask_ptr) | 601 | compat_ulong_t __user *, user_mask_ptr) |
| 602 | { | 602 | { |
| 603 | cpumask_var_t new_mask; | 603 | cpumask_var_t new_mask; |
| 604 | int retval; | 604 | int retval; |
| @@ -616,8 +616,8 @@ out: | |||
| 616 | return retval; | 616 | return retval; |
| 617 | } | 617 | } |
| 618 | 618 | ||
| 619 | asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len, | 619 | COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t, pid, unsigned int, len, |
| 620 | compat_ulong_t __user *user_mask_ptr) | 620 | compat_ulong_t __user *, user_mask_ptr) |
| 621 | { | 621 | { |
| 622 | int ret; | 622 | int ret; |
| 623 | cpumask_var_t mask; | 623 | cpumask_var_t mask; |
| @@ -662,9 +662,9 @@ int put_compat_itimerspec(struct compat_itimerspec __user *dst, | |||
| 662 | return 0; | 662 | return 0; |
| 663 | } | 663 | } |
| 664 | 664 | ||
| 665 | long compat_sys_timer_create(clockid_t which_clock, | 665 | COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock, |
| 666 | struct compat_sigevent __user *timer_event_spec, | 666 | struct compat_sigevent __user *, timer_event_spec, |
| 667 | timer_t __user *created_timer_id) | 667 | timer_t __user *, created_timer_id) |
| 668 | { | 668 | { |
| 669 | struct sigevent __user *event = NULL; | 669 | struct sigevent __user *event = NULL; |
| 670 | 670 | ||
| @@ -680,9 +680,9 @@ long compat_sys_timer_create(clockid_t which_clock, | |||
| 680 | return sys_timer_create(which_clock, event, created_timer_id); | 680 | return sys_timer_create(which_clock, event, created_timer_id); |
| 681 | } | 681 | } |
| 682 | 682 | ||
| 683 | long compat_sys_timer_settime(timer_t timer_id, int flags, | 683 | COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, |
| 684 | struct compat_itimerspec __user *new, | 684 | struct compat_itimerspec __user *, new, |
| 685 | struct compat_itimerspec __user *old) | 685 | struct compat_itimerspec __user *, old) |
| 686 | { | 686 | { |
| 687 | long err; | 687 | long err; |
| 688 | mm_segment_t oldfs; | 688 | mm_segment_t oldfs; |
| @@ -703,8 +703,8 @@ long compat_sys_timer_settime(timer_t timer_id, int flags, | |||
| 703 | return err; | 703 | return err; |
| 704 | } | 704 | } |
| 705 | 705 | ||
| 706 | long compat_sys_timer_gettime(timer_t timer_id, | 706 | COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, |
| 707 | struct compat_itimerspec __user *setting) | 707 | struct compat_itimerspec __user *, setting) |
| 708 | { | 708 | { |
| 709 | long err; | 709 | long err; |
| 710 | mm_segment_t oldfs; | 710 | mm_segment_t oldfs; |
| @@ -720,8 +720,8 @@ long compat_sys_timer_gettime(timer_t timer_id, | |||
| 720 | return err; | 720 | return err; |
| 721 | } | 721 | } |
| 722 | 722 | ||
| 723 | long compat_sys_clock_settime(clockid_t which_clock, | 723 | COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock, |
| 724 | struct compat_timespec __user *tp) | 724 | struct compat_timespec __user *, tp) |
| 725 | { | 725 | { |
| 726 | long err; | 726 | long err; |
| 727 | mm_segment_t oldfs; | 727 | mm_segment_t oldfs; |
| @@ -737,8 +737,8 @@ long compat_sys_clock_settime(clockid_t which_clock, | |||
| 737 | return err; | 737 | return err; |
| 738 | } | 738 | } |
| 739 | 739 | ||
| 740 | long compat_sys_clock_gettime(clockid_t which_clock, | 740 | COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, |
| 741 | struct compat_timespec __user *tp) | 741 | struct compat_timespec __user *, tp) |
| 742 | { | 742 | { |
| 743 | long err; | 743 | long err; |
| 744 | mm_segment_t oldfs; | 744 | mm_segment_t oldfs; |
| @@ -754,8 +754,8 @@ long compat_sys_clock_gettime(clockid_t which_clock, | |||
| 754 | return err; | 754 | return err; |
| 755 | } | 755 | } |
| 756 | 756 | ||
| 757 | long compat_sys_clock_adjtime(clockid_t which_clock, | 757 | COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, |
| 758 | struct compat_timex __user *utp) | 758 | struct compat_timex __user *, utp) |
| 759 | { | 759 | { |
| 760 | struct timex txc; | 760 | struct timex txc; |
| 761 | mm_segment_t oldfs; | 761 | mm_segment_t oldfs; |
| @@ -777,8 +777,8 @@ long compat_sys_clock_adjtime(clockid_t which_clock, | |||
| 777 | return ret; | 777 | return ret; |
| 778 | } | 778 | } |
| 779 | 779 | ||
| 780 | long compat_sys_clock_getres(clockid_t which_clock, | 780 | COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, |
| 781 | struct compat_timespec __user *tp) | 781 | struct compat_timespec __user *, tp) |
| 782 | { | 782 | { |
| 783 | long err; | 783 | long err; |
| 784 | mm_segment_t oldfs; | 784 | mm_segment_t oldfs; |
| @@ -818,9 +818,9 @@ static long compat_clock_nanosleep_restart(struct restart_block *restart) | |||
| 818 | return err; | 818 | return err; |
| 819 | } | 819 | } |
| 820 | 820 | ||
| 821 | long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, | 821 | COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, |
| 822 | struct compat_timespec __user *rqtp, | 822 | struct compat_timespec __user *, rqtp, |
| 823 | struct compat_timespec __user *rmtp) | 823 | struct compat_timespec __user *, rmtp) |
| 824 | { | 824 | { |
| 825 | long err; | 825 | long err; |
| 826 | mm_segment_t oldfs; | 826 | mm_segment_t oldfs; |
| @@ -1010,7 +1010,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese, | |||
| 1010 | 1010 | ||
| 1011 | /* compat_time_t is a 32 bit "long" and needs to get converted. */ | 1011 | /* compat_time_t is a 32 bit "long" and needs to get converted. */ |
| 1012 | 1012 | ||
| 1013 | asmlinkage long compat_sys_time(compat_time_t __user * tloc) | 1013 | COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc) |
| 1014 | { | 1014 | { |
| 1015 | compat_time_t i; | 1015 | compat_time_t i; |
| 1016 | struct timeval tv; | 1016 | struct timeval tv; |
| @@ -1026,7 +1026,7 @@ asmlinkage long compat_sys_time(compat_time_t __user * tloc) | |||
| 1026 | return i; | 1026 | return i; |
| 1027 | } | 1027 | } |
| 1028 | 1028 | ||
| 1029 | asmlinkage long compat_sys_stime(compat_time_t __user *tptr) | 1029 | COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr) |
| 1030 | { | 1030 | { |
| 1031 | struct timespec tv; | 1031 | struct timespec tv; |
| 1032 | int err; | 1032 | int err; |
| @@ -1046,7 +1046,7 @@ asmlinkage long compat_sys_stime(compat_time_t __user *tptr) | |||
| 1046 | 1046 | ||
| 1047 | #endif /* __ARCH_WANT_COMPAT_SYS_TIME */ | 1047 | #endif /* __ARCH_WANT_COMPAT_SYS_TIME */ |
| 1048 | 1048 | ||
| 1049 | asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) | 1049 | COMPAT_SYSCALL_DEFINE1(adjtimex, struct compat_timex __user *, utp) |
| 1050 | { | 1050 | { |
| 1051 | struct timex txc; | 1051 | struct timex txc; |
| 1052 | int err, ret; | 1052 | int err, ret; |
| @@ -1065,11 +1065,11 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) | |||
| 1065 | } | 1065 | } |
| 1066 | 1066 | ||
| 1067 | #ifdef CONFIG_NUMA | 1067 | #ifdef CONFIG_NUMA |
| 1068 | asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages, | 1068 | COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages, |
| 1069 | compat_uptr_t __user *pages32, | 1069 | compat_uptr_t __user *, pages32, |
| 1070 | const int __user *nodes, | 1070 | const int __user *, nodes, |
| 1071 | int __user *status, | 1071 | int __user *, status, |
| 1072 | int flags) | 1072 | int, flags) |
| 1073 | { | 1073 | { |
| 1074 | const void __user * __user *pages; | 1074 | const void __user * __user *pages; |
| 1075 | int i; | 1075 | int i; |
| @@ -1085,10 +1085,10 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages, | |||
| 1085 | return sys_move_pages(pid, nr_pages, pages, nodes, status, flags); | 1085 | return sys_move_pages(pid, nr_pages, pages, nodes, status, flags); |
| 1086 | } | 1086 | } |
| 1087 | 1087 | ||
| 1088 | asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, | 1088 | COMPAT_SYSCALL_DEFINE4(migrate_pages, compat_pid_t, pid, |
| 1089 | compat_ulong_t maxnode, | 1089 | compat_ulong_t, maxnode, |
| 1090 | const compat_ulong_t __user *old_nodes, | 1090 | const compat_ulong_t __user *, old_nodes, |
| 1091 | const compat_ulong_t __user *new_nodes) | 1091 | const compat_ulong_t __user *, new_nodes) |
| 1092 | { | 1092 | { |
| 1093 | unsigned long __user *old = NULL; | 1093 | unsigned long __user *old = NULL; |
| 1094 | unsigned long __user *new = NULL; | 1094 | unsigned long __user *new = NULL; |
diff --git a/kernel/cpu/Makefile b/kernel/cpu/Makefile deleted file mode 100644 index 59ab052ef7a0..000000000000 --- a/kernel/cpu/Makefile +++ /dev/null | |||
| @@ -1 +0,0 @@ | |||
| 1 | obj-y = idle.o | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4410ac6a55f1..e6b1b66afe52 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -974,12 +974,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
| 974 | * Temporarilly set tasks mems_allowed to target nodes of migration, | 974 | * Temporarilly set tasks mems_allowed to target nodes of migration, |
| 975 | * so that the migration code can allocate pages on these nodes. | 975 | * so that the migration code can allocate pages on these nodes. |
| 976 | * | 976 | * |
| 977 | * Call holding cpuset_mutex, so current's cpuset won't change | ||
| 978 | * during this call, as manage_mutex holds off any cpuset_attach() | ||
| 979 | * calls. Therefore we don't need to take task_lock around the | ||
| 980 | * call to guarantee_online_mems(), as we know no one is changing | ||
| 981 | * our task's cpuset. | ||
| 982 | * | ||
| 983 | * While the mm_struct we are migrating is typically from some | 977 | * While the mm_struct we are migrating is typically from some |
| 984 | * other task, the task_struct mems_allowed that we are hacking | 978 | * other task, the task_struct mems_allowed that we are hacking |
| 985 | * is for our current task, which must allocate new pages for that | 979 | * is for our current task, which must allocate new pages for that |
| @@ -996,8 +990,10 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | |||
| 996 | 990 | ||
| 997 | do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); | 991 | do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); |
| 998 | 992 | ||
| 993 | rcu_read_lock(); | ||
| 999 | mems_cs = effective_nodemask_cpuset(task_cs(tsk)); | 994 | mems_cs = effective_nodemask_cpuset(task_cs(tsk)); |
| 1000 | guarantee_online_mems(mems_cs, &tsk->mems_allowed); | 995 | guarantee_online_mems(mems_cs, &tsk->mems_allowed); |
| 996 | rcu_read_unlock(); | ||
| 1001 | } | 997 | } |
| 1002 | 998 | ||
| 1003 | /* | 999 | /* |
| @@ -2486,9 +2482,9 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) | |||
| 2486 | 2482 | ||
| 2487 | task_lock(current); | 2483 | task_lock(current); |
| 2488 | cs = nearest_hardwall_ancestor(task_cs(current)); | 2484 | cs = nearest_hardwall_ancestor(task_cs(current)); |
| 2485 | allowed = node_isset(node, cs->mems_allowed); | ||
| 2489 | task_unlock(current); | 2486 | task_unlock(current); |
| 2490 | 2487 | ||
| 2491 | allowed = node_isset(node, cs->mems_allowed); | ||
| 2492 | mutex_unlock(&callback_mutex); | 2488 | mutex_unlock(&callback_mutex); |
| 2493 | return allowed; | 2489 | return allowed; |
| 2494 | } | 2490 | } |
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 334b3980ffc1..99982a70ddad 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
| @@ -1035,7 +1035,7 @@ int dbg_io_get_char(void) | |||
| 1035 | * otherwise as a quick means to stop program execution and "break" into | 1035 | * otherwise as a quick means to stop program execution and "break" into |
| 1036 | * the debugger. | 1036 | * the debugger. |
| 1037 | */ | 1037 | */ |
| 1038 | void kgdb_breakpoint(void) | 1038 | noinline void kgdb_breakpoint(void) |
| 1039 | { | 1039 | { |
| 1040 | atomic_inc(&kgdb_setting_breakpoint); | 1040 | atomic_inc(&kgdb_setting_breakpoint); |
| 1041 | wmb(); /* Sync point before breakpoint */ | 1041 | wmb(); /* Sync point before breakpoint */ |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 56003c6edfd3..661951ab8ae7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -231,11 +231,29 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, | |||
| 231 | #define NR_ACCUMULATED_SAMPLES 128 | 231 | #define NR_ACCUMULATED_SAMPLES 128 |
| 232 | static DEFINE_PER_CPU(u64, running_sample_length); | 232 | static DEFINE_PER_CPU(u64, running_sample_length); |
| 233 | 233 | ||
| 234 | void perf_sample_event_took(u64 sample_len_ns) | 234 | static void perf_duration_warn(struct irq_work *w) |
| 235 | { | 235 | { |
| 236 | u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); | ||
| 236 | u64 avg_local_sample_len; | 237 | u64 avg_local_sample_len; |
| 237 | u64 local_samples_len; | 238 | u64 local_samples_len; |
| 239 | |||
| 240 | local_samples_len = __get_cpu_var(running_sample_length); | ||
| 241 | avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; | ||
| 242 | |||
| 243 | printk_ratelimited(KERN_WARNING | ||
| 244 | "perf interrupt took too long (%lld > %lld), lowering " | ||
| 245 | "kernel.perf_event_max_sample_rate to %d\n", | ||
| 246 | avg_local_sample_len, allowed_ns >> 1, | ||
| 247 | sysctl_perf_event_sample_rate); | ||
| 248 | } | ||
| 249 | |||
| 250 | static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn); | ||
| 251 | |||
| 252 | void perf_sample_event_took(u64 sample_len_ns) | ||
| 253 | { | ||
| 238 | u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); | 254 | u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); |
| 255 | u64 avg_local_sample_len; | ||
| 256 | u64 local_samples_len; | ||
| 239 | 257 | ||
| 240 | if (allowed_ns == 0) | 258 | if (allowed_ns == 0) |
| 241 | return; | 259 | return; |
| @@ -263,13 +281,14 @@ void perf_sample_event_took(u64 sample_len_ns) | |||
| 263 | sysctl_perf_event_sample_rate = max_samples_per_tick * HZ; | 281 | sysctl_perf_event_sample_rate = max_samples_per_tick * HZ; |
| 264 | perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; | 282 | perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; |
| 265 | 283 | ||
| 266 | printk_ratelimited(KERN_WARNING | ||
| 267 | "perf samples too long (%lld > %lld), lowering " | ||
| 268 | "kernel.perf_event_max_sample_rate to %d\n", | ||
| 269 | avg_local_sample_len, allowed_ns, | ||
| 270 | sysctl_perf_event_sample_rate); | ||
| 271 | |||
| 272 | update_perf_cpu_limits(); | 284 | update_perf_cpu_limits(); |
| 285 | |||
| 286 | if (!irq_work_queue(&perf_duration_work)) { | ||
| 287 | early_printk("perf interrupt took too long (%lld > %lld), lowering " | ||
| 288 | "kernel.perf_event_max_sample_rate to %d\n", | ||
| 289 | avg_local_sample_len, allowed_ns >> 1, | ||
| 290 | sysctl_perf_event_sample_rate); | ||
| 291 | } | ||
| 273 | } | 292 | } |
| 274 | 293 | ||
| 275 | static atomic64_t perf_event_id; | 294 | static atomic64_t perf_event_id; |
| @@ -1714,7 +1733,7 @@ group_sched_in(struct perf_event *group_event, | |||
| 1714 | struct perf_event_context *ctx) | 1733 | struct perf_event_context *ctx) |
| 1715 | { | 1734 | { |
| 1716 | struct perf_event *event, *partial_group = NULL; | 1735 | struct perf_event *event, *partial_group = NULL; |
| 1717 | struct pmu *pmu = group_event->pmu; | 1736 | struct pmu *pmu = ctx->pmu; |
| 1718 | u64 now = ctx->time; | 1737 | u64 now = ctx->time; |
| 1719 | bool simulate = false; | 1738 | bool simulate = false; |
| 1720 | 1739 | ||
| @@ -2563,8 +2582,6 @@ static void perf_branch_stack_sched_in(struct task_struct *prev, | |||
| 2563 | if (cpuctx->ctx.nr_branch_stack > 0 | 2582 | if (cpuctx->ctx.nr_branch_stack > 0 |
| 2564 | && pmu->flush_branch_stack) { | 2583 | && pmu->flush_branch_stack) { |
| 2565 | 2584 | ||
| 2566 | pmu = cpuctx->ctx.pmu; | ||
| 2567 | |||
| 2568 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | 2585 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); |
| 2569 | 2586 | ||
| 2570 | perf_pmu_disable(pmu); | 2587 | perf_pmu_disable(pmu); |
| @@ -6294,7 +6311,7 @@ static int perf_event_idx_default(struct perf_event *event) | |||
| 6294 | * Ensures all contexts with the same task_ctx_nr have the same | 6311 | * Ensures all contexts with the same task_ctx_nr have the same |
| 6295 | * pmu_cpu_context too. | 6312 | * pmu_cpu_context too. |
| 6296 | */ | 6313 | */ |
| 6297 | static void *find_pmu_context(int ctxn) | 6314 | static struct perf_cpu_context __percpu *find_pmu_context(int ctxn) |
| 6298 | { | 6315 | { |
| 6299 | struct pmu *pmu; | 6316 | struct pmu *pmu; |
| 6300 | 6317 | ||
| @@ -7856,14 +7873,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu) | |||
| 7856 | static void __perf_event_exit_context(void *__info) | 7873 | static void __perf_event_exit_context(void *__info) |
| 7857 | { | 7874 | { |
| 7858 | struct perf_event_context *ctx = __info; | 7875 | struct perf_event_context *ctx = __info; |
| 7859 | struct perf_event *event, *tmp; | 7876 | struct perf_event *event; |
| 7860 | 7877 | ||
| 7861 | perf_pmu_rotate_stop(ctx->pmu); | 7878 | perf_pmu_rotate_stop(ctx->pmu); |
| 7862 | 7879 | ||
| 7863 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 7880 | rcu_read_lock(); |
| 7864 | __perf_remove_from_context(event); | 7881 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) |
| 7865 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | ||
| 7866 | __perf_remove_from_context(event); | 7882 | __perf_remove_from_context(event); |
| 7883 | rcu_read_unlock(); | ||
| 7867 | } | 7884 | } |
| 7868 | 7885 | ||
| 7869 | static void perf_event_exit_cpu_context(int cpu) | 7886 | static void perf_event_exit_cpu_context(int cpu) |
| @@ -7887,11 +7904,11 @@ static void perf_event_exit_cpu(int cpu) | |||
| 7887 | { | 7904 | { |
| 7888 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); | 7905 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
| 7889 | 7906 | ||
| 7907 | perf_event_exit_cpu_context(cpu); | ||
| 7908 | |||
| 7890 | mutex_lock(&swhash->hlist_mutex); | 7909 | mutex_lock(&swhash->hlist_mutex); |
| 7891 | swevent_hlist_release(swhash); | 7910 | swevent_hlist_release(swhash); |
| 7892 | mutex_unlock(&swhash->hlist_mutex); | 7911 | mutex_unlock(&swhash->hlist_mutex); |
| 7893 | |||
| 7894 | perf_event_exit_cpu_context(cpu); | ||
| 7895 | } | 7912 | } |
| 7896 | #else | 7913 | #else |
| 7897 | static inline void perf_event_exit_cpu(int cpu) { } | 7914 | static inline void perf_event_exit_cpu(int cpu) { } |
diff --git a/kernel/extable.c b/kernel/extable.c index 763faf037ec1..d8a6446adbcb 100644 --- a/kernel/extable.c +++ b/kernel/extable.c | |||
| @@ -36,7 +36,7 @@ extern struct exception_table_entry __start___ex_table[]; | |||
| 36 | extern struct exception_table_entry __stop___ex_table[]; | 36 | extern struct exception_table_entry __stop___ex_table[]; |
| 37 | 37 | ||
| 38 | /* Cleared by build time tools if the table is already sorted. */ | 38 | /* Cleared by build time tools if the table is already sorted. */ |
| 39 | u32 __initdata main_extable_sort_needed = 1; | 39 | u32 __initdata __visible main_extable_sort_needed = 1; |
| 40 | 40 | ||
| 41 | /* Sort the kernel's built-in exception table */ | 41 | /* Sort the kernel's built-in exception table */ |
| 42 | void __init sort_main_extable(void) | 42 | void __init sort_main_extable(void) |
diff --git a/kernel/fork.c b/kernel/fork.c index a17621c6cd42..332688e5e7b4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -237,6 +237,7 @@ void __put_task_struct(struct task_struct *tsk) | |||
| 237 | WARN_ON(atomic_read(&tsk->usage)); | 237 | WARN_ON(atomic_read(&tsk->usage)); |
| 238 | WARN_ON(tsk == current); | 238 | WARN_ON(tsk == current); |
| 239 | 239 | ||
| 240 | task_numa_free(tsk); | ||
| 240 | security_task_free(tsk); | 241 | security_task_free(tsk); |
| 241 | exit_creds(tsk); | 242 | exit_creds(tsk); |
| 242 | delayacct_tsk_free(tsk); | 243 | delayacct_tsk_free(tsk); |
diff --git a/kernel/futex.c b/kernel/futex.c index 44a1261cb9ff..67dacaf93e56 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -157,7 +157,9 @@ | |||
| 157 | * enqueue. | 157 | * enqueue. |
| 158 | */ | 158 | */ |
| 159 | 159 | ||
| 160 | #ifndef CONFIG_HAVE_FUTEX_CMPXCHG | ||
| 160 | int __read_mostly futex_cmpxchg_enabled; | 161 | int __read_mostly futex_cmpxchg_enabled; |
| 162 | #endif | ||
| 161 | 163 | ||
| 162 | /* | 164 | /* |
| 163 | * Futex flags used to encode options to functions and preserve them across | 165 | * Futex flags used to encode options to functions and preserve them across |
| @@ -234,6 +236,7 @@ static const struct futex_q futex_q_init = { | |||
| 234 | * waiting on a futex. | 236 | * waiting on a futex. |
| 235 | */ | 237 | */ |
| 236 | struct futex_hash_bucket { | 238 | struct futex_hash_bucket { |
| 239 | atomic_t waiters; | ||
| 237 | spinlock_t lock; | 240 | spinlock_t lock; |
| 238 | struct plist_head chain; | 241 | struct plist_head chain; |
| 239 | } ____cacheline_aligned_in_smp; | 242 | } ____cacheline_aligned_in_smp; |
| @@ -253,22 +256,37 @@ static inline void futex_get_mm(union futex_key *key) | |||
| 253 | smp_mb__after_atomic_inc(); | 256 | smp_mb__after_atomic_inc(); |
| 254 | } | 257 | } |
| 255 | 258 | ||
| 256 | static inline bool hb_waiters_pending(struct futex_hash_bucket *hb) | 259 | /* |
| 260 | * Reflects a new waiter being added to the waitqueue. | ||
| 261 | */ | ||
| 262 | static inline void hb_waiters_inc(struct futex_hash_bucket *hb) | ||
| 257 | { | 263 | { |
| 258 | #ifdef CONFIG_SMP | 264 | #ifdef CONFIG_SMP |
| 265 | atomic_inc(&hb->waiters); | ||
| 259 | /* | 266 | /* |
| 260 | * Tasks trying to enter the critical region are most likely | 267 | * Full barrier (A), see the ordering comment above. |
| 261 | * potential waiters that will be added to the plist. Ensure | ||
| 262 | * that wakers won't miss to-be-slept tasks in the window between | ||
| 263 | * the wait call and the actual plist_add. | ||
| 264 | */ | 268 | */ |
| 265 | if (spin_is_locked(&hb->lock)) | 269 | smp_mb__after_atomic_inc(); |
| 266 | return true; | 270 | #endif |
| 267 | smp_rmb(); /* Make sure we check the lock state first */ | 271 | } |
| 272 | |||
| 273 | /* | ||
| 274 | * Reflects a waiter being removed from the waitqueue by wakeup | ||
| 275 | * paths. | ||
| 276 | */ | ||
| 277 | static inline void hb_waiters_dec(struct futex_hash_bucket *hb) | ||
| 278 | { | ||
| 279 | #ifdef CONFIG_SMP | ||
| 280 | atomic_dec(&hb->waiters); | ||
| 281 | #endif | ||
| 282 | } | ||
| 268 | 283 | ||
| 269 | return !plist_head_empty(&hb->chain); | 284 | static inline int hb_waiters_pending(struct futex_hash_bucket *hb) |
| 285 | { | ||
| 286 | #ifdef CONFIG_SMP | ||
| 287 | return atomic_read(&hb->waiters); | ||
| 270 | #else | 288 | #else |
| 271 | return true; | 289 | return 1; |
| 272 | #endif | 290 | #endif |
| 273 | } | 291 | } |
| 274 | 292 | ||
| @@ -954,6 +972,7 @@ static void __unqueue_futex(struct futex_q *q) | |||
| 954 | 972 | ||
| 955 | hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); | 973 | hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); |
| 956 | plist_del(&q->list, &hb->chain); | 974 | plist_del(&q->list, &hb->chain); |
| 975 | hb_waiters_dec(hb); | ||
| 957 | } | 976 | } |
| 958 | 977 | ||
| 959 | /* | 978 | /* |
| @@ -1257,7 +1276,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | |||
| 1257 | */ | 1276 | */ |
| 1258 | if (likely(&hb1->chain != &hb2->chain)) { | 1277 | if (likely(&hb1->chain != &hb2->chain)) { |
| 1259 | plist_del(&q->list, &hb1->chain); | 1278 | plist_del(&q->list, &hb1->chain); |
| 1279 | hb_waiters_dec(hb1); | ||
| 1260 | plist_add(&q->list, &hb2->chain); | 1280 | plist_add(&q->list, &hb2->chain); |
| 1281 | hb_waiters_inc(hb2); | ||
| 1261 | q->lock_ptr = &hb2->lock; | 1282 | q->lock_ptr = &hb2->lock; |
| 1262 | } | 1283 | } |
| 1263 | get_futex_key_refs(key2); | 1284 | get_futex_key_refs(key2); |
| @@ -1600,6 +1621,17 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
| 1600 | struct futex_hash_bucket *hb; | 1621 | struct futex_hash_bucket *hb; |
| 1601 | 1622 | ||
| 1602 | hb = hash_futex(&q->key); | 1623 | hb = hash_futex(&q->key); |
| 1624 | |||
| 1625 | /* | ||
| 1626 | * Increment the counter before taking the lock so that | ||
| 1627 | * a potential waker won't miss a to-be-slept task that is | ||
| 1628 | * waiting for the spinlock. This is safe as all queue_lock() | ||
| 1629 | * users end up calling queue_me(). Similarly, for housekeeping, | ||
| 1630 | * decrement the counter at queue_unlock() when some error has | ||
| 1631 | * occurred and we don't end up adding the task to the list. | ||
| 1632 | */ | ||
| 1633 | hb_waiters_inc(hb); | ||
| 1634 | |||
| 1603 | q->lock_ptr = &hb->lock; | 1635 | q->lock_ptr = &hb->lock; |
| 1604 | 1636 | ||
| 1605 | spin_lock(&hb->lock); /* implies MB (A) */ | 1637 | spin_lock(&hb->lock); /* implies MB (A) */ |
| @@ -1611,6 +1643,7 @@ queue_unlock(struct futex_hash_bucket *hb) | |||
| 1611 | __releases(&hb->lock) | 1643 | __releases(&hb->lock) |
| 1612 | { | 1644 | { |
| 1613 | spin_unlock(&hb->lock); | 1645 | spin_unlock(&hb->lock); |
| 1646 | hb_waiters_dec(hb); | ||
| 1614 | } | 1647 | } |
| 1615 | 1648 | ||
| 1616 | /** | 1649 | /** |
| @@ -2342,6 +2375,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
| 2342 | * Unqueue the futex_q and determine which it was. | 2375 | * Unqueue the futex_q and determine which it was. |
| 2343 | */ | 2376 | */ |
| 2344 | plist_del(&q->list, &hb->chain); | 2377 | plist_del(&q->list, &hb->chain); |
| 2378 | hb_waiters_dec(hb); | ||
| 2345 | 2379 | ||
| 2346 | /* Handle spurious wakeups gracefully */ | 2380 | /* Handle spurious wakeups gracefully */ |
| 2347 | ret = -EWOULDBLOCK; | 2381 | ret = -EWOULDBLOCK; |
| @@ -2843,9 +2877,28 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, | |||
| 2843 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); | 2877 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); |
| 2844 | } | 2878 | } |
| 2845 | 2879 | ||
| 2846 | static int __init futex_init(void) | 2880 | static void __init futex_detect_cmpxchg(void) |
| 2847 | { | 2881 | { |
| 2882 | #ifndef CONFIG_HAVE_FUTEX_CMPXCHG | ||
| 2848 | u32 curval; | 2883 | u32 curval; |
| 2884 | |||
| 2885 | /* | ||
| 2886 | * This will fail and we want it. Some arch implementations do | ||
| 2887 | * runtime detection of the futex_atomic_cmpxchg_inatomic() | ||
| 2888 | * functionality. We want to know that before we call in any | ||
| 2889 | * of the complex code paths. Also we want to prevent | ||
| 2890 | * registration of robust lists in that case. NULL is | ||
| 2891 | * guaranteed to fault and we get -EFAULT on functional | ||
| 2892 | * implementation, the non-functional ones will return | ||
| 2893 | * -ENOSYS. | ||
| 2894 | */ | ||
| 2895 | if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) | ||
| 2896 | futex_cmpxchg_enabled = 1; | ||
| 2897 | #endif | ||
| 2898 | } | ||
| 2899 | |||
| 2900 | static int __init futex_init(void) | ||
| 2901 | { | ||
| 2849 | unsigned int futex_shift; | 2902 | unsigned int futex_shift; |
| 2850 | unsigned long i; | 2903 | unsigned long i; |
| 2851 | 2904 | ||
| @@ -2861,20 +2914,11 @@ static int __init futex_init(void) | |||
| 2861 | &futex_shift, NULL, | 2914 | &futex_shift, NULL, |
| 2862 | futex_hashsize, futex_hashsize); | 2915 | futex_hashsize, futex_hashsize); |
| 2863 | futex_hashsize = 1UL << futex_shift; | 2916 | futex_hashsize = 1UL << futex_shift; |
| 2864 | /* | 2917 | |
| 2865 | * This will fail and we want it. Some arch implementations do | 2918 | futex_detect_cmpxchg(); |
| 2866 | * runtime detection of the futex_atomic_cmpxchg_inatomic() | ||
| 2867 | * functionality. We want to know that before we call in any | ||
| 2868 | * of the complex code paths. Also we want to prevent | ||
| 2869 | * registration of robust lists in that case. NULL is | ||
| 2870 | * guaranteed to fault and we get -EFAULT on functional | ||
| 2871 | * implementation, the non-functional ones will return | ||
| 2872 | * -ENOSYS. | ||
| 2873 | */ | ||
| 2874 | if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) | ||
| 2875 | futex_cmpxchg_enabled = 1; | ||
| 2876 | 2919 | ||
| 2877 | for (i = 0; i < futex_hashsize; i++) { | 2920 | for (i = 0; i < futex_hashsize; i++) { |
| 2921 | atomic_set(&futex_queues[i].waiters, 0); | ||
| 2878 | plist_head_init(&futex_queues[i].chain); | 2922 | plist_head_init(&futex_queues[i].chain); |
| 2879 | spin_lock_init(&futex_queues[i].lock); | 2923 | spin_lock_init(&futex_queues[i].lock); |
| 2880 | } | 2924 | } |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index cf68bb36fe58..f14033700c25 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/mutex.h> | 10 | #include <linux/mutex.h> |
| 11 | #include <linux/of.h> | 11 | #include <linux/of.h> |
| 12 | #include <linux/of_address.h> | 12 | #include <linux/of_address.h> |
| 13 | #include <linux/of_irq.h> | ||
| 13 | #include <linux/topology.h> | 14 | #include <linux/topology.h> |
| 14 | #include <linux/seq_file.h> | 15 | #include <linux/seq_file.h> |
| 15 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 481a13c43b17..d3bf660cb57f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -802,8 +802,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc, | |||
| 802 | 802 | ||
| 803 | static void wake_threads_waitq(struct irq_desc *desc) | 803 | static void wake_threads_waitq(struct irq_desc *desc) |
| 804 | { | 804 | { |
| 805 | if (atomic_dec_and_test(&desc->threads_active) && | 805 | if (atomic_dec_and_test(&desc->threads_active)) |
| 806 | waitqueue_active(&desc->wait_for_threads)) | ||
| 807 | wake_up(&desc->wait_for_threads); | 806 | wake_up(&desc->wait_for_threads); |
| 808 | } | 807 | } |
| 809 | 808 | ||
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 55fcce6065cf..a82170e2fa78 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c | |||
| @@ -61,11 +61,11 @@ void __weak arch_irq_work_raise(void) | |||
| 61 | * | 61 | * |
| 62 | * Can be re-enqueued while the callback is still in progress. | 62 | * Can be re-enqueued while the callback is still in progress. |
| 63 | */ | 63 | */ |
| 64 | void irq_work_queue(struct irq_work *work) | 64 | bool irq_work_queue(struct irq_work *work) |
| 65 | { | 65 | { |
| 66 | /* Only queue if not already pending */ | 66 | /* Only queue if not already pending */ |
| 67 | if (!irq_work_claim(work)) | 67 | if (!irq_work_claim(work)) |
| 68 | return; | 68 | return false; |
| 69 | 69 | ||
| 70 | /* Queue the entry and raise the IPI if needed. */ | 70 | /* Queue the entry and raise the IPI if needed. */ |
| 71 | preempt_disable(); | 71 | preempt_disable(); |
| @@ -83,6 +83,8 @@ void irq_work_queue(struct irq_work *work) | |||
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | preempt_enable(); | 85 | preempt_enable(); |
| 86 | |||
| 87 | return true; | ||
| 86 | } | 88 | } |
| 87 | EXPORT_SYMBOL_GPL(irq_work_queue); | 89 | EXPORT_SYMBOL_GPL(irq_work_queue); |
| 88 | 90 | ||
diff --git a/kernel/kexec.c b/kernel/kexec.c index 60bafbed06ab..45601cf41bee 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
| @@ -1039,10 +1039,10 @@ void __weak crash_unmap_reserved_pages(void) | |||
| 1039 | {} | 1039 | {} |
| 1040 | 1040 | ||
| 1041 | #ifdef CONFIG_COMPAT | 1041 | #ifdef CONFIG_COMPAT |
| 1042 | asmlinkage long compat_sys_kexec_load(unsigned long entry, | 1042 | COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, |
| 1043 | unsigned long nr_segments, | 1043 | compat_ulong_t, nr_segments, |
| 1044 | struct compat_kexec_segment __user *segments, | 1044 | struct compat_kexec_segment __user *, segments, |
| 1045 | unsigned long flags) | 1045 | compat_ulong_t, flags) |
| 1046 | { | 1046 | { |
| 1047 | struct compat_kexec_segment in; | 1047 | struct compat_kexec_segment in; |
| 1048 | struct kexec_segment out, __user *ksegments; | 1048 | struct kexec_segment out, __user *ksegments; |
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index d945a949760f..e660964086e2 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
| @@ -19,6 +19,8 @@ | |||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/capability.h> | 20 | #include <linux/capability.h> |
| 21 | 21 | ||
| 22 | #include <linux/rcupdate.h> /* rcu_expedited */ | ||
| 23 | |||
| 22 | #define KERNEL_ATTR_RO(_name) \ | 24 | #define KERNEL_ATTR_RO(_name) \ |
| 23 | static struct kobj_attribute _name##_attr = __ATTR_RO(_name) | 25 | static struct kobj_attribute _name##_attr = __ATTR_RO(_name) |
| 24 | 26 | ||
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index baab8e5e7f66..306a76b51e0f 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | 1 | ||
| 2 | obj-y += mutex.o semaphore.o rwsem.o lglock.o | 2 | obj-y += mutex.o semaphore.o rwsem.o lglock.o mcs_spinlock.o |
| 3 | 3 | ||
| 4 | ifdef CONFIG_FUNCTION_TRACER | 4 | ifdef CONFIG_FUNCTION_TRACER |
| 5 | CFLAGS_REMOVE_lockdep.o = -pg | 5 | CFLAGS_REMOVE_lockdep.o = -pg |
| @@ -23,3 +23,4 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | |||
| 23 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | 23 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o |
| 24 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | 24 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o |
| 25 | obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o | 25 | obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o |
| 26 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | ||
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index eb8a54783fa0..b0e9467922e1 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
| @@ -1936,12 +1936,12 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) | |||
| 1936 | 1936 | ||
| 1937 | for (;;) { | 1937 | for (;;) { |
| 1938 | int distance = curr->lockdep_depth - depth + 1; | 1938 | int distance = curr->lockdep_depth - depth + 1; |
| 1939 | hlock = curr->held_locks + depth-1; | 1939 | hlock = curr->held_locks + depth - 1; |
| 1940 | /* | 1940 | /* |
| 1941 | * Only non-recursive-read entries get new dependencies | 1941 | * Only non-recursive-read entries get new dependencies |
| 1942 | * added: | 1942 | * added: |
| 1943 | */ | 1943 | */ |
| 1944 | if (hlock->read != 2) { | 1944 | if (hlock->read != 2 && hlock->check) { |
| 1945 | if (!check_prev_add(curr, hlock, next, | 1945 | if (!check_prev_add(curr, hlock, next, |
| 1946 | distance, trylock_loop)) | 1946 | distance, trylock_loop)) |
| 1947 | return 0; | 1947 | return 0; |
| @@ -2098,7 +2098,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, | |||
| 2098 | * (If lookup_chain_cache() returns with 1 it acquires | 2098 | * (If lookup_chain_cache() returns with 1 it acquires |
| 2099 | * graph_lock for us) | 2099 | * graph_lock for us) |
| 2100 | */ | 2100 | */ |
| 2101 | if (!hlock->trylock && (hlock->check == 2) && | 2101 | if (!hlock->trylock && hlock->check && |
| 2102 | lookup_chain_cache(curr, hlock, chain_key)) { | 2102 | lookup_chain_cache(curr, hlock, chain_key)) { |
| 2103 | /* | 2103 | /* |
| 2104 | * Check whether last held lock: | 2104 | * Check whether last held lock: |
| @@ -2517,7 +2517,7 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) | |||
| 2517 | 2517 | ||
| 2518 | BUG_ON(usage_bit >= LOCK_USAGE_STATES); | 2518 | BUG_ON(usage_bit >= LOCK_USAGE_STATES); |
| 2519 | 2519 | ||
| 2520 | if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys) | 2520 | if (!hlock->check) |
| 2521 | continue; | 2521 | continue; |
| 2522 | 2522 | ||
| 2523 | if (!mark_lock(curr, hlock, usage_bit)) | 2523 | if (!mark_lock(curr, hlock, usage_bit)) |
| @@ -2557,7 +2557,7 @@ static void __trace_hardirqs_on_caller(unsigned long ip) | |||
| 2557 | debug_atomic_inc(hardirqs_on_events); | 2557 | debug_atomic_inc(hardirqs_on_events); |
| 2558 | } | 2558 | } |
| 2559 | 2559 | ||
| 2560 | void trace_hardirqs_on_caller(unsigned long ip) | 2560 | __visible void trace_hardirqs_on_caller(unsigned long ip) |
| 2561 | { | 2561 | { |
| 2562 | time_hardirqs_on(CALLER_ADDR0, ip); | 2562 | time_hardirqs_on(CALLER_ADDR0, ip); |
| 2563 | 2563 | ||
| @@ -2610,7 +2610,7 @@ EXPORT_SYMBOL(trace_hardirqs_on); | |||
| 2610 | /* | 2610 | /* |
| 2611 | * Hardirqs were disabled: | 2611 | * Hardirqs were disabled: |
| 2612 | */ | 2612 | */ |
| 2613 | void trace_hardirqs_off_caller(unsigned long ip) | 2613 | __visible void trace_hardirqs_off_caller(unsigned long ip) |
| 2614 | { | 2614 | { |
| 2615 | struct task_struct *curr = current; | 2615 | struct task_struct *curr = current; |
| 2616 | 2616 | ||
| @@ -3055,9 +3055,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3055 | int class_idx; | 3055 | int class_idx; |
| 3056 | u64 chain_key; | 3056 | u64 chain_key; |
| 3057 | 3057 | ||
| 3058 | if (!prove_locking) | ||
| 3059 | check = 1; | ||
| 3060 | |||
| 3061 | if (unlikely(!debug_locks)) | 3058 | if (unlikely(!debug_locks)) |
| 3062 | return 0; | 3059 | return 0; |
| 3063 | 3060 | ||
| @@ -3069,8 +3066,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3069 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 3066 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
| 3070 | return 0; | 3067 | return 0; |
| 3071 | 3068 | ||
| 3072 | if (lock->key == &__lockdep_no_validate__) | 3069 | if (!prove_locking || lock->key == &__lockdep_no_validate__) |
| 3073 | check = 1; | 3070 | check = 0; |
| 3074 | 3071 | ||
| 3075 | if (subclass < NR_LOCKDEP_CACHING_CLASSES) | 3072 | if (subclass < NR_LOCKDEP_CACHING_CLASSES) |
| 3076 | class = lock->class_cache[subclass]; | 3073 | class = lock->class_cache[subclass]; |
| @@ -3138,7 +3135,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3138 | hlock->holdtime_stamp = lockstat_clock(); | 3135 | hlock->holdtime_stamp = lockstat_clock(); |
| 3139 | #endif | 3136 | #endif |
| 3140 | 3137 | ||
| 3141 | if (check == 2 && !mark_irqflags(curr, hlock)) | 3138 | if (check && !mark_irqflags(curr, hlock)) |
| 3142 | return 0; | 3139 | return 0; |
| 3143 | 3140 | ||
| 3144 | /* mark it as used: */ | 3141 | /* mark it as used: */ |
| @@ -4191,7 +4188,7 @@ void debug_show_held_locks(struct task_struct *task) | |||
| 4191 | } | 4188 | } |
| 4192 | EXPORT_SYMBOL_GPL(debug_show_held_locks); | 4189 | EXPORT_SYMBOL_GPL(debug_show_held_locks); |
| 4193 | 4190 | ||
| 4194 | void lockdep_sys_exit(void) | 4191 | asmlinkage void lockdep_sys_exit(void) |
| 4195 | { | 4192 | { |
| 4196 | struct task_struct *curr = current; | 4193 | struct task_struct *curr = current; |
| 4197 | 4194 | ||
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c new file mode 100644 index 000000000000..f26b1a18e34e --- /dev/null +++ b/kernel/locking/locktorture.c | |||
| @@ -0,0 +1,452 @@ | |||
| 1 | /* | ||
| 2 | * Module-based torture test facility for locking | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, you can access it online at | ||
| 16 | * http://www.gnu.org/licenses/gpl-2.0.html. | ||
| 17 | * | ||
| 18 | * Copyright (C) IBM Corporation, 2014 | ||
| 19 | * | ||
| 20 | * Author: Paul E. McKenney <paulmck@us.ibm.com> | ||
| 21 | * Based on kernel/rcu/torture.c. | ||
| 22 | */ | ||
| 23 | #include <linux/types.h> | ||
| 24 | #include <linux/kernel.h> | ||
| 25 | #include <linux/init.h> | ||
| 26 | #include <linux/module.h> | ||
| 27 | #include <linux/kthread.h> | ||
| 28 | #include <linux/err.h> | ||
| 29 | #include <linux/spinlock.h> | ||
| 30 | #include <linux/smp.h> | ||
| 31 | #include <linux/interrupt.h> | ||
| 32 | #include <linux/sched.h> | ||
| 33 | #include <linux/atomic.h> | ||
| 34 | #include <linux/bitops.h> | ||
| 35 | #include <linux/completion.h> | ||
| 36 | #include <linux/moduleparam.h> | ||
| 37 | #include <linux/percpu.h> | ||
| 38 | #include <linux/notifier.h> | ||
| 39 | #include <linux/reboot.h> | ||
| 40 | #include <linux/freezer.h> | ||
| 41 | #include <linux/cpu.h> | ||
| 42 | #include <linux/delay.h> | ||
| 43 | #include <linux/stat.h> | ||
| 44 | #include <linux/slab.h> | ||
| 45 | #include <linux/trace_clock.h> | ||
| 46 | #include <asm/byteorder.h> | ||
| 47 | #include <linux/torture.h> | ||
| 48 | |||
| 49 | MODULE_LICENSE("GPL"); | ||
| 50 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>"); | ||
| 51 | |||
| 52 | torture_param(int, nwriters_stress, -1, | ||
| 53 | "Number of write-locking stress-test threads"); | ||
| 54 | torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); | ||
| 55 | torture_param(int, onoff_interval, 0, | ||
| 56 | "Time between CPU hotplugs (s), 0=disable"); | ||
| 57 | torture_param(int, shuffle_interval, 3, | ||
| 58 | "Number of jiffies between shuffles, 0=disable"); | ||
| 59 | torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable."); | ||
| 60 | torture_param(int, stat_interval, 60, | ||
| 61 | "Number of seconds between stats printk()s"); | ||
| 62 | torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable"); | ||
| 63 | torture_param(bool, verbose, true, | ||
| 64 | "Enable verbose debugging printk()s"); | ||
| 65 | |||
| 66 | static char *torture_type = "spin_lock"; | ||
| 67 | module_param(torture_type, charp, 0444); | ||
| 68 | MODULE_PARM_DESC(torture_type, | ||
| 69 | "Type of lock to torture (spin_lock, spin_lock_irq, ...)"); | ||
| 70 | |||
| 71 | static atomic_t n_lock_torture_errors; | ||
| 72 | |||
| 73 | static struct task_struct *stats_task; | ||
| 74 | static struct task_struct **writer_tasks; | ||
| 75 | |||
| 76 | static int nrealwriters_stress; | ||
| 77 | static bool lock_is_write_held; | ||
| 78 | |||
| 79 | struct lock_writer_stress_stats { | ||
| 80 | long n_write_lock_fail; | ||
| 81 | long n_write_lock_acquired; | ||
| 82 | }; | ||
| 83 | static struct lock_writer_stress_stats *lwsa; | ||
| 84 | |||
| 85 | #if defined(MODULE) || defined(CONFIG_LOCK_TORTURE_TEST_RUNNABLE) | ||
| 86 | #define LOCKTORTURE_RUNNABLE_INIT 1 | ||
| 87 | #else | ||
| 88 | #define LOCKTORTURE_RUNNABLE_INIT 0 | ||
| 89 | #endif | ||
| 90 | int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT; | ||
| 91 | module_param(locktorture_runnable, int, 0444); | ||
| 92 | MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at boot"); | ||
| 93 | |||
| 94 | /* Forward reference. */ | ||
| 95 | static void lock_torture_cleanup(void); | ||
| 96 | |||
| 97 | /* | ||
| 98 | * Operations vector for selecting different types of tests. | ||
| 99 | */ | ||
| 100 | struct lock_torture_ops { | ||
| 101 | void (*init)(void); | ||
| 102 | int (*writelock)(void); | ||
| 103 | void (*write_delay)(struct torture_random_state *trsp); | ||
| 104 | void (*writeunlock)(void); | ||
| 105 | unsigned long flags; | ||
| 106 | const char *name; | ||
| 107 | }; | ||
| 108 | |||
| 109 | static struct lock_torture_ops *cur_ops; | ||
| 110 | |||
| 111 | /* | ||
| 112 | * Definitions for lock torture testing. | ||
| 113 | */ | ||
| 114 | |||
| 115 | static int torture_lock_busted_write_lock(void) | ||
| 116 | { | ||
| 117 | return 0; /* BUGGY, do not use in real life!!! */ | ||
| 118 | } | ||
| 119 | |||
| 120 | static void torture_lock_busted_write_delay(struct torture_random_state *trsp) | ||
| 121 | { | ||
| 122 | const unsigned long longdelay_us = 100; | ||
| 123 | |||
| 124 | /* We want a long delay occasionally to force massive contention. */ | ||
| 125 | if (!(torture_random(trsp) % | ||
| 126 | (nrealwriters_stress * 2000 * longdelay_us))) | ||
| 127 | mdelay(longdelay_us); | ||
| 128 | #ifdef CONFIG_PREEMPT | ||
| 129 | if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) | ||
| 130 | preempt_schedule(); /* Allow test to be preempted. */ | ||
| 131 | #endif | ||
| 132 | } | ||
| 133 | |||
| 134 | static void torture_lock_busted_write_unlock(void) | ||
| 135 | { | ||
| 136 | /* BUGGY, do not use in real life!!! */ | ||
| 137 | } | ||
| 138 | |||
| 139 | static struct lock_torture_ops lock_busted_ops = { | ||
| 140 | .writelock = torture_lock_busted_write_lock, | ||
| 141 | .write_delay = torture_lock_busted_write_delay, | ||
| 142 | .writeunlock = torture_lock_busted_write_unlock, | ||
| 143 | .name = "lock_busted" | ||
| 144 | }; | ||
| 145 | |||
| 146 | static DEFINE_SPINLOCK(torture_spinlock); | ||
| 147 | |||
| 148 | static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock) | ||
| 149 | { | ||
| 150 | spin_lock(&torture_spinlock); | ||
| 151 | return 0; | ||
| 152 | } | ||
| 153 | |||
| 154 | static void torture_spin_lock_write_delay(struct torture_random_state *trsp) | ||
| 155 | { | ||
| 156 | const unsigned long shortdelay_us = 2; | ||
| 157 | const unsigned long longdelay_us = 100; | ||
| 158 | |||
| 159 | /* We want a short delay mostly to emulate likely code, and | ||
| 160 | * we want a long delay occasionally to force massive contention. | ||
| 161 | */ | ||
| 162 | if (!(torture_random(trsp) % | ||
| 163 | (nrealwriters_stress * 2000 * longdelay_us))) | ||
| 164 | mdelay(longdelay_us); | ||
| 165 | if (!(torture_random(trsp) % | ||
| 166 | (nrealwriters_stress * 2 * shortdelay_us))) | ||
| 167 | udelay(shortdelay_us); | ||
| 168 | #ifdef CONFIG_PREEMPT | ||
| 169 | if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) | ||
| 170 | preempt_schedule(); /* Allow test to be preempted. */ | ||
| 171 | #endif | ||
| 172 | } | ||
| 173 | |||
| 174 | static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock) | ||
| 175 | { | ||
| 176 | spin_unlock(&torture_spinlock); | ||
| 177 | } | ||
| 178 | |||
| 179 | static struct lock_torture_ops spin_lock_ops = { | ||
| 180 | .writelock = torture_spin_lock_write_lock, | ||
| 181 | .write_delay = torture_spin_lock_write_delay, | ||
| 182 | .writeunlock = torture_spin_lock_write_unlock, | ||
| 183 | .name = "spin_lock" | ||
| 184 | }; | ||
| 185 | |||
| 186 | static int torture_spin_lock_write_lock_irq(void) | ||
| 187 | __acquires(torture_spinlock_irq) | ||
| 188 | { | ||
| 189 | unsigned long flags; | ||
| 190 | |||
| 191 | spin_lock_irqsave(&torture_spinlock, flags); | ||
| 192 | cur_ops->flags = flags; | ||
| 193 | return 0; | ||
| 194 | } | ||
| 195 | |||
| 196 | static void torture_lock_spin_write_unlock_irq(void) | ||
| 197 | __releases(torture_spinlock) | ||
| 198 | { | ||
| 199 | spin_unlock_irqrestore(&torture_spinlock, cur_ops->flags); | ||
| 200 | } | ||
| 201 | |||
| 202 | static struct lock_torture_ops spin_lock_irq_ops = { | ||
| 203 | .writelock = torture_spin_lock_write_lock_irq, | ||
| 204 | .write_delay = torture_spin_lock_write_delay, | ||
| 205 | .writeunlock = torture_lock_spin_write_unlock_irq, | ||
| 206 | .name = "spin_lock_irq" | ||
| 207 | }; | ||
| 208 | |||
| 209 | /* | ||
| 210 | * Lock torture writer kthread. Repeatedly acquires and releases | ||
| 211 | * the lock, checking for duplicate acquisitions. | ||
| 212 | */ | ||
| 213 | static int lock_torture_writer(void *arg) | ||
| 214 | { | ||
| 215 | struct lock_writer_stress_stats *lwsp = arg; | ||
| 216 | static DEFINE_TORTURE_RANDOM(rand); | ||
| 217 | |||
| 218 | VERBOSE_TOROUT_STRING("lock_torture_writer task started"); | ||
| 219 | set_user_nice(current, 19); | ||
| 220 | |||
| 221 | do { | ||
| 222 | schedule_timeout_uninterruptible(1); | ||
| 223 | cur_ops->writelock(); | ||
| 224 | if (WARN_ON_ONCE(lock_is_write_held)) | ||
| 225 | lwsp->n_write_lock_fail++; | ||
| 226 | lock_is_write_held = 1; | ||
| 227 | lwsp->n_write_lock_acquired++; | ||
| 228 | cur_ops->write_delay(&rand); | ||
| 229 | lock_is_write_held = 0; | ||
| 230 | cur_ops->writeunlock(); | ||
| 231 | stutter_wait("lock_torture_writer"); | ||
| 232 | } while (!torture_must_stop()); | ||
| 233 | torture_kthread_stopping("lock_torture_writer"); | ||
| 234 | return 0; | ||
| 235 | } | ||
| 236 | |||
| 237 | /* | ||
| 238 | * Create an lock-torture-statistics message in the specified buffer. | ||
| 239 | */ | ||
| 240 | static void lock_torture_printk(char *page) | ||
| 241 | { | ||
| 242 | bool fail = 0; | ||
| 243 | int i; | ||
| 244 | long max = 0; | ||
| 245 | long min = lwsa[0].n_write_lock_acquired; | ||
| 246 | long long sum = 0; | ||
| 247 | |||
| 248 | for (i = 0; i < nrealwriters_stress; i++) { | ||
| 249 | if (lwsa[i].n_write_lock_fail) | ||
| 250 | fail = true; | ||
| 251 | sum += lwsa[i].n_write_lock_acquired; | ||
| 252 | if (max < lwsa[i].n_write_lock_fail) | ||
| 253 | max = lwsa[i].n_write_lock_fail; | ||
| 254 | if (min > lwsa[i].n_write_lock_fail) | ||
| 255 | min = lwsa[i].n_write_lock_fail; | ||
| 256 | } | ||
| 257 | page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); | ||
| 258 | page += sprintf(page, | ||
| 259 | "Writes: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", | ||
| 260 | sum, max, min, max / 2 > min ? "???" : "", | ||
| 261 | fail, fail ? "!!!" : ""); | ||
| 262 | if (fail) | ||
| 263 | atomic_inc(&n_lock_torture_errors); | ||
| 264 | } | ||
| 265 | |||
| 266 | /* | ||
| 267 | * Print torture statistics. Caller must ensure that there is only one | ||
| 268 | * call to this function at a given time!!! This is normally accomplished | ||
| 269 | * by relying on the module system to only have one copy of the module | ||
| 270 | * loaded, and then by giving the lock_torture_stats kthread full control | ||
| 271 | * (or the init/cleanup functions when lock_torture_stats thread is not | ||
| 272 | * running). | ||
| 273 | */ | ||
| 274 | static void lock_torture_stats_print(void) | ||
| 275 | { | ||
| 276 | int size = nrealwriters_stress * 200 + 8192; | ||
| 277 | char *buf; | ||
| 278 | |||
| 279 | buf = kmalloc(size, GFP_KERNEL); | ||
| 280 | if (!buf) { | ||
| 281 | pr_err("lock_torture_stats_print: Out of memory, need: %d", | ||
| 282 | size); | ||
| 283 | return; | ||
| 284 | } | ||
| 285 | lock_torture_printk(buf); | ||
| 286 | pr_alert("%s", buf); | ||
| 287 | kfree(buf); | ||
| 288 | } | ||
| 289 | |||
| 290 | /* | ||
| 291 | * Periodically prints torture statistics, if periodic statistics printing | ||
| 292 | * was specified via the stat_interval module parameter. | ||
| 293 | * | ||
| 294 | * No need to worry about fullstop here, since this one doesn't reference | ||
| 295 | * volatile state or register callbacks. | ||
| 296 | */ | ||
| 297 | static int lock_torture_stats(void *arg) | ||
| 298 | { | ||
| 299 | VERBOSE_TOROUT_STRING("lock_torture_stats task started"); | ||
| 300 | do { | ||
| 301 | schedule_timeout_interruptible(stat_interval * HZ); | ||
| 302 | lock_torture_stats_print(); | ||
| 303 | torture_shutdown_absorb("lock_torture_stats"); | ||
| 304 | } while (!torture_must_stop()); | ||
| 305 | torture_kthread_stopping("lock_torture_stats"); | ||
| 306 | return 0; | ||
| 307 | } | ||
| 308 | |||
| 309 | static inline void | ||
| 310 | lock_torture_print_module_parms(struct lock_torture_ops *cur_ops, | ||
| 311 | const char *tag) | ||
| 312 | { | ||
| 313 | pr_alert("%s" TORTURE_FLAG | ||
| 314 | "--- %s: nwriters_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", | ||
| 315 | torture_type, tag, nrealwriters_stress, stat_interval, verbose, | ||
| 316 | shuffle_interval, stutter, shutdown_secs, | ||
| 317 | onoff_interval, onoff_holdoff); | ||
| 318 | } | ||
| 319 | |||
| 320 | static void lock_torture_cleanup(void) | ||
| 321 | { | ||
| 322 | int i; | ||
| 323 | |||
| 324 | if (torture_cleanup()) | ||
| 325 | return; | ||
| 326 | |||
| 327 | if (writer_tasks) { | ||
| 328 | for (i = 0; i < nrealwriters_stress; i++) | ||
| 329 | torture_stop_kthread(lock_torture_writer, | ||
| 330 | writer_tasks[i]); | ||
| 331 | kfree(writer_tasks); | ||
| 332 | writer_tasks = NULL; | ||
| 333 | } | ||
| 334 | |||
| 335 | torture_stop_kthread(lock_torture_stats, stats_task); | ||
| 336 | lock_torture_stats_print(); /* -After- the stats thread is stopped! */ | ||
| 337 | |||
| 338 | if (atomic_read(&n_lock_torture_errors)) | ||
| 339 | lock_torture_print_module_parms(cur_ops, | ||
| 340 | "End of test: FAILURE"); | ||
| 341 | else if (torture_onoff_failures()) | ||
| 342 | lock_torture_print_module_parms(cur_ops, | ||
| 343 | "End of test: LOCK_HOTPLUG"); | ||
| 344 | else | ||
| 345 | lock_torture_print_module_parms(cur_ops, | ||
| 346 | "End of test: SUCCESS"); | ||
| 347 | } | ||
| 348 | |||
| 349 | static int __init lock_torture_init(void) | ||
| 350 | { | ||
| 351 | int i; | ||
| 352 | int firsterr = 0; | ||
| 353 | static struct lock_torture_ops *torture_ops[] = { | ||
| 354 | &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, | ||
| 355 | }; | ||
| 356 | |||
| 357 | torture_init_begin(torture_type, verbose, &locktorture_runnable); | ||
| 358 | |||
| 359 | /* Process args and tell the world that the torturer is on the job. */ | ||
| 360 | for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { | ||
| 361 | cur_ops = torture_ops[i]; | ||
| 362 | if (strcmp(torture_type, cur_ops->name) == 0) | ||
| 363 | break; | ||
| 364 | } | ||
| 365 | if (i == ARRAY_SIZE(torture_ops)) { | ||
| 366 | pr_alert("lock-torture: invalid torture type: \"%s\"\n", | ||
| 367 | torture_type); | ||
| 368 | pr_alert("lock-torture types:"); | ||
| 369 | for (i = 0; i < ARRAY_SIZE(torture_ops); i++) | ||
| 370 | pr_alert(" %s", torture_ops[i]->name); | ||
| 371 | pr_alert("\n"); | ||
| 372 | torture_init_end(); | ||
| 373 | return -EINVAL; | ||
| 374 | } | ||
| 375 | if (cur_ops->init) | ||
| 376 | cur_ops->init(); /* no "goto unwind" prior to this point!!! */ | ||
| 377 | |||
| 378 | if (nwriters_stress >= 0) | ||
| 379 | nrealwriters_stress = nwriters_stress; | ||
| 380 | else | ||
| 381 | nrealwriters_stress = 2 * num_online_cpus(); | ||
| 382 | lock_torture_print_module_parms(cur_ops, "Start of test"); | ||
| 383 | |||
| 384 | /* Initialize the statistics so that each run gets its own numbers. */ | ||
| 385 | |||
| 386 | lock_is_write_held = 0; | ||
| 387 | lwsa = kmalloc(sizeof(*lwsa) * nrealwriters_stress, GFP_KERNEL); | ||
| 388 | if (lwsa == NULL) { | ||
| 389 | VERBOSE_TOROUT_STRING("lwsa: Out of memory"); | ||
| 390 | firsterr = -ENOMEM; | ||
| 391 | goto unwind; | ||
| 392 | } | ||
| 393 | for (i = 0; i < nrealwriters_stress; i++) { | ||
| 394 | lwsa[i].n_write_lock_fail = 0; | ||
| 395 | lwsa[i].n_write_lock_acquired = 0; | ||
| 396 | } | ||
| 397 | |||
| 398 | /* Start up the kthreads. */ | ||
| 399 | |||
| 400 | if (onoff_interval > 0) { | ||
| 401 | firsterr = torture_onoff_init(onoff_holdoff * HZ, | ||
| 402 | onoff_interval * HZ); | ||
| 403 | if (firsterr) | ||
| 404 | goto unwind; | ||
| 405 | } | ||
| 406 | if (shuffle_interval > 0) { | ||
| 407 | firsterr = torture_shuffle_init(shuffle_interval); | ||
| 408 | if (firsterr) | ||
| 409 | goto unwind; | ||
| 410 | } | ||
| 411 | if (shutdown_secs > 0) { | ||
| 412 | firsterr = torture_shutdown_init(shutdown_secs, | ||
| 413 | lock_torture_cleanup); | ||
| 414 | if (firsterr) | ||
| 415 | goto unwind; | ||
| 416 | } | ||
| 417 | if (stutter > 0) { | ||
| 418 | firsterr = torture_stutter_init(stutter); | ||
| 419 | if (firsterr) | ||
| 420 | goto unwind; | ||
| 421 | } | ||
| 422 | |||
| 423 | writer_tasks = kzalloc(nrealwriters_stress * sizeof(writer_tasks[0]), | ||
| 424 | GFP_KERNEL); | ||
| 425 | if (writer_tasks == NULL) { | ||
| 426 | VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); | ||
| 427 | firsterr = -ENOMEM; | ||
| 428 | goto unwind; | ||
| 429 | } | ||
| 430 | for (i = 0; i < nrealwriters_stress; i++) { | ||
| 431 | firsterr = torture_create_kthread(lock_torture_writer, &lwsa[i], | ||
| 432 | writer_tasks[i]); | ||
| 433 | if (firsterr) | ||
| 434 | goto unwind; | ||
| 435 | } | ||
| 436 | if (stat_interval > 0) { | ||
| 437 | firsterr = torture_create_kthread(lock_torture_stats, NULL, | ||
| 438 | stats_task); | ||
| 439 | if (firsterr) | ||
| 440 | goto unwind; | ||
| 441 | } | ||
| 442 | torture_init_end(); | ||
| 443 | return 0; | ||
| 444 | |||
| 445 | unwind: | ||
| 446 | torture_init_end(); | ||
| 447 | lock_torture_cleanup(); | ||
| 448 | return firsterr; | ||
| 449 | } | ||
| 450 | |||
| 451 | module_init(lock_torture_init); | ||
| 452 | module_exit(lock_torture_cleanup); | ||
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c new file mode 100644 index 000000000000..838dc9e00669 --- /dev/null +++ b/kernel/locking/mcs_spinlock.c | |||
| @@ -0,0 +1,178 @@ | |||
| 1 | |||
| 2 | #include <linux/percpu.h> | ||
| 3 | #include <linux/mutex.h> | ||
| 4 | #include <linux/sched.h> | ||
| 5 | #include "mcs_spinlock.h" | ||
| 6 | |||
| 7 | #ifdef CONFIG_SMP | ||
| 8 | |||
| 9 | /* | ||
| 10 | * An MCS like lock especially tailored for optimistic spinning for sleeping | ||
| 11 | * lock implementations (mutex, rwsem, etc). | ||
| 12 | * | ||
| 13 | * Using a single mcs node per CPU is safe because sleeping locks should not be | ||
| 14 | * called from interrupt context and we have preemption disabled while | ||
| 15 | * spinning. | ||
| 16 | */ | ||
| 17 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); | ||
| 18 | |||
| 19 | /* | ||
| 20 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. | ||
| 21 | * Can return NULL in case we were the last queued and we updated @lock instead. | ||
| 22 | */ | ||
| 23 | static inline struct optimistic_spin_queue * | ||
| 24 | osq_wait_next(struct optimistic_spin_queue **lock, | ||
| 25 | struct optimistic_spin_queue *node, | ||
| 26 | struct optimistic_spin_queue *prev) | ||
| 27 | { | ||
| 28 | struct optimistic_spin_queue *next = NULL; | ||
| 29 | |||
| 30 | for (;;) { | ||
| 31 | if (*lock == node && cmpxchg(lock, node, prev) == node) { | ||
| 32 | /* | ||
| 33 | * We were the last queued, we moved @lock back. @prev | ||
| 34 | * will now observe @lock and will complete its | ||
| 35 | * unlock()/unqueue(). | ||
| 36 | */ | ||
| 37 | break; | ||
| 38 | } | ||
| 39 | |||
| 40 | /* | ||
| 41 | * We must xchg() the @node->next value, because if we were to | ||
| 42 | * leave it in, a concurrent unlock()/unqueue() from | ||
| 43 | * @node->next might complete Step-A and think its @prev is | ||
| 44 | * still valid. | ||
| 45 | * | ||
| 46 | * If the concurrent unlock()/unqueue() wins the race, we'll | ||
| 47 | * wait for either @lock to point to us, through its Step-B, or | ||
| 48 | * wait for a new @node->next from its Step-C. | ||
| 49 | */ | ||
| 50 | if (node->next) { | ||
| 51 | next = xchg(&node->next, NULL); | ||
| 52 | if (next) | ||
| 53 | break; | ||
| 54 | } | ||
| 55 | |||
| 56 | arch_mutex_cpu_relax(); | ||
| 57 | } | ||
| 58 | |||
| 59 | return next; | ||
| 60 | } | ||
| 61 | |||
| 62 | bool osq_lock(struct optimistic_spin_queue **lock) | ||
| 63 | { | ||
| 64 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | ||
| 65 | struct optimistic_spin_queue *prev, *next; | ||
| 66 | |||
| 67 | node->locked = 0; | ||
| 68 | node->next = NULL; | ||
| 69 | |||
| 70 | node->prev = prev = xchg(lock, node); | ||
| 71 | if (likely(prev == NULL)) | ||
| 72 | return true; | ||
| 73 | |||
| 74 | ACCESS_ONCE(prev->next) = node; | ||
| 75 | |||
| 76 | /* | ||
| 77 | * Normally @prev is untouchable after the above store; because at that | ||
| 78 | * moment unlock can proceed and wipe the node element from stack. | ||
| 79 | * | ||
| 80 | * However, since our nodes are static per-cpu storage, we're | ||
| 81 | * guaranteed their existence -- this allows us to apply | ||
| 82 | * cmpxchg in an attempt to undo our queueing. | ||
| 83 | */ | ||
| 84 | |||
| 85 | while (!smp_load_acquire(&node->locked)) { | ||
| 86 | /* | ||
| 87 | * If we need to reschedule bail... so we can block. | ||
| 88 | */ | ||
| 89 | if (need_resched()) | ||
| 90 | goto unqueue; | ||
| 91 | |||
| 92 | arch_mutex_cpu_relax(); | ||
| 93 | } | ||
| 94 | return true; | ||
| 95 | |||
| 96 | unqueue: | ||
| 97 | /* | ||
| 98 | * Step - A -- stabilize @prev | ||
| 99 | * | ||
| 100 | * Undo our @prev->next assignment; this will make @prev's | ||
| 101 | * unlock()/unqueue() wait for a next pointer since @lock points to us | ||
| 102 | * (or later). | ||
| 103 | */ | ||
| 104 | |||
| 105 | for (;;) { | ||
| 106 | if (prev->next == node && | ||
| 107 | cmpxchg(&prev->next, node, NULL) == node) | ||
| 108 | break; | ||
| 109 | |||
| 110 | /* | ||
| 111 | * We can only fail the cmpxchg() racing against an unlock(), | ||
| 112 | * in which case we should observe @node->locked becomming | ||
| 113 | * true. | ||
| 114 | */ | ||
| 115 | if (smp_load_acquire(&node->locked)) | ||
| 116 | return true; | ||
| 117 | |||
| 118 | arch_mutex_cpu_relax(); | ||
| 119 | |||
| 120 | /* | ||
| 121 | * Or we race against a concurrent unqueue()'s step-B, in which | ||
| 122 | * case its step-C will write us a new @node->prev pointer. | ||
| 123 | */ | ||
| 124 | prev = ACCESS_ONCE(node->prev); | ||
| 125 | } | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Step - B -- stabilize @next | ||
| 129 | * | ||
| 130 | * Similar to unlock(), wait for @node->next or move @lock from @node | ||
| 131 | * back to @prev. | ||
| 132 | */ | ||
| 133 | |||
| 134 | next = osq_wait_next(lock, node, prev); | ||
| 135 | if (!next) | ||
| 136 | return false; | ||
| 137 | |||
| 138 | /* | ||
| 139 | * Step - C -- unlink | ||
| 140 | * | ||
| 141 | * @prev is stable because its still waiting for a new @prev->next | ||
| 142 | * pointer, @next is stable because our @node->next pointer is NULL and | ||
| 143 | * it will wait in Step-A. | ||
| 144 | */ | ||
| 145 | |||
| 146 | ACCESS_ONCE(next->prev) = prev; | ||
| 147 | ACCESS_ONCE(prev->next) = next; | ||
| 148 | |||
| 149 | return false; | ||
| 150 | } | ||
| 151 | |||
| 152 | void osq_unlock(struct optimistic_spin_queue **lock) | ||
| 153 | { | ||
| 154 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | ||
| 155 | struct optimistic_spin_queue *next; | ||
| 156 | |||
| 157 | /* | ||
| 158 | * Fast path for the uncontended case. | ||
| 159 | */ | ||
| 160 | if (likely(cmpxchg(lock, node, NULL) == node)) | ||
| 161 | return; | ||
| 162 | |||
| 163 | /* | ||
| 164 | * Second most likely case. | ||
| 165 | */ | ||
| 166 | next = xchg(&node->next, NULL); | ||
| 167 | if (next) { | ||
| 168 | ACCESS_ONCE(next->locked) = 1; | ||
| 169 | return; | ||
| 170 | } | ||
| 171 | |||
| 172 | next = osq_wait_next(lock, node, NULL); | ||
| 173 | if (next) | ||
| 174 | ACCESS_ONCE(next->locked) = 1; | ||
| 175 | } | ||
| 176 | |||
| 177 | #endif | ||
| 178 | |||
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h new file mode 100644 index 000000000000..a2dbac4aca6b --- /dev/null +++ b/kernel/locking/mcs_spinlock.h | |||
| @@ -0,0 +1,129 @@ | |||
| 1 | /* | ||
| 2 | * MCS lock defines | ||
| 3 | * | ||
| 4 | * This file contains the main data structure and API definitions of MCS lock. | ||
| 5 | * | ||
| 6 | * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock | ||
| 7 | * with the desirable properties of being fair, and with each cpu trying | ||
| 8 | * to acquire the lock spinning on a local variable. | ||
| 9 | * It avoids expensive cache bouncings that common test-and-set spin-lock | ||
| 10 | * implementations incur. | ||
| 11 | */ | ||
| 12 | #ifndef __LINUX_MCS_SPINLOCK_H | ||
| 13 | #define __LINUX_MCS_SPINLOCK_H | ||
| 14 | |||
| 15 | #include <asm/mcs_spinlock.h> | ||
| 16 | |||
| 17 | struct mcs_spinlock { | ||
| 18 | struct mcs_spinlock *next; | ||
| 19 | int locked; /* 1 if lock acquired */ | ||
| 20 | }; | ||
| 21 | |||
| 22 | #ifndef arch_mcs_spin_lock_contended | ||
| 23 | /* | ||
| 24 | * Using smp_load_acquire() provides a memory barrier that ensures | ||
| 25 | * subsequent operations happen after the lock is acquired. | ||
| 26 | */ | ||
| 27 | #define arch_mcs_spin_lock_contended(l) \ | ||
| 28 | do { \ | ||
| 29 | while (!(smp_load_acquire(l))) \ | ||
| 30 | arch_mutex_cpu_relax(); \ | ||
| 31 | } while (0) | ||
| 32 | #endif | ||
| 33 | |||
| 34 | #ifndef arch_mcs_spin_unlock_contended | ||
| 35 | /* | ||
| 36 | * smp_store_release() provides a memory barrier to ensure all | ||
| 37 | * operations in the critical section has been completed before | ||
| 38 | * unlocking. | ||
| 39 | */ | ||
| 40 | #define arch_mcs_spin_unlock_contended(l) \ | ||
| 41 | smp_store_release((l), 1) | ||
| 42 | #endif | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Note: the smp_load_acquire/smp_store_release pair is not | ||
| 46 | * sufficient to form a full memory barrier across | ||
| 47 | * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. | ||
| 48 | * For applications that need a full barrier across multiple cpus | ||
| 49 | * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be | ||
| 50 | * used after mcs_lock. | ||
| 51 | */ | ||
| 52 | |||
| 53 | /* | ||
| 54 | * In order to acquire the lock, the caller should declare a local node and | ||
| 55 | * pass a reference of the node to this function in addition to the lock. | ||
| 56 | * If the lock has already been acquired, then this will proceed to spin | ||
| 57 | * on this node->locked until the previous lock holder sets the node->locked | ||
| 58 | * in mcs_spin_unlock(). | ||
| 59 | * | ||
| 60 | * We don't inline mcs_spin_lock() so that perf can correctly account for the | ||
| 61 | * time spent in this lock function. | ||
| 62 | */ | ||
| 63 | static inline | ||
| 64 | void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | ||
| 65 | { | ||
| 66 | struct mcs_spinlock *prev; | ||
| 67 | |||
| 68 | /* Init node */ | ||
| 69 | node->locked = 0; | ||
| 70 | node->next = NULL; | ||
| 71 | |||
| 72 | prev = xchg(lock, node); | ||
| 73 | if (likely(prev == NULL)) { | ||
| 74 | /* | ||
| 75 | * Lock acquired, don't need to set node->locked to 1. Threads | ||
| 76 | * only spin on its own node->locked value for lock acquisition. | ||
| 77 | * However, since this thread can immediately acquire the lock | ||
| 78 | * and does not proceed to spin on its own node->locked, this | ||
| 79 | * value won't be used. If a debug mode is needed to | ||
| 80 | * audit lock status, then set node->locked value here. | ||
| 81 | */ | ||
| 82 | return; | ||
| 83 | } | ||
| 84 | ACCESS_ONCE(prev->next) = node; | ||
| 85 | |||
| 86 | /* Wait until the lock holder passes the lock down. */ | ||
| 87 | arch_mcs_spin_lock_contended(&node->locked); | ||
| 88 | } | ||
| 89 | |||
| 90 | /* | ||
| 91 | * Releases the lock. The caller should pass in the corresponding node that | ||
| 92 | * was used to acquire the lock. | ||
| 93 | */ | ||
| 94 | static inline | ||
| 95 | void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | ||
| 96 | { | ||
| 97 | struct mcs_spinlock *next = ACCESS_ONCE(node->next); | ||
| 98 | |||
| 99 | if (likely(!next)) { | ||
| 100 | /* | ||
| 101 | * Release the lock by setting it to NULL | ||
| 102 | */ | ||
| 103 | if (likely(cmpxchg(lock, node, NULL) == node)) | ||
| 104 | return; | ||
| 105 | /* Wait until the next pointer is set */ | ||
| 106 | while (!(next = ACCESS_ONCE(node->next))) | ||
| 107 | arch_mutex_cpu_relax(); | ||
| 108 | } | ||
| 109 | |||
| 110 | /* Pass lock to next waiter. */ | ||
| 111 | arch_mcs_spin_unlock_contended(&next->locked); | ||
| 112 | } | ||
| 113 | |||
| 114 | /* | ||
| 115 | * Cancellable version of the MCS lock above. | ||
| 116 | * | ||
| 117 | * Intended for adaptive spinning of sleeping locks: | ||
| 118 | * mutex_lock()/rwsem_down_{read,write}() etc. | ||
| 119 | */ | ||
| 120 | |||
| 121 | struct optimistic_spin_queue { | ||
| 122 | struct optimistic_spin_queue *next, *prev; | ||
| 123 | int locked; /* 1 if lock acquired */ | ||
| 124 | }; | ||
| 125 | |||
| 126 | extern bool osq_lock(struct optimistic_spin_queue **lock); | ||
| 127 | extern void osq_unlock(struct optimistic_spin_queue **lock); | ||
| 128 | |||
| 129 | #endif /* __LINUX_MCS_SPINLOCK_H */ | ||
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index faf6f5b53e77..e1191c996c59 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c | |||
| @@ -83,6 +83,12 @@ void debug_mutex_unlock(struct mutex *lock) | |||
| 83 | 83 | ||
| 84 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); | 84 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); |
| 85 | mutex_clear_owner(lock); | 85 | mutex_clear_owner(lock); |
| 86 | |||
| 87 | /* | ||
| 88 | * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug | ||
| 89 | * mutexes so that we can do it here after we've verified state. | ||
| 90 | */ | ||
| 91 | atomic_set(&lock->count, 1); | ||
| 86 | } | 92 | } |
| 87 | 93 | ||
| 88 | void debug_mutex_init(struct mutex *lock, const char *name, | 94 | void debug_mutex_init(struct mutex *lock, const char *name, |
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 4dd6e4c219de..bc73d33c6760 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> |
| 26 | #include <linux/interrupt.h> | 26 | #include <linux/interrupt.h> |
| 27 | #include <linux/debug_locks.h> | 27 | #include <linux/debug_locks.h> |
| 28 | #include "mcs_spinlock.h" | ||
| 28 | 29 | ||
| 29 | /* | 30 | /* |
| 30 | * In the DEBUG case we are using the "NULL fastpath" for mutexes, | 31 | * In the DEBUG case we are using the "NULL fastpath" for mutexes, |
| @@ -33,6 +34,13 @@ | |||
| 33 | #ifdef CONFIG_DEBUG_MUTEXES | 34 | #ifdef CONFIG_DEBUG_MUTEXES |
| 34 | # include "mutex-debug.h" | 35 | # include "mutex-debug.h" |
| 35 | # include <asm-generic/mutex-null.h> | 36 | # include <asm-generic/mutex-null.h> |
| 37 | /* | ||
| 38 | * Must be 0 for the debug case so we do not do the unlock outside of the | ||
| 39 | * wait_lock region. debug_mutex_unlock() will do the actual unlock in this | ||
| 40 | * case. | ||
| 41 | */ | ||
| 42 | # undef __mutex_slowpath_needs_to_unlock | ||
| 43 | # define __mutex_slowpath_needs_to_unlock() 0 | ||
| 36 | #else | 44 | #else |
| 37 | # include "mutex.h" | 45 | # include "mutex.h" |
| 38 | # include <asm/mutex.h> | 46 | # include <asm/mutex.h> |
| @@ -52,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) | |||
| 52 | INIT_LIST_HEAD(&lock->wait_list); | 60 | INIT_LIST_HEAD(&lock->wait_list); |
| 53 | mutex_clear_owner(lock); | 61 | mutex_clear_owner(lock); |
| 54 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 62 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
| 55 | lock->spin_mlock = NULL; | 63 | lock->osq = NULL; |
| 56 | #endif | 64 | #endif |
| 57 | 65 | ||
| 58 | debug_mutex_init(lock, name, key); | 66 | debug_mutex_init(lock, name, key); |
| @@ -67,8 +75,7 @@ EXPORT_SYMBOL(__mutex_init); | |||
| 67 | * We also put the fastpath first in the kernel image, to make sure the | 75 | * We also put the fastpath first in the kernel image, to make sure the |
| 68 | * branch is predicted by the CPU as default-untaken. | 76 | * branch is predicted by the CPU as default-untaken. |
| 69 | */ | 77 | */ |
| 70 | static __used noinline void __sched | 78 | __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); |
| 71 | __mutex_lock_slowpath(atomic_t *lock_count); | ||
| 72 | 79 | ||
| 73 | /** | 80 | /** |
| 74 | * mutex_lock - acquire the mutex | 81 | * mutex_lock - acquire the mutex |
| @@ -111,54 +118,7 @@ EXPORT_SYMBOL(mutex_lock); | |||
| 111 | * more or less simultaneously, the spinners need to acquire a MCS lock | 118 | * more or less simultaneously, the spinners need to acquire a MCS lock |
| 112 | * first before spinning on the owner field. | 119 | * first before spinning on the owner field. |
| 113 | * | 120 | * |
| 114 | * We don't inline mspin_lock() so that perf can correctly account for the | ||
| 115 | * time spent in this lock function. | ||
| 116 | */ | 121 | */ |
| 117 | struct mspin_node { | ||
| 118 | struct mspin_node *next ; | ||
| 119 | int locked; /* 1 if lock acquired */ | ||
| 120 | }; | ||
| 121 | #define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) | ||
| 122 | |||
| 123 | static noinline | ||
| 124 | void mspin_lock(struct mspin_node **lock, struct mspin_node *node) | ||
| 125 | { | ||
| 126 | struct mspin_node *prev; | ||
| 127 | |||
| 128 | /* Init node */ | ||
| 129 | node->locked = 0; | ||
| 130 | node->next = NULL; | ||
| 131 | |||
| 132 | prev = xchg(lock, node); | ||
| 133 | if (likely(prev == NULL)) { | ||
| 134 | /* Lock acquired */ | ||
| 135 | node->locked = 1; | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | ACCESS_ONCE(prev->next) = node; | ||
| 139 | smp_wmb(); | ||
| 140 | /* Wait until the lock holder passes the lock down */ | ||
| 141 | while (!ACCESS_ONCE(node->locked)) | ||
| 142 | arch_mutex_cpu_relax(); | ||
| 143 | } | ||
| 144 | |||
| 145 | static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) | ||
| 146 | { | ||
| 147 | struct mspin_node *next = ACCESS_ONCE(node->next); | ||
| 148 | |||
| 149 | if (likely(!next)) { | ||
| 150 | /* | ||
| 151 | * Release the lock by setting it to NULL | ||
| 152 | */ | ||
| 153 | if (cmpxchg(lock, node, NULL) == node) | ||
| 154 | return; | ||
| 155 | /* Wait until the next pointer is set */ | ||
| 156 | while (!(next = ACCESS_ONCE(node->next))) | ||
| 157 | arch_mutex_cpu_relax(); | ||
| 158 | } | ||
| 159 | ACCESS_ONCE(next->locked) = 1; | ||
| 160 | smp_wmb(); | ||
| 161 | } | ||
| 162 | 122 | ||
| 163 | /* | 123 | /* |
| 164 | * Mutex spinning code migrated from kernel/sched/core.c | 124 | * Mutex spinning code migrated from kernel/sched/core.c |
| @@ -212,6 +172,9 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) | |||
| 212 | struct task_struct *owner; | 172 | struct task_struct *owner; |
| 213 | int retval = 1; | 173 | int retval = 1; |
| 214 | 174 | ||
| 175 | if (need_resched()) | ||
| 176 | return 0; | ||
| 177 | |||
| 215 | rcu_read_lock(); | 178 | rcu_read_lock(); |
| 216 | owner = ACCESS_ONCE(lock->owner); | 179 | owner = ACCESS_ONCE(lock->owner); |
| 217 | if (owner) | 180 | if (owner) |
| @@ -225,7 +188,8 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) | |||
| 225 | } | 188 | } |
| 226 | #endif | 189 | #endif |
| 227 | 190 | ||
| 228 | static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); | 191 | __visible __used noinline |
| 192 | void __sched __mutex_unlock_slowpath(atomic_t *lock_count); | ||
| 229 | 193 | ||
| 230 | /** | 194 | /** |
| 231 | * mutex_unlock - release the mutex | 195 | * mutex_unlock - release the mutex |
| @@ -446,9 +410,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 446 | if (!mutex_can_spin_on_owner(lock)) | 410 | if (!mutex_can_spin_on_owner(lock)) |
| 447 | goto slowpath; | 411 | goto slowpath; |
| 448 | 412 | ||
| 413 | if (!osq_lock(&lock->osq)) | ||
| 414 | goto slowpath; | ||
| 415 | |||
| 449 | for (;;) { | 416 | for (;;) { |
| 450 | struct task_struct *owner; | 417 | struct task_struct *owner; |
| 451 | struct mspin_node node; | ||
| 452 | 418 | ||
| 453 | if (use_ww_ctx && ww_ctx->acquired > 0) { | 419 | if (use_ww_ctx && ww_ctx->acquired > 0) { |
| 454 | struct ww_mutex *ww; | 420 | struct ww_mutex *ww; |
| @@ -463,19 +429,16 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 463 | * performed the optimistic spinning cannot be done. | 429 | * performed the optimistic spinning cannot be done. |
| 464 | */ | 430 | */ |
| 465 | if (ACCESS_ONCE(ww->ctx)) | 431 | if (ACCESS_ONCE(ww->ctx)) |
| 466 | goto slowpath; | 432 | break; |
| 467 | } | 433 | } |
| 468 | 434 | ||
| 469 | /* | 435 | /* |
| 470 | * If there's an owner, wait for it to either | 436 | * If there's an owner, wait for it to either |
| 471 | * release the lock or go to sleep. | 437 | * release the lock or go to sleep. |
| 472 | */ | 438 | */ |
| 473 | mspin_lock(MLOCK(lock), &node); | ||
| 474 | owner = ACCESS_ONCE(lock->owner); | 439 | owner = ACCESS_ONCE(lock->owner); |
| 475 | if (owner && !mutex_spin_on_owner(lock, owner)) { | 440 | if (owner && !mutex_spin_on_owner(lock, owner)) |
| 476 | mspin_unlock(MLOCK(lock), &node); | 441 | break; |
| 477 | goto slowpath; | ||
| 478 | } | ||
| 479 | 442 | ||
| 480 | if ((atomic_read(&lock->count) == 1) && | 443 | if ((atomic_read(&lock->count) == 1) && |
| 481 | (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { | 444 | (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { |
| @@ -488,11 +451,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 488 | } | 451 | } |
| 489 | 452 | ||
| 490 | mutex_set_owner(lock); | 453 | mutex_set_owner(lock); |
| 491 | mspin_unlock(MLOCK(lock), &node); | 454 | osq_unlock(&lock->osq); |
| 492 | preempt_enable(); | 455 | preempt_enable(); |
| 493 | return 0; | 456 | return 0; |
| 494 | } | 457 | } |
| 495 | mspin_unlock(MLOCK(lock), &node); | ||
| 496 | 458 | ||
| 497 | /* | 459 | /* |
| 498 | * When there's no owner, we might have preempted between the | 460 | * When there's no owner, we might have preempted between the |
| @@ -501,7 +463,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 501 | * the owner complete. | 463 | * the owner complete. |
| 502 | */ | 464 | */ |
| 503 | if (!owner && (need_resched() || rt_task(task))) | 465 | if (!owner && (need_resched() || rt_task(task))) |
| 504 | goto slowpath; | 466 | break; |
| 505 | 467 | ||
| 506 | /* | 468 | /* |
| 507 | * The cpu_relax() call is a compiler barrier which forces | 469 | * The cpu_relax() call is a compiler barrier which forces |
| @@ -511,7 +473,15 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 511 | */ | 473 | */ |
| 512 | arch_mutex_cpu_relax(); | 474 | arch_mutex_cpu_relax(); |
| 513 | } | 475 | } |
| 476 | osq_unlock(&lock->osq); | ||
| 514 | slowpath: | 477 | slowpath: |
| 478 | /* | ||
| 479 | * If we fell out of the spin path because of need_resched(), | ||
| 480 | * reschedule now, before we try-lock the mutex. This avoids getting | ||
| 481 | * scheduled out right after we obtained the mutex. | ||
| 482 | */ | ||
| 483 | if (need_resched()) | ||
| 484 | schedule_preempt_disabled(); | ||
| 515 | #endif | 485 | #endif |
| 516 | spin_lock_mutex(&lock->wait_lock, flags); | 486 | spin_lock_mutex(&lock->wait_lock, flags); |
| 517 | 487 | ||
| @@ -717,10 +687,6 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | |||
| 717 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 687 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
| 718 | unsigned long flags; | 688 | unsigned long flags; |
| 719 | 689 | ||
| 720 | spin_lock_mutex(&lock->wait_lock, flags); | ||
| 721 | mutex_release(&lock->dep_map, nested, _RET_IP_); | ||
| 722 | debug_mutex_unlock(lock); | ||
| 723 | |||
| 724 | /* | 690 | /* |
| 725 | * some architectures leave the lock unlocked in the fastpath failure | 691 | * some architectures leave the lock unlocked in the fastpath failure |
| 726 | * case, others need to leave it locked. In the later case we have to | 692 | * case, others need to leave it locked. In the later case we have to |
| @@ -729,6 +695,10 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | |||
| 729 | if (__mutex_slowpath_needs_to_unlock()) | 695 | if (__mutex_slowpath_needs_to_unlock()) |
| 730 | atomic_set(&lock->count, 1); | 696 | atomic_set(&lock->count, 1); |
| 731 | 697 | ||
| 698 | spin_lock_mutex(&lock->wait_lock, flags); | ||
| 699 | mutex_release(&lock->dep_map, nested, _RET_IP_); | ||
| 700 | debug_mutex_unlock(lock); | ||
| 701 | |||
| 732 | if (!list_empty(&lock->wait_list)) { | 702 | if (!list_empty(&lock->wait_list)) { |
| 733 | /* get the first entry from the wait-list: */ | 703 | /* get the first entry from the wait-list: */ |
| 734 | struct mutex_waiter *waiter = | 704 | struct mutex_waiter *waiter = |
| @@ -746,7 +716,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | |||
| 746 | /* | 716 | /* |
| 747 | * Release the lock, slowpath: | 717 | * Release the lock, slowpath: |
| 748 | */ | 718 | */ |
| 749 | static __used noinline void | 719 | __visible void |
| 750 | __mutex_unlock_slowpath(atomic_t *lock_count) | 720 | __mutex_unlock_slowpath(atomic_t *lock_count) |
| 751 | { | 721 | { |
| 752 | __mutex_unlock_common_slowpath(lock_count, 1); | 722 | __mutex_unlock_common_slowpath(lock_count, 1); |
| @@ -803,7 +773,7 @@ int __sched mutex_lock_killable(struct mutex *lock) | |||
| 803 | } | 773 | } |
| 804 | EXPORT_SYMBOL(mutex_lock_killable); | 774 | EXPORT_SYMBOL(mutex_lock_killable); |
| 805 | 775 | ||
| 806 | static __used noinline void __sched | 776 | __visible void __sched |
| 807 | __mutex_lock_slowpath(atomic_t *lock_count) | 777 | __mutex_lock_slowpath(atomic_t *lock_count) |
| 808 | { | 778 | { |
| 809 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 779 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 2e960a2bab81..aa4dff04b594 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
| @@ -213,6 +213,18 @@ struct task_struct *rt_mutex_get_top_task(struct task_struct *task) | |||
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | /* | 215 | /* |
| 216 | * Called by sched_setscheduler() to check whether the priority change | ||
| 217 | * is overruled by a possible priority boosting. | ||
| 218 | */ | ||
| 219 | int rt_mutex_check_prio(struct task_struct *task, int newprio) | ||
| 220 | { | ||
| 221 | if (!task_has_pi_waiters(task)) | ||
| 222 | return 0; | ||
| 223 | |||
| 224 | return task_top_pi_waiter(task)->task->prio <= newprio; | ||
| 225 | } | ||
| 226 | |||
| 227 | /* | ||
| 216 | * Adjust the priority of a task, after its pi_waiters got modified. | 228 | * Adjust the priority of a task, after its pi_waiters got modified. |
| 217 | * | 229 | * |
| 218 | * This can be both boosting and unboosting. task->pi_lock must be held. | 230 | * This can be both boosting and unboosting. task->pi_lock must be held. |
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 19c5fa95e0b4..1d66e08e897d 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
| @@ -143,6 +143,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) | |||
| 143 | /* | 143 | /* |
| 144 | * wait for the read lock to be granted | 144 | * wait for the read lock to be granted |
| 145 | */ | 145 | */ |
| 146 | __visible | ||
| 146 | struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | 147 | struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) |
| 147 | { | 148 | { |
| 148 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; | 149 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; |
| @@ -190,6 +191,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | |||
| 190 | /* | 191 | /* |
| 191 | * wait until we successfully acquire the write lock | 192 | * wait until we successfully acquire the write lock |
| 192 | */ | 193 | */ |
| 194 | __visible | ||
| 193 | struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) | 195 | struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) |
| 194 | { | 196 | { |
| 195 | long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; | 197 | long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; |
| @@ -252,6 +254,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) | |||
| 252 | * handle waking up a waiter on the semaphore | 254 | * handle waking up a waiter on the semaphore |
| 253 | * - up_read/up_write has decremented the active part of count if we come here | 255 | * - up_read/up_write has decremented the active part of count if we come here |
| 254 | */ | 256 | */ |
| 257 | __visible | ||
| 255 | struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) | 258 | struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) |
| 256 | { | 259 | { |
| 257 | unsigned long flags; | 260 | unsigned long flags; |
| @@ -272,6 +275,7 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) | |||
| 272 | * - caller incremented waiting part of count and discovered it still negative | 275 | * - caller incremented waiting part of count and discovered it still negative |
| 273 | * - just wake up any readers at the front of the queue | 276 | * - just wake up any readers at the front of the queue |
| 274 | */ | 277 | */ |
| 278 | __visible | ||
| 275 | struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) | 279 | struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) |
| 276 | { | 280 | { |
| 277 | unsigned long flags; | 281 | unsigned long flags; |
diff --git a/kernel/module.c b/kernel/module.c index d24fcf29cb64..8dc7f5e80dd8 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -1015,7 +1015,7 @@ static size_t module_flags_taint(struct module *mod, char *buf) | |||
| 1015 | buf[l++] = 'C'; | 1015 | buf[l++] = 'C'; |
| 1016 | /* | 1016 | /* |
| 1017 | * TAINT_FORCED_RMMOD: could be added. | 1017 | * TAINT_FORCED_RMMOD: could be added. |
| 1018 | * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't | 1018 | * TAINT_CPU_OUT_OF_SPEC, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't |
| 1019 | * apply to modules. | 1019 | * apply to modules. |
| 1020 | */ | 1020 | */ |
| 1021 | return l; | 1021 | return l; |
| @@ -1948,6 +1948,10 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) | |||
| 1948 | 1948 | ||
| 1949 | switch (sym[i].st_shndx) { | 1949 | switch (sym[i].st_shndx) { |
| 1950 | case SHN_COMMON: | 1950 | case SHN_COMMON: |
| 1951 | /* Ignore common symbols */ | ||
| 1952 | if (!strncmp(name, "__gnu_lto", 9)) | ||
| 1953 | break; | ||
| 1954 | |||
| 1951 | /* We compiled with -fno-common. These are not | 1955 | /* We compiled with -fno-common. These are not |
| 1952 | supposed to happen. */ | 1956 | supposed to happen. */ |
| 1953 | pr_debug("Common symbol: %s\n", name); | 1957 | pr_debug("Common symbol: %s\n", name); |
diff --git a/kernel/notifier.c b/kernel/notifier.c index 2d5cc4ccff7f..db4c8b08a50c 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c | |||
| @@ -309,7 +309,7 @@ int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, | |||
| 309 | * racy then it does not matter what the result of the test | 309 | * racy then it does not matter what the result of the test |
| 310 | * is, we re-check the list after having taken the lock anyway: | 310 | * is, we re-check the list after having taken the lock anyway: |
| 311 | */ | 311 | */ |
| 312 | if (rcu_dereference_raw(nh->head)) { | 312 | if (rcu_access_pointer(nh->head)) { |
| 313 | down_read(&nh->rwsem); | 313 | down_read(&nh->rwsem); |
| 314 | ret = notifier_call_chain(&nh->head, val, v, nr_to_call, | 314 | ret = notifier_call_chain(&nh->head, val, v, nr_to_call, |
| 315 | nr_calls); | 315 | nr_calls); |
diff --git a/kernel/panic.c b/kernel/panic.c index 6d6300375090..cca8a913ae7c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -199,7 +199,7 @@ struct tnt { | |||
| 199 | static const struct tnt tnts[] = { | 199 | static const struct tnt tnts[] = { |
| 200 | { TAINT_PROPRIETARY_MODULE, 'P', 'G' }, | 200 | { TAINT_PROPRIETARY_MODULE, 'P', 'G' }, |
| 201 | { TAINT_FORCED_MODULE, 'F', ' ' }, | 201 | { TAINT_FORCED_MODULE, 'F', ' ' }, |
| 202 | { TAINT_UNSAFE_SMP, 'S', ' ' }, | 202 | { TAINT_CPU_OUT_OF_SPEC, 'S', ' ' }, |
| 203 | { TAINT_FORCED_RMMOD, 'R', ' ' }, | 203 | { TAINT_FORCED_RMMOD, 'R', ' ' }, |
| 204 | { TAINT_MACHINE_CHECK, 'M', ' ' }, | 204 | { TAINT_MACHINE_CHECK, 'M', ' ' }, |
| 205 | { TAINT_BAD_PAGE, 'B', ' ' }, | 205 | { TAINT_BAD_PAGE, 'B', ' ' }, |
| @@ -459,7 +459,7 @@ EXPORT_SYMBOL(warn_slowpath_null); | |||
| 459 | * Called when gcc's -fstack-protector feature is used, and | 459 | * Called when gcc's -fstack-protector feature is used, and |
| 460 | * gcc detects corruption of the on-stack canary value | 460 | * gcc detects corruption of the on-stack canary value |
| 461 | */ | 461 | */ |
| 462 | void __stack_chk_fail(void) | 462 | __visible void __stack_chk_fail(void) |
| 463 | { | 463 | { |
| 464 | panic("stack-protector: Kernel stack is corrupted in: %p\n", | 464 | panic("stack-protector: Kernel stack is corrupted in: %p\n", |
| 465 | __builtin_return_address(0)); | 465 | __builtin_return_address(0)); |
diff --git a/kernel/power/console.c b/kernel/power/console.c index eacb8bd8cab4..aba9c545a0e3 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <linux/kbd_kern.h> | 9 | #include <linux/kbd_kern.h> |
| 10 | #include <linux/vt.h> | 10 | #include <linux/vt.h> |
| 11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 12 | #include <linux/slab.h> | ||
| 12 | #include "power.h" | 13 | #include "power.h" |
| 13 | 14 | ||
| 14 | #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) | 15 | #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) |
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b1d255f04135..4dae9cbe9259 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
| @@ -1076,7 +1076,6 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
| 1076 | next_seq = log_next_seq; | 1076 | next_seq = log_next_seq; |
| 1077 | 1077 | ||
| 1078 | len = 0; | 1078 | len = 0; |
| 1079 | prev = 0; | ||
| 1080 | while (len >= 0 && seq < next_seq) { | 1079 | while (len >= 0 && seq < next_seq) { |
| 1081 | struct printk_log *msg = log_from_idx(idx); | 1080 | struct printk_log *msg = log_from_idx(idx); |
| 1082 | int textlen; | 1081 | int textlen; |
| @@ -2788,7 +2787,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, | |||
| 2788 | next_idx = idx; | 2787 | next_idx = idx; |
| 2789 | 2788 | ||
| 2790 | l = 0; | 2789 | l = 0; |
| 2791 | prev = 0; | ||
| 2792 | while (seq < dumper->next_seq) { | 2790 | while (seq < dumper->next_seq) { |
| 2793 | struct printk_log *msg = log_from_idx(idx); | 2791 | struct printk_log *msg = log_from_idx(idx); |
| 2794 | 2792 | ||
diff --git a/kernel/profile.c b/kernel/profile.c index 6631e1ef55ab..ebdd9c1a86b4 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
| @@ -549,14 +549,14 @@ static int create_hash_tables(void) | |||
| 549 | struct page *page; | 549 | struct page *page; |
| 550 | 550 | ||
| 551 | page = alloc_pages_exact_node(node, | 551 | page = alloc_pages_exact_node(node, |
| 552 | GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, | 552 | GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, |
| 553 | 0); | 553 | 0); |
| 554 | if (!page) | 554 | if (!page) |
| 555 | goto out_cleanup; | 555 | goto out_cleanup; |
| 556 | per_cpu(cpu_profile_hits, cpu)[1] | 556 | per_cpu(cpu_profile_hits, cpu)[1] |
| 557 | = (struct profile_hit *)page_address(page); | 557 | = (struct profile_hit *)page_address(page); |
| 558 | page = alloc_pages_exact_node(node, | 558 | page = alloc_pages_exact_node(node, |
| 559 | GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, | 559 | GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, |
| 560 | 0); | 560 | 0); |
| 561 | if (!page) | 561 | if (!page) |
| 562 | goto out_cleanup; | 562 | goto out_cleanup; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1f4bcb3cc21c..adf98622cb32 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -1180,8 +1180,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, | |||
| 1180 | return ret; | 1180 | return ret; |
| 1181 | } | 1181 | } |
| 1182 | 1182 | ||
| 1183 | asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | 1183 | COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, |
| 1184 | compat_long_t addr, compat_long_t data) | 1184 | compat_long_t, addr, compat_long_t, data) |
| 1185 | { | 1185 | { |
| 1186 | struct task_struct *child; | 1186 | struct task_struct *child; |
| 1187 | long ret; | 1187 | long ret; |
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 01e9ec37a3e3..807ccfbf69b3 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | obj-y += update.o srcu.o | 1 | obj-y += update.o srcu.o |
| 2 | obj-$(CONFIG_RCU_TORTURE_TEST) += torture.o | 2 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
| 3 | obj-$(CONFIG_TREE_RCU) += tree.o | 3 | obj-$(CONFIG_TREE_RCU) += tree.o |
| 4 | obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o | 4 | obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o |
| 5 | obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o | 5 | obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o |
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 79c3877e9c5b..bfda2726ca45 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h | |||
| @@ -12,8 +12,8 @@ | |||
| 12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
| 13 | * | 13 | * |
| 14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
| 15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, you can access it online at |
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 16 | * http://www.gnu.org/licenses/gpl-2.0.html. |
| 17 | * | 17 | * |
| 18 | * Copyright IBM Corporation, 2011 | 18 | * Copyright IBM Corporation, 2011 |
| 19 | * | 19 | * |
| @@ -23,6 +23,7 @@ | |||
| 23 | #ifndef __LINUX_RCU_H | 23 | #ifndef __LINUX_RCU_H |
| 24 | #define __LINUX_RCU_H | 24 | #define __LINUX_RCU_H |
| 25 | 25 | ||
| 26 | #include <trace/events/rcu.h> | ||
| 26 | #ifdef CONFIG_RCU_TRACE | 27 | #ifdef CONFIG_RCU_TRACE |
| 27 | #define RCU_TRACE(stmt) stmt | 28 | #define RCU_TRACE(stmt) stmt |
| 28 | #else /* #ifdef CONFIG_RCU_TRACE */ | 29 | #else /* #ifdef CONFIG_RCU_TRACE */ |
| @@ -116,8 +117,6 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) | |||
| 116 | } | 117 | } |
| 117 | } | 118 | } |
| 118 | 119 | ||
| 119 | extern int rcu_expedited; | ||
| 120 | |||
| 121 | #ifdef CONFIG_RCU_STALL_COMMON | 120 | #ifdef CONFIG_RCU_STALL_COMMON |
| 122 | 121 | ||
| 123 | extern int rcu_cpu_stall_suppress; | 122 | extern int rcu_cpu_stall_suppress; |
diff --git a/kernel/rcu/torture.c b/kernel/rcu/rcutorture.c index 732f8ae3086a..bd30bc61bc05 100644 --- a/kernel/rcu/torture.c +++ b/kernel/rcu/rcutorture.c | |||
| @@ -12,8 +12,8 @@ | |||
| 12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
| 13 | * | 13 | * |
| 14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
| 15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, you can access it online at |
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 16 | * http://www.gnu.org/licenses/gpl-2.0.html. |
| 17 | * | 17 | * |
| 18 | * Copyright (C) IBM Corporation, 2005, 2006 | 18 | * Copyright (C) IBM Corporation, 2005, 2006 |
| 19 | * | 19 | * |
| @@ -48,110 +48,58 @@ | |||
| 48 | #include <linux/slab.h> | 48 | #include <linux/slab.h> |
| 49 | #include <linux/trace_clock.h> | 49 | #include <linux/trace_clock.h> |
| 50 | #include <asm/byteorder.h> | 50 | #include <asm/byteorder.h> |
| 51 | #include <linux/torture.h> | ||
| 51 | 52 | ||
| 52 | MODULE_LICENSE("GPL"); | 53 | MODULE_LICENSE("GPL"); |
| 53 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>"); | 54 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>"); |
| 54 | 55 | ||
| 55 | MODULE_ALIAS("rcutorture"); | 56 | |
| 56 | #ifdef MODULE_PARAM_PREFIX | 57 | torture_param(int, fqs_duration, 0, |
| 57 | #undef MODULE_PARAM_PREFIX | 58 | "Duration of fqs bursts (us), 0 to disable"); |
| 58 | #endif | 59 | torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)"); |
| 59 | #define MODULE_PARAM_PREFIX "rcutorture." | 60 | torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)"); |
| 60 | 61 | torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); | |
| 61 | static int fqs_duration; | 62 | torture_param(bool, gp_normal, false, |
| 62 | module_param(fqs_duration, int, 0444); | 63 | "Use normal (non-expedited) GP wait primitives"); |
| 63 | MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us), 0 to disable"); | 64 | torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers"); |
| 64 | static int fqs_holdoff; | 65 | torture_param(int, n_barrier_cbs, 0, |
| 65 | module_param(fqs_holdoff, int, 0444); | 66 | "# of callbacks/kthreads for barrier testing"); |
| 66 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); | 67 | torture_param(int, nfakewriters, 4, "Number of RCU fake writer threads"); |
| 67 | static int fqs_stutter = 3; | 68 | torture_param(int, nreaders, -1, "Number of RCU reader threads"); |
| 68 | module_param(fqs_stutter, int, 0444); | 69 | torture_param(int, object_debug, 0, |
| 69 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); | 70 | "Enable debug-object double call_rcu() testing"); |
| 70 | static bool gp_exp; | 71 | torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); |
| 71 | module_param(gp_exp, bool, 0444); | 72 | torture_param(int, onoff_interval, 0, |
| 72 | MODULE_PARM_DESC(gp_exp, "Use expedited GP wait primitives"); | 73 | "Time between CPU hotplugs (s), 0=disable"); |
| 73 | static bool gp_normal; | 74 | torture_param(int, shuffle_interval, 3, "Number of seconds between shuffles"); |
| 74 | module_param(gp_normal, bool, 0444); | 75 | torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable."); |
| 75 | MODULE_PARM_DESC(gp_normal, "Use normal (non-expedited) GP wait primitives"); | 76 | torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable."); |
| 76 | static int irqreader = 1; | 77 | torture_param(int, stall_cpu_holdoff, 10, |
| 77 | module_param(irqreader, int, 0444); | 78 | "Time to wait before starting stall (s)."); |
| 78 | MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers"); | 79 | torture_param(int, stat_interval, 60, |
| 79 | static int n_barrier_cbs; | 80 | "Number of seconds between stats printk()s"); |
| 80 | module_param(n_barrier_cbs, int, 0444); | 81 | torture_param(int, stutter, 5, "Number of seconds to run/halt test"); |
| 81 | MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing"); | 82 | torture_param(int, test_boost, 1, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); |
| 82 | static int nfakewriters = 4; | 83 | torture_param(int, test_boost_duration, 4, |
| 83 | module_param(nfakewriters, int, 0444); | 84 | "Duration of each boost test, seconds."); |
| 84 | MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); | 85 | torture_param(int, test_boost_interval, 7, |
| 85 | static int nreaders = -1; | 86 | "Interval between boost tests, seconds."); |
| 86 | module_param(nreaders, int, 0444); | 87 | torture_param(bool, test_no_idle_hz, true, |
| 87 | MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); | 88 | "Test support for tickless idle CPUs"); |
| 88 | static int object_debug; | 89 | torture_param(bool, verbose, true, |
| 89 | module_param(object_debug, int, 0444); | 90 | "Enable verbose debugging printk()s"); |
| 90 | MODULE_PARM_DESC(object_debug, "Enable debug-object double call_rcu() testing"); | 91 | |
| 91 | static int onoff_holdoff; | ||
| 92 | module_param(onoff_holdoff, int, 0444); | ||
| 93 | MODULE_PARM_DESC(onoff_holdoff, "Time after boot before CPU hotplugs (s)"); | ||
| 94 | static int onoff_interval; | ||
| 95 | module_param(onoff_interval, int, 0444); | ||
| 96 | MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); | ||
| 97 | static int shuffle_interval = 3; | ||
| 98 | module_param(shuffle_interval, int, 0444); | ||
| 99 | MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); | ||
| 100 | static int shutdown_secs; | ||
| 101 | module_param(shutdown_secs, int, 0444); | ||
| 102 | MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), <= zero to disable."); | ||
| 103 | static int stall_cpu; | ||
| 104 | module_param(stall_cpu, int, 0444); | ||
| 105 | MODULE_PARM_DESC(stall_cpu, "Stall duration (s), zero to disable."); | ||
| 106 | static int stall_cpu_holdoff = 10; | ||
| 107 | module_param(stall_cpu_holdoff, int, 0444); | ||
| 108 | MODULE_PARM_DESC(stall_cpu_holdoff, "Time to wait before starting stall (s)."); | ||
| 109 | static int stat_interval = 60; | ||
| 110 | module_param(stat_interval, int, 0644); | ||
| 111 | MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); | ||
| 112 | static int stutter = 5; | ||
| 113 | module_param(stutter, int, 0444); | ||
| 114 | MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test"); | ||
| 115 | static int test_boost = 1; | ||
| 116 | module_param(test_boost, int, 0444); | ||
| 117 | MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); | ||
| 118 | static int test_boost_duration = 4; | ||
| 119 | module_param(test_boost_duration, int, 0444); | ||
| 120 | MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds."); | ||
| 121 | static int test_boost_interval = 7; | ||
| 122 | module_param(test_boost_interval, int, 0444); | ||
| 123 | MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds."); | ||
| 124 | static bool test_no_idle_hz = true; | ||
| 125 | module_param(test_no_idle_hz, bool, 0444); | ||
| 126 | MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); | ||
| 127 | static char *torture_type = "rcu"; | 92 | static char *torture_type = "rcu"; |
| 128 | module_param(torture_type, charp, 0444); | 93 | module_param(torture_type, charp, 0444); |
| 129 | MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); | 94 | MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); |
| 130 | static bool verbose; | ||
| 131 | module_param(verbose, bool, 0444); | ||
| 132 | MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); | ||
| 133 | |||
| 134 | #define TORTURE_FLAG "-torture:" | ||
| 135 | #define PRINTK_STRING(s) \ | ||
| 136 | do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) | ||
| 137 | #define VERBOSE_PRINTK_STRING(s) \ | ||
| 138 | do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) | ||
| 139 | #define VERBOSE_PRINTK_ERRSTRING(s) \ | ||
| 140 | do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) | ||
| 141 | 95 | ||
| 142 | static int nrealreaders; | 96 | static int nrealreaders; |
| 143 | static struct task_struct *writer_task; | 97 | static struct task_struct *writer_task; |
| 144 | static struct task_struct **fakewriter_tasks; | 98 | static struct task_struct **fakewriter_tasks; |
| 145 | static struct task_struct **reader_tasks; | 99 | static struct task_struct **reader_tasks; |
| 146 | static struct task_struct *stats_task; | 100 | static struct task_struct *stats_task; |
| 147 | static struct task_struct *shuffler_task; | ||
| 148 | static struct task_struct *stutter_task; | ||
| 149 | static struct task_struct *fqs_task; | 101 | static struct task_struct *fqs_task; |
| 150 | static struct task_struct *boost_tasks[NR_CPUS]; | 102 | static struct task_struct *boost_tasks[NR_CPUS]; |
| 151 | static struct task_struct *shutdown_task; | ||
| 152 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 153 | static struct task_struct *onoff_task; | ||
| 154 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 155 | static struct task_struct *stall_task; | 103 | static struct task_struct *stall_task; |
| 156 | static struct task_struct **barrier_cbs_tasks; | 104 | static struct task_struct **barrier_cbs_tasks; |
| 157 | static struct task_struct *barrier_task; | 105 | static struct task_struct *barrier_task; |
| @@ -170,10 +118,10 @@ static struct rcu_torture __rcu *rcu_torture_current; | |||
| 170 | static unsigned long rcu_torture_current_version; | 118 | static unsigned long rcu_torture_current_version; |
| 171 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; | 119 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; |
| 172 | static DEFINE_SPINLOCK(rcu_torture_lock); | 120 | static DEFINE_SPINLOCK(rcu_torture_lock); |
| 173 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = | 121 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], |
| 174 | { 0 }; | 122 | rcu_torture_count) = { 0 }; |
| 175 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) = | 123 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], |
| 176 | { 0 }; | 124 | rcu_torture_batch) = { 0 }; |
| 177 | static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; | 125 | static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; |
| 178 | static atomic_t n_rcu_torture_alloc; | 126 | static atomic_t n_rcu_torture_alloc; |
| 179 | static atomic_t n_rcu_torture_alloc_fail; | 127 | static atomic_t n_rcu_torture_alloc_fail; |
| @@ -186,22 +134,9 @@ static long n_rcu_torture_boost_rterror; | |||
| 186 | static long n_rcu_torture_boost_failure; | 134 | static long n_rcu_torture_boost_failure; |
| 187 | static long n_rcu_torture_boosts; | 135 | static long n_rcu_torture_boosts; |
| 188 | static long n_rcu_torture_timers; | 136 | static long n_rcu_torture_timers; |
| 189 | static long n_offline_attempts; | ||
| 190 | static long n_offline_successes; | ||
| 191 | static unsigned long sum_offline; | ||
| 192 | static int min_offline = -1; | ||
| 193 | static int max_offline; | ||
| 194 | static long n_online_attempts; | ||
| 195 | static long n_online_successes; | ||
| 196 | static unsigned long sum_online; | ||
| 197 | static int min_online = -1; | ||
| 198 | static int max_online; | ||
| 199 | static long n_barrier_attempts; | 137 | static long n_barrier_attempts; |
| 200 | static long n_barrier_successes; | 138 | static long n_barrier_successes; |
| 201 | static struct list_head rcu_torture_removed; | 139 | static struct list_head rcu_torture_removed; |
| 202 | static cpumask_var_t shuffle_tmp_mask; | ||
| 203 | |||
| 204 | static int stutter_pause_test; | ||
| 205 | 140 | ||
| 206 | #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) | 141 | #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) |
| 207 | #define RCUTORTURE_RUNNABLE_INIT 1 | 142 | #define RCUTORTURE_RUNNABLE_INIT 1 |
| @@ -232,7 +167,6 @@ static u64 notrace rcu_trace_clock_local(void) | |||
| 232 | } | 167 | } |
| 233 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | 168 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
| 234 | 169 | ||
| 235 | static unsigned long shutdown_time; /* jiffies to system shutdown. */ | ||
| 236 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ | 170 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ |
| 237 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ | 171 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ |
| 238 | /* and boost task create/destroy. */ | 172 | /* and boost task create/destroy. */ |
| @@ -242,51 +176,6 @@ static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ | |||
| 242 | static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ | 176 | static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ |
| 243 | static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); | 177 | static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); |
| 244 | 178 | ||
| 245 | /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ | ||
| 246 | |||
| 247 | #define FULLSTOP_DONTSTOP 0 /* Normal operation. */ | ||
| 248 | #define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ | ||
| 249 | #define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ | ||
| 250 | static int fullstop = FULLSTOP_RMMOD; | ||
| 251 | /* | ||
| 252 | * Protect fullstop transitions and spawning of kthreads. | ||
| 253 | */ | ||
| 254 | static DEFINE_MUTEX(fullstop_mutex); | ||
| 255 | |||
| 256 | /* Forward reference. */ | ||
| 257 | static void rcu_torture_cleanup(void); | ||
| 258 | |||
| 259 | /* | ||
| 260 | * Detect and respond to a system shutdown. | ||
| 261 | */ | ||
| 262 | static int | ||
| 263 | rcutorture_shutdown_notify(struct notifier_block *unused1, | ||
| 264 | unsigned long unused2, void *unused3) | ||
| 265 | { | ||
| 266 | mutex_lock(&fullstop_mutex); | ||
| 267 | if (fullstop == FULLSTOP_DONTSTOP) | ||
| 268 | fullstop = FULLSTOP_SHUTDOWN; | ||
| 269 | else | ||
| 270 | pr_warn(/* but going down anyway, so... */ | ||
| 271 | "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); | ||
| 272 | mutex_unlock(&fullstop_mutex); | ||
| 273 | return NOTIFY_DONE; | ||
| 274 | } | ||
| 275 | |||
| 276 | /* | ||
| 277 | * Absorb kthreads into a kernel function that won't return, so that | ||
| 278 | * they won't ever access module text or data again. | ||
| 279 | */ | ||
| 280 | static void rcutorture_shutdown_absorb(const char *title) | ||
| 281 | { | ||
| 282 | if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { | ||
| 283 | pr_notice( | ||
| 284 | "rcutorture thread %s parking due to system shutdown\n", | ||
| 285 | title); | ||
| 286 | schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | /* | 179 | /* |
| 291 | * Allocate an element from the rcu_tortures pool. | 180 | * Allocate an element from the rcu_tortures pool. |
| 292 | */ | 181 | */ |
| @@ -320,44 +209,6 @@ rcu_torture_free(struct rcu_torture *p) | |||
| 320 | spin_unlock_bh(&rcu_torture_lock); | 209 | spin_unlock_bh(&rcu_torture_lock); |
| 321 | } | 210 | } |
| 322 | 211 | ||
| 323 | struct rcu_random_state { | ||
| 324 | unsigned long rrs_state; | ||
| 325 | long rrs_count; | ||
| 326 | }; | ||
| 327 | |||
| 328 | #define RCU_RANDOM_MULT 39916801 /* prime */ | ||
| 329 | #define RCU_RANDOM_ADD 479001701 /* prime */ | ||
| 330 | #define RCU_RANDOM_REFRESH 10000 | ||
| 331 | |||
| 332 | #define DEFINE_RCU_RANDOM(name) struct rcu_random_state name = { 0, 0 } | ||
| 333 | |||
| 334 | /* | ||
| 335 | * Crude but fast random-number generator. Uses a linear congruential | ||
| 336 | * generator, with occasional help from cpu_clock(). | ||
| 337 | */ | ||
| 338 | static unsigned long | ||
| 339 | rcu_random(struct rcu_random_state *rrsp) | ||
| 340 | { | ||
| 341 | if (--rrsp->rrs_count < 0) { | ||
| 342 | rrsp->rrs_state += (unsigned long)local_clock(); | ||
| 343 | rrsp->rrs_count = RCU_RANDOM_REFRESH; | ||
| 344 | } | ||
| 345 | rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD; | ||
| 346 | return swahw32(rrsp->rrs_state); | ||
| 347 | } | ||
| 348 | |||
| 349 | static void | ||
| 350 | rcu_stutter_wait(const char *title) | ||
| 351 | { | ||
| 352 | while (stutter_pause_test || !rcutorture_runnable) { | ||
| 353 | if (rcutorture_runnable) | ||
| 354 | schedule_timeout_interruptible(1); | ||
| 355 | else | ||
| 356 | schedule_timeout_interruptible(round_jiffies_relative(HZ)); | ||
| 357 | rcutorture_shutdown_absorb(title); | ||
| 358 | } | ||
| 359 | } | ||
| 360 | |||
| 361 | /* | 212 | /* |
| 362 | * Operations vector for selecting different types of tests. | 213 | * Operations vector for selecting different types of tests. |
| 363 | */ | 214 | */ |
| @@ -365,7 +216,7 @@ rcu_stutter_wait(const char *title) | |||
| 365 | struct rcu_torture_ops { | 216 | struct rcu_torture_ops { |
| 366 | void (*init)(void); | 217 | void (*init)(void); |
| 367 | int (*readlock)(void); | 218 | int (*readlock)(void); |
| 368 | void (*read_delay)(struct rcu_random_state *rrsp); | 219 | void (*read_delay)(struct torture_random_state *rrsp); |
| 369 | void (*readunlock)(int idx); | 220 | void (*readunlock)(int idx); |
| 370 | int (*completed)(void); | 221 | int (*completed)(void); |
| 371 | void (*deferred_free)(struct rcu_torture *p); | 222 | void (*deferred_free)(struct rcu_torture *p); |
| @@ -392,7 +243,7 @@ static int rcu_torture_read_lock(void) __acquires(RCU) | |||
| 392 | return 0; | 243 | return 0; |
| 393 | } | 244 | } |
| 394 | 245 | ||
| 395 | static void rcu_read_delay(struct rcu_random_state *rrsp) | 246 | static void rcu_read_delay(struct torture_random_state *rrsp) |
| 396 | { | 247 | { |
| 397 | const unsigned long shortdelay_us = 200; | 248 | const unsigned long shortdelay_us = 200; |
| 398 | const unsigned long longdelay_ms = 50; | 249 | const unsigned long longdelay_ms = 50; |
| @@ -401,12 +252,13 @@ static void rcu_read_delay(struct rcu_random_state *rrsp) | |||
| 401 | * period, and we want a long delay occasionally to trigger | 252 | * period, and we want a long delay occasionally to trigger |
| 402 | * force_quiescent_state. */ | 253 | * force_quiescent_state. */ |
| 403 | 254 | ||
| 404 | if (!(rcu_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) | 255 | if (!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) |
| 405 | mdelay(longdelay_ms); | 256 | mdelay(longdelay_ms); |
| 406 | if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) | 257 | if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) |
| 407 | udelay(shortdelay_us); | 258 | udelay(shortdelay_us); |
| 408 | #ifdef CONFIG_PREEMPT | 259 | #ifdef CONFIG_PREEMPT |
| 409 | if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000))) | 260 | if (!preempt_count() && |
| 261 | !(torture_random(rrsp) % (nrealreaders * 20000))) | ||
| 410 | preempt_schedule(); /* No QS if preempt_disable() in effect */ | 262 | preempt_schedule(); /* No QS if preempt_disable() in effect */ |
| 411 | #endif | 263 | #endif |
| 412 | } | 264 | } |
| @@ -427,7 +279,7 @@ rcu_torture_cb(struct rcu_head *p) | |||
| 427 | int i; | 279 | int i; |
| 428 | struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu); | 280 | struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu); |
| 429 | 281 | ||
| 430 | if (fullstop != FULLSTOP_DONTSTOP) { | 282 | if (torture_must_stop_irq()) { |
| 431 | /* Test is ending, just drop callbacks on the floor. */ | 283 | /* Test is ending, just drop callbacks on the floor. */ |
| 432 | /* The next initialization will pick up the pieces. */ | 284 | /* The next initialization will pick up the pieces. */ |
| 433 | return; | 285 | return; |
| @@ -520,6 +372,48 @@ static struct rcu_torture_ops rcu_bh_ops = { | |||
| 520 | }; | 372 | }; |
| 521 | 373 | ||
| 522 | /* | 374 | /* |
| 375 | * Don't even think about trying any of these in real life!!! | ||
| 376 | * The names includes "busted", and they really means it! | ||
| 377 | * The only purpose of these functions is to provide a buggy RCU | ||
| 378 | * implementation to make sure that rcutorture correctly emits | ||
| 379 | * buggy-RCU error messages. | ||
| 380 | */ | ||
| 381 | static void rcu_busted_torture_deferred_free(struct rcu_torture *p) | ||
| 382 | { | ||
| 383 | /* This is a deliberate bug for testing purposes only! */ | ||
| 384 | rcu_torture_cb(&p->rtort_rcu); | ||
| 385 | } | ||
| 386 | |||
| 387 | static void synchronize_rcu_busted(void) | ||
| 388 | { | ||
| 389 | /* This is a deliberate bug for testing purposes only! */ | ||
| 390 | } | ||
| 391 | |||
| 392 | static void | ||
| 393 | call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
| 394 | { | ||
| 395 | /* This is a deliberate bug for testing purposes only! */ | ||
| 396 | func(head); | ||
| 397 | } | ||
| 398 | |||
| 399 | static struct rcu_torture_ops rcu_busted_ops = { | ||
| 400 | .init = rcu_sync_torture_init, | ||
| 401 | .readlock = rcu_torture_read_lock, | ||
| 402 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | ||
| 403 | .readunlock = rcu_torture_read_unlock, | ||
| 404 | .completed = rcu_no_completed, | ||
| 405 | .deferred_free = rcu_busted_torture_deferred_free, | ||
| 406 | .sync = synchronize_rcu_busted, | ||
| 407 | .exp_sync = synchronize_rcu_busted, | ||
| 408 | .call = call_rcu_busted, | ||
| 409 | .cb_barrier = NULL, | ||
| 410 | .fqs = NULL, | ||
| 411 | .stats = NULL, | ||
| 412 | .irq_capable = 1, | ||
| 413 | .name = "rcu_busted" | ||
| 414 | }; | ||
| 415 | |||
| 416 | /* | ||
| 523 | * Definitions for srcu torture testing. | 417 | * Definitions for srcu torture testing. |
| 524 | */ | 418 | */ |
| 525 | 419 | ||
| @@ -530,7 +424,7 @@ static int srcu_torture_read_lock(void) __acquires(&srcu_ctl) | |||
| 530 | return srcu_read_lock(&srcu_ctl); | 424 | return srcu_read_lock(&srcu_ctl); |
| 531 | } | 425 | } |
| 532 | 426 | ||
| 533 | static void srcu_read_delay(struct rcu_random_state *rrsp) | 427 | static void srcu_read_delay(struct torture_random_state *rrsp) |
| 534 | { | 428 | { |
| 535 | long delay; | 429 | long delay; |
| 536 | const long uspertick = 1000000 / HZ; | 430 | const long uspertick = 1000000 / HZ; |
| @@ -538,7 +432,8 @@ static void srcu_read_delay(struct rcu_random_state *rrsp) | |||
| 538 | 432 | ||
| 539 | /* We want there to be long-running readers, but not all the time. */ | 433 | /* We want there to be long-running readers, but not all the time. */ |
| 540 | 434 | ||
| 541 | delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); | 435 | delay = torture_random(rrsp) % |
| 436 | (nrealreaders * 2 * longdelay * uspertick); | ||
| 542 | if (!delay) | 437 | if (!delay) |
| 543 | schedule_timeout_interruptible(longdelay); | 438 | schedule_timeout_interruptible(longdelay); |
| 544 | else | 439 | else |
| @@ -677,12 +572,12 @@ static int rcu_torture_boost(void *arg) | |||
| 677 | struct rcu_boost_inflight rbi = { .inflight = 0 }; | 572 | struct rcu_boost_inflight rbi = { .inflight = 0 }; |
| 678 | struct sched_param sp; | 573 | struct sched_param sp; |
| 679 | 574 | ||
| 680 | VERBOSE_PRINTK_STRING("rcu_torture_boost started"); | 575 | VERBOSE_TOROUT_STRING("rcu_torture_boost started"); |
| 681 | 576 | ||
| 682 | /* Set real-time priority. */ | 577 | /* Set real-time priority. */ |
| 683 | sp.sched_priority = 1; | 578 | sp.sched_priority = 1; |
| 684 | if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) { | 579 | if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) { |
| 685 | VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!"); | 580 | VERBOSE_TOROUT_STRING("rcu_torture_boost RT prio failed!"); |
| 686 | n_rcu_torture_boost_rterror++; | 581 | n_rcu_torture_boost_rterror++; |
| 687 | } | 582 | } |
| 688 | 583 | ||
| @@ -693,9 +588,8 @@ static int rcu_torture_boost(void *arg) | |||
| 693 | oldstarttime = boost_starttime; | 588 | oldstarttime = boost_starttime; |
| 694 | while (ULONG_CMP_LT(jiffies, oldstarttime)) { | 589 | while (ULONG_CMP_LT(jiffies, oldstarttime)) { |
| 695 | schedule_timeout_interruptible(oldstarttime - jiffies); | 590 | schedule_timeout_interruptible(oldstarttime - jiffies); |
| 696 | rcu_stutter_wait("rcu_torture_boost"); | 591 | stutter_wait("rcu_torture_boost"); |
| 697 | if (kthread_should_stop() || | 592 | if (torture_must_stop()) |
| 698 | fullstop != FULLSTOP_DONTSTOP) | ||
| 699 | goto checkwait; | 593 | goto checkwait; |
| 700 | } | 594 | } |
| 701 | 595 | ||
| @@ -710,15 +604,14 @@ static int rcu_torture_boost(void *arg) | |||
| 710 | call_rcu(&rbi.rcu, rcu_torture_boost_cb); | 604 | call_rcu(&rbi.rcu, rcu_torture_boost_cb); |
| 711 | if (jiffies - call_rcu_time > | 605 | if (jiffies - call_rcu_time > |
| 712 | test_boost_duration * HZ - HZ / 2) { | 606 | test_boost_duration * HZ - HZ / 2) { |
| 713 | VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed"); | 607 | VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed"); |
| 714 | n_rcu_torture_boost_failure++; | 608 | n_rcu_torture_boost_failure++; |
| 715 | } | 609 | } |
| 716 | call_rcu_time = jiffies; | 610 | call_rcu_time = jiffies; |
| 717 | } | 611 | } |
| 718 | cond_resched(); | 612 | cond_resched(); |
| 719 | rcu_stutter_wait("rcu_torture_boost"); | 613 | stutter_wait("rcu_torture_boost"); |
| 720 | if (kthread_should_stop() || | 614 | if (torture_must_stop()) |
| 721 | fullstop != FULLSTOP_DONTSTOP) | ||
| 722 | goto checkwait; | 615 | goto checkwait; |
| 723 | } | 616 | } |
| 724 | 617 | ||
| @@ -742,16 +635,17 @@ static int rcu_torture_boost(void *arg) | |||
| 742 | } | 635 | } |
| 743 | 636 | ||
| 744 | /* Go do the stutter. */ | 637 | /* Go do the stutter. */ |
| 745 | checkwait: rcu_stutter_wait("rcu_torture_boost"); | 638 | checkwait: stutter_wait("rcu_torture_boost"); |
| 746 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | 639 | } while (!torture_must_stop()); |
| 747 | 640 | ||
| 748 | /* Clean up and exit. */ | 641 | /* Clean up and exit. */ |
| 749 | VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); | 642 | while (!kthread_should_stop() || rbi.inflight) { |
| 750 | rcutorture_shutdown_absorb("rcu_torture_boost"); | 643 | torture_shutdown_absorb("rcu_torture_boost"); |
| 751 | while (!kthread_should_stop() || rbi.inflight) | ||
| 752 | schedule_timeout_uninterruptible(1); | 644 | schedule_timeout_uninterruptible(1); |
| 645 | } | ||
| 753 | smp_mb(); /* order accesses to ->inflight before stack-frame death. */ | 646 | smp_mb(); /* order accesses to ->inflight before stack-frame death. */ |
| 754 | destroy_rcu_head_on_stack(&rbi.rcu); | 647 | destroy_rcu_head_on_stack(&rbi.rcu); |
| 648 | torture_kthread_stopping("rcu_torture_boost"); | ||
| 755 | return 0; | 649 | return 0; |
| 756 | } | 650 | } |
| 757 | 651 | ||
| @@ -766,7 +660,7 @@ rcu_torture_fqs(void *arg) | |||
| 766 | unsigned long fqs_resume_time; | 660 | unsigned long fqs_resume_time; |
| 767 | int fqs_burst_remaining; | 661 | int fqs_burst_remaining; |
| 768 | 662 | ||
| 769 | VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); | 663 | VERBOSE_TOROUT_STRING("rcu_torture_fqs task started"); |
| 770 | do { | 664 | do { |
| 771 | fqs_resume_time = jiffies + fqs_stutter * HZ; | 665 | fqs_resume_time = jiffies + fqs_stutter * HZ; |
| 772 | while (ULONG_CMP_LT(jiffies, fqs_resume_time) && | 666 | while (ULONG_CMP_LT(jiffies, fqs_resume_time) && |
| @@ -780,12 +674,9 @@ rcu_torture_fqs(void *arg) | |||
| 780 | udelay(fqs_holdoff); | 674 | udelay(fqs_holdoff); |
| 781 | fqs_burst_remaining -= fqs_holdoff; | 675 | fqs_burst_remaining -= fqs_holdoff; |
| 782 | } | 676 | } |
| 783 | rcu_stutter_wait("rcu_torture_fqs"); | 677 | stutter_wait("rcu_torture_fqs"); |
| 784 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | 678 | } while (!torture_must_stop()); |
| 785 | VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping"); | 679 | torture_kthread_stopping("rcu_torture_fqs"); |
| 786 | rcutorture_shutdown_absorb("rcu_torture_fqs"); | ||
| 787 | while (!kthread_should_stop()) | ||
| 788 | schedule_timeout_uninterruptible(1); | ||
| 789 | return 0; | 680 | return 0; |
| 790 | } | 681 | } |
| 791 | 682 | ||
| @@ -802,10 +693,10 @@ rcu_torture_writer(void *arg) | |||
| 802 | struct rcu_torture *rp; | 693 | struct rcu_torture *rp; |
| 803 | struct rcu_torture *rp1; | 694 | struct rcu_torture *rp1; |
| 804 | struct rcu_torture *old_rp; | 695 | struct rcu_torture *old_rp; |
| 805 | static DEFINE_RCU_RANDOM(rand); | 696 | static DEFINE_TORTURE_RANDOM(rand); |
| 806 | 697 | ||
| 807 | VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); | 698 | VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); |
| 808 | set_user_nice(current, 19); | 699 | set_user_nice(current, MAX_NICE); |
| 809 | 700 | ||
| 810 | do { | 701 | do { |
| 811 | schedule_timeout_uninterruptible(1); | 702 | schedule_timeout_uninterruptible(1); |
| @@ -813,7 +704,7 @@ rcu_torture_writer(void *arg) | |||
| 813 | if (rp == NULL) | 704 | if (rp == NULL) |
| 814 | continue; | 705 | continue; |
| 815 | rp->rtort_pipe_count = 0; | 706 | rp->rtort_pipe_count = 0; |
| 816 | udelay(rcu_random(&rand) & 0x3ff); | 707 | udelay(torture_random(&rand) & 0x3ff); |
| 817 | old_rp = rcu_dereference_check(rcu_torture_current, | 708 | old_rp = rcu_dereference_check(rcu_torture_current, |
| 818 | current == writer_task); | 709 | current == writer_task); |
| 819 | rp->rtort_mbtest = 1; | 710 | rp->rtort_mbtest = 1; |
| @@ -826,7 +717,7 @@ rcu_torture_writer(void *arg) | |||
| 826 | atomic_inc(&rcu_torture_wcount[i]); | 717 | atomic_inc(&rcu_torture_wcount[i]); |
| 827 | old_rp->rtort_pipe_count++; | 718 | old_rp->rtort_pipe_count++; |
| 828 | if (gp_normal == gp_exp) | 719 | if (gp_normal == gp_exp) |
| 829 | exp = !!(rcu_random(&rand) & 0x80); | 720 | exp = !!(torture_random(&rand) & 0x80); |
| 830 | else | 721 | else |
| 831 | exp = gp_exp; | 722 | exp = gp_exp; |
| 832 | if (!exp) { | 723 | if (!exp) { |
| @@ -852,12 +743,9 @@ rcu_torture_writer(void *arg) | |||
| 852 | } | 743 | } |
| 853 | } | 744 | } |
| 854 | rcutorture_record_progress(++rcu_torture_current_version); | 745 | rcutorture_record_progress(++rcu_torture_current_version); |
| 855 | rcu_stutter_wait("rcu_torture_writer"); | 746 | stutter_wait("rcu_torture_writer"); |
| 856 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | 747 | } while (!torture_must_stop()); |
| 857 | VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); | 748 | torture_kthread_stopping("rcu_torture_writer"); |
| 858 | rcutorture_shutdown_absorb("rcu_torture_writer"); | ||
| 859 | while (!kthread_should_stop()) | ||
| 860 | schedule_timeout_uninterruptible(1); | ||
| 861 | return 0; | 749 | return 0; |
| 862 | } | 750 | } |
| 863 | 751 | ||
| @@ -868,19 +756,19 @@ rcu_torture_writer(void *arg) | |||
| 868 | static int | 756 | static int |
| 869 | rcu_torture_fakewriter(void *arg) | 757 | rcu_torture_fakewriter(void *arg) |
| 870 | { | 758 | { |
| 871 | DEFINE_RCU_RANDOM(rand); | 759 | DEFINE_TORTURE_RANDOM(rand); |
| 872 | 760 | ||
| 873 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); | 761 | VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started"); |
| 874 | set_user_nice(current, 19); | 762 | set_user_nice(current, MAX_NICE); |
| 875 | 763 | ||
| 876 | do { | 764 | do { |
| 877 | schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); | 765 | schedule_timeout_uninterruptible(1 + torture_random(&rand)%10); |
| 878 | udelay(rcu_random(&rand) & 0x3ff); | 766 | udelay(torture_random(&rand) & 0x3ff); |
| 879 | if (cur_ops->cb_barrier != NULL && | 767 | if (cur_ops->cb_barrier != NULL && |
| 880 | rcu_random(&rand) % (nfakewriters * 8) == 0) { | 768 | torture_random(&rand) % (nfakewriters * 8) == 0) { |
| 881 | cur_ops->cb_barrier(); | 769 | cur_ops->cb_barrier(); |
| 882 | } else if (gp_normal == gp_exp) { | 770 | } else if (gp_normal == gp_exp) { |
| 883 | if (rcu_random(&rand) & 0x80) | 771 | if (torture_random(&rand) & 0x80) |
| 884 | cur_ops->sync(); | 772 | cur_ops->sync(); |
| 885 | else | 773 | else |
| 886 | cur_ops->exp_sync(); | 774 | cur_ops->exp_sync(); |
| @@ -889,13 +777,10 @@ rcu_torture_fakewriter(void *arg) | |||
| 889 | } else { | 777 | } else { |
| 890 | cur_ops->exp_sync(); | 778 | cur_ops->exp_sync(); |
| 891 | } | 779 | } |
| 892 | rcu_stutter_wait("rcu_torture_fakewriter"); | 780 | stutter_wait("rcu_torture_fakewriter"); |
| 893 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | 781 | } while (!torture_must_stop()); |
| 894 | 782 | ||
| 895 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); | 783 | torture_kthread_stopping("rcu_torture_fakewriter"); |
| 896 | rcutorture_shutdown_absorb("rcu_torture_fakewriter"); | ||
| 897 | while (!kthread_should_stop()) | ||
| 898 | schedule_timeout_uninterruptible(1); | ||
| 899 | return 0; | 784 | return 0; |
| 900 | } | 785 | } |
| 901 | 786 | ||
| @@ -921,7 +806,7 @@ static void rcu_torture_timer(unsigned long unused) | |||
| 921 | int idx; | 806 | int idx; |
| 922 | int completed; | 807 | int completed; |
| 923 | int completed_end; | 808 | int completed_end; |
| 924 | static DEFINE_RCU_RANDOM(rand); | 809 | static DEFINE_TORTURE_RANDOM(rand); |
| 925 | static DEFINE_SPINLOCK(rand_lock); | 810 | static DEFINE_SPINLOCK(rand_lock); |
| 926 | struct rcu_torture *p; | 811 | struct rcu_torture *p; |
| 927 | int pipe_count; | 812 | int pipe_count; |
| @@ -980,14 +865,14 @@ rcu_torture_reader(void *arg) | |||
| 980 | int completed; | 865 | int completed; |
| 981 | int completed_end; | 866 | int completed_end; |
| 982 | int idx; | 867 | int idx; |
| 983 | DEFINE_RCU_RANDOM(rand); | 868 | DEFINE_TORTURE_RANDOM(rand); |
| 984 | struct rcu_torture *p; | 869 | struct rcu_torture *p; |
| 985 | int pipe_count; | 870 | int pipe_count; |
| 986 | struct timer_list t; | 871 | struct timer_list t; |
| 987 | unsigned long long ts; | 872 | unsigned long long ts; |
| 988 | 873 | ||
| 989 | VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); | 874 | VERBOSE_TOROUT_STRING("rcu_torture_reader task started"); |
| 990 | set_user_nice(current, 19); | 875 | set_user_nice(current, MAX_NICE); |
| 991 | if (irqreader && cur_ops->irq_capable) | 876 | if (irqreader && cur_ops->irq_capable) |
| 992 | setup_timer_on_stack(&t, rcu_torture_timer, 0); | 877 | setup_timer_on_stack(&t, rcu_torture_timer, 0); |
| 993 | 878 | ||
| @@ -1034,14 +919,11 @@ rcu_torture_reader(void *arg) | |||
| 1034 | preempt_enable(); | 919 | preempt_enable(); |
| 1035 | cur_ops->readunlock(idx); | 920 | cur_ops->readunlock(idx); |
| 1036 | schedule(); | 921 | schedule(); |
| 1037 | rcu_stutter_wait("rcu_torture_reader"); | 922 | stutter_wait("rcu_torture_reader"); |
| 1038 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | 923 | } while (!torture_must_stop()); |
| 1039 | VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); | ||
| 1040 | rcutorture_shutdown_absorb("rcu_torture_reader"); | ||
| 1041 | if (irqreader && cur_ops->irq_capable) | 924 | if (irqreader && cur_ops->irq_capable) |
| 1042 | del_timer_sync(&t); | 925 | del_timer_sync(&t); |
| 1043 | while (!kthread_should_stop()) | 926 | torture_kthread_stopping("rcu_torture_reader"); |
| 1044 | schedule_timeout_uninterruptible(1); | ||
| 1045 | return 0; | 927 | return 0; |
| 1046 | } | 928 | } |
| 1047 | 929 | ||
| @@ -1083,13 +965,7 @@ rcu_torture_printk(char *page) | |||
| 1083 | n_rcu_torture_boost_failure, | 965 | n_rcu_torture_boost_failure, |
| 1084 | n_rcu_torture_boosts, | 966 | n_rcu_torture_boosts, |
| 1085 | n_rcu_torture_timers); | 967 | n_rcu_torture_timers); |
| 1086 | page += sprintf(page, | 968 | page = torture_onoff_stats(page); |
| 1087 | "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", | ||
| 1088 | n_online_successes, n_online_attempts, | ||
| 1089 | n_offline_successes, n_offline_attempts, | ||
| 1090 | min_online, max_online, | ||
| 1091 | min_offline, max_offline, | ||
| 1092 | sum_online, sum_offline, HZ); | ||
| 1093 | page += sprintf(page, "barrier: %ld/%ld:%ld", | 969 | page += sprintf(page, "barrier: %ld/%ld:%ld", |
| 1094 | n_barrier_successes, | 970 | n_barrier_successes, |
| 1095 | n_barrier_attempts, | 971 | n_barrier_attempts, |
| @@ -1150,123 +1026,17 @@ rcu_torture_stats_print(void) | |||
| 1150 | /* | 1026 | /* |
| 1151 | * Periodically prints torture statistics, if periodic statistics printing | 1027 | * Periodically prints torture statistics, if periodic statistics printing |
| 1152 | * was specified via the stat_interval module parameter. | 1028 | * was specified via the stat_interval module parameter. |
| 1153 | * | ||
| 1154 | * No need to worry about fullstop here, since this one doesn't reference | ||
| 1155 | * volatile state or register callbacks. | ||
| 1156 | */ | 1029 | */ |
| 1157 | static int | 1030 | static int |
| 1158 | rcu_torture_stats(void *arg) | 1031 | rcu_torture_stats(void *arg) |
| 1159 | { | 1032 | { |
| 1160 | VERBOSE_PRINTK_STRING("rcu_torture_stats task started"); | 1033 | VERBOSE_TOROUT_STRING("rcu_torture_stats task started"); |
| 1161 | do { | 1034 | do { |
| 1162 | schedule_timeout_interruptible(stat_interval * HZ); | 1035 | schedule_timeout_interruptible(stat_interval * HZ); |
| 1163 | rcu_torture_stats_print(); | 1036 | rcu_torture_stats_print(); |
| 1164 | rcutorture_shutdown_absorb("rcu_torture_stats"); | 1037 | torture_shutdown_absorb("rcu_torture_stats"); |
| 1165 | } while (!kthread_should_stop()); | 1038 | } while (!torture_must_stop()); |
| 1166 | VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping"); | 1039 | torture_kthread_stopping("rcu_torture_stats"); |
| 1167 | return 0; | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | static int rcu_idle_cpu; /* Force all torture tasks off this CPU */ | ||
| 1171 | |||
| 1172 | /* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case | ||
| 1173 | * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs. | ||
| 1174 | */ | ||
| 1175 | static void rcu_torture_shuffle_tasks(void) | ||
| 1176 | { | ||
| 1177 | int i; | ||
| 1178 | |||
| 1179 | cpumask_setall(shuffle_tmp_mask); | ||
| 1180 | get_online_cpus(); | ||
| 1181 | |||
| 1182 | /* No point in shuffling if there is only one online CPU (ex: UP) */ | ||
| 1183 | if (num_online_cpus() == 1) { | ||
| 1184 | put_online_cpus(); | ||
| 1185 | return; | ||
| 1186 | } | ||
| 1187 | |||
| 1188 | if (rcu_idle_cpu != -1) | ||
| 1189 | cpumask_clear_cpu(rcu_idle_cpu, shuffle_tmp_mask); | ||
| 1190 | |||
| 1191 | set_cpus_allowed_ptr(current, shuffle_tmp_mask); | ||
| 1192 | |||
| 1193 | if (reader_tasks) { | ||
| 1194 | for (i = 0; i < nrealreaders; i++) | ||
| 1195 | if (reader_tasks[i]) | ||
| 1196 | set_cpus_allowed_ptr(reader_tasks[i], | ||
| 1197 | shuffle_tmp_mask); | ||
| 1198 | } | ||
| 1199 | if (fakewriter_tasks) { | ||
| 1200 | for (i = 0; i < nfakewriters; i++) | ||
| 1201 | if (fakewriter_tasks[i]) | ||
| 1202 | set_cpus_allowed_ptr(fakewriter_tasks[i], | ||
| 1203 | shuffle_tmp_mask); | ||
| 1204 | } | ||
| 1205 | if (writer_task) | ||
| 1206 | set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask); | ||
| 1207 | if (stats_task) | ||
| 1208 | set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask); | ||
| 1209 | if (stutter_task) | ||
| 1210 | set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask); | ||
| 1211 | if (fqs_task) | ||
| 1212 | set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask); | ||
| 1213 | if (shutdown_task) | ||
| 1214 | set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask); | ||
| 1215 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 1216 | if (onoff_task) | ||
| 1217 | set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask); | ||
| 1218 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 1219 | if (stall_task) | ||
| 1220 | set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask); | ||
| 1221 | if (barrier_cbs_tasks) | ||
| 1222 | for (i = 0; i < n_barrier_cbs; i++) | ||
| 1223 | if (barrier_cbs_tasks[i]) | ||
| 1224 | set_cpus_allowed_ptr(barrier_cbs_tasks[i], | ||
| 1225 | shuffle_tmp_mask); | ||
| 1226 | if (barrier_task) | ||
| 1227 | set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask); | ||
| 1228 | |||
| 1229 | if (rcu_idle_cpu == -1) | ||
| 1230 | rcu_idle_cpu = num_online_cpus() - 1; | ||
| 1231 | else | ||
| 1232 | rcu_idle_cpu--; | ||
| 1233 | |||
| 1234 | put_online_cpus(); | ||
| 1235 | } | ||
| 1236 | |||
| 1237 | /* Shuffle tasks across CPUs, with the intent of allowing each CPU in the | ||
| 1238 | * system to become idle at a time and cut off its timer ticks. This is meant | ||
| 1239 | * to test the support for such tickless idle CPU in RCU. | ||
| 1240 | */ | ||
| 1241 | static int | ||
| 1242 | rcu_torture_shuffle(void *arg) | ||
| 1243 | { | ||
| 1244 | VERBOSE_PRINTK_STRING("rcu_torture_shuffle task started"); | ||
| 1245 | do { | ||
| 1246 | schedule_timeout_interruptible(shuffle_interval * HZ); | ||
| 1247 | rcu_torture_shuffle_tasks(); | ||
| 1248 | rcutorture_shutdown_absorb("rcu_torture_shuffle"); | ||
| 1249 | } while (!kthread_should_stop()); | ||
| 1250 | VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping"); | ||
| 1251 | return 0; | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | /* Cause the rcutorture test to "stutter", starting and stopping all | ||
| 1255 | * threads periodically. | ||
| 1256 | */ | ||
| 1257 | static int | ||
| 1258 | rcu_torture_stutter(void *arg) | ||
| 1259 | { | ||
| 1260 | VERBOSE_PRINTK_STRING("rcu_torture_stutter task started"); | ||
| 1261 | do { | ||
| 1262 | schedule_timeout_interruptible(stutter * HZ); | ||
| 1263 | stutter_pause_test = 1; | ||
| 1264 | if (!kthread_should_stop()) | ||
| 1265 | schedule_timeout_interruptible(stutter * HZ); | ||
| 1266 | stutter_pause_test = 0; | ||
| 1267 | rcutorture_shutdown_absorb("rcu_torture_stutter"); | ||
| 1268 | } while (!kthread_should_stop()); | ||
| 1269 | VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping"); | ||
| 1270 | return 0; | 1040 | return 0; |
| 1271 | } | 1041 | } |
| 1272 | 1042 | ||
| @@ -1293,10 +1063,6 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) | |||
| 1293 | onoff_interval, onoff_holdoff); | 1063 | onoff_interval, onoff_holdoff); |
| 1294 | } | 1064 | } |
| 1295 | 1065 | ||
| 1296 | static struct notifier_block rcutorture_shutdown_nb = { | ||
| 1297 | .notifier_call = rcutorture_shutdown_notify, | ||
| 1298 | }; | ||
| 1299 | |||
| 1300 | static void rcutorture_booster_cleanup(int cpu) | 1066 | static void rcutorture_booster_cleanup(int cpu) |
| 1301 | { | 1067 | { |
| 1302 | struct task_struct *t; | 1068 | struct task_struct *t; |
| @@ -1304,14 +1070,12 @@ static void rcutorture_booster_cleanup(int cpu) | |||
| 1304 | if (boost_tasks[cpu] == NULL) | 1070 | if (boost_tasks[cpu] == NULL) |
| 1305 | return; | 1071 | return; |
| 1306 | mutex_lock(&boost_mutex); | 1072 | mutex_lock(&boost_mutex); |
| 1307 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task"); | ||
| 1308 | t = boost_tasks[cpu]; | 1073 | t = boost_tasks[cpu]; |
| 1309 | boost_tasks[cpu] = NULL; | 1074 | boost_tasks[cpu] = NULL; |
| 1310 | mutex_unlock(&boost_mutex); | 1075 | mutex_unlock(&boost_mutex); |
| 1311 | 1076 | ||
| 1312 | /* This must be outside of the mutex, otherwise deadlock! */ | 1077 | /* This must be outside of the mutex, otherwise deadlock! */ |
| 1313 | kthread_stop(t); | 1078 | torture_stop_kthread(rcu_torture_boost, t); |
| 1314 | boost_tasks[cpu] = NULL; | ||
| 1315 | } | 1079 | } |
| 1316 | 1080 | ||
| 1317 | static int rcutorture_booster_init(int cpu) | 1081 | static int rcutorture_booster_init(int cpu) |
| @@ -1323,13 +1087,13 @@ static int rcutorture_booster_init(int cpu) | |||
| 1323 | 1087 | ||
| 1324 | /* Don't allow time recalculation while creating a new task. */ | 1088 | /* Don't allow time recalculation while creating a new task. */ |
| 1325 | mutex_lock(&boost_mutex); | 1089 | mutex_lock(&boost_mutex); |
| 1326 | VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); | 1090 | VERBOSE_TOROUT_STRING("Creating rcu_torture_boost task"); |
| 1327 | boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL, | 1091 | boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL, |
| 1328 | cpu_to_node(cpu), | 1092 | cpu_to_node(cpu), |
| 1329 | "rcu_torture_boost"); | 1093 | "rcu_torture_boost"); |
| 1330 | if (IS_ERR(boost_tasks[cpu])) { | 1094 | if (IS_ERR(boost_tasks[cpu])) { |
| 1331 | retval = PTR_ERR(boost_tasks[cpu]); | 1095 | retval = PTR_ERR(boost_tasks[cpu]); |
| 1332 | VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); | 1096 | VERBOSE_TOROUT_STRING("rcu_torture_boost task create failed"); |
| 1333 | n_rcu_torture_boost_ktrerror++; | 1097 | n_rcu_torture_boost_ktrerror++; |
| 1334 | boost_tasks[cpu] = NULL; | 1098 | boost_tasks[cpu] = NULL; |
| 1335 | mutex_unlock(&boost_mutex); | 1099 | mutex_unlock(&boost_mutex); |
| @@ -1342,175 +1106,6 @@ static int rcutorture_booster_init(int cpu) | |||
| 1342 | } | 1106 | } |
| 1343 | 1107 | ||
| 1344 | /* | 1108 | /* |
| 1345 | * Cause the rcutorture test to shutdown the system after the test has | ||
| 1346 | * run for the time specified by the shutdown_secs module parameter. | ||
| 1347 | */ | ||
| 1348 | static int | ||
| 1349 | rcu_torture_shutdown(void *arg) | ||
| 1350 | { | ||
| 1351 | long delta; | ||
| 1352 | unsigned long jiffies_snap; | ||
| 1353 | |||
| 1354 | VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started"); | ||
| 1355 | jiffies_snap = ACCESS_ONCE(jiffies); | ||
| 1356 | while (ULONG_CMP_LT(jiffies_snap, shutdown_time) && | ||
| 1357 | !kthread_should_stop()) { | ||
| 1358 | delta = shutdown_time - jiffies_snap; | ||
| 1359 | if (verbose) | ||
| 1360 | pr_alert("%s" TORTURE_FLAG | ||
| 1361 | "rcu_torture_shutdown task: %lu jiffies remaining\n", | ||
| 1362 | torture_type, delta); | ||
| 1363 | schedule_timeout_interruptible(delta); | ||
| 1364 | jiffies_snap = ACCESS_ONCE(jiffies); | ||
| 1365 | } | ||
| 1366 | if (kthread_should_stop()) { | ||
| 1367 | VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping"); | ||
| 1368 | return 0; | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | /* OK, shut down the system. */ | ||
| 1372 | |||
| 1373 | VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system"); | ||
| 1374 | shutdown_task = NULL; /* Avoid self-kill deadlock. */ | ||
| 1375 | rcu_torture_cleanup(); /* Get the success/failure message. */ | ||
| 1376 | kernel_power_off(); /* Shut down the system. */ | ||
| 1377 | return 0; | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 1381 | |||
| 1382 | /* | ||
| 1383 | * Execute random CPU-hotplug operations at the interval specified | ||
| 1384 | * by the onoff_interval. | ||
| 1385 | */ | ||
| 1386 | static int | ||
| 1387 | rcu_torture_onoff(void *arg) | ||
| 1388 | { | ||
| 1389 | int cpu; | ||
| 1390 | unsigned long delta; | ||
| 1391 | int maxcpu = -1; | ||
| 1392 | DEFINE_RCU_RANDOM(rand); | ||
| 1393 | int ret; | ||
| 1394 | unsigned long starttime; | ||
| 1395 | |||
| 1396 | VERBOSE_PRINTK_STRING("rcu_torture_onoff task started"); | ||
| 1397 | for_each_online_cpu(cpu) | ||
| 1398 | maxcpu = cpu; | ||
| 1399 | WARN_ON(maxcpu < 0); | ||
| 1400 | if (onoff_holdoff > 0) { | ||
| 1401 | VERBOSE_PRINTK_STRING("rcu_torture_onoff begin holdoff"); | ||
| 1402 | schedule_timeout_interruptible(onoff_holdoff * HZ); | ||
| 1403 | VERBOSE_PRINTK_STRING("rcu_torture_onoff end holdoff"); | ||
| 1404 | } | ||
| 1405 | while (!kthread_should_stop()) { | ||
| 1406 | cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1); | ||
| 1407 | if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { | ||
| 1408 | if (verbose) | ||
| 1409 | pr_alert("%s" TORTURE_FLAG | ||
| 1410 | "rcu_torture_onoff task: offlining %d\n", | ||
| 1411 | torture_type, cpu); | ||
| 1412 | starttime = jiffies; | ||
| 1413 | n_offline_attempts++; | ||
| 1414 | ret = cpu_down(cpu); | ||
| 1415 | if (ret) { | ||
| 1416 | if (verbose) | ||
| 1417 | pr_alert("%s" TORTURE_FLAG | ||
| 1418 | "rcu_torture_onoff task: offline %d failed: errno %d\n", | ||
| 1419 | torture_type, cpu, ret); | ||
| 1420 | } else { | ||
| 1421 | if (verbose) | ||
| 1422 | pr_alert("%s" TORTURE_FLAG | ||
| 1423 | "rcu_torture_onoff task: offlined %d\n", | ||
| 1424 | torture_type, cpu); | ||
| 1425 | n_offline_successes++; | ||
| 1426 | delta = jiffies - starttime; | ||
| 1427 | sum_offline += delta; | ||
| 1428 | if (min_offline < 0) { | ||
| 1429 | min_offline = delta; | ||
| 1430 | max_offline = delta; | ||
| 1431 | } | ||
| 1432 | if (min_offline > delta) | ||
| 1433 | min_offline = delta; | ||
| 1434 | if (max_offline < delta) | ||
| 1435 | max_offline = delta; | ||
| 1436 | } | ||
| 1437 | } else if (cpu_is_hotpluggable(cpu)) { | ||
| 1438 | if (verbose) | ||
| 1439 | pr_alert("%s" TORTURE_FLAG | ||
| 1440 | "rcu_torture_onoff task: onlining %d\n", | ||
| 1441 | torture_type, cpu); | ||
| 1442 | starttime = jiffies; | ||
| 1443 | n_online_attempts++; | ||
| 1444 | ret = cpu_up(cpu); | ||
| 1445 | if (ret) { | ||
| 1446 | if (verbose) | ||
| 1447 | pr_alert("%s" TORTURE_FLAG | ||
| 1448 | "rcu_torture_onoff task: online %d failed: errno %d\n", | ||
| 1449 | torture_type, cpu, ret); | ||
| 1450 | } else { | ||
| 1451 | if (verbose) | ||
| 1452 | pr_alert("%s" TORTURE_FLAG | ||
| 1453 | "rcu_torture_onoff task: onlined %d\n", | ||
| 1454 | torture_type, cpu); | ||
| 1455 | n_online_successes++; | ||
| 1456 | delta = jiffies - starttime; | ||
| 1457 | sum_online += delta; | ||
| 1458 | if (min_online < 0) { | ||
| 1459 | min_online = delta; | ||
| 1460 | max_online = delta; | ||
| 1461 | } | ||
| 1462 | if (min_online > delta) | ||
| 1463 | min_online = delta; | ||
| 1464 | if (max_online < delta) | ||
| 1465 | max_online = delta; | ||
| 1466 | } | ||
| 1467 | } | ||
| 1468 | schedule_timeout_interruptible(onoff_interval * HZ); | ||
| 1469 | } | ||
| 1470 | VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping"); | ||
| 1471 | return 0; | ||
| 1472 | } | ||
| 1473 | |||
| 1474 | static int | ||
| 1475 | rcu_torture_onoff_init(void) | ||
| 1476 | { | ||
| 1477 | int ret; | ||
| 1478 | |||
| 1479 | if (onoff_interval <= 0) | ||
| 1480 | return 0; | ||
| 1481 | onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff"); | ||
| 1482 | if (IS_ERR(onoff_task)) { | ||
| 1483 | ret = PTR_ERR(onoff_task); | ||
| 1484 | onoff_task = NULL; | ||
| 1485 | return ret; | ||
| 1486 | } | ||
| 1487 | return 0; | ||
| 1488 | } | ||
| 1489 | |||
| 1490 | static void rcu_torture_onoff_cleanup(void) | ||
| 1491 | { | ||
| 1492 | if (onoff_task == NULL) | ||
| 1493 | return; | ||
| 1494 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); | ||
| 1495 | kthread_stop(onoff_task); | ||
| 1496 | onoff_task = NULL; | ||
| 1497 | } | ||
| 1498 | |||
| 1499 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 1500 | |||
| 1501 | static int | ||
| 1502 | rcu_torture_onoff_init(void) | ||
| 1503 | { | ||
| 1504 | return 0; | ||
| 1505 | } | ||
| 1506 | |||
| 1507 | static void rcu_torture_onoff_cleanup(void) | ||
| 1508 | { | ||
| 1509 | } | ||
| 1510 | |||
| 1511 | #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 1512 | |||
| 1513 | /* | ||
| 1514 | * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then | 1109 | * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then |
| 1515 | * induces a CPU stall for the time specified by stall_cpu. | 1110 | * induces a CPU stall for the time specified by stall_cpu. |
| 1516 | */ | 1111 | */ |
| @@ -1518,11 +1113,11 @@ static int rcu_torture_stall(void *args) | |||
| 1518 | { | 1113 | { |
| 1519 | unsigned long stop_at; | 1114 | unsigned long stop_at; |
| 1520 | 1115 | ||
| 1521 | VERBOSE_PRINTK_STRING("rcu_torture_stall task started"); | 1116 | VERBOSE_TOROUT_STRING("rcu_torture_stall task started"); |
| 1522 | if (stall_cpu_holdoff > 0) { | 1117 | if (stall_cpu_holdoff > 0) { |
| 1523 | VERBOSE_PRINTK_STRING("rcu_torture_stall begin holdoff"); | 1118 | VERBOSE_TOROUT_STRING("rcu_torture_stall begin holdoff"); |
| 1524 | schedule_timeout_interruptible(stall_cpu_holdoff * HZ); | 1119 | schedule_timeout_interruptible(stall_cpu_holdoff * HZ); |
| 1525 | VERBOSE_PRINTK_STRING("rcu_torture_stall end holdoff"); | 1120 | VERBOSE_TOROUT_STRING("rcu_torture_stall end holdoff"); |
| 1526 | } | 1121 | } |
| 1527 | if (!kthread_should_stop()) { | 1122 | if (!kthread_should_stop()) { |
| 1528 | stop_at = get_seconds() + stall_cpu; | 1123 | stop_at = get_seconds() + stall_cpu; |
| @@ -1536,7 +1131,7 @@ static int rcu_torture_stall(void *args) | |||
| 1536 | rcu_read_unlock(); | 1131 | rcu_read_unlock(); |
| 1537 | pr_alert("rcu_torture_stall end.\n"); | 1132 | pr_alert("rcu_torture_stall end.\n"); |
| 1538 | } | 1133 | } |
| 1539 | rcutorture_shutdown_absorb("rcu_torture_stall"); | 1134 | torture_shutdown_absorb("rcu_torture_stall"); |
| 1540 | while (!kthread_should_stop()) | 1135 | while (!kthread_should_stop()) |
| 1541 | schedule_timeout_interruptible(10 * HZ); | 1136 | schedule_timeout_interruptible(10 * HZ); |
| 1542 | return 0; | 1137 | return 0; |
| @@ -1545,27 +1140,9 @@ static int rcu_torture_stall(void *args) | |||
| 1545 | /* Spawn CPU-stall kthread, if stall_cpu specified. */ | 1140 | /* Spawn CPU-stall kthread, if stall_cpu specified. */ |
| 1546 | static int __init rcu_torture_stall_init(void) | 1141 | static int __init rcu_torture_stall_init(void) |
| 1547 | { | 1142 | { |
| 1548 | int ret; | ||
| 1549 | |||
| 1550 | if (stall_cpu <= 0) | 1143 | if (stall_cpu <= 0) |
| 1551 | return 0; | 1144 | return 0; |
| 1552 | stall_task = kthread_run(rcu_torture_stall, NULL, "rcu_torture_stall"); | 1145 | return torture_create_kthread(rcu_torture_stall, NULL, stall_task); |
| 1553 | if (IS_ERR(stall_task)) { | ||
| 1554 | ret = PTR_ERR(stall_task); | ||
| 1555 | stall_task = NULL; | ||
| 1556 | return ret; | ||
| 1557 | } | ||
| 1558 | return 0; | ||
| 1559 | } | ||
| 1560 | |||
| 1561 | /* Clean up after the CPU-stall kthread, if one was spawned. */ | ||
| 1562 | static void rcu_torture_stall_cleanup(void) | ||
| 1563 | { | ||
| 1564 | if (stall_task == NULL) | ||
| 1565 | return; | ||
| 1566 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task."); | ||
| 1567 | kthread_stop(stall_task); | ||
| 1568 | stall_task = NULL; | ||
| 1569 | } | 1146 | } |
| 1570 | 1147 | ||
| 1571 | /* Callback function for RCU barrier testing. */ | 1148 | /* Callback function for RCU barrier testing. */ |
| @@ -1583,28 +1160,24 @@ static int rcu_torture_barrier_cbs(void *arg) | |||
| 1583 | struct rcu_head rcu; | 1160 | struct rcu_head rcu; |
| 1584 | 1161 | ||
| 1585 | init_rcu_head_on_stack(&rcu); | 1162 | init_rcu_head_on_stack(&rcu); |
| 1586 | VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started"); | 1163 | VERBOSE_TOROUT_STRING("rcu_torture_barrier_cbs task started"); |
| 1587 | set_user_nice(current, 19); | 1164 | set_user_nice(current, MAX_NICE); |
| 1588 | do { | 1165 | do { |
| 1589 | wait_event(barrier_cbs_wq[myid], | 1166 | wait_event(barrier_cbs_wq[myid], |
| 1590 | (newphase = | 1167 | (newphase = |
| 1591 | ACCESS_ONCE(barrier_phase)) != lastphase || | 1168 | ACCESS_ONCE(barrier_phase)) != lastphase || |
| 1592 | kthread_should_stop() || | 1169 | torture_must_stop()); |
| 1593 | fullstop != FULLSTOP_DONTSTOP); | ||
| 1594 | lastphase = newphase; | 1170 | lastphase = newphase; |
| 1595 | smp_mb(); /* ensure barrier_phase load before ->call(). */ | 1171 | smp_mb(); /* ensure barrier_phase load before ->call(). */ |
| 1596 | if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) | 1172 | if (torture_must_stop()) |
| 1597 | break; | 1173 | break; |
| 1598 | cur_ops->call(&rcu, rcu_torture_barrier_cbf); | 1174 | cur_ops->call(&rcu, rcu_torture_barrier_cbf); |
| 1599 | if (atomic_dec_and_test(&barrier_cbs_count)) | 1175 | if (atomic_dec_and_test(&barrier_cbs_count)) |
| 1600 | wake_up(&barrier_wq); | 1176 | wake_up(&barrier_wq); |
| 1601 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | 1177 | } while (!torture_must_stop()); |
| 1602 | VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping"); | ||
| 1603 | rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); | ||
| 1604 | while (!kthread_should_stop()) | ||
| 1605 | schedule_timeout_interruptible(1); | ||
| 1606 | cur_ops->cb_barrier(); | 1178 | cur_ops->cb_barrier(); |
| 1607 | destroy_rcu_head_on_stack(&rcu); | 1179 | destroy_rcu_head_on_stack(&rcu); |
| 1180 | torture_kthread_stopping("rcu_torture_barrier_cbs"); | ||
| 1608 | return 0; | 1181 | return 0; |
| 1609 | } | 1182 | } |
| 1610 | 1183 | ||
| @@ -1613,7 +1186,7 @@ static int rcu_torture_barrier(void *arg) | |||
| 1613 | { | 1186 | { |
| 1614 | int i; | 1187 | int i; |
| 1615 | 1188 | ||
| 1616 | VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting"); | 1189 | VERBOSE_TOROUT_STRING("rcu_torture_barrier task starting"); |
| 1617 | do { | 1190 | do { |
| 1618 | atomic_set(&barrier_cbs_invoked, 0); | 1191 | atomic_set(&barrier_cbs_invoked, 0); |
| 1619 | atomic_set(&barrier_cbs_count, n_barrier_cbs); | 1192 | atomic_set(&barrier_cbs_count, n_barrier_cbs); |
| @@ -1623,9 +1196,8 @@ static int rcu_torture_barrier(void *arg) | |||
| 1623 | wake_up(&barrier_cbs_wq[i]); | 1196 | wake_up(&barrier_cbs_wq[i]); |
| 1624 | wait_event(barrier_wq, | 1197 | wait_event(barrier_wq, |
| 1625 | atomic_read(&barrier_cbs_count) == 0 || | 1198 | atomic_read(&barrier_cbs_count) == 0 || |
| 1626 | kthread_should_stop() || | 1199 | torture_must_stop()); |
| 1627 | fullstop != FULLSTOP_DONTSTOP); | 1200 | if (torture_must_stop()) |
| 1628 | if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) | ||
| 1629 | break; | 1201 | break; |
| 1630 | n_barrier_attempts++; | 1202 | n_barrier_attempts++; |
| 1631 | cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */ | 1203 | cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */ |
| @@ -1635,11 +1207,8 @@ static int rcu_torture_barrier(void *arg) | |||
| 1635 | } | 1207 | } |
| 1636 | n_barrier_successes++; | 1208 | n_barrier_successes++; |
| 1637 | schedule_timeout_interruptible(HZ / 10); | 1209 | schedule_timeout_interruptible(HZ / 10); |
| 1638 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | 1210 | } while (!torture_must_stop()); |
| 1639 | VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); | 1211 | torture_kthread_stopping("rcu_torture_barrier"); |
| 1640 | rcutorture_shutdown_absorb("rcu_torture_barrier"); | ||
| 1641 | while (!kthread_should_stop()) | ||
| 1642 | schedule_timeout_interruptible(1); | ||
| 1643 | return 0; | 1212 | return 0; |
| 1644 | } | 1213 | } |
| 1645 | 1214 | ||
| @@ -1672,24 +1241,13 @@ static int rcu_torture_barrier_init(void) | |||
| 1672 | return -ENOMEM; | 1241 | return -ENOMEM; |
| 1673 | for (i = 0; i < n_barrier_cbs; i++) { | 1242 | for (i = 0; i < n_barrier_cbs; i++) { |
| 1674 | init_waitqueue_head(&barrier_cbs_wq[i]); | 1243 | init_waitqueue_head(&barrier_cbs_wq[i]); |
| 1675 | barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs, | 1244 | ret = torture_create_kthread(rcu_torture_barrier_cbs, |
| 1676 | (void *)(long)i, | 1245 | (void *)(long)i, |
| 1677 | "rcu_torture_barrier_cbs"); | 1246 | barrier_cbs_tasks[i]); |
| 1678 | if (IS_ERR(barrier_cbs_tasks[i])) { | 1247 | if (ret) |
| 1679 | ret = PTR_ERR(barrier_cbs_tasks[i]); | ||
| 1680 | VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs"); | ||
| 1681 | barrier_cbs_tasks[i] = NULL; | ||
| 1682 | return ret; | 1248 | return ret; |
| 1683 | } | ||
| 1684 | } | 1249 | } |
| 1685 | barrier_task = kthread_run(rcu_torture_barrier, NULL, | 1250 | return torture_create_kthread(rcu_torture_barrier, NULL, barrier_task); |
| 1686 | "rcu_torture_barrier"); | ||
| 1687 | if (IS_ERR(barrier_task)) { | ||
| 1688 | ret = PTR_ERR(barrier_task); | ||
| 1689 | VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier"); | ||
| 1690 | barrier_task = NULL; | ||
| 1691 | } | ||
| 1692 | return 0; | ||
| 1693 | } | 1251 | } |
| 1694 | 1252 | ||
| 1695 | /* Clean up after RCU barrier testing. */ | 1253 | /* Clean up after RCU barrier testing. */ |
| @@ -1697,19 +1255,11 @@ static void rcu_torture_barrier_cleanup(void) | |||
| 1697 | { | 1255 | { |
| 1698 | int i; | 1256 | int i; |
| 1699 | 1257 | ||
| 1700 | if (barrier_task != NULL) { | 1258 | torture_stop_kthread(rcu_torture_barrier, barrier_task); |
| 1701 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task"); | ||
| 1702 | kthread_stop(barrier_task); | ||
| 1703 | barrier_task = NULL; | ||
| 1704 | } | ||
| 1705 | if (barrier_cbs_tasks != NULL) { | 1259 | if (barrier_cbs_tasks != NULL) { |
| 1706 | for (i = 0; i < n_barrier_cbs; i++) { | 1260 | for (i = 0; i < n_barrier_cbs; i++) |
| 1707 | if (barrier_cbs_tasks[i] != NULL) { | 1261 | torture_stop_kthread(rcu_torture_barrier_cbs, |
| 1708 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task"); | 1262 | barrier_cbs_tasks[i]); |
| 1709 | kthread_stop(barrier_cbs_tasks[i]); | ||
| 1710 | barrier_cbs_tasks[i] = NULL; | ||
| 1711 | } | ||
| 1712 | } | ||
| 1713 | kfree(barrier_cbs_tasks); | 1263 | kfree(barrier_cbs_tasks); |
| 1714 | barrier_cbs_tasks = NULL; | 1264 | barrier_cbs_tasks = NULL; |
| 1715 | } | 1265 | } |
| @@ -1747,90 +1297,42 @@ rcu_torture_cleanup(void) | |||
| 1747 | { | 1297 | { |
| 1748 | int i; | 1298 | int i; |
| 1749 | 1299 | ||
| 1750 | mutex_lock(&fullstop_mutex); | ||
| 1751 | rcutorture_record_test_transition(); | 1300 | rcutorture_record_test_transition(); |
| 1752 | if (fullstop == FULLSTOP_SHUTDOWN) { | 1301 | if (torture_cleanup()) { |
| 1753 | pr_warn(/* but going down anyway, so... */ | ||
| 1754 | "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); | ||
| 1755 | mutex_unlock(&fullstop_mutex); | ||
| 1756 | schedule_timeout_uninterruptible(10); | ||
| 1757 | if (cur_ops->cb_barrier != NULL) | 1302 | if (cur_ops->cb_barrier != NULL) |
| 1758 | cur_ops->cb_barrier(); | 1303 | cur_ops->cb_barrier(); |
| 1759 | return; | 1304 | return; |
| 1760 | } | 1305 | } |
| 1761 | fullstop = FULLSTOP_RMMOD; | ||
| 1762 | mutex_unlock(&fullstop_mutex); | ||
| 1763 | unregister_reboot_notifier(&rcutorture_shutdown_nb); | ||
| 1764 | rcu_torture_barrier_cleanup(); | ||
| 1765 | rcu_torture_stall_cleanup(); | ||
| 1766 | if (stutter_task) { | ||
| 1767 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); | ||
| 1768 | kthread_stop(stutter_task); | ||
| 1769 | } | ||
| 1770 | stutter_task = NULL; | ||
| 1771 | if (shuffler_task) { | ||
| 1772 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task"); | ||
| 1773 | kthread_stop(shuffler_task); | ||
| 1774 | free_cpumask_var(shuffle_tmp_mask); | ||
| 1775 | } | ||
| 1776 | shuffler_task = NULL; | ||
| 1777 | 1306 | ||
| 1778 | if (writer_task) { | 1307 | rcu_torture_barrier_cleanup(); |
| 1779 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task"); | 1308 | torture_stop_kthread(rcu_torture_stall, stall_task); |
| 1780 | kthread_stop(writer_task); | 1309 | torture_stop_kthread(rcu_torture_writer, writer_task); |
| 1781 | } | ||
| 1782 | writer_task = NULL; | ||
| 1783 | 1310 | ||
| 1784 | if (reader_tasks) { | 1311 | if (reader_tasks) { |
| 1785 | for (i = 0; i < nrealreaders; i++) { | 1312 | for (i = 0; i < nrealreaders; i++) |
| 1786 | if (reader_tasks[i]) { | 1313 | torture_stop_kthread(rcu_torture_reader, |
| 1787 | VERBOSE_PRINTK_STRING( | 1314 | reader_tasks[i]); |
| 1788 | "Stopping rcu_torture_reader task"); | ||
| 1789 | kthread_stop(reader_tasks[i]); | ||
| 1790 | } | ||
| 1791 | reader_tasks[i] = NULL; | ||
| 1792 | } | ||
| 1793 | kfree(reader_tasks); | 1315 | kfree(reader_tasks); |
| 1794 | reader_tasks = NULL; | ||
| 1795 | } | 1316 | } |
| 1796 | rcu_torture_current = NULL; | 1317 | rcu_torture_current = NULL; |
| 1797 | 1318 | ||
| 1798 | if (fakewriter_tasks) { | 1319 | if (fakewriter_tasks) { |
| 1799 | for (i = 0; i < nfakewriters; i++) { | 1320 | for (i = 0; i < nfakewriters; i++) { |
| 1800 | if (fakewriter_tasks[i]) { | 1321 | torture_stop_kthread(rcu_torture_fakewriter, |
| 1801 | VERBOSE_PRINTK_STRING( | 1322 | fakewriter_tasks[i]); |
| 1802 | "Stopping rcu_torture_fakewriter task"); | ||
| 1803 | kthread_stop(fakewriter_tasks[i]); | ||
| 1804 | } | ||
| 1805 | fakewriter_tasks[i] = NULL; | ||
| 1806 | } | 1323 | } |
| 1807 | kfree(fakewriter_tasks); | 1324 | kfree(fakewriter_tasks); |
| 1808 | fakewriter_tasks = NULL; | 1325 | fakewriter_tasks = NULL; |
| 1809 | } | 1326 | } |
| 1810 | 1327 | ||
| 1811 | if (stats_task) { | 1328 | torture_stop_kthread(rcu_torture_stats, stats_task); |
| 1812 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task"); | 1329 | torture_stop_kthread(rcu_torture_fqs, fqs_task); |
| 1813 | kthread_stop(stats_task); | ||
| 1814 | } | ||
| 1815 | stats_task = NULL; | ||
| 1816 | |||
| 1817 | if (fqs_task) { | ||
| 1818 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task"); | ||
| 1819 | kthread_stop(fqs_task); | ||
| 1820 | } | ||
| 1821 | fqs_task = NULL; | ||
| 1822 | if ((test_boost == 1 && cur_ops->can_boost) || | 1330 | if ((test_boost == 1 && cur_ops->can_boost) || |
| 1823 | test_boost == 2) { | 1331 | test_boost == 2) { |
| 1824 | unregister_cpu_notifier(&rcutorture_cpu_nb); | 1332 | unregister_cpu_notifier(&rcutorture_cpu_nb); |
| 1825 | for_each_possible_cpu(i) | 1333 | for_each_possible_cpu(i) |
| 1826 | rcutorture_booster_cleanup(i); | 1334 | rcutorture_booster_cleanup(i); |
| 1827 | } | 1335 | } |
| 1828 | if (shutdown_task != NULL) { | ||
| 1829 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); | ||
| 1830 | kthread_stop(shutdown_task); | ||
| 1831 | } | ||
| 1832 | shutdown_task = NULL; | ||
| 1833 | rcu_torture_onoff_cleanup(); | ||
| 1834 | 1336 | ||
| 1835 | /* Wait for all RCU callbacks to fire. */ | 1337 | /* Wait for all RCU callbacks to fire. */ |
| 1836 | 1338 | ||
| @@ -1841,8 +1343,7 @@ rcu_torture_cleanup(void) | |||
| 1841 | 1343 | ||
| 1842 | if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) | 1344 | if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) |
| 1843 | rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); | 1345 | rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); |
| 1844 | else if (n_online_successes != n_online_attempts || | 1346 | else if (torture_onoff_failures()) |
| 1845 | n_offline_successes != n_offline_attempts) | ||
| 1846 | rcu_torture_print_module_parms(cur_ops, | 1347 | rcu_torture_print_module_parms(cur_ops, |
| 1847 | "End of test: RCU_HOTPLUG"); | 1348 | "End of test: RCU_HOTPLUG"); |
| 1848 | else | 1349 | else |
| @@ -1911,12 +1412,11 @@ rcu_torture_init(void) | |||
| 1911 | int i; | 1412 | int i; |
| 1912 | int cpu; | 1413 | int cpu; |
| 1913 | int firsterr = 0; | 1414 | int firsterr = 0; |
| 1914 | int retval; | ||
| 1915 | static struct rcu_torture_ops *torture_ops[] = { | 1415 | static struct rcu_torture_ops *torture_ops[] = { |
| 1916 | &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops, | 1416 | &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, |
| 1917 | }; | 1417 | }; |
| 1918 | 1418 | ||
| 1919 | mutex_lock(&fullstop_mutex); | 1419 | torture_init_begin(torture_type, verbose, &rcutorture_runnable); |
| 1920 | 1420 | ||
| 1921 | /* Process args and tell the world that the torturer is on the job. */ | 1421 | /* Process args and tell the world that the torturer is on the job. */ |
| 1922 | for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { | 1422 | for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { |
| @@ -1931,7 +1431,7 @@ rcu_torture_init(void) | |||
| 1931 | for (i = 0; i < ARRAY_SIZE(torture_ops); i++) | 1431 | for (i = 0; i < ARRAY_SIZE(torture_ops); i++) |
| 1932 | pr_alert(" %s", torture_ops[i]->name); | 1432 | pr_alert(" %s", torture_ops[i]->name); |
| 1933 | pr_alert("\n"); | 1433 | pr_alert("\n"); |
| 1934 | mutex_unlock(&fullstop_mutex); | 1434 | torture_init_end(); |
| 1935 | return -EINVAL; | 1435 | return -EINVAL; |
| 1936 | } | 1436 | } |
| 1937 | if (cur_ops->fqs == NULL && fqs_duration != 0) { | 1437 | if (cur_ops->fqs == NULL && fqs_duration != 0) { |
| @@ -1946,7 +1446,6 @@ rcu_torture_init(void) | |||
| 1946 | else | 1446 | else |
| 1947 | nrealreaders = 2 * num_online_cpus(); | 1447 | nrealreaders = 2 * num_online_cpus(); |
| 1948 | rcu_torture_print_module_parms(cur_ops, "Start of test"); | 1448 | rcu_torture_print_module_parms(cur_ops, "Start of test"); |
| 1949 | fullstop = FULLSTOP_DONTSTOP; | ||
| 1950 | 1449 | ||
| 1951 | /* Set up the freelist. */ | 1450 | /* Set up the freelist. */ |
| 1952 | 1451 | ||
| @@ -1982,108 +1481,61 @@ rcu_torture_init(void) | |||
| 1982 | 1481 | ||
| 1983 | /* Start up the kthreads. */ | 1482 | /* Start up the kthreads. */ |
| 1984 | 1483 | ||
| 1985 | VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task"); | 1484 | firsterr = torture_create_kthread(rcu_torture_writer, NULL, |
| 1986 | writer_task = kthread_create(rcu_torture_writer, NULL, | 1485 | writer_task); |
| 1987 | "rcu_torture_writer"); | 1486 | if (firsterr) |
| 1988 | if (IS_ERR(writer_task)) { | ||
| 1989 | firsterr = PTR_ERR(writer_task); | ||
| 1990 | VERBOSE_PRINTK_ERRSTRING("Failed to create writer"); | ||
| 1991 | writer_task = NULL; | ||
| 1992 | goto unwind; | 1487 | goto unwind; |
| 1993 | } | ||
| 1994 | wake_up_process(writer_task); | ||
| 1995 | fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), | 1488 | fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), |
| 1996 | GFP_KERNEL); | 1489 | GFP_KERNEL); |
| 1997 | if (fakewriter_tasks == NULL) { | 1490 | if (fakewriter_tasks == NULL) { |
| 1998 | VERBOSE_PRINTK_ERRSTRING("out of memory"); | 1491 | VERBOSE_TOROUT_ERRSTRING("out of memory"); |
| 1999 | firsterr = -ENOMEM; | 1492 | firsterr = -ENOMEM; |
| 2000 | goto unwind; | 1493 | goto unwind; |
| 2001 | } | 1494 | } |
| 2002 | for (i = 0; i < nfakewriters; i++) { | 1495 | for (i = 0; i < nfakewriters; i++) { |
| 2003 | VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task"); | 1496 | firsterr = torture_create_kthread(rcu_torture_fakewriter, |
| 2004 | fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL, | 1497 | NULL, fakewriter_tasks[i]); |
| 2005 | "rcu_torture_fakewriter"); | 1498 | if (firsterr) |
| 2006 | if (IS_ERR(fakewriter_tasks[i])) { | ||
| 2007 | firsterr = PTR_ERR(fakewriter_tasks[i]); | ||
| 2008 | VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter"); | ||
| 2009 | fakewriter_tasks[i] = NULL; | ||
| 2010 | goto unwind; | 1499 | goto unwind; |
| 2011 | } | ||
| 2012 | } | 1500 | } |
| 2013 | reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]), | 1501 | reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]), |
| 2014 | GFP_KERNEL); | 1502 | GFP_KERNEL); |
| 2015 | if (reader_tasks == NULL) { | 1503 | if (reader_tasks == NULL) { |
| 2016 | VERBOSE_PRINTK_ERRSTRING("out of memory"); | 1504 | VERBOSE_TOROUT_ERRSTRING("out of memory"); |
| 2017 | firsterr = -ENOMEM; | 1505 | firsterr = -ENOMEM; |
| 2018 | goto unwind; | 1506 | goto unwind; |
| 2019 | } | 1507 | } |
| 2020 | for (i = 0; i < nrealreaders; i++) { | 1508 | for (i = 0; i < nrealreaders; i++) { |
| 2021 | VERBOSE_PRINTK_STRING("Creating rcu_torture_reader task"); | 1509 | firsterr = torture_create_kthread(rcu_torture_reader, NULL, |
| 2022 | reader_tasks[i] = kthread_run(rcu_torture_reader, NULL, | 1510 | reader_tasks[i]); |
| 2023 | "rcu_torture_reader"); | 1511 | if (firsterr) |
| 2024 | if (IS_ERR(reader_tasks[i])) { | ||
| 2025 | firsterr = PTR_ERR(reader_tasks[i]); | ||
| 2026 | VERBOSE_PRINTK_ERRSTRING("Failed to create reader"); | ||
| 2027 | reader_tasks[i] = NULL; | ||
| 2028 | goto unwind; | 1512 | goto unwind; |
| 2029 | } | ||
| 2030 | } | 1513 | } |
| 2031 | if (stat_interval > 0) { | 1514 | if (stat_interval > 0) { |
| 2032 | VERBOSE_PRINTK_STRING("Creating rcu_torture_stats task"); | 1515 | firsterr = torture_create_kthread(rcu_torture_stats, NULL, |
| 2033 | stats_task = kthread_run(rcu_torture_stats, NULL, | 1516 | stats_task); |
| 2034 | "rcu_torture_stats"); | 1517 | if (firsterr) |
| 2035 | if (IS_ERR(stats_task)) { | ||
| 2036 | firsterr = PTR_ERR(stats_task); | ||
| 2037 | VERBOSE_PRINTK_ERRSTRING("Failed to create stats"); | ||
| 2038 | stats_task = NULL; | ||
| 2039 | goto unwind; | 1518 | goto unwind; |
| 2040 | } | ||
| 2041 | } | 1519 | } |
| 2042 | if (test_no_idle_hz) { | 1520 | if (test_no_idle_hz) { |
| 2043 | rcu_idle_cpu = num_online_cpus() - 1; | 1521 | firsterr = torture_shuffle_init(shuffle_interval * HZ); |
| 2044 | 1522 | if (firsterr) | |
| 2045 | if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) { | ||
| 2046 | firsterr = -ENOMEM; | ||
| 2047 | VERBOSE_PRINTK_ERRSTRING("Failed to alloc mask"); | ||
| 2048 | goto unwind; | ||
| 2049 | } | ||
| 2050 | |||
| 2051 | /* Create the shuffler thread */ | ||
| 2052 | shuffler_task = kthread_run(rcu_torture_shuffle, NULL, | ||
| 2053 | "rcu_torture_shuffle"); | ||
| 2054 | if (IS_ERR(shuffler_task)) { | ||
| 2055 | free_cpumask_var(shuffle_tmp_mask); | ||
| 2056 | firsterr = PTR_ERR(shuffler_task); | ||
| 2057 | VERBOSE_PRINTK_ERRSTRING("Failed to create shuffler"); | ||
| 2058 | shuffler_task = NULL; | ||
| 2059 | goto unwind; | 1523 | goto unwind; |
| 2060 | } | ||
| 2061 | } | 1524 | } |
| 2062 | if (stutter < 0) | 1525 | if (stutter < 0) |
| 2063 | stutter = 0; | 1526 | stutter = 0; |
| 2064 | if (stutter) { | 1527 | if (stutter) { |
| 2065 | /* Create the stutter thread */ | 1528 | firsterr = torture_stutter_init(stutter * HZ); |
| 2066 | stutter_task = kthread_run(rcu_torture_stutter, NULL, | 1529 | if (firsterr) |
| 2067 | "rcu_torture_stutter"); | ||
| 2068 | if (IS_ERR(stutter_task)) { | ||
| 2069 | firsterr = PTR_ERR(stutter_task); | ||
| 2070 | VERBOSE_PRINTK_ERRSTRING("Failed to create stutter"); | ||
| 2071 | stutter_task = NULL; | ||
| 2072 | goto unwind; | 1530 | goto unwind; |
| 2073 | } | ||
| 2074 | } | 1531 | } |
| 2075 | if (fqs_duration < 0) | 1532 | if (fqs_duration < 0) |
| 2076 | fqs_duration = 0; | 1533 | fqs_duration = 0; |
| 2077 | if (fqs_duration) { | 1534 | if (fqs_duration) { |
| 2078 | /* Create the stutter thread */ | 1535 | /* Create the fqs thread */ |
| 2079 | fqs_task = kthread_run(rcu_torture_fqs, NULL, | 1536 | torture_create_kthread(rcu_torture_fqs, NULL, fqs_task); |
| 2080 | "rcu_torture_fqs"); | 1537 | if (firsterr) |
| 2081 | if (IS_ERR(fqs_task)) { | ||
| 2082 | firsterr = PTR_ERR(fqs_task); | ||
| 2083 | VERBOSE_PRINTK_ERRSTRING("Failed to create fqs"); | ||
| 2084 | fqs_task = NULL; | ||
| 2085 | goto unwind; | 1538 | goto unwind; |
| 2086 | } | ||
| 2087 | } | 1539 | } |
| 2088 | if (test_boost_interval < 1) | 1540 | if (test_boost_interval < 1) |
| 2089 | test_boost_interval = 1; | 1541 | test_boost_interval = 1; |
| @@ -2097,49 +1549,31 @@ rcu_torture_init(void) | |||
| 2097 | for_each_possible_cpu(i) { | 1549 | for_each_possible_cpu(i) { |
| 2098 | if (cpu_is_offline(i)) | 1550 | if (cpu_is_offline(i)) |
| 2099 | continue; /* Heuristic: CPU can go offline. */ | 1551 | continue; /* Heuristic: CPU can go offline. */ |
| 2100 | retval = rcutorture_booster_init(i); | 1552 | firsterr = rcutorture_booster_init(i); |
| 2101 | if (retval < 0) { | 1553 | if (firsterr) |
| 2102 | firsterr = retval; | ||
| 2103 | goto unwind; | 1554 | goto unwind; |
| 2104 | } | ||
| 2105 | } | 1555 | } |
| 2106 | } | 1556 | } |
| 2107 | if (shutdown_secs > 0) { | 1557 | firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); |
| 2108 | shutdown_time = jiffies + shutdown_secs * HZ; | 1558 | if (firsterr) |
| 2109 | shutdown_task = kthread_create(rcu_torture_shutdown, NULL, | ||
| 2110 | "rcu_torture_shutdown"); | ||
| 2111 | if (IS_ERR(shutdown_task)) { | ||
| 2112 | firsterr = PTR_ERR(shutdown_task); | ||
| 2113 | VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown"); | ||
| 2114 | shutdown_task = NULL; | ||
| 2115 | goto unwind; | ||
| 2116 | } | ||
| 2117 | wake_up_process(shutdown_task); | ||
| 2118 | } | ||
| 2119 | i = rcu_torture_onoff_init(); | ||
| 2120 | if (i != 0) { | ||
| 2121 | firsterr = i; | ||
| 2122 | goto unwind; | 1559 | goto unwind; |
| 2123 | } | 1560 | firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval * HZ); |
| 2124 | register_reboot_notifier(&rcutorture_shutdown_nb); | 1561 | if (firsterr) |
| 2125 | i = rcu_torture_stall_init(); | ||
| 2126 | if (i != 0) { | ||
| 2127 | firsterr = i; | ||
| 2128 | goto unwind; | 1562 | goto unwind; |
| 2129 | } | 1563 | firsterr = rcu_torture_stall_init(); |
| 2130 | retval = rcu_torture_barrier_init(); | 1564 | if (firsterr) |
| 2131 | if (retval != 0) { | 1565 | goto unwind; |
| 2132 | firsterr = retval; | 1566 | firsterr = rcu_torture_barrier_init(); |
| 1567 | if (firsterr) | ||
| 2133 | goto unwind; | 1568 | goto unwind; |
| 2134 | } | ||
| 2135 | if (object_debug) | 1569 | if (object_debug) |
| 2136 | rcu_test_debug_objects(); | 1570 | rcu_test_debug_objects(); |
| 2137 | rcutorture_record_test_transition(); | 1571 | rcutorture_record_test_transition(); |
| 2138 | mutex_unlock(&fullstop_mutex); | 1572 | torture_init_end(); |
| 2139 | return 0; | 1573 | return 0; |
| 2140 | 1574 | ||
| 2141 | unwind: | 1575 | unwind: |
| 2142 | mutex_unlock(&fullstop_mutex); | 1576 | torture_init_end(); |
| 2143 | rcu_torture_cleanup(); | 1577 | rcu_torture_cleanup(); |
| 2144 | return firsterr; | 1578 | return firsterr; |
| 2145 | } | 1579 | } |
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 3318d8284384..c639556f3fa0 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c | |||
| @@ -12,8 +12,8 @@ | |||
| 12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
| 13 | * | 13 | * |
| 14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
| 15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, you can access it online at |
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 16 | * http://www.gnu.org/licenses/gpl-2.0.html. |
| 17 | * | 17 | * |
| 18 | * Copyright (C) IBM Corporation, 2006 | 18 | * Copyright (C) IBM Corporation, 2006 |
| 19 | * Copyright (C) Fujitsu, 2012 | 19 | * Copyright (C) Fujitsu, 2012 |
| @@ -36,8 +36,6 @@ | |||
| 36 | #include <linux/delay.h> | 36 | #include <linux/delay.h> |
| 37 | #include <linux/srcu.h> | 37 | #include <linux/srcu.h> |
| 38 | 38 | ||
| 39 | #include <trace/events/rcu.h> | ||
| 40 | |||
| 41 | #include "rcu.h" | 39 | #include "rcu.h" |
| 42 | 40 | ||
| 43 | /* | 41 | /* |
| @@ -398,7 +396,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, | |||
| 398 | rcu_batch_queue(&sp->batch_queue, head); | 396 | rcu_batch_queue(&sp->batch_queue, head); |
| 399 | if (!sp->running) { | 397 | if (!sp->running) { |
| 400 | sp->running = true; | 398 | sp->running = true; |
| 401 | schedule_delayed_work(&sp->work, 0); | 399 | queue_delayed_work(system_power_efficient_wq, &sp->work, 0); |
| 402 | } | 400 | } |
| 403 | spin_unlock_irqrestore(&sp->queue_lock, flags); | 401 | spin_unlock_irqrestore(&sp->queue_lock, flags); |
| 404 | } | 402 | } |
| @@ -674,7 +672,8 @@ static void srcu_reschedule(struct srcu_struct *sp) | |||
| 674 | } | 672 | } |
| 675 | 673 | ||
| 676 | if (pending) | 674 | if (pending) |
| 677 | schedule_delayed_work(&sp->work, SRCU_INTERVAL); | 675 | queue_delayed_work(system_power_efficient_wq, |
| 676 | &sp->work, SRCU_INTERVAL); | ||
| 678 | } | 677 | } |
| 679 | 678 | ||
| 680 | /* | 679 | /* |
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 1254f312d024..d9efcc13008c 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c | |||
| @@ -12,8 +12,8 @@ | |||
| 12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
| 13 | * | 13 | * |
| 14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
| 15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, you can access it online at |
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 16 | * http://www.gnu.org/licenses/gpl-2.0.html. |
| 17 | * | 17 | * |
| 18 | * Copyright IBM Corporation, 2008 | 18 | * Copyright IBM Corporation, 2008 |
| 19 | * | 19 | * |
| @@ -37,10 +37,6 @@ | |||
| 37 | #include <linux/prefetch.h> | 37 | #include <linux/prefetch.h> |
| 38 | #include <linux/ftrace_event.h> | 38 | #include <linux/ftrace_event.h> |
| 39 | 39 | ||
| 40 | #ifdef CONFIG_RCU_TRACE | ||
| 41 | #include <trace/events/rcu.h> | ||
| 42 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
| 43 | |||
| 44 | #include "rcu.h" | 40 | #include "rcu.h" |
| 45 | 41 | ||
| 46 | /* Forward declarations for tiny_plugin.h. */ | 42 | /* Forward declarations for tiny_plugin.h. */ |
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 280d06cae352..431528520562 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h | |||
| @@ -14,8 +14,8 @@ | |||
| 14 | * GNU General Public License for more details. | 14 | * GNU General Public License for more details. |
| 15 | * | 15 | * |
| 16 | * You should have received a copy of the GNU General Public License | 16 | * You should have received a copy of the GNU General Public License |
| 17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, you can access it online at |
| 18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 18 | * http://www.gnu.org/licenses/gpl-2.0.html. |
| 19 | * | 19 | * |
| 20 | * Copyright (c) 2010 Linaro | 20 | * Copyright (c) 2010 Linaro |
| 21 | * | 21 | * |
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b3d116cd072d..0c47e300210a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
| @@ -12,8 +12,8 @@ | |||
| 12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
| 13 | * | 13 | * |
| 14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
| 15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, you can access it online at |
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 16 | * http://www.gnu.org/licenses/gpl-2.0.html. |
| 17 | * | 17 | * |
| 18 | * Copyright IBM Corporation, 2008 | 18 | * Copyright IBM Corporation, 2008 |
| 19 | * | 19 | * |
| @@ -58,8 +58,6 @@ | |||
| 58 | #include <linux/suspend.h> | 58 | #include <linux/suspend.h> |
| 59 | 59 | ||
| 60 | #include "tree.h" | 60 | #include "tree.h" |
| 61 | #include <trace/events/rcu.h> | ||
| 62 | |||
| 63 | #include "rcu.h" | 61 | #include "rcu.h" |
| 64 | 62 | ||
| 65 | MODULE_ALIAS("rcutree"); | 63 | MODULE_ALIAS("rcutree"); |
| @@ -837,7 +835,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, | |||
| 837 | * to the next. Only do this for the primary flavor of RCU. | 835 | * to the next. Only do this for the primary flavor of RCU. |
| 838 | */ | 836 | */ |
| 839 | if (rdp->rsp == rcu_state && | 837 | if (rdp->rsp == rcu_state && |
| 840 | ULONG_CMP_GE(ACCESS_ONCE(jiffies), rdp->rsp->jiffies_resched)) { | 838 | ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { |
| 841 | rdp->rsp->jiffies_resched += 5; | 839 | rdp->rsp->jiffies_resched += 5; |
| 842 | resched_cpu(rdp->cpu); | 840 | resched_cpu(rdp->cpu); |
| 843 | } | 841 | } |
| @@ -847,7 +845,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, | |||
| 847 | 845 | ||
| 848 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 846 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
| 849 | { | 847 | { |
| 850 | unsigned long j = ACCESS_ONCE(jiffies); | 848 | unsigned long j = jiffies; |
| 851 | unsigned long j1; | 849 | unsigned long j1; |
| 852 | 850 | ||
| 853 | rsp->gp_start = j; | 851 | rsp->gp_start = j; |
| @@ -1005,7 +1003,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1005 | 1003 | ||
| 1006 | if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp)) | 1004 | if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp)) |
| 1007 | return; | 1005 | return; |
| 1008 | j = ACCESS_ONCE(jiffies); | 1006 | j = jiffies; |
| 1009 | 1007 | ||
| 1010 | /* | 1008 | /* |
| 1011 | * Lots of memory barriers to reject false positives. | 1009 | * Lots of memory barriers to reject false positives. |
| @@ -1423,13 +1421,14 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
| 1423 | 1421 | ||
| 1424 | /* Advance to a new grace period and initialize state. */ | 1422 | /* Advance to a new grace period and initialize state. */ |
| 1425 | record_gp_stall_check_time(rsp); | 1423 | record_gp_stall_check_time(rsp); |
| 1426 | smp_wmb(); /* Record GP times before starting GP. */ | 1424 | /* Record GP times before starting GP, hence smp_store_release(). */ |
| 1427 | rsp->gpnum++; | 1425 | smp_store_release(&rsp->gpnum, rsp->gpnum + 1); |
| 1428 | trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); | 1426 | trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); |
| 1429 | raw_spin_unlock_irq(&rnp->lock); | 1427 | raw_spin_unlock_irq(&rnp->lock); |
| 1430 | 1428 | ||
| 1431 | /* Exclude any concurrent CPU-hotplug operations. */ | 1429 | /* Exclude any concurrent CPU-hotplug operations. */ |
| 1432 | mutex_lock(&rsp->onoff_mutex); | 1430 | mutex_lock(&rsp->onoff_mutex); |
| 1431 | smp_mb__after_unlock_lock(); /* ->gpnum increment before GP! */ | ||
| 1433 | 1432 | ||
| 1434 | /* | 1433 | /* |
| 1435 | * Set the quiescent-state-needed bits in all the rcu_node | 1434 | * Set the quiescent-state-needed bits in all the rcu_node |
| @@ -1557,10 +1556,11 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |||
| 1557 | } | 1556 | } |
| 1558 | rnp = rcu_get_root(rsp); | 1557 | rnp = rcu_get_root(rsp); |
| 1559 | raw_spin_lock_irq(&rnp->lock); | 1558 | raw_spin_lock_irq(&rnp->lock); |
| 1560 | smp_mb__after_unlock_lock(); | 1559 | smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */ |
| 1561 | rcu_nocb_gp_set(rnp, nocb); | 1560 | rcu_nocb_gp_set(rnp, nocb); |
| 1562 | 1561 | ||
| 1563 | rsp->completed = rsp->gpnum; /* Declare grace period done. */ | 1562 | /* Declare grace period done. */ |
| 1563 | ACCESS_ONCE(rsp->completed) = rsp->gpnum; | ||
| 1564 | trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); | 1564 | trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); |
| 1565 | rsp->fqs_state = RCU_GP_IDLE; | 1565 | rsp->fqs_state = RCU_GP_IDLE; |
| 1566 | rdp = this_cpu_ptr(rsp->rda); | 1566 | rdp = this_cpu_ptr(rsp->rda); |
| @@ -2304,7 +2304,7 @@ static void force_quiescent_state(struct rcu_state *rsp) | |||
| 2304 | if (rnp_old != NULL) | 2304 | if (rnp_old != NULL) |
| 2305 | raw_spin_unlock(&rnp_old->fqslock); | 2305 | raw_spin_unlock(&rnp_old->fqslock); |
| 2306 | if (ret) { | 2306 | if (ret) { |
| 2307 | rsp->n_force_qs_lh++; | 2307 | ACCESS_ONCE(rsp->n_force_qs_lh)++; |
| 2308 | return; | 2308 | return; |
| 2309 | } | 2309 | } |
| 2310 | rnp_old = rnp; | 2310 | rnp_old = rnp; |
| @@ -2316,7 +2316,7 @@ static void force_quiescent_state(struct rcu_state *rsp) | |||
| 2316 | smp_mb__after_unlock_lock(); | 2316 | smp_mb__after_unlock_lock(); |
| 2317 | raw_spin_unlock(&rnp_old->fqslock); | 2317 | raw_spin_unlock(&rnp_old->fqslock); |
| 2318 | if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { | 2318 | if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { |
| 2319 | rsp->n_force_qs_lh++; | 2319 | ACCESS_ONCE(rsp->n_force_qs_lh)++; |
| 2320 | raw_spin_unlock_irqrestore(&rnp_old->lock, flags); | 2320 | raw_spin_unlock_irqrestore(&rnp_old->lock, flags); |
| 2321 | return; /* Someone beat us to it. */ | 2321 | return; /* Someone beat us to it. */ |
| 2322 | } | 2322 | } |
| @@ -2639,6 +2639,58 @@ void synchronize_rcu_bh(void) | |||
| 2639 | } | 2639 | } |
| 2640 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | 2640 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); |
| 2641 | 2641 | ||
| 2642 | /** | ||
| 2643 | * get_state_synchronize_rcu - Snapshot current RCU state | ||
| 2644 | * | ||
| 2645 | * Returns a cookie that is used by a later call to cond_synchronize_rcu() | ||
| 2646 | * to determine whether or not a full grace period has elapsed in the | ||
| 2647 | * meantime. | ||
| 2648 | */ | ||
| 2649 | unsigned long get_state_synchronize_rcu(void) | ||
| 2650 | { | ||
| 2651 | /* | ||
| 2652 | * Any prior manipulation of RCU-protected data must happen | ||
| 2653 | * before the load from ->gpnum. | ||
| 2654 | */ | ||
| 2655 | smp_mb(); /* ^^^ */ | ||
| 2656 | |||
| 2657 | /* | ||
| 2658 | * Make sure this load happens before the purportedly | ||
| 2659 | * time-consuming work between get_state_synchronize_rcu() | ||
| 2660 | * and cond_synchronize_rcu(). | ||
| 2661 | */ | ||
| 2662 | return smp_load_acquire(&rcu_state->gpnum); | ||
| 2663 | } | ||
| 2664 | EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); | ||
| 2665 | |||
| 2666 | /** | ||
| 2667 | * cond_synchronize_rcu - Conditionally wait for an RCU grace period | ||
| 2668 | * | ||
| 2669 | * @oldstate: return value from earlier call to get_state_synchronize_rcu() | ||
| 2670 | * | ||
| 2671 | * If a full RCU grace period has elapsed since the earlier call to | ||
| 2672 | * get_state_synchronize_rcu(), just return. Otherwise, invoke | ||
| 2673 | * synchronize_rcu() to wait for a full grace period. | ||
| 2674 | * | ||
| 2675 | * Yes, this function does not take counter wrap into account. But | ||
| 2676 | * counter wrap is harmless. If the counter wraps, we have waited for | ||
| 2677 | * more than 2 billion grace periods (and way more on a 64-bit system!), | ||
| 2678 | * so waiting for one additional grace period should be just fine. | ||
| 2679 | */ | ||
| 2680 | void cond_synchronize_rcu(unsigned long oldstate) | ||
| 2681 | { | ||
| 2682 | unsigned long newstate; | ||
| 2683 | |||
| 2684 | /* | ||
| 2685 | * Ensure that this load happens before any RCU-destructive | ||
| 2686 | * actions the caller might carry out after we return. | ||
| 2687 | */ | ||
| 2688 | newstate = smp_load_acquire(&rcu_state->completed); | ||
| 2689 | if (ULONG_CMP_GE(oldstate, newstate)) | ||
| 2690 | synchronize_rcu(); | ||
| 2691 | } | ||
| 2692 | EXPORT_SYMBOL_GPL(cond_synchronize_rcu); | ||
| 2693 | |||
| 2642 | static int synchronize_sched_expedited_cpu_stop(void *data) | 2694 | static int synchronize_sched_expedited_cpu_stop(void *data) |
| 2643 | { | 2695 | { |
| 2644 | /* | 2696 | /* |
| @@ -2880,7 +2932,7 @@ static int rcu_pending(int cpu) | |||
| 2880 | * non-NULL, store an indication of whether all callbacks are lazy. | 2932 | * non-NULL, store an indication of whether all callbacks are lazy. |
| 2881 | * (If there are no callbacks, all of them are deemed to be lazy.) | 2933 | * (If there are no callbacks, all of them are deemed to be lazy.) |
| 2882 | */ | 2934 | */ |
| 2883 | static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) | 2935 | static int __maybe_unused rcu_cpu_has_callbacks(int cpu, bool *all_lazy) |
| 2884 | { | 2936 | { |
| 2885 | bool al = true; | 2937 | bool al = true; |
| 2886 | bool hc = false; | 2938 | bool hc = false; |
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 8c19873f1ac9..75dc3c39a02a 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h | |||
| @@ -13,8 +13,8 @@ | |||
| 13 | * GNU General Public License for more details. | 13 | * GNU General Public License for more details. |
| 14 | * | 14 | * |
| 15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
| 16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, you can access it online at |
| 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 17 | * http://www.gnu.org/licenses/gpl-2.0.html. |
| 18 | * | 18 | * |
| 19 | * Copyright IBM Corporation, 2008 | 19 | * Copyright IBM Corporation, 2008 |
| 20 | * | 20 | * |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 6e2ef4b2b920..962d1d589929 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
| @@ -14,8 +14,8 @@ | |||
| 14 | * GNU General Public License for more details. | 14 | * GNU General Public License for more details. |
| 15 | * | 15 | * |
| 16 | * You should have received a copy of the GNU General Public License | 16 | * You should have received a copy of the GNU General Public License |
| 17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, you can access it online at |
| 18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 18 | * http://www.gnu.org/licenses/gpl-2.0.html. |
| 19 | * | 19 | * |
| 20 | * Copyright Red Hat, 2009 | 20 | * Copyright Red Hat, 2009 |
| 21 | * Copyright IBM Corporation, 2009 | 21 | * Copyright IBM Corporation, 2009 |
| @@ -1586,11 +1586,13 @@ static void rcu_prepare_kthreads(int cpu) | |||
| 1586 | * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs | 1586 | * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs |
| 1587 | * any flavor of RCU. | 1587 | * any flavor of RCU. |
| 1588 | */ | 1588 | */ |
| 1589 | #ifndef CONFIG_RCU_NOCB_CPU_ALL | ||
| 1589 | int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | 1590 | int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) |
| 1590 | { | 1591 | { |
| 1591 | *delta_jiffies = ULONG_MAX; | 1592 | *delta_jiffies = ULONG_MAX; |
| 1592 | return rcu_cpu_has_callbacks(cpu, NULL); | 1593 | return rcu_cpu_has_callbacks(cpu, NULL); |
| 1593 | } | 1594 | } |
| 1595 | #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ | ||
| 1594 | 1596 | ||
| 1595 | /* | 1597 | /* |
| 1596 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up | 1598 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up |
| @@ -1656,7 +1658,7 @@ extern int tick_nohz_active; | |||
| 1656 | * only if it has been awhile since the last time we did so. Afterwards, | 1658 | * only if it has been awhile since the last time we did so. Afterwards, |
| 1657 | * if there are any callbacks ready for immediate invocation, return true. | 1659 | * if there are any callbacks ready for immediate invocation, return true. |
| 1658 | */ | 1660 | */ |
| 1659 | static bool rcu_try_advance_all_cbs(void) | 1661 | static bool __maybe_unused rcu_try_advance_all_cbs(void) |
| 1660 | { | 1662 | { |
| 1661 | bool cbs_ready = false; | 1663 | bool cbs_ready = false; |
| 1662 | struct rcu_data *rdp; | 1664 | struct rcu_data *rdp; |
| @@ -1696,6 +1698,7 @@ static bool rcu_try_advance_all_cbs(void) | |||
| 1696 | * | 1698 | * |
| 1697 | * The caller must have disabled interrupts. | 1699 | * The caller must have disabled interrupts. |
| 1698 | */ | 1700 | */ |
| 1701 | #ifndef CONFIG_RCU_NOCB_CPU_ALL | ||
| 1699 | int rcu_needs_cpu(int cpu, unsigned long *dj) | 1702 | int rcu_needs_cpu(int cpu, unsigned long *dj) |
| 1700 | { | 1703 | { |
| 1701 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | 1704 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
| @@ -1726,6 +1729,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj) | |||
| 1726 | } | 1729 | } |
| 1727 | return 0; | 1730 | return 0; |
| 1728 | } | 1731 | } |
| 1732 | #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ | ||
| 1729 | 1733 | ||
| 1730 | /* | 1734 | /* |
| 1731 | * Prepare a CPU for idle from an RCU perspective. The first major task | 1735 | * Prepare a CPU for idle from an RCU perspective. The first major task |
| @@ -1739,6 +1743,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj) | |||
| 1739 | */ | 1743 | */ |
| 1740 | static void rcu_prepare_for_idle(int cpu) | 1744 | static void rcu_prepare_for_idle(int cpu) |
| 1741 | { | 1745 | { |
| 1746 | #ifndef CONFIG_RCU_NOCB_CPU_ALL | ||
| 1742 | struct rcu_data *rdp; | 1747 | struct rcu_data *rdp; |
| 1743 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | 1748 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
| 1744 | struct rcu_node *rnp; | 1749 | struct rcu_node *rnp; |
| @@ -1790,6 +1795,7 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 1790 | rcu_accelerate_cbs(rsp, rnp, rdp); | 1795 | rcu_accelerate_cbs(rsp, rnp, rdp); |
| 1791 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1796 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
| 1792 | } | 1797 | } |
| 1798 | #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ | ||
| 1793 | } | 1799 | } |
| 1794 | 1800 | ||
| 1795 | /* | 1801 | /* |
| @@ -1799,11 +1805,12 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 1799 | */ | 1805 | */ |
| 1800 | static void rcu_cleanup_after_idle(int cpu) | 1806 | static void rcu_cleanup_after_idle(int cpu) |
| 1801 | { | 1807 | { |
| 1802 | 1808 | #ifndef CONFIG_RCU_NOCB_CPU_ALL | |
| 1803 | if (rcu_is_nocb_cpu(cpu)) | 1809 | if (rcu_is_nocb_cpu(cpu)) |
| 1804 | return; | 1810 | return; |
| 1805 | if (rcu_try_advance_all_cbs()) | 1811 | if (rcu_try_advance_all_cbs()) |
| 1806 | invoke_rcu_core(); | 1812 | invoke_rcu_core(); |
| 1813 | #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ | ||
| 1807 | } | 1814 | } |
| 1808 | 1815 | ||
| 1809 | /* | 1816 | /* |
| @@ -2101,6 +2108,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) | |||
| 2101 | init_waitqueue_head(&rnp->nocb_gp_wq[1]); | 2108 | init_waitqueue_head(&rnp->nocb_gp_wq[1]); |
| 2102 | } | 2109 | } |
| 2103 | 2110 | ||
| 2111 | #ifndef CONFIG_RCU_NOCB_CPU_ALL | ||
| 2104 | /* Is the specified CPU a no-CPUs CPU? */ | 2112 | /* Is the specified CPU a no-CPUs CPU? */ |
| 2105 | bool rcu_is_nocb_cpu(int cpu) | 2113 | bool rcu_is_nocb_cpu(int cpu) |
| 2106 | { | 2114 | { |
| @@ -2108,6 +2116,7 @@ bool rcu_is_nocb_cpu(int cpu) | |||
| 2108 | return cpumask_test_cpu(cpu, rcu_nocb_mask); | 2116 | return cpumask_test_cpu(cpu, rcu_nocb_mask); |
| 2109 | return false; | 2117 | return false; |
| 2110 | } | 2118 | } |
| 2119 | #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ | ||
| 2111 | 2120 | ||
| 2112 | /* | 2121 | /* |
| 2113 | * Enqueue the specified string of rcu_head structures onto the specified | 2122 | * Enqueue the specified string of rcu_head structures onto the specified |
| @@ -2893,7 +2902,7 @@ static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp) | |||
| 2893 | * CPU unless the grace period has extended for too long. | 2902 | * CPU unless the grace period has extended for too long. |
| 2894 | * | 2903 | * |
| 2895 | * This code relies on the fact that all NO_HZ_FULL CPUs are also | 2904 | * This code relies on the fact that all NO_HZ_FULL CPUs are also |
| 2896 | * CONFIG_RCU_NOCB_CPUs. | 2905 | * CONFIG_RCU_NOCB_CPU CPUs. |
| 2897 | */ | 2906 | */ |
| 2898 | static bool rcu_nohz_full_cpu(struct rcu_state *rsp) | 2907 | static bool rcu_nohz_full_cpu(struct rcu_state *rsp) |
| 2899 | { | 2908 | { |
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 4def475336d4..5cdc62e1beeb 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c | |||
| @@ -12,8 +12,8 @@ | |||
| 12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
| 13 | * | 13 | * |
| 14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
| 15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, you can access it online at |
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 16 | * http://www.gnu.org/licenses/gpl-2.0.html. |
| 17 | * | 17 | * |
| 18 | * Copyright IBM Corporation, 2008 | 18 | * Copyright IBM Corporation, 2008 |
| 19 | * | 19 | * |
| @@ -273,7 +273,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
| 273 | seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", | 273 | seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", |
| 274 | rsp->n_force_qs, rsp->n_force_qs_ngp, | 274 | rsp->n_force_qs, rsp->n_force_qs_ngp, |
| 275 | rsp->n_force_qs - rsp->n_force_qs_ngp, | 275 | rsp->n_force_qs - rsp->n_force_qs_ngp, |
| 276 | rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); | 276 | ACCESS_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen); |
| 277 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { | 277 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { |
| 278 | if (rnp->level != level) { | 278 | if (rnp->level != level) { |
| 279 | seq_puts(m, "\n"); | 279 | seq_puts(m, "\n"); |
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index c54609faf233..4c0a9b0af469 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c | |||
| @@ -12,8 +12,8 @@ | |||
| 12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
| 13 | * | 13 | * |
| 14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
| 15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, you can access it online at |
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 16 | * http://www.gnu.org/licenses/gpl-2.0.html. |
| 17 | * | 17 | * |
| 18 | * Copyright IBM Corporation, 2001 | 18 | * Copyright IBM Corporation, 2001 |
| 19 | * | 19 | * |
| @@ -49,7 +49,6 @@ | |||
| 49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
| 50 | 50 | ||
| 51 | #define CREATE_TRACE_POINTS | 51 | #define CREATE_TRACE_POINTS |
| 52 | #include <trace/events/rcu.h> | ||
| 53 | 52 | ||
| 54 | #include "rcu.h" | 53 | #include "rcu.h" |
| 55 | 54 | ||
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 9a95c8c2af2a..ab32b7b0db5c 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile | |||
| @@ -13,7 +13,7 @@ endif | |||
| 13 | 13 | ||
| 14 | obj-y += core.o proc.o clock.o cputime.o | 14 | obj-y += core.o proc.o clock.o cputime.o |
| 15 | obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o | 15 | obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o |
| 16 | obj-y += wait.o completion.o | 16 | obj-y += wait.o completion.o idle.o |
| 17 | obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o | 17 | obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o |
| 18 | obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o | 18 | obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o |
| 19 | obj-$(CONFIG_SCHEDSTATS) += stats.o | 19 | obj-$(CONFIG_SCHEDSTATS) += stats.o |
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 4a073539c58e..e73efba98301 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c | |||
| @@ -203,7 +203,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) | |||
| 203 | struct autogroup *ag; | 203 | struct autogroup *ag; |
| 204 | int err; | 204 | int err; |
| 205 | 205 | ||
| 206 | if (nice < -20 || nice > 19) | 206 | if (nice < MIN_NICE || nice > MAX_NICE) |
| 207 | return -EINVAL; | 207 | return -EINVAL; |
| 208 | 208 | ||
| 209 | err = security_task_setnice(current, nice); | 209 | err = security_task_setnice(current, nice); |
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 43c2bcc35761..b30a2924ef14 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c | |||
| @@ -301,14 +301,14 @@ u64 sched_clock_cpu(int cpu) | |||
| 301 | if (unlikely(!sched_clock_running)) | 301 | if (unlikely(!sched_clock_running)) |
| 302 | return 0ull; | 302 | return 0ull; |
| 303 | 303 | ||
| 304 | preempt_disable(); | 304 | preempt_disable_notrace(); |
| 305 | scd = cpu_sdc(cpu); | 305 | scd = cpu_sdc(cpu); |
| 306 | 306 | ||
| 307 | if (cpu != smp_processor_id()) | 307 | if (cpu != smp_processor_id()) |
| 308 | clock = sched_clock_remote(scd); | 308 | clock = sched_clock_remote(scd); |
| 309 | else | 309 | else |
| 310 | clock = sched_clock_local(scd); | 310 | clock = sched_clock_local(scd); |
| 311 | preempt_enable(); | 311 | preempt_enable_notrace(); |
| 312 | 312 | ||
| 313 | return clock; | 313 | return clock; |
| 314 | } | 314 | } |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b46131ef6aab..a47902c687ae 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -1745,8 +1745,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
| 1745 | p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; | 1745 | p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; |
| 1746 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; | 1746 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; |
| 1747 | p->numa_work.next = &p->numa_work; | 1747 | p->numa_work.next = &p->numa_work; |
| 1748 | p->numa_faults = NULL; | 1748 | p->numa_faults_memory = NULL; |
| 1749 | p->numa_faults_buffer = NULL; | 1749 | p->numa_faults_buffer_memory = NULL; |
| 1750 | p->last_task_numa_placement = 0; | ||
| 1751 | p->last_sum_exec_runtime = 0; | ||
| 1750 | 1752 | ||
| 1751 | INIT_LIST_HEAD(&p->numa_entry); | 1753 | INIT_LIST_HEAD(&p->numa_entry); |
| 1752 | p->numa_group = NULL; | 1754 | p->numa_group = NULL; |
| @@ -1952,7 +1954,7 @@ static int dl_overflow(struct task_struct *p, int policy, | |||
| 1952 | { | 1954 | { |
| 1953 | 1955 | ||
| 1954 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); | 1956 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); |
| 1955 | u64 period = attr->sched_period; | 1957 | u64 period = attr->sched_period ?: attr->sched_deadline; |
| 1956 | u64 runtime = attr->sched_runtime; | 1958 | u64 runtime = attr->sched_runtime; |
| 1957 | u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; | 1959 | u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; |
| 1958 | int cpus, err = -1; | 1960 | int cpus, err = -1; |
| @@ -2149,8 +2151,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 2149 | if (mm) | 2151 | if (mm) |
| 2150 | mmdrop(mm); | 2152 | mmdrop(mm); |
| 2151 | if (unlikely(prev_state == TASK_DEAD)) { | 2153 | if (unlikely(prev_state == TASK_DEAD)) { |
| 2152 | task_numa_free(prev); | ||
| 2153 | |||
| 2154 | if (prev->sched_class->task_dead) | 2154 | if (prev->sched_class->task_dead) |
| 2155 | prev->sched_class->task_dead(prev); | 2155 | prev->sched_class->task_dead(prev); |
| 2156 | 2156 | ||
| @@ -2167,13 +2167,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 2167 | 2167 | ||
| 2168 | #ifdef CONFIG_SMP | 2168 | #ifdef CONFIG_SMP |
| 2169 | 2169 | ||
| 2170 | /* assumes rq->lock is held */ | ||
| 2171 | static inline void pre_schedule(struct rq *rq, struct task_struct *prev) | ||
| 2172 | { | ||
| 2173 | if (prev->sched_class->pre_schedule) | ||
| 2174 | prev->sched_class->pre_schedule(rq, prev); | ||
| 2175 | } | ||
| 2176 | |||
| 2177 | /* rq->lock is NOT held, but preemption is disabled */ | 2170 | /* rq->lock is NOT held, but preemption is disabled */ |
| 2178 | static inline void post_schedule(struct rq *rq) | 2171 | static inline void post_schedule(struct rq *rq) |
| 2179 | { | 2172 | { |
| @@ -2191,10 +2184,6 @@ static inline void post_schedule(struct rq *rq) | |||
| 2191 | 2184 | ||
| 2192 | #else | 2185 | #else |
| 2193 | 2186 | ||
| 2194 | static inline void pre_schedule(struct rq *rq, struct task_struct *p) | ||
| 2195 | { | ||
| 2196 | } | ||
| 2197 | |||
| 2198 | static inline void post_schedule(struct rq *rq) | 2187 | static inline void post_schedule(struct rq *rq) |
| 2199 | { | 2188 | { |
| 2200 | } | 2189 | } |
| @@ -2510,8 +2499,13 @@ void __kprobes preempt_count_add(int val) | |||
| 2510 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= | 2499 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= |
| 2511 | PREEMPT_MASK - 10); | 2500 | PREEMPT_MASK - 10); |
| 2512 | #endif | 2501 | #endif |
| 2513 | if (preempt_count() == val) | 2502 | if (preempt_count() == val) { |
| 2514 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); | 2503 | unsigned long ip = get_parent_ip(CALLER_ADDR1); |
| 2504 | #ifdef CONFIG_DEBUG_PREEMPT | ||
| 2505 | current->preempt_disable_ip = ip; | ||
| 2506 | #endif | ||
| 2507 | trace_preempt_off(CALLER_ADDR0, ip); | ||
| 2508 | } | ||
| 2515 | } | 2509 | } |
| 2516 | EXPORT_SYMBOL(preempt_count_add); | 2510 | EXPORT_SYMBOL(preempt_count_add); |
| 2517 | 2511 | ||
| @@ -2554,6 +2548,13 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
| 2554 | print_modules(); | 2548 | print_modules(); |
| 2555 | if (irqs_disabled()) | 2549 | if (irqs_disabled()) |
| 2556 | print_irqtrace_events(prev); | 2550 | print_irqtrace_events(prev); |
| 2551 | #ifdef CONFIG_DEBUG_PREEMPT | ||
| 2552 | if (in_atomic_preempt_off()) { | ||
| 2553 | pr_err("Preemption disabled at:"); | ||
| 2554 | print_ip_sym(current->preempt_disable_ip); | ||
| 2555 | pr_cont("\n"); | ||
| 2556 | } | ||
| 2557 | #endif | ||
| 2557 | dump_stack(); | 2558 | dump_stack(); |
| 2558 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); | 2559 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
| 2559 | } | 2560 | } |
| @@ -2577,36 +2578,34 @@ static inline void schedule_debug(struct task_struct *prev) | |||
| 2577 | schedstat_inc(this_rq(), sched_count); | 2578 | schedstat_inc(this_rq(), sched_count); |
| 2578 | } | 2579 | } |
| 2579 | 2580 | ||
| 2580 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | ||
| 2581 | { | ||
| 2582 | if (prev->on_rq || rq->skip_clock_update < 0) | ||
| 2583 | update_rq_clock(rq); | ||
| 2584 | prev->sched_class->put_prev_task(rq, prev); | ||
| 2585 | } | ||
| 2586 | |||
| 2587 | /* | 2581 | /* |
| 2588 | * Pick up the highest-prio task: | 2582 | * Pick up the highest-prio task: |
| 2589 | */ | 2583 | */ |
| 2590 | static inline struct task_struct * | 2584 | static inline struct task_struct * |
| 2591 | pick_next_task(struct rq *rq) | 2585 | pick_next_task(struct rq *rq, struct task_struct *prev) |
| 2592 | { | 2586 | { |
| 2593 | const struct sched_class *class; | 2587 | const struct sched_class *class = &fair_sched_class; |
| 2594 | struct task_struct *p; | 2588 | struct task_struct *p; |
| 2595 | 2589 | ||
| 2596 | /* | 2590 | /* |
| 2597 | * Optimization: we know that if all tasks are in | 2591 | * Optimization: we know that if all tasks are in |
| 2598 | * the fair class we can call that function directly: | 2592 | * the fair class we can call that function directly: |
| 2599 | */ | 2593 | */ |
| 2600 | if (likely(rq->nr_running == rq->cfs.h_nr_running)) { | 2594 | if (likely(prev->sched_class == class && |
| 2601 | p = fair_sched_class.pick_next_task(rq); | 2595 | rq->nr_running == rq->cfs.h_nr_running)) { |
| 2602 | if (likely(p)) | 2596 | p = fair_sched_class.pick_next_task(rq, prev); |
| 2597 | if (likely(p && p != RETRY_TASK)) | ||
| 2603 | return p; | 2598 | return p; |
| 2604 | } | 2599 | } |
| 2605 | 2600 | ||
| 2601 | again: | ||
| 2606 | for_each_class(class) { | 2602 | for_each_class(class) { |
| 2607 | p = class->pick_next_task(rq); | 2603 | p = class->pick_next_task(rq, prev); |
| 2608 | if (p) | 2604 | if (p) { |
| 2605 | if (unlikely(p == RETRY_TASK)) | ||
| 2606 | goto again; | ||
| 2609 | return p; | 2607 | return p; |
| 2608 | } | ||
| 2610 | } | 2609 | } |
| 2611 | 2610 | ||
| 2612 | BUG(); /* the idle class will always have a runnable task */ | 2611 | BUG(); /* the idle class will always have a runnable task */ |
| @@ -2700,13 +2699,10 @@ need_resched: | |||
| 2700 | switch_count = &prev->nvcsw; | 2699 | switch_count = &prev->nvcsw; |
| 2701 | } | 2700 | } |
| 2702 | 2701 | ||
| 2703 | pre_schedule(rq, prev); | 2702 | if (prev->on_rq || rq->skip_clock_update < 0) |
| 2704 | 2703 | update_rq_clock(rq); | |
| 2705 | if (unlikely(!rq->nr_running)) | ||
| 2706 | idle_balance(cpu, rq); | ||
| 2707 | 2704 | ||
| 2708 | put_prev_task(rq, prev); | 2705 | next = pick_next_task(rq, prev); |
| 2709 | next = pick_next_task(rq); | ||
| 2710 | clear_tsk_need_resched(prev); | 2706 | clear_tsk_need_resched(prev); |
| 2711 | clear_preempt_need_resched(); | 2707 | clear_preempt_need_resched(); |
| 2712 | rq->skip_clock_update = 0; | 2708 | rq->skip_clock_update = 0; |
| @@ -2908,7 +2904,8 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
| 2908 | * This function changes the 'effective' priority of a task. It does | 2904 | * This function changes the 'effective' priority of a task. It does |
| 2909 | * not touch ->normal_prio like __setscheduler(). | 2905 | * not touch ->normal_prio like __setscheduler(). |
| 2910 | * | 2906 | * |
| 2911 | * Used by the rt_mutex code to implement priority inheritance logic. | 2907 | * Used by the rt_mutex code to implement priority inheritance |
| 2908 | * logic. Call site only calls if the priority of the task changed. | ||
| 2912 | */ | 2909 | */ |
| 2913 | void rt_mutex_setprio(struct task_struct *p, int prio) | 2910 | void rt_mutex_setprio(struct task_struct *p, int prio) |
| 2914 | { | 2911 | { |
| @@ -2998,7 +2995,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 2998 | unsigned long flags; | 2995 | unsigned long flags; |
| 2999 | struct rq *rq; | 2996 | struct rq *rq; |
| 3000 | 2997 | ||
| 3001 | if (TASK_NICE(p) == nice || nice < -20 || nice > 19) | 2998 | if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) |
| 3002 | return; | 2999 | return; |
| 3003 | /* | 3000 | /* |
| 3004 | * We have to be careful, if called from sys_setpriority(), | 3001 | * We have to be careful, if called from sys_setpriority(), |
| @@ -3076,11 +3073,11 @@ SYSCALL_DEFINE1(nice, int, increment) | |||
| 3076 | if (increment > 40) | 3073 | if (increment > 40) |
| 3077 | increment = 40; | 3074 | increment = 40; |
| 3078 | 3075 | ||
| 3079 | nice = TASK_NICE(current) + increment; | 3076 | nice = task_nice(current) + increment; |
| 3080 | if (nice < -20) | 3077 | if (nice < MIN_NICE) |
| 3081 | nice = -20; | 3078 | nice = MIN_NICE; |
| 3082 | if (nice > 19) | 3079 | if (nice > MAX_NICE) |
| 3083 | nice = 19; | 3080 | nice = MAX_NICE; |
| 3084 | 3081 | ||
| 3085 | if (increment < 0 && !can_nice(current, nice)) | 3082 | if (increment < 0 && !can_nice(current, nice)) |
| 3086 | return -EPERM; | 3083 | return -EPERM; |
| @@ -3109,18 +3106,6 @@ int task_prio(const struct task_struct *p) | |||
| 3109 | } | 3106 | } |
| 3110 | 3107 | ||
| 3111 | /** | 3108 | /** |
| 3112 | * task_nice - return the nice value of a given task. | ||
| 3113 | * @p: the task in question. | ||
| 3114 | * | ||
| 3115 | * Return: The nice value [ -20 ... 0 ... 19 ]. | ||
| 3116 | */ | ||
| 3117 | int task_nice(const struct task_struct *p) | ||
| 3118 | { | ||
| 3119 | return TASK_NICE(p); | ||
| 3120 | } | ||
| 3121 | EXPORT_SYMBOL(task_nice); | ||
| 3122 | |||
| 3123 | /** | ||
| 3124 | * idle_cpu - is a given cpu idle currently? | 3109 | * idle_cpu - is a given cpu idle currently? |
| 3125 | * @cpu: the processor in question. | 3110 | * @cpu: the processor in question. |
| 3126 | * | 3111 | * |
| @@ -3189,9 +3174,8 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) | |||
| 3189 | dl_se->dl_new = 1; | 3174 | dl_se->dl_new = 1; |
| 3190 | } | 3175 | } |
| 3191 | 3176 | ||
| 3192 | /* Actually do priority change: must hold pi & rq lock. */ | 3177 | static void __setscheduler_params(struct task_struct *p, |
| 3193 | static void __setscheduler(struct rq *rq, struct task_struct *p, | 3178 | const struct sched_attr *attr) |
| 3194 | const struct sched_attr *attr) | ||
| 3195 | { | 3179 | { |
| 3196 | int policy = attr->sched_policy; | 3180 | int policy = attr->sched_policy; |
| 3197 | 3181 | ||
| @@ -3211,9 +3195,21 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, | |||
| 3211 | * getparam()/getattr() don't report silly values for !rt tasks. | 3195 | * getparam()/getattr() don't report silly values for !rt tasks. |
| 3212 | */ | 3196 | */ |
| 3213 | p->rt_priority = attr->sched_priority; | 3197 | p->rt_priority = attr->sched_priority; |
| 3214 | |||
| 3215 | p->normal_prio = normal_prio(p); | 3198 | p->normal_prio = normal_prio(p); |
| 3216 | p->prio = rt_mutex_getprio(p); | 3199 | set_load_weight(p); |
| 3200 | } | ||
| 3201 | |||
| 3202 | /* Actually do priority change: must hold pi & rq lock. */ | ||
| 3203 | static void __setscheduler(struct rq *rq, struct task_struct *p, | ||
| 3204 | const struct sched_attr *attr) | ||
| 3205 | { | ||
| 3206 | __setscheduler_params(p, attr); | ||
| 3207 | |||
| 3208 | /* | ||
| 3209 | * If we get here, there was no pi waiters boosting the | ||
| 3210 | * task. It is safe to use the normal prio. | ||
| 3211 | */ | ||
| 3212 | p->prio = normal_prio(p); | ||
| 3217 | 3213 | ||
| 3218 | if (dl_prio(p->prio)) | 3214 | if (dl_prio(p->prio)) |
| 3219 | p->sched_class = &dl_sched_class; | 3215 | p->sched_class = &dl_sched_class; |
| @@ -3221,8 +3217,6 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, | |||
| 3221 | p->sched_class = &rt_sched_class; | 3217 | p->sched_class = &rt_sched_class; |
| 3222 | else | 3218 | else |
| 3223 | p->sched_class = &fair_sched_class; | 3219 | p->sched_class = &fair_sched_class; |
| 3224 | |||
| 3225 | set_load_weight(p); | ||
| 3226 | } | 3220 | } |
| 3227 | 3221 | ||
| 3228 | static void | 3222 | static void |
| @@ -3275,6 +3269,8 @@ static int __sched_setscheduler(struct task_struct *p, | |||
| 3275 | const struct sched_attr *attr, | 3269 | const struct sched_attr *attr, |
| 3276 | bool user) | 3270 | bool user) |
| 3277 | { | 3271 | { |
| 3272 | int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : | ||
| 3273 | MAX_RT_PRIO - 1 - attr->sched_priority; | ||
| 3278 | int retval, oldprio, oldpolicy = -1, on_rq, running; | 3274 | int retval, oldprio, oldpolicy = -1, on_rq, running; |
| 3279 | int policy = attr->sched_policy; | 3275 | int policy = attr->sched_policy; |
| 3280 | unsigned long flags; | 3276 | unsigned long flags; |
| @@ -3319,7 +3315,7 @@ recheck: | |||
| 3319 | */ | 3315 | */ |
| 3320 | if (user && !capable(CAP_SYS_NICE)) { | 3316 | if (user && !capable(CAP_SYS_NICE)) { |
| 3321 | if (fair_policy(policy)) { | 3317 | if (fair_policy(policy)) { |
| 3322 | if (attr->sched_nice < TASK_NICE(p) && | 3318 | if (attr->sched_nice < task_nice(p) && |
| 3323 | !can_nice(p, attr->sched_nice)) | 3319 | !can_nice(p, attr->sched_nice)) |
| 3324 | return -EPERM; | 3320 | return -EPERM; |
| 3325 | } | 3321 | } |
| @@ -3338,12 +3334,21 @@ recheck: | |||
| 3338 | return -EPERM; | 3334 | return -EPERM; |
| 3339 | } | 3335 | } |
| 3340 | 3336 | ||
| 3337 | /* | ||
| 3338 | * Can't set/change SCHED_DEADLINE policy at all for now | ||
| 3339 | * (safest behavior); in the future we would like to allow | ||
| 3340 | * unprivileged DL tasks to increase their relative deadline | ||
| 3341 | * or reduce their runtime (both ways reducing utilization) | ||
| 3342 | */ | ||
| 3343 | if (dl_policy(policy)) | ||
| 3344 | return -EPERM; | ||
| 3345 | |||
| 3341 | /* | 3346 | /* |
| 3342 | * Treat SCHED_IDLE as nice 20. Only allow a switch to | 3347 | * Treat SCHED_IDLE as nice 20. Only allow a switch to |
| 3343 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. | 3348 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. |
| 3344 | */ | 3349 | */ |
| 3345 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) { | 3350 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) { |
| 3346 | if (!can_nice(p, TASK_NICE(p))) | 3351 | if (!can_nice(p, task_nice(p))) |
| 3347 | return -EPERM; | 3352 | return -EPERM; |
| 3348 | } | 3353 | } |
| 3349 | 3354 | ||
| @@ -3380,16 +3385,18 @@ recheck: | |||
| 3380 | } | 3385 | } |
| 3381 | 3386 | ||
| 3382 | /* | 3387 | /* |
| 3383 | * If not changing anything there's no need to proceed further: | 3388 | * If not changing anything there's no need to proceed further, |
| 3389 | * but store a possible modification of reset_on_fork. | ||
| 3384 | */ | 3390 | */ |
| 3385 | if (unlikely(policy == p->policy)) { | 3391 | if (unlikely(policy == p->policy)) { |
| 3386 | if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p)) | 3392 | if (fair_policy(policy) && attr->sched_nice != task_nice(p)) |
| 3387 | goto change; | 3393 | goto change; |
| 3388 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) | 3394 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) |
| 3389 | goto change; | 3395 | goto change; |
| 3390 | if (dl_policy(policy)) | 3396 | if (dl_policy(policy)) |
| 3391 | goto change; | 3397 | goto change; |
| 3392 | 3398 | ||
| 3399 | p->sched_reset_on_fork = reset_on_fork; | ||
| 3393 | task_rq_unlock(rq, p, &flags); | 3400 | task_rq_unlock(rq, p, &flags); |
| 3394 | return 0; | 3401 | return 0; |
| 3395 | } | 3402 | } |
| @@ -3443,6 +3450,24 @@ change: | |||
| 3443 | return -EBUSY; | 3450 | return -EBUSY; |
| 3444 | } | 3451 | } |
| 3445 | 3452 | ||
| 3453 | p->sched_reset_on_fork = reset_on_fork; | ||
| 3454 | oldprio = p->prio; | ||
| 3455 | |||
| 3456 | /* | ||
| 3457 | * Special case for priority boosted tasks. | ||
| 3458 | * | ||
| 3459 | * If the new priority is lower or equal (user space view) | ||
| 3460 | * than the current (boosted) priority, we just store the new | ||
| 3461 | * normal parameters and do not touch the scheduler class and | ||
| 3462 | * the runqueue. This will be done when the task deboost | ||
| 3463 | * itself. | ||
| 3464 | */ | ||
| 3465 | if (rt_mutex_check_prio(p, newprio)) { | ||
| 3466 | __setscheduler_params(p, attr); | ||
| 3467 | task_rq_unlock(rq, p, &flags); | ||
| 3468 | return 0; | ||
| 3469 | } | ||
| 3470 | |||
| 3446 | on_rq = p->on_rq; | 3471 | on_rq = p->on_rq; |
| 3447 | running = task_current(rq, p); | 3472 | running = task_current(rq, p); |
| 3448 | if (on_rq) | 3473 | if (on_rq) |
| @@ -3450,16 +3475,18 @@ change: | |||
| 3450 | if (running) | 3475 | if (running) |
| 3451 | p->sched_class->put_prev_task(rq, p); | 3476 | p->sched_class->put_prev_task(rq, p); |
| 3452 | 3477 | ||
| 3453 | p->sched_reset_on_fork = reset_on_fork; | ||
| 3454 | |||
| 3455 | oldprio = p->prio; | ||
| 3456 | prev_class = p->sched_class; | 3478 | prev_class = p->sched_class; |
| 3457 | __setscheduler(rq, p, attr); | 3479 | __setscheduler(rq, p, attr); |
| 3458 | 3480 | ||
| 3459 | if (running) | 3481 | if (running) |
| 3460 | p->sched_class->set_curr_task(rq); | 3482 | p->sched_class->set_curr_task(rq); |
| 3461 | if (on_rq) | 3483 | if (on_rq) { |
| 3462 | enqueue_task(rq, p, 0); | 3484 | /* |
| 3485 | * We enqueue to tail when the priority of a task is | ||
| 3486 | * increased (user space view). | ||
| 3487 | */ | ||
| 3488 | enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0); | ||
| 3489 | } | ||
| 3463 | 3490 | ||
| 3464 | check_class_changed(rq, p, prev_class, oldprio); | 3491 | check_class_changed(rq, p, prev_class, oldprio); |
| 3465 | task_rq_unlock(rq, p, &flags); | 3492 | task_rq_unlock(rq, p, &flags); |
| @@ -3615,7 +3642,7 @@ static int sched_copy_attr(struct sched_attr __user *uattr, | |||
| 3615 | * XXX: do we want to be lenient like existing syscalls; or do we want | 3642 | * XXX: do we want to be lenient like existing syscalls; or do we want |
| 3616 | * to be strict and return an error on out-of-bounds values? | 3643 | * to be strict and return an error on out-of-bounds values? |
| 3617 | */ | 3644 | */ |
| 3618 | attr->sched_nice = clamp(attr->sched_nice, -20, 19); | 3645 | attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE); |
| 3619 | 3646 | ||
| 3620 | out: | 3647 | out: |
| 3621 | return ret; | 3648 | return ret; |
| @@ -3661,13 +3688,14 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) | |||
| 3661 | * @pid: the pid in question. | 3688 | * @pid: the pid in question. |
| 3662 | * @uattr: structure containing the extended parameters. | 3689 | * @uattr: structure containing the extended parameters. |
| 3663 | */ | 3690 | */ |
| 3664 | SYSCALL_DEFINE2(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr) | 3691 | SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, |
| 3692 | unsigned int, flags) | ||
| 3665 | { | 3693 | { |
| 3666 | struct sched_attr attr; | 3694 | struct sched_attr attr; |
| 3667 | struct task_struct *p; | 3695 | struct task_struct *p; |
| 3668 | int retval; | 3696 | int retval; |
| 3669 | 3697 | ||
| 3670 | if (!uattr || pid < 0) | 3698 | if (!uattr || pid < 0 || flags) |
| 3671 | return -EINVAL; | 3699 | return -EINVAL; |
| 3672 | 3700 | ||
| 3673 | if (sched_copy_attr(uattr, &attr)) | 3701 | if (sched_copy_attr(uattr, &attr)) |
| @@ -3786,7 +3814,7 @@ static int sched_read_attr(struct sched_attr __user *uattr, | |||
| 3786 | attr->size = usize; | 3814 | attr->size = usize; |
| 3787 | } | 3815 | } |
| 3788 | 3816 | ||
| 3789 | ret = copy_to_user(uattr, attr, usize); | 3817 | ret = copy_to_user(uattr, attr, attr->size); |
| 3790 | if (ret) | 3818 | if (ret) |
| 3791 | return -EFAULT; | 3819 | return -EFAULT; |
| 3792 | 3820 | ||
| @@ -3804,8 +3832,8 @@ err_size: | |||
| 3804 | * @uattr: structure containing the extended parameters. | 3832 | * @uattr: structure containing the extended parameters. |
| 3805 | * @size: sizeof(attr) for fwd/bwd comp. | 3833 | * @size: sizeof(attr) for fwd/bwd comp. |
| 3806 | */ | 3834 | */ |
| 3807 | SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, | 3835 | SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, |
| 3808 | unsigned int, size) | 3836 | unsigned int, size, unsigned int, flags) |
| 3809 | { | 3837 | { |
| 3810 | struct sched_attr attr = { | 3838 | struct sched_attr attr = { |
| 3811 | .size = sizeof(struct sched_attr), | 3839 | .size = sizeof(struct sched_attr), |
| @@ -3814,7 +3842,7 @@ SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, | |||
| 3814 | int retval; | 3842 | int retval; |
| 3815 | 3843 | ||
| 3816 | if (!uattr || pid < 0 || size > PAGE_SIZE || | 3844 | if (!uattr || pid < 0 || size > PAGE_SIZE || |
| 3817 | size < SCHED_ATTR_SIZE_VER0) | 3845 | size < SCHED_ATTR_SIZE_VER0 || flags) |
| 3818 | return -EINVAL; | 3846 | return -EINVAL; |
| 3819 | 3847 | ||
| 3820 | rcu_read_lock(); | 3848 | rcu_read_lock(); |
| @@ -3835,7 +3863,7 @@ SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, | |||
| 3835 | else if (task_has_rt_policy(p)) | 3863 | else if (task_has_rt_policy(p)) |
| 3836 | attr.sched_priority = p->rt_priority; | 3864 | attr.sched_priority = p->rt_priority; |
| 3837 | else | 3865 | else |
| 3838 | attr.sched_nice = TASK_NICE(p); | 3866 | attr.sched_nice = task_nice(p); |
| 3839 | 3867 | ||
| 3840 | rcu_read_unlock(); | 3868 | rcu_read_unlock(); |
| 3841 | 3869 | ||
| @@ -4473,6 +4501,7 @@ void init_idle(struct task_struct *idle, int cpu) | |||
| 4473 | rcu_read_unlock(); | 4501 | rcu_read_unlock(); |
| 4474 | 4502 | ||
| 4475 | rq->curr = rq->idle = idle; | 4503 | rq->curr = rq->idle = idle; |
| 4504 | idle->on_rq = 1; | ||
| 4476 | #if defined(CONFIG_SMP) | 4505 | #if defined(CONFIG_SMP) |
| 4477 | idle->on_cpu = 1; | 4506 | idle->on_cpu = 1; |
| 4478 | #endif | 4507 | #endif |
| @@ -4692,8 +4721,10 @@ void idle_task_exit(void) | |||
| 4692 | 4721 | ||
| 4693 | BUG_ON(cpu_online(smp_processor_id())); | 4722 | BUG_ON(cpu_online(smp_processor_id())); |
| 4694 | 4723 | ||
| 4695 | if (mm != &init_mm) | 4724 | if (mm != &init_mm) { |
| 4696 | switch_mm(mm, &init_mm, current); | 4725 | switch_mm(mm, &init_mm, current); |
| 4726 | finish_arch_post_lock_switch(); | ||
| 4727 | } | ||
| 4697 | mmdrop(mm); | 4728 | mmdrop(mm); |
| 4698 | } | 4729 | } |
| 4699 | 4730 | ||
| @@ -4711,6 +4742,22 @@ static void calc_load_migrate(struct rq *rq) | |||
| 4711 | atomic_long_add(delta, &calc_load_tasks); | 4742 | atomic_long_add(delta, &calc_load_tasks); |
| 4712 | } | 4743 | } |
| 4713 | 4744 | ||
| 4745 | static void put_prev_task_fake(struct rq *rq, struct task_struct *prev) | ||
| 4746 | { | ||
| 4747 | } | ||
| 4748 | |||
| 4749 | static const struct sched_class fake_sched_class = { | ||
| 4750 | .put_prev_task = put_prev_task_fake, | ||
| 4751 | }; | ||
| 4752 | |||
| 4753 | static struct task_struct fake_task = { | ||
| 4754 | /* | ||
| 4755 | * Avoid pull_{rt,dl}_task() | ||
| 4756 | */ | ||
| 4757 | .prio = MAX_PRIO + 1, | ||
| 4758 | .sched_class = &fake_sched_class, | ||
| 4759 | }; | ||
| 4760 | |||
| 4714 | /* | 4761 | /* |
| 4715 | * Migrate all tasks from the rq, sleeping tasks will be migrated by | 4762 | * Migrate all tasks from the rq, sleeping tasks will be migrated by |
| 4716 | * try_to_wake_up()->select_task_rq(). | 4763 | * try_to_wake_up()->select_task_rq(). |
| @@ -4751,7 +4798,7 @@ static void migrate_tasks(unsigned int dead_cpu) | |||
| 4751 | if (rq->nr_running == 1) | 4798 | if (rq->nr_running == 1) |
| 4752 | break; | 4799 | break; |
| 4753 | 4800 | ||
| 4754 | next = pick_next_task(rq); | 4801 | next = pick_next_task(rq, &fake_task); |
| 4755 | BUG_ON(!next); | 4802 | BUG_ON(!next); |
| 4756 | next->sched_class->put_prev_task(rq, next); | 4803 | next->sched_class->put_prev_task(rq, next); |
| 4757 | 4804 | ||
| @@ -4841,7 +4888,7 @@ set_table_entry(struct ctl_table *entry, | |||
| 4841 | static struct ctl_table * | 4888 | static struct ctl_table * |
| 4842 | sd_alloc_ctl_domain_table(struct sched_domain *sd) | 4889 | sd_alloc_ctl_domain_table(struct sched_domain *sd) |
| 4843 | { | 4890 | { |
| 4844 | struct ctl_table *table = sd_alloc_ctl_entry(13); | 4891 | struct ctl_table *table = sd_alloc_ctl_entry(14); |
| 4845 | 4892 | ||
| 4846 | if (table == NULL) | 4893 | if (table == NULL) |
| 4847 | return NULL; | 4894 | return NULL; |
| @@ -4869,9 +4916,12 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) | |||
| 4869 | sizeof(int), 0644, proc_dointvec_minmax, false); | 4916 | sizeof(int), 0644, proc_dointvec_minmax, false); |
| 4870 | set_table_entry(&table[10], "flags", &sd->flags, | 4917 | set_table_entry(&table[10], "flags", &sd->flags, |
| 4871 | sizeof(int), 0644, proc_dointvec_minmax, false); | 4918 | sizeof(int), 0644, proc_dointvec_minmax, false); |
| 4872 | set_table_entry(&table[11], "name", sd->name, | 4919 | set_table_entry(&table[11], "max_newidle_lb_cost", |
| 4920 | &sd->max_newidle_lb_cost, | ||
| 4921 | sizeof(long), 0644, proc_doulongvec_minmax, false); | ||
| 4922 | set_table_entry(&table[12], "name", sd->name, | ||
| 4873 | CORENAME_MAX_SIZE, 0444, proc_dostring, false); | 4923 | CORENAME_MAX_SIZE, 0444, proc_dostring, false); |
| 4874 | /* &table[12] is terminator */ | 4924 | /* &table[13] is terminator */ |
| 4875 | 4925 | ||
| 4876 | return table; | 4926 | return table; |
| 4877 | } | 4927 | } |
| @@ -6848,7 +6898,6 @@ void __init sched_init(void) | |||
| 6848 | 6898 | ||
| 6849 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; | 6899 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; |
| 6850 | #ifdef CONFIG_RT_GROUP_SCHED | 6900 | #ifdef CONFIG_RT_GROUP_SCHED |
| 6851 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); | ||
| 6852 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); | 6901 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); |
| 6853 | #endif | 6902 | #endif |
| 6854 | 6903 | ||
| @@ -6937,7 +6986,8 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
| 6937 | static unsigned long prev_jiffy; /* ratelimiting */ | 6986 | static unsigned long prev_jiffy; /* ratelimiting */ |
| 6938 | 6987 | ||
| 6939 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ | 6988 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ |
| 6940 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | 6989 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && |
| 6990 | !is_idle_task(current)) || | ||
| 6941 | system_state != SYSTEM_RUNNING || oops_in_progress) | 6991 | system_state != SYSTEM_RUNNING || oops_in_progress) |
| 6942 | return; | 6992 | return; |
| 6943 | if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) | 6993 | if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) |
| @@ -6955,6 +7005,13 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
| 6955 | debug_show_held_locks(current); | 7005 | debug_show_held_locks(current); |
| 6956 | if (irqs_disabled()) | 7006 | if (irqs_disabled()) |
| 6957 | print_irqtrace_events(current); | 7007 | print_irqtrace_events(current); |
| 7008 | #ifdef CONFIG_DEBUG_PREEMPT | ||
| 7009 | if (!preempt_count_equals(preempt_offset)) { | ||
| 7010 | pr_err("Preemption disabled at:"); | ||
| 7011 | print_ip_sym(current->preempt_disable_ip); | ||
| 7012 | pr_cont("\n"); | ||
| 7013 | } | ||
| 7014 | #endif | ||
| 6958 | dump_stack(); | 7015 | dump_stack(); |
| 6959 | } | 7016 | } |
| 6960 | EXPORT_SYMBOL(__might_sleep); | 7017 | EXPORT_SYMBOL(__might_sleep); |
| @@ -7008,7 +7065,7 @@ void normalize_rt_tasks(void) | |||
| 7008 | * Renice negative nice level userspace | 7065 | * Renice negative nice level userspace |
| 7009 | * tasks back to 0: | 7066 | * tasks back to 0: |
| 7010 | */ | 7067 | */ |
| 7011 | if (TASK_NICE(p) < 0 && p->mm) | 7068 | if (task_nice(p) < 0 && p->mm) |
| 7012 | set_user_nice(p, 0); | 7069 | set_user_nice(p, 0); |
| 7013 | continue; | 7070 | continue; |
| 7014 | } | 7071 | } |
| @@ -7422,6 +7479,7 @@ static int sched_dl_global_constraints(void) | |||
| 7422 | u64 period = global_rt_period(); | 7479 | u64 period = global_rt_period(); |
| 7423 | u64 new_bw = to_ratio(period, runtime); | 7480 | u64 new_bw = to_ratio(period, runtime); |
| 7424 | int cpu, ret = 0; | 7481 | int cpu, ret = 0; |
| 7482 | unsigned long flags; | ||
| 7425 | 7483 | ||
| 7426 | /* | 7484 | /* |
| 7427 | * Here we want to check the bandwidth not being set to some | 7485 | * Here we want to check the bandwidth not being set to some |
| @@ -7435,10 +7493,10 @@ static int sched_dl_global_constraints(void) | |||
| 7435 | for_each_possible_cpu(cpu) { | 7493 | for_each_possible_cpu(cpu) { |
| 7436 | struct dl_bw *dl_b = dl_bw_of(cpu); | 7494 | struct dl_bw *dl_b = dl_bw_of(cpu); |
| 7437 | 7495 | ||
| 7438 | raw_spin_lock(&dl_b->lock); | 7496 | raw_spin_lock_irqsave(&dl_b->lock, flags); |
| 7439 | if (new_bw < dl_b->total_bw) | 7497 | if (new_bw < dl_b->total_bw) |
| 7440 | ret = -EBUSY; | 7498 | ret = -EBUSY; |
| 7441 | raw_spin_unlock(&dl_b->lock); | 7499 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); |
| 7442 | 7500 | ||
| 7443 | if (ret) | 7501 | if (ret) |
| 7444 | break; | 7502 | break; |
| @@ -7451,6 +7509,7 @@ static void sched_dl_do_global(void) | |||
| 7451 | { | 7509 | { |
| 7452 | u64 new_bw = -1; | 7510 | u64 new_bw = -1; |
| 7453 | int cpu; | 7511 | int cpu; |
| 7512 | unsigned long flags; | ||
| 7454 | 7513 | ||
| 7455 | def_dl_bandwidth.dl_period = global_rt_period(); | 7514 | def_dl_bandwidth.dl_period = global_rt_period(); |
| 7456 | def_dl_bandwidth.dl_runtime = global_rt_runtime(); | 7515 | def_dl_bandwidth.dl_runtime = global_rt_runtime(); |
| @@ -7464,9 +7523,9 @@ static void sched_dl_do_global(void) | |||
| 7464 | for_each_possible_cpu(cpu) { | 7523 | for_each_possible_cpu(cpu) { |
| 7465 | struct dl_bw *dl_b = dl_bw_of(cpu); | 7524 | struct dl_bw *dl_b = dl_bw_of(cpu); |
| 7466 | 7525 | ||
| 7467 | raw_spin_lock(&dl_b->lock); | 7526 | raw_spin_lock_irqsave(&dl_b->lock, flags); |
| 7468 | dl_b->bw = new_bw; | 7527 | dl_b->bw = new_bw; |
| 7469 | raw_spin_unlock(&dl_b->lock); | 7528 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); |
| 7470 | } | 7529 | } |
| 7471 | } | 7530 | } |
| 7472 | 7531 | ||
| @@ -7475,7 +7534,8 @@ static int sched_rt_global_validate(void) | |||
| 7475 | if (sysctl_sched_rt_period <= 0) | 7534 | if (sysctl_sched_rt_period <= 0) |
| 7476 | return -EINVAL; | 7535 | return -EINVAL; |
| 7477 | 7536 | ||
| 7478 | if (sysctl_sched_rt_runtime > sysctl_sched_rt_period) | 7537 | if ((sysctl_sched_rt_runtime != RUNTIME_INF) && |
| 7538 | (sysctl_sched_rt_runtime > sysctl_sched_rt_period)) | ||
| 7479 | return -EINVAL; | 7539 | return -EINVAL; |
| 7480 | 7540 | ||
| 7481 | return 0; | 7541 | return 0; |
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 045fc74e3f09..5b9bb42b2d47 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c | |||
| @@ -70,7 +70,7 @@ static void cpudl_heapify(struct cpudl *cp, int idx) | |||
| 70 | 70 | ||
| 71 | static void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl) | 71 | static void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl) |
| 72 | { | 72 | { |
| 73 | WARN_ON(idx > num_present_cpus() || idx == IDX_INVALID); | 73 | WARN_ON(idx == IDX_INVALID || !cpu_present(idx)); |
| 74 | 74 | ||
| 75 | if (dl_time_before(new_dl, cp->elements[idx].dl)) { | 75 | if (dl_time_before(new_dl, cp->elements[idx].dl)) { |
| 76 | cp->elements[idx].dl = new_dl; | 76 | cp->elements[idx].dl = new_dl; |
| @@ -117,7 +117,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, | |||
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | out: | 119 | out: |
| 120 | WARN_ON(best_cpu > num_present_cpus() && best_cpu != -1); | 120 | WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); |
| 121 | 121 | ||
| 122 | return best_cpu; | 122 | return best_cpu; |
| 123 | } | 123 | } |
| @@ -137,7 +137,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) | |||
| 137 | int old_idx, new_cpu; | 137 | int old_idx, new_cpu; |
| 138 | unsigned long flags; | 138 | unsigned long flags; |
| 139 | 139 | ||
| 140 | WARN_ON(cpu > num_present_cpus()); | 140 | WARN_ON(!cpu_present(cpu)); |
| 141 | 141 | ||
| 142 | raw_spin_lock_irqsave(&cp->lock, flags); | 142 | raw_spin_lock_irqsave(&cp->lock, flags); |
| 143 | old_idx = cp->cpu_to_idx[cpu]; | 143 | old_idx = cp->cpu_to_idx[cpu]; |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 99947919e30b..58624a65f124 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
| @@ -142,7 +142,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime, | |||
| 142 | p->utimescaled += cputime_scaled; | 142 | p->utimescaled += cputime_scaled; |
| 143 | account_group_user_time(p, cputime); | 143 | account_group_user_time(p, cputime); |
| 144 | 144 | ||
| 145 | index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; | 145 | index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; |
| 146 | 146 | ||
| 147 | /* Add user time to cpustat. */ | 147 | /* Add user time to cpustat. */ |
| 148 | task_group_account_field(p, index, (__force u64) cputime); | 148 | task_group_account_field(p, index, (__force u64) cputime); |
| @@ -169,7 +169,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, | |||
| 169 | p->gtime += cputime; | 169 | p->gtime += cputime; |
| 170 | 170 | ||
| 171 | /* Add guest time to cpustat. */ | 171 | /* Add guest time to cpustat. */ |
| 172 | if (TASK_NICE(p) > 0) { | 172 | if (task_nice(p) > 0) { |
| 173 | cpustat[CPUTIME_NICE] += (__force u64) cputime; | 173 | cpustat[CPUTIME_NICE] += (__force u64) cputime; |
| 174 | cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; | 174 | cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; |
| 175 | } else { | 175 | } else { |
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 0dd5e0971a07..27ef40925525 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
| @@ -121,7 +121,7 @@ static inline void dl_clear_overload(struct rq *rq) | |||
| 121 | 121 | ||
| 122 | static void update_dl_migration(struct dl_rq *dl_rq) | 122 | static void update_dl_migration(struct dl_rq *dl_rq) |
| 123 | { | 123 | { |
| 124 | if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_total > 1) { | 124 | if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) { |
| 125 | if (!dl_rq->overloaded) { | 125 | if (!dl_rq->overloaded) { |
| 126 | dl_set_overload(rq_of_dl_rq(dl_rq)); | 126 | dl_set_overload(rq_of_dl_rq(dl_rq)); |
| 127 | dl_rq->overloaded = 1; | 127 | dl_rq->overloaded = 1; |
| @@ -135,9 +135,7 @@ static void update_dl_migration(struct dl_rq *dl_rq) | |||
| 135 | static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | 135 | static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) |
| 136 | { | 136 | { |
| 137 | struct task_struct *p = dl_task_of(dl_se); | 137 | struct task_struct *p = dl_task_of(dl_se); |
| 138 | dl_rq = &rq_of_dl_rq(dl_rq)->dl; | ||
| 139 | 138 | ||
| 140 | dl_rq->dl_nr_total++; | ||
| 141 | if (p->nr_cpus_allowed > 1) | 139 | if (p->nr_cpus_allowed > 1) |
| 142 | dl_rq->dl_nr_migratory++; | 140 | dl_rq->dl_nr_migratory++; |
| 143 | 141 | ||
| @@ -147,9 +145,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |||
| 147 | static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | 145 | static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) |
| 148 | { | 146 | { |
| 149 | struct task_struct *p = dl_task_of(dl_se); | 147 | struct task_struct *p = dl_task_of(dl_se); |
| 150 | dl_rq = &rq_of_dl_rq(dl_rq)->dl; | ||
| 151 | 148 | ||
| 152 | dl_rq->dl_nr_total--; | ||
| 153 | if (p->nr_cpus_allowed > 1) | 149 | if (p->nr_cpus_allowed > 1) |
| 154 | dl_rq->dl_nr_migratory--; | 150 | dl_rq->dl_nr_migratory--; |
| 155 | 151 | ||
| @@ -214,6 +210,16 @@ static inline int has_pushable_dl_tasks(struct rq *rq) | |||
| 214 | 210 | ||
| 215 | static int push_dl_task(struct rq *rq); | 211 | static int push_dl_task(struct rq *rq); |
| 216 | 212 | ||
| 213 | static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) | ||
| 214 | { | ||
| 215 | return dl_task(prev); | ||
| 216 | } | ||
| 217 | |||
| 218 | static inline void set_post_schedule(struct rq *rq) | ||
| 219 | { | ||
| 220 | rq->post_schedule = has_pushable_dl_tasks(rq); | ||
| 221 | } | ||
| 222 | |||
| 217 | #else | 223 | #else |
| 218 | 224 | ||
| 219 | static inline | 225 | static inline |
| @@ -236,6 +242,19 @@ void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |||
| 236 | { | 242 | { |
| 237 | } | 243 | } |
| 238 | 244 | ||
| 245 | static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) | ||
| 246 | { | ||
| 247 | return false; | ||
| 248 | } | ||
| 249 | |||
| 250 | static inline int pull_dl_task(struct rq *rq) | ||
| 251 | { | ||
| 252 | return 0; | ||
| 253 | } | ||
| 254 | |||
| 255 | static inline void set_post_schedule(struct rq *rq) | ||
| 256 | { | ||
| 257 | } | ||
| 239 | #endif /* CONFIG_SMP */ | 258 | #endif /* CONFIG_SMP */ |
| 240 | 259 | ||
| 241 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); | 260 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); |
| @@ -566,6 +585,8 @@ int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se) | |||
| 566 | return 1; | 585 | return 1; |
| 567 | } | 586 | } |
| 568 | 587 | ||
| 588 | extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); | ||
| 589 | |||
| 569 | /* | 590 | /* |
| 570 | * Update the current task's runtime statistics (provided it is still | 591 | * Update the current task's runtime statistics (provided it is still |
| 571 | * a -deadline task and has not been removed from the dl_rq). | 592 | * a -deadline task and has not been removed from the dl_rq). |
| @@ -588,8 +609,8 @@ static void update_curr_dl(struct rq *rq) | |||
| 588 | * approach need further study. | 609 | * approach need further study. |
| 589 | */ | 610 | */ |
| 590 | delta_exec = rq_clock_task(rq) - curr->se.exec_start; | 611 | delta_exec = rq_clock_task(rq) - curr->se.exec_start; |
| 591 | if (unlikely((s64)delta_exec < 0)) | 612 | if (unlikely((s64)delta_exec <= 0)) |
| 592 | delta_exec = 0; | 613 | return; |
| 593 | 614 | ||
| 594 | schedstat_set(curr->se.statistics.exec_max, | 615 | schedstat_set(curr->se.statistics.exec_max, |
| 595 | max(curr->se.statistics.exec_max, delta_exec)); | 616 | max(curr->se.statistics.exec_max, delta_exec)); |
| @@ -629,11 +650,13 @@ static void update_curr_dl(struct rq *rq) | |||
| 629 | struct rt_rq *rt_rq = &rq->rt; | 650 | struct rt_rq *rt_rq = &rq->rt; |
| 630 | 651 | ||
| 631 | raw_spin_lock(&rt_rq->rt_runtime_lock); | 652 | raw_spin_lock(&rt_rq->rt_runtime_lock); |
| 632 | rt_rq->rt_time += delta_exec; | ||
| 633 | /* | 653 | /* |
| 634 | * We'll let actual RT tasks worry about the overflow here, we | 654 | * We'll let actual RT tasks worry about the overflow here, we |
| 635 | * have our own CBS to keep us inline -- see above. | 655 | * have our own CBS to keep us inline; only account when RT |
| 656 | * bandwidth is relevant. | ||
| 636 | */ | 657 | */ |
| 658 | if (sched_rt_bandwidth_account(rt_rq)) | ||
| 659 | rt_rq->rt_time += delta_exec; | ||
| 637 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 660 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
| 638 | } | 661 | } |
| 639 | } | 662 | } |
| @@ -717,6 +740,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |||
| 717 | 740 | ||
| 718 | WARN_ON(!dl_prio(prio)); | 741 | WARN_ON(!dl_prio(prio)); |
| 719 | dl_rq->dl_nr_running++; | 742 | dl_rq->dl_nr_running++; |
| 743 | inc_nr_running(rq_of_dl_rq(dl_rq)); | ||
| 720 | 744 | ||
| 721 | inc_dl_deadline(dl_rq, deadline); | 745 | inc_dl_deadline(dl_rq, deadline); |
| 722 | inc_dl_migration(dl_se, dl_rq); | 746 | inc_dl_migration(dl_se, dl_rq); |
| @@ -730,6 +754,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |||
| 730 | WARN_ON(!dl_prio(prio)); | 754 | WARN_ON(!dl_prio(prio)); |
| 731 | WARN_ON(!dl_rq->dl_nr_running); | 755 | WARN_ON(!dl_rq->dl_nr_running); |
| 732 | dl_rq->dl_nr_running--; | 756 | dl_rq->dl_nr_running--; |
| 757 | dec_nr_running(rq_of_dl_rq(dl_rq)); | ||
| 733 | 758 | ||
| 734 | dec_dl_deadline(dl_rq, dl_se->deadline); | 759 | dec_dl_deadline(dl_rq, dl_se->deadline); |
| 735 | dec_dl_migration(dl_se, dl_rq); | 760 | dec_dl_migration(dl_se, dl_rq); |
| @@ -836,8 +861,6 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |||
| 836 | 861 | ||
| 837 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) | 862 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) |
| 838 | enqueue_pushable_dl_task(rq, p); | 863 | enqueue_pushable_dl_task(rq, p); |
| 839 | |||
| 840 | inc_nr_running(rq); | ||
| 841 | } | 864 | } |
| 842 | 865 | ||
| 843 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) | 866 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) |
| @@ -850,8 +873,6 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |||
| 850 | { | 873 | { |
| 851 | update_curr_dl(rq); | 874 | update_curr_dl(rq); |
| 852 | __dequeue_task_dl(rq, p, flags); | 875 | __dequeue_task_dl(rq, p, flags); |
| 853 | |||
| 854 | dec_nr_running(rq); | ||
| 855 | } | 876 | } |
| 856 | 877 | ||
| 857 | /* | 878 | /* |
| @@ -944,6 +965,8 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) | |||
| 944 | resched_task(rq->curr); | 965 | resched_task(rq->curr); |
| 945 | } | 966 | } |
| 946 | 967 | ||
| 968 | static int pull_dl_task(struct rq *this_rq); | ||
| 969 | |||
| 947 | #endif /* CONFIG_SMP */ | 970 | #endif /* CONFIG_SMP */ |
| 948 | 971 | ||
| 949 | /* | 972 | /* |
| @@ -990,7 +1013,7 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, | |||
| 990 | return rb_entry(left, struct sched_dl_entity, rb_node); | 1013 | return rb_entry(left, struct sched_dl_entity, rb_node); |
| 991 | } | 1014 | } |
| 992 | 1015 | ||
| 993 | struct task_struct *pick_next_task_dl(struct rq *rq) | 1016 | struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) |
| 994 | { | 1017 | { |
| 995 | struct sched_dl_entity *dl_se; | 1018 | struct sched_dl_entity *dl_se; |
| 996 | struct task_struct *p; | 1019 | struct task_struct *p; |
| @@ -998,9 +1021,20 @@ struct task_struct *pick_next_task_dl(struct rq *rq) | |||
| 998 | 1021 | ||
| 999 | dl_rq = &rq->dl; | 1022 | dl_rq = &rq->dl; |
| 1000 | 1023 | ||
| 1024 | if (need_pull_dl_task(rq, prev)) | ||
| 1025 | pull_dl_task(rq); | ||
| 1026 | /* | ||
| 1027 | * When prev is DL, we may throttle it in put_prev_task(). | ||
| 1028 | * So, we update time before we check for dl_nr_running. | ||
| 1029 | */ | ||
| 1030 | if (prev->sched_class == &dl_sched_class) | ||
| 1031 | update_curr_dl(rq); | ||
| 1032 | |||
| 1001 | if (unlikely(!dl_rq->dl_nr_running)) | 1033 | if (unlikely(!dl_rq->dl_nr_running)) |
| 1002 | return NULL; | 1034 | return NULL; |
| 1003 | 1035 | ||
| 1036 | put_prev_task(rq, prev); | ||
| 1037 | |||
| 1004 | dl_se = pick_next_dl_entity(rq, dl_rq); | 1038 | dl_se = pick_next_dl_entity(rq, dl_rq); |
| 1005 | BUG_ON(!dl_se); | 1039 | BUG_ON(!dl_se); |
| 1006 | 1040 | ||
| @@ -1015,9 +1049,7 @@ struct task_struct *pick_next_task_dl(struct rq *rq) | |||
| 1015 | start_hrtick_dl(rq, p); | 1049 | start_hrtick_dl(rq, p); |
| 1016 | #endif | 1050 | #endif |
| 1017 | 1051 | ||
| 1018 | #ifdef CONFIG_SMP | 1052 | set_post_schedule(rq); |
| 1019 | rq->post_schedule = has_pushable_dl_tasks(rq); | ||
| 1020 | #endif /* CONFIG_SMP */ | ||
| 1021 | 1053 | ||
| 1022 | return p; | 1054 | return p; |
| 1023 | } | 1055 | } |
| @@ -1426,13 +1458,6 @@ skip: | |||
| 1426 | return ret; | 1458 | return ret; |
| 1427 | } | 1459 | } |
| 1428 | 1460 | ||
| 1429 | static void pre_schedule_dl(struct rq *rq, struct task_struct *prev) | ||
| 1430 | { | ||
| 1431 | /* Try to pull other tasks here */ | ||
| 1432 | if (dl_task(prev)) | ||
| 1433 | pull_dl_task(rq); | ||
| 1434 | } | ||
| 1435 | |||
| 1436 | static void post_schedule_dl(struct rq *rq) | 1461 | static void post_schedule_dl(struct rq *rq) |
| 1437 | { | 1462 | { |
| 1438 | push_dl_tasks(rq); | 1463 | push_dl_tasks(rq); |
| @@ -1560,7 +1585,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) | |||
| 1560 | if (unlikely(p->dl.dl_throttled)) | 1585 | if (unlikely(p->dl.dl_throttled)) |
| 1561 | return; | 1586 | return; |
| 1562 | 1587 | ||
| 1563 | if (p->on_rq || rq->curr != p) { | 1588 | if (p->on_rq && rq->curr != p) { |
| 1564 | #ifdef CONFIG_SMP | 1589 | #ifdef CONFIG_SMP |
| 1565 | if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p)) | 1590 | if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p)) |
| 1566 | /* Only reschedule if pushing failed */ | 1591 | /* Only reschedule if pushing failed */ |
| @@ -1625,7 +1650,6 @@ const struct sched_class dl_sched_class = { | |||
| 1625 | .set_cpus_allowed = set_cpus_allowed_dl, | 1650 | .set_cpus_allowed = set_cpus_allowed_dl, |
| 1626 | .rq_online = rq_online_dl, | 1651 | .rq_online = rq_online_dl, |
| 1627 | .rq_offline = rq_offline_dl, | 1652 | .rq_offline = rq_offline_dl, |
| 1628 | .pre_schedule = pre_schedule_dl, | ||
| 1629 | .post_schedule = post_schedule_dl, | 1653 | .post_schedule = post_schedule_dl, |
| 1630 | .task_woken = task_woken_dl, | 1654 | .task_woken = task_woken_dl, |
| 1631 | #endif | 1655 | #endif |
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index dd52e7ffb10e..f3344c31632a 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
| @@ -321,6 +321,7 @@ do { \ | |||
| 321 | P(sched_goidle); | 321 | P(sched_goidle); |
| 322 | #ifdef CONFIG_SMP | 322 | #ifdef CONFIG_SMP |
| 323 | P64(avg_idle); | 323 | P64(avg_idle); |
| 324 | P64(max_idle_balance_cost); | ||
| 324 | #endif | 325 | #endif |
| 325 | 326 | ||
| 326 | P(ttwu_count); | 327 | P(ttwu_count); |
| @@ -533,15 +534,15 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m) | |||
| 533 | unsigned long nr_faults = -1; | 534 | unsigned long nr_faults = -1; |
| 534 | int cpu_current, home_node; | 535 | int cpu_current, home_node; |
| 535 | 536 | ||
| 536 | if (p->numa_faults) | 537 | if (p->numa_faults_memory) |
| 537 | nr_faults = p->numa_faults[2*node + i]; | 538 | nr_faults = p->numa_faults_memory[2*node + i]; |
| 538 | 539 | ||
| 539 | cpu_current = !i ? (task_node(p) == node) : | 540 | cpu_current = !i ? (task_node(p) == node) : |
| 540 | (pol && node_isset(node, pol->v.nodes)); | 541 | (pol && node_isset(node, pol->v.nodes)); |
| 541 | 542 | ||
| 542 | home_node = (p->numa_preferred_nid == node); | 543 | home_node = (p->numa_preferred_nid == node); |
| 543 | 544 | ||
| 544 | SEQ_printf(m, "numa_faults, %d, %d, %d, %d, %ld\n", | 545 | SEQ_printf(m, "numa_faults_memory, %d, %d, %d, %d, %ld\n", |
| 545 | i, node, cpu_current, home_node, nr_faults); | 546 | i, node, cpu_current, home_node, nr_faults); |
| 546 | } | 547 | } |
| 547 | } | 548 | } |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 966cc2bfcb77..7e9bd0b1fa9e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -322,13 +322,13 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) | |||
| 322 | list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) | 322 | list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) |
| 323 | 323 | ||
| 324 | /* Do the two (enqueued) entities belong to the same group ? */ | 324 | /* Do the two (enqueued) entities belong to the same group ? */ |
| 325 | static inline int | 325 | static inline struct cfs_rq * |
| 326 | is_same_group(struct sched_entity *se, struct sched_entity *pse) | 326 | is_same_group(struct sched_entity *se, struct sched_entity *pse) |
| 327 | { | 327 | { |
| 328 | if (se->cfs_rq == pse->cfs_rq) | 328 | if (se->cfs_rq == pse->cfs_rq) |
| 329 | return 1; | 329 | return se->cfs_rq; |
| 330 | 330 | ||
| 331 | return 0; | 331 | return NULL; |
| 332 | } | 332 | } |
| 333 | 333 | ||
| 334 | static inline struct sched_entity *parent_entity(struct sched_entity *se) | 334 | static inline struct sched_entity *parent_entity(struct sched_entity *se) |
| @@ -336,17 +336,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) | |||
| 336 | return se->parent; | 336 | return se->parent; |
| 337 | } | 337 | } |
| 338 | 338 | ||
| 339 | /* return depth at which a sched entity is present in the hierarchy */ | ||
| 340 | static inline int depth_se(struct sched_entity *se) | ||
| 341 | { | ||
| 342 | int depth = 0; | ||
| 343 | |||
| 344 | for_each_sched_entity(se) | ||
| 345 | depth++; | ||
| 346 | |||
| 347 | return depth; | ||
| 348 | } | ||
| 349 | |||
| 350 | static void | 339 | static void |
| 351 | find_matching_se(struct sched_entity **se, struct sched_entity **pse) | 340 | find_matching_se(struct sched_entity **se, struct sched_entity **pse) |
| 352 | { | 341 | { |
| @@ -360,8 +349,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) | |||
| 360 | */ | 349 | */ |
| 361 | 350 | ||
| 362 | /* First walk up until both entities are at same depth */ | 351 | /* First walk up until both entities are at same depth */ |
| 363 | se_depth = depth_se(*se); | 352 | se_depth = (*se)->depth; |
| 364 | pse_depth = depth_se(*pse); | 353 | pse_depth = (*pse)->depth; |
| 365 | 354 | ||
| 366 | while (se_depth > pse_depth) { | 355 | while (se_depth > pse_depth) { |
| 367 | se_depth--; | 356 | se_depth--; |
| @@ -426,12 +415,6 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) | |||
| 426 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ | 415 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ |
| 427 | for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) | 416 | for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) |
| 428 | 417 | ||
| 429 | static inline int | ||
| 430 | is_same_group(struct sched_entity *se, struct sched_entity *pse) | ||
| 431 | { | ||
| 432 | return 1; | ||
| 433 | } | ||
| 434 | |||
| 435 | static inline struct sched_entity *parent_entity(struct sched_entity *se) | 418 | static inline struct sched_entity *parent_entity(struct sched_entity *se) |
| 436 | { | 419 | { |
| 437 | return NULL; | 420 | return NULL; |
| @@ -819,14 +802,6 @@ unsigned int sysctl_numa_balancing_scan_size = 256; | |||
| 819 | /* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */ | 802 | /* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */ |
| 820 | unsigned int sysctl_numa_balancing_scan_delay = 1000; | 803 | unsigned int sysctl_numa_balancing_scan_delay = 1000; |
| 821 | 804 | ||
| 822 | /* | ||
| 823 | * After skipping a page migration on a shared page, skip N more numa page | ||
| 824 | * migrations unconditionally. This reduces the number of NUMA migrations | ||
| 825 | * in shared memory workloads, and has the effect of pulling tasks towards | ||
| 826 | * where their memory lives, over pulling the memory towards the task. | ||
| 827 | */ | ||
| 828 | unsigned int sysctl_numa_balancing_migrate_deferred = 16; | ||
| 829 | |||
| 830 | static unsigned int task_nr_scan_windows(struct task_struct *p) | 805 | static unsigned int task_nr_scan_windows(struct task_struct *p) |
| 831 | { | 806 | { |
| 832 | unsigned long rss = 0; | 807 | unsigned long rss = 0; |
| @@ -893,10 +868,26 @@ struct numa_group { | |||
| 893 | struct list_head task_list; | 868 | struct list_head task_list; |
| 894 | 869 | ||
| 895 | struct rcu_head rcu; | 870 | struct rcu_head rcu; |
| 871 | nodemask_t active_nodes; | ||
| 896 | unsigned long total_faults; | 872 | unsigned long total_faults; |
| 873 | /* | ||
| 874 | * Faults_cpu is used to decide whether memory should move | ||
| 875 | * towards the CPU. As a consequence, these stats are weighted | ||
| 876 | * more by CPU use than by memory faults. | ||
| 877 | */ | ||
| 878 | unsigned long *faults_cpu; | ||
| 897 | unsigned long faults[0]; | 879 | unsigned long faults[0]; |
| 898 | }; | 880 | }; |
| 899 | 881 | ||
| 882 | /* Shared or private faults. */ | ||
| 883 | #define NR_NUMA_HINT_FAULT_TYPES 2 | ||
| 884 | |||
| 885 | /* Memory and CPU locality */ | ||
| 886 | #define NR_NUMA_HINT_FAULT_STATS (NR_NUMA_HINT_FAULT_TYPES * 2) | ||
| 887 | |||
| 888 | /* Averaged statistics, and temporary buffers. */ | ||
| 889 | #define NR_NUMA_HINT_FAULT_BUCKETS (NR_NUMA_HINT_FAULT_STATS * 2) | ||
| 890 | |||
| 900 | pid_t task_numa_group_id(struct task_struct *p) | 891 | pid_t task_numa_group_id(struct task_struct *p) |
| 901 | { | 892 | { |
| 902 | return p->numa_group ? p->numa_group->gid : 0; | 893 | return p->numa_group ? p->numa_group->gid : 0; |
| @@ -904,16 +895,16 @@ pid_t task_numa_group_id(struct task_struct *p) | |||
| 904 | 895 | ||
| 905 | static inline int task_faults_idx(int nid, int priv) | 896 | static inline int task_faults_idx(int nid, int priv) |
| 906 | { | 897 | { |
| 907 | return 2 * nid + priv; | 898 | return NR_NUMA_HINT_FAULT_TYPES * nid + priv; |
| 908 | } | 899 | } |
| 909 | 900 | ||
| 910 | static inline unsigned long task_faults(struct task_struct *p, int nid) | 901 | static inline unsigned long task_faults(struct task_struct *p, int nid) |
| 911 | { | 902 | { |
| 912 | if (!p->numa_faults) | 903 | if (!p->numa_faults_memory) |
| 913 | return 0; | 904 | return 0; |
| 914 | 905 | ||
| 915 | return p->numa_faults[task_faults_idx(nid, 0)] + | 906 | return p->numa_faults_memory[task_faults_idx(nid, 0)] + |
| 916 | p->numa_faults[task_faults_idx(nid, 1)]; | 907 | p->numa_faults_memory[task_faults_idx(nid, 1)]; |
| 917 | } | 908 | } |
| 918 | 909 | ||
| 919 | static inline unsigned long group_faults(struct task_struct *p, int nid) | 910 | static inline unsigned long group_faults(struct task_struct *p, int nid) |
| @@ -925,6 +916,12 @@ static inline unsigned long group_faults(struct task_struct *p, int nid) | |||
| 925 | p->numa_group->faults[task_faults_idx(nid, 1)]; | 916 | p->numa_group->faults[task_faults_idx(nid, 1)]; |
| 926 | } | 917 | } |
| 927 | 918 | ||
| 919 | static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) | ||
| 920 | { | ||
| 921 | return group->faults_cpu[task_faults_idx(nid, 0)] + | ||
| 922 | group->faults_cpu[task_faults_idx(nid, 1)]; | ||
| 923 | } | ||
| 924 | |||
| 928 | /* | 925 | /* |
| 929 | * These return the fraction of accesses done by a particular task, or | 926 | * These return the fraction of accesses done by a particular task, or |
| 930 | * task group, on a particular numa node. The group weight is given a | 927 | * task group, on a particular numa node. The group weight is given a |
| @@ -935,7 +932,7 @@ static inline unsigned long task_weight(struct task_struct *p, int nid) | |||
| 935 | { | 932 | { |
| 936 | unsigned long total_faults; | 933 | unsigned long total_faults; |
| 937 | 934 | ||
| 938 | if (!p->numa_faults) | 935 | if (!p->numa_faults_memory) |
| 939 | return 0; | 936 | return 0; |
| 940 | 937 | ||
| 941 | total_faults = p->total_numa_faults; | 938 | total_faults = p->total_numa_faults; |
| @@ -954,6 +951,69 @@ static inline unsigned long group_weight(struct task_struct *p, int nid) | |||
| 954 | return 1000 * group_faults(p, nid) / p->numa_group->total_faults; | 951 | return 1000 * group_faults(p, nid) / p->numa_group->total_faults; |
| 955 | } | 952 | } |
| 956 | 953 | ||
| 954 | bool should_numa_migrate_memory(struct task_struct *p, struct page * page, | ||
| 955 | int src_nid, int dst_cpu) | ||
| 956 | { | ||
| 957 | struct numa_group *ng = p->numa_group; | ||
| 958 | int dst_nid = cpu_to_node(dst_cpu); | ||
| 959 | int last_cpupid, this_cpupid; | ||
| 960 | |||
| 961 | this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid); | ||
| 962 | |||
| 963 | /* | ||
| 964 | * Multi-stage node selection is used in conjunction with a periodic | ||
| 965 | * migration fault to build a temporal task<->page relation. By using | ||
| 966 | * a two-stage filter we remove short/unlikely relations. | ||
| 967 | * | ||
| 968 | * Using P(p) ~ n_p / n_t as per frequentist probability, we can equate | ||
| 969 | * a task's usage of a particular page (n_p) per total usage of this | ||
| 970 | * page (n_t) (in a given time-span) to a probability. | ||
| 971 | * | ||
| 972 | * Our periodic faults will sample this probability and getting the | ||
| 973 | * same result twice in a row, given these samples are fully | ||
| 974 | * independent, is then given by P(n)^2, provided our sample period | ||
| 975 | * is sufficiently short compared to the usage pattern. | ||
| 976 | * | ||
| 977 | * This quadric squishes small probabilities, making it less likely we | ||
| 978 | * act on an unlikely task<->page relation. | ||
| 979 | */ | ||
| 980 | last_cpupid = page_cpupid_xchg_last(page, this_cpupid); | ||
| 981 | if (!cpupid_pid_unset(last_cpupid) && | ||
| 982 | cpupid_to_nid(last_cpupid) != dst_nid) | ||
| 983 | return false; | ||
| 984 | |||
| 985 | /* Always allow migrate on private faults */ | ||
| 986 | if (cpupid_match_pid(p, last_cpupid)) | ||
| 987 | return true; | ||
| 988 | |||
| 989 | /* A shared fault, but p->numa_group has not been set up yet. */ | ||
| 990 | if (!ng) | ||
| 991 | return true; | ||
| 992 | |||
| 993 | /* | ||
| 994 | * Do not migrate if the destination is not a node that | ||
| 995 | * is actively used by this numa group. | ||
| 996 | */ | ||
| 997 | if (!node_isset(dst_nid, ng->active_nodes)) | ||
| 998 | return false; | ||
| 999 | |||
| 1000 | /* | ||
| 1001 | * Source is a node that is not actively used by this | ||
| 1002 | * numa group, while the destination is. Migrate. | ||
| 1003 | */ | ||
| 1004 | if (!node_isset(src_nid, ng->active_nodes)) | ||
| 1005 | return true; | ||
| 1006 | |||
| 1007 | /* | ||
| 1008 | * Both source and destination are nodes in active | ||
| 1009 | * use by this numa group. Maximize memory bandwidth | ||
| 1010 | * by migrating from more heavily used groups, to less | ||
| 1011 | * heavily used ones, spreading the load around. | ||
| 1012 | * Use a 1/4 hysteresis to avoid spurious page movement. | ||
| 1013 | */ | ||
| 1014 | return group_faults(p, dst_nid) < (group_faults(p, src_nid) * 3 / 4); | ||
| 1015 | } | ||
| 1016 | |||
| 957 | static unsigned long weighted_cpuload(const int cpu); | 1017 | static unsigned long weighted_cpuload(const int cpu); |
| 958 | static unsigned long source_load(int cpu, int type); | 1018 | static unsigned long source_load(int cpu, int type); |
| 959 | static unsigned long target_load(int cpu, int type); | 1019 | static unsigned long target_load(int cpu, int type); |
| @@ -1267,7 +1327,7 @@ static int task_numa_migrate(struct task_struct *p) | |||
| 1267 | static void numa_migrate_preferred(struct task_struct *p) | 1327 | static void numa_migrate_preferred(struct task_struct *p) |
| 1268 | { | 1328 | { |
| 1269 | /* This task has no NUMA fault statistics yet */ | 1329 | /* This task has no NUMA fault statistics yet */ |
| 1270 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) | 1330 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults_memory)) |
| 1271 | return; | 1331 | return; |
| 1272 | 1332 | ||
| 1273 | /* Periodically retry migrating the task to the preferred node */ | 1333 | /* Periodically retry migrating the task to the preferred node */ |
| @@ -1282,6 +1342,38 @@ static void numa_migrate_preferred(struct task_struct *p) | |||
| 1282 | } | 1342 | } |
| 1283 | 1343 | ||
| 1284 | /* | 1344 | /* |
| 1345 | * Find the nodes on which the workload is actively running. We do this by | ||
| 1346 | * tracking the nodes from which NUMA hinting faults are triggered. This can | ||
| 1347 | * be different from the set of nodes where the workload's memory is currently | ||
| 1348 | * located. | ||
| 1349 | * | ||
| 1350 | * The bitmask is used to make smarter decisions on when to do NUMA page | ||
| 1351 | * migrations, To prevent flip-flopping, and excessive page migrations, nodes | ||
| 1352 | * are added when they cause over 6/16 of the maximum number of faults, but | ||
| 1353 | * only removed when they drop below 3/16. | ||
| 1354 | */ | ||
| 1355 | static void update_numa_active_node_mask(struct numa_group *numa_group) | ||
| 1356 | { | ||
| 1357 | unsigned long faults, max_faults = 0; | ||
| 1358 | int nid; | ||
| 1359 | |||
| 1360 | for_each_online_node(nid) { | ||
| 1361 | faults = group_faults_cpu(numa_group, nid); | ||
| 1362 | if (faults > max_faults) | ||
| 1363 | max_faults = faults; | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | for_each_online_node(nid) { | ||
| 1367 | faults = group_faults_cpu(numa_group, nid); | ||
| 1368 | if (!node_isset(nid, numa_group->active_nodes)) { | ||
| 1369 | if (faults > max_faults * 6 / 16) | ||
| 1370 | node_set(nid, numa_group->active_nodes); | ||
| 1371 | } else if (faults < max_faults * 3 / 16) | ||
| 1372 | node_clear(nid, numa_group->active_nodes); | ||
| 1373 | } | ||
| 1374 | } | ||
| 1375 | |||
| 1376 | /* | ||
| 1285 | * When adapting the scan rate, the period is divided into NUMA_PERIOD_SLOTS | 1377 | * When adapting the scan rate, the period is divided into NUMA_PERIOD_SLOTS |
| 1286 | * increments. The more local the fault statistics are, the higher the scan | 1378 | * increments. The more local the fault statistics are, the higher the scan |
| 1287 | * period will be for the next scan window. If local/remote ratio is below | 1379 | * period will be for the next scan window. If local/remote ratio is below |
| @@ -1355,11 +1447,41 @@ static void update_task_scan_period(struct task_struct *p, | |||
| 1355 | memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); | 1447 | memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); |
| 1356 | } | 1448 | } |
| 1357 | 1449 | ||
| 1450 | /* | ||
| 1451 | * Get the fraction of time the task has been running since the last | ||
| 1452 | * NUMA placement cycle. The scheduler keeps similar statistics, but | ||
| 1453 | * decays those on a 32ms period, which is orders of magnitude off | ||
| 1454 | * from the dozens-of-seconds NUMA balancing period. Use the scheduler | ||
| 1455 | * stats only if the task is so new there are no NUMA statistics yet. | ||
| 1456 | */ | ||
| 1457 | static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) | ||
| 1458 | { | ||
| 1459 | u64 runtime, delta, now; | ||
| 1460 | /* Use the start of this time slice to avoid calculations. */ | ||
| 1461 | now = p->se.exec_start; | ||
| 1462 | runtime = p->se.sum_exec_runtime; | ||
| 1463 | |||
| 1464 | if (p->last_task_numa_placement) { | ||
| 1465 | delta = runtime - p->last_sum_exec_runtime; | ||
| 1466 | *period = now - p->last_task_numa_placement; | ||
| 1467 | } else { | ||
| 1468 | delta = p->se.avg.runnable_avg_sum; | ||
| 1469 | *period = p->se.avg.runnable_avg_period; | ||
| 1470 | } | ||
| 1471 | |||
| 1472 | p->last_sum_exec_runtime = runtime; | ||
| 1473 | p->last_task_numa_placement = now; | ||
| 1474 | |||
| 1475 | return delta; | ||
| 1476 | } | ||
| 1477 | |||
| 1358 | static void task_numa_placement(struct task_struct *p) | 1478 | static void task_numa_placement(struct task_struct *p) |
| 1359 | { | 1479 | { |
| 1360 | int seq, nid, max_nid = -1, max_group_nid = -1; | 1480 | int seq, nid, max_nid = -1, max_group_nid = -1; |
| 1361 | unsigned long max_faults = 0, max_group_faults = 0; | 1481 | unsigned long max_faults = 0, max_group_faults = 0; |
| 1362 | unsigned long fault_types[2] = { 0, 0 }; | 1482 | unsigned long fault_types[2] = { 0, 0 }; |
| 1483 | unsigned long total_faults; | ||
| 1484 | u64 runtime, period; | ||
| 1363 | spinlock_t *group_lock = NULL; | 1485 | spinlock_t *group_lock = NULL; |
| 1364 | 1486 | ||
| 1365 | seq = ACCESS_ONCE(p->mm->numa_scan_seq); | 1487 | seq = ACCESS_ONCE(p->mm->numa_scan_seq); |
| @@ -1368,6 +1490,10 @@ static void task_numa_placement(struct task_struct *p) | |||
| 1368 | p->numa_scan_seq = seq; | 1490 | p->numa_scan_seq = seq; |
| 1369 | p->numa_scan_period_max = task_scan_max(p); | 1491 | p->numa_scan_period_max = task_scan_max(p); |
| 1370 | 1492 | ||
| 1493 | total_faults = p->numa_faults_locality[0] + | ||
| 1494 | p->numa_faults_locality[1]; | ||
| 1495 | runtime = numa_get_avg_runtime(p, &period); | ||
| 1496 | |||
| 1371 | /* If the task is part of a group prevent parallel updates to group stats */ | 1497 | /* If the task is part of a group prevent parallel updates to group stats */ |
| 1372 | if (p->numa_group) { | 1498 | if (p->numa_group) { |
| 1373 | group_lock = &p->numa_group->lock; | 1499 | group_lock = &p->numa_group->lock; |
| @@ -1379,24 +1505,37 @@ static void task_numa_placement(struct task_struct *p) | |||
| 1379 | unsigned long faults = 0, group_faults = 0; | 1505 | unsigned long faults = 0, group_faults = 0; |
| 1380 | int priv, i; | 1506 | int priv, i; |
| 1381 | 1507 | ||
| 1382 | for (priv = 0; priv < 2; priv++) { | 1508 | for (priv = 0; priv < NR_NUMA_HINT_FAULT_TYPES; priv++) { |
| 1383 | long diff; | 1509 | long diff, f_diff, f_weight; |
| 1384 | 1510 | ||
| 1385 | i = task_faults_idx(nid, priv); | 1511 | i = task_faults_idx(nid, priv); |
| 1386 | diff = -p->numa_faults[i]; | ||
| 1387 | 1512 | ||
| 1388 | /* Decay existing window, copy faults since last scan */ | 1513 | /* Decay existing window, copy faults since last scan */ |
| 1389 | p->numa_faults[i] >>= 1; | 1514 | diff = p->numa_faults_buffer_memory[i] - p->numa_faults_memory[i] / 2; |
| 1390 | p->numa_faults[i] += p->numa_faults_buffer[i]; | 1515 | fault_types[priv] += p->numa_faults_buffer_memory[i]; |
| 1391 | fault_types[priv] += p->numa_faults_buffer[i]; | 1516 | p->numa_faults_buffer_memory[i] = 0; |
| 1392 | p->numa_faults_buffer[i] = 0; | ||
| 1393 | 1517 | ||
| 1394 | faults += p->numa_faults[i]; | 1518 | /* |
| 1395 | diff += p->numa_faults[i]; | 1519 | * Normalize the faults_from, so all tasks in a group |
| 1520 | * count according to CPU use, instead of by the raw | ||
| 1521 | * number of faults. Tasks with little runtime have | ||
| 1522 | * little over-all impact on throughput, and thus their | ||
| 1523 | * faults are less important. | ||
| 1524 | */ | ||
| 1525 | f_weight = div64_u64(runtime << 16, period + 1); | ||
| 1526 | f_weight = (f_weight * p->numa_faults_buffer_cpu[i]) / | ||
| 1527 | (total_faults + 1); | ||
| 1528 | f_diff = f_weight - p->numa_faults_cpu[i] / 2; | ||
| 1529 | p->numa_faults_buffer_cpu[i] = 0; | ||
| 1530 | |||
| 1531 | p->numa_faults_memory[i] += diff; | ||
| 1532 | p->numa_faults_cpu[i] += f_diff; | ||
| 1533 | faults += p->numa_faults_memory[i]; | ||
| 1396 | p->total_numa_faults += diff; | 1534 | p->total_numa_faults += diff; |
| 1397 | if (p->numa_group) { | 1535 | if (p->numa_group) { |
| 1398 | /* safe because we can only change our own group */ | 1536 | /* safe because we can only change our own group */ |
| 1399 | p->numa_group->faults[i] += diff; | 1537 | p->numa_group->faults[i] += diff; |
| 1538 | p->numa_group->faults_cpu[i] += f_diff; | ||
| 1400 | p->numa_group->total_faults += diff; | 1539 | p->numa_group->total_faults += diff; |
| 1401 | group_faults += p->numa_group->faults[i]; | 1540 | group_faults += p->numa_group->faults[i]; |
| 1402 | } | 1541 | } |
| @@ -1416,6 +1555,7 @@ static void task_numa_placement(struct task_struct *p) | |||
| 1416 | update_task_scan_period(p, fault_types[0], fault_types[1]); | 1555 | update_task_scan_period(p, fault_types[0], fault_types[1]); |
| 1417 | 1556 | ||
| 1418 | if (p->numa_group) { | 1557 | if (p->numa_group) { |
| 1558 | update_numa_active_node_mask(p->numa_group); | ||
| 1419 | /* | 1559 | /* |
| 1420 | * If the preferred task and group nids are different, | 1560 | * If the preferred task and group nids are different, |
| 1421 | * iterate over the nodes again to find the best place. | 1561 | * iterate over the nodes again to find the best place. |
| @@ -1465,7 +1605,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
| 1465 | 1605 | ||
| 1466 | if (unlikely(!p->numa_group)) { | 1606 | if (unlikely(!p->numa_group)) { |
| 1467 | unsigned int size = sizeof(struct numa_group) + | 1607 | unsigned int size = sizeof(struct numa_group) + |
| 1468 | 2*nr_node_ids*sizeof(unsigned long); | 1608 | 4*nr_node_ids*sizeof(unsigned long); |
| 1469 | 1609 | ||
| 1470 | grp = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); | 1610 | grp = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); |
| 1471 | if (!grp) | 1611 | if (!grp) |
| @@ -1475,9 +1615,14 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
| 1475 | spin_lock_init(&grp->lock); | 1615 | spin_lock_init(&grp->lock); |
| 1476 | INIT_LIST_HEAD(&grp->task_list); | 1616 | INIT_LIST_HEAD(&grp->task_list); |
| 1477 | grp->gid = p->pid; | 1617 | grp->gid = p->pid; |
| 1618 | /* Second half of the array tracks nids where faults happen */ | ||
| 1619 | grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES * | ||
| 1620 | nr_node_ids; | ||
| 1621 | |||
| 1622 | node_set(task_node(current), grp->active_nodes); | ||
| 1478 | 1623 | ||
| 1479 | for (i = 0; i < 2*nr_node_ids; i++) | 1624 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) |
| 1480 | grp->faults[i] = p->numa_faults[i]; | 1625 | grp->faults[i] = p->numa_faults_memory[i]; |
| 1481 | 1626 | ||
| 1482 | grp->total_faults = p->total_numa_faults; | 1627 | grp->total_faults = p->total_numa_faults; |
| 1483 | 1628 | ||
| @@ -1534,9 +1679,9 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
| 1534 | 1679 | ||
| 1535 | double_lock(&my_grp->lock, &grp->lock); | 1680 | double_lock(&my_grp->lock, &grp->lock); |
| 1536 | 1681 | ||
| 1537 | for (i = 0; i < 2*nr_node_ids; i++) { | 1682 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { |
| 1538 | my_grp->faults[i] -= p->numa_faults[i]; | 1683 | my_grp->faults[i] -= p->numa_faults_memory[i]; |
| 1539 | grp->faults[i] += p->numa_faults[i]; | 1684 | grp->faults[i] += p->numa_faults_memory[i]; |
| 1540 | } | 1685 | } |
| 1541 | my_grp->total_faults -= p->total_numa_faults; | 1686 | my_grp->total_faults -= p->total_numa_faults; |
| 1542 | grp->total_faults += p->total_numa_faults; | 1687 | grp->total_faults += p->total_numa_faults; |
| @@ -1562,12 +1707,12 @@ void task_numa_free(struct task_struct *p) | |||
| 1562 | { | 1707 | { |
| 1563 | struct numa_group *grp = p->numa_group; | 1708 | struct numa_group *grp = p->numa_group; |
| 1564 | int i; | 1709 | int i; |
| 1565 | void *numa_faults = p->numa_faults; | 1710 | void *numa_faults = p->numa_faults_memory; |
| 1566 | 1711 | ||
| 1567 | if (grp) { | 1712 | if (grp) { |
| 1568 | spin_lock(&grp->lock); | 1713 | spin_lock(&grp->lock); |
| 1569 | for (i = 0; i < 2*nr_node_ids; i++) | 1714 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) |
| 1570 | grp->faults[i] -= p->numa_faults[i]; | 1715 | grp->faults[i] -= p->numa_faults_memory[i]; |
| 1571 | grp->total_faults -= p->total_numa_faults; | 1716 | grp->total_faults -= p->total_numa_faults; |
| 1572 | 1717 | ||
| 1573 | list_del(&p->numa_entry); | 1718 | list_del(&p->numa_entry); |
| @@ -1577,18 +1722,21 @@ void task_numa_free(struct task_struct *p) | |||
| 1577 | put_numa_group(grp); | 1722 | put_numa_group(grp); |
| 1578 | } | 1723 | } |
| 1579 | 1724 | ||
| 1580 | p->numa_faults = NULL; | 1725 | p->numa_faults_memory = NULL; |
| 1581 | p->numa_faults_buffer = NULL; | 1726 | p->numa_faults_buffer_memory = NULL; |
| 1727 | p->numa_faults_cpu= NULL; | ||
| 1728 | p->numa_faults_buffer_cpu = NULL; | ||
| 1582 | kfree(numa_faults); | 1729 | kfree(numa_faults); |
| 1583 | } | 1730 | } |
| 1584 | 1731 | ||
| 1585 | /* | 1732 | /* |
| 1586 | * Got a PROT_NONE fault for a page on @node. | 1733 | * Got a PROT_NONE fault for a page on @node. |
| 1587 | */ | 1734 | */ |
| 1588 | void task_numa_fault(int last_cpupid, int node, int pages, int flags) | 1735 | void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) |
| 1589 | { | 1736 | { |
| 1590 | struct task_struct *p = current; | 1737 | struct task_struct *p = current; |
| 1591 | bool migrated = flags & TNF_MIGRATED; | 1738 | bool migrated = flags & TNF_MIGRATED; |
| 1739 | int cpu_node = task_node(current); | ||
| 1592 | int priv; | 1740 | int priv; |
| 1593 | 1741 | ||
| 1594 | if (!numabalancing_enabled) | 1742 | if (!numabalancing_enabled) |
| @@ -1603,16 +1751,24 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags) | |||
| 1603 | return; | 1751 | return; |
| 1604 | 1752 | ||
| 1605 | /* Allocate buffer to track faults on a per-node basis */ | 1753 | /* Allocate buffer to track faults on a per-node basis */ |
| 1606 | if (unlikely(!p->numa_faults)) { | 1754 | if (unlikely(!p->numa_faults_memory)) { |
| 1607 | int size = sizeof(*p->numa_faults) * 2 * nr_node_ids; | 1755 | int size = sizeof(*p->numa_faults_memory) * |
| 1756 | NR_NUMA_HINT_FAULT_BUCKETS * nr_node_ids; | ||
| 1608 | 1757 | ||
| 1609 | /* numa_faults and numa_faults_buffer share the allocation */ | 1758 | p->numa_faults_memory = kzalloc(size, GFP_KERNEL|__GFP_NOWARN); |
| 1610 | p->numa_faults = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN); | 1759 | if (!p->numa_faults_memory) |
| 1611 | if (!p->numa_faults) | ||
| 1612 | return; | 1760 | return; |
| 1613 | 1761 | ||
| 1614 | BUG_ON(p->numa_faults_buffer); | 1762 | BUG_ON(p->numa_faults_buffer_memory); |
| 1615 | p->numa_faults_buffer = p->numa_faults + (2 * nr_node_ids); | 1763 | /* |
| 1764 | * The averaged statistics, shared & private, memory & cpu, | ||
| 1765 | * occupy the first half of the array. The second half of the | ||
| 1766 | * array is for current counters, which are averaged into the | ||
| 1767 | * first set by task_numa_placement. | ||
| 1768 | */ | ||
| 1769 | p->numa_faults_cpu = p->numa_faults_memory + (2 * nr_node_ids); | ||
| 1770 | p->numa_faults_buffer_memory = p->numa_faults_memory + (4 * nr_node_ids); | ||
| 1771 | p->numa_faults_buffer_cpu = p->numa_faults_memory + (6 * nr_node_ids); | ||
| 1616 | p->total_numa_faults = 0; | 1772 | p->total_numa_faults = 0; |
| 1617 | memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); | 1773 | memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); |
| 1618 | } | 1774 | } |
| @@ -1641,7 +1797,8 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags) | |||
| 1641 | if (migrated) | 1797 | if (migrated) |
| 1642 | p->numa_pages_migrated += pages; | 1798 | p->numa_pages_migrated += pages; |
| 1643 | 1799 | ||
| 1644 | p->numa_faults_buffer[task_faults_idx(node, priv)] += pages; | 1800 | p->numa_faults_buffer_memory[task_faults_idx(mem_node, priv)] += pages; |
| 1801 | p->numa_faults_buffer_cpu[task_faults_idx(cpu_node, priv)] += pages; | ||
| 1645 | p->numa_faults_locality[!!(flags & TNF_FAULT_LOCAL)] += pages; | 1802 | p->numa_faults_locality[!!(flags & TNF_FAULT_LOCAL)] += pages; |
| 1646 | } | 1803 | } |
| 1647 | 1804 | ||
| @@ -1757,6 +1914,8 @@ void task_numa_work(struct callback_head *work) | |||
| 1757 | start = end; | 1914 | start = end; |
| 1758 | if (pages <= 0) | 1915 | if (pages <= 0) |
| 1759 | goto out; | 1916 | goto out; |
| 1917 | |||
| 1918 | cond_resched(); | ||
| 1760 | } while (end != vma->vm_end); | 1919 | } while (end != vma->vm_end); |
| 1761 | } | 1920 | } |
| 1762 | 1921 | ||
| @@ -2217,13 +2376,20 @@ static inline void __update_group_entity_contrib(struct sched_entity *se) | |||
| 2217 | se->avg.load_avg_contrib >>= NICE_0_SHIFT; | 2376 | se->avg.load_avg_contrib >>= NICE_0_SHIFT; |
| 2218 | } | 2377 | } |
| 2219 | } | 2378 | } |
| 2220 | #else | 2379 | |
| 2380 | static inline void update_rq_runnable_avg(struct rq *rq, int runnable) | ||
| 2381 | { | ||
| 2382 | __update_entity_runnable_avg(rq_clock_task(rq), &rq->avg, runnable); | ||
| 2383 | __update_tg_runnable_avg(&rq->avg, &rq->cfs); | ||
| 2384 | } | ||
| 2385 | #else /* CONFIG_FAIR_GROUP_SCHED */ | ||
| 2221 | static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq, | 2386 | static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq, |
| 2222 | int force_update) {} | 2387 | int force_update) {} |
| 2223 | static inline void __update_tg_runnable_avg(struct sched_avg *sa, | 2388 | static inline void __update_tg_runnable_avg(struct sched_avg *sa, |
| 2224 | struct cfs_rq *cfs_rq) {} | 2389 | struct cfs_rq *cfs_rq) {} |
| 2225 | static inline void __update_group_entity_contrib(struct sched_entity *se) {} | 2390 | static inline void __update_group_entity_contrib(struct sched_entity *se) {} |
| 2226 | #endif | 2391 | static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {} |
| 2392 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
| 2227 | 2393 | ||
| 2228 | static inline void __update_task_entity_contrib(struct sched_entity *se) | 2394 | static inline void __update_task_entity_contrib(struct sched_entity *se) |
| 2229 | { | 2395 | { |
| @@ -2321,12 +2487,6 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) | |||
| 2321 | __update_cfs_rq_tg_load_contrib(cfs_rq, force_update); | 2487 | __update_cfs_rq_tg_load_contrib(cfs_rq, force_update); |
| 2322 | } | 2488 | } |
| 2323 | 2489 | ||
| 2324 | static inline void update_rq_runnable_avg(struct rq *rq, int runnable) | ||
| 2325 | { | ||
| 2326 | __update_entity_runnable_avg(rq_clock_task(rq), &rq->avg, runnable); | ||
| 2327 | __update_tg_runnable_avg(&rq->avg, &rq->cfs); | ||
| 2328 | } | ||
| 2329 | |||
| 2330 | /* Add the load generated by se into cfs_rq's child load-average */ | 2490 | /* Add the load generated by se into cfs_rq's child load-average */ |
| 2331 | static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, | 2491 | static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, |
| 2332 | struct sched_entity *se, | 2492 | struct sched_entity *se, |
| @@ -2414,7 +2574,10 @@ void idle_exit_fair(struct rq *this_rq) | |||
| 2414 | update_rq_runnable_avg(this_rq, 0); | 2574 | update_rq_runnable_avg(this_rq, 0); |
| 2415 | } | 2575 | } |
| 2416 | 2576 | ||
| 2417 | #else | 2577 | static int idle_balance(struct rq *this_rq); |
| 2578 | |||
| 2579 | #else /* CONFIG_SMP */ | ||
| 2580 | |||
| 2418 | static inline void update_entity_load_avg(struct sched_entity *se, | 2581 | static inline void update_entity_load_avg(struct sched_entity *se, |
| 2419 | int update_cfs_rq) {} | 2582 | int update_cfs_rq) {} |
| 2420 | static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {} | 2583 | static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {} |
| @@ -2426,7 +2589,13 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, | |||
| 2426 | int sleep) {} | 2589 | int sleep) {} |
| 2427 | static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, | 2590 | static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, |
| 2428 | int force_update) {} | 2591 | int force_update) {} |
| 2429 | #endif | 2592 | |
| 2593 | static inline int idle_balance(struct rq *rq) | ||
| 2594 | { | ||
| 2595 | return 0; | ||
| 2596 | } | ||
| 2597 | |||
| 2598 | #endif /* CONFIG_SMP */ | ||
| 2430 | 2599 | ||
| 2431 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | 2600 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 2432 | { | 2601 | { |
| @@ -2576,10 +2745,10 @@ static void __clear_buddies_last(struct sched_entity *se) | |||
| 2576 | { | 2745 | { |
| 2577 | for_each_sched_entity(se) { | 2746 | for_each_sched_entity(se) { |
| 2578 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 2747 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
| 2579 | if (cfs_rq->last == se) | 2748 | if (cfs_rq->last != se) |
| 2580 | cfs_rq->last = NULL; | ||
| 2581 | else | ||
| 2582 | break; | 2749 | break; |
| 2750 | |||
| 2751 | cfs_rq->last = NULL; | ||
| 2583 | } | 2752 | } |
| 2584 | } | 2753 | } |
| 2585 | 2754 | ||
| @@ -2587,10 +2756,10 @@ static void __clear_buddies_next(struct sched_entity *se) | |||
| 2587 | { | 2756 | { |
| 2588 | for_each_sched_entity(se) { | 2757 | for_each_sched_entity(se) { |
| 2589 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 2758 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
| 2590 | if (cfs_rq->next == se) | 2759 | if (cfs_rq->next != se) |
| 2591 | cfs_rq->next = NULL; | ||
| 2592 | else | ||
| 2593 | break; | 2760 | break; |
| 2761 | |||
| 2762 | cfs_rq->next = NULL; | ||
| 2594 | } | 2763 | } |
| 2595 | } | 2764 | } |
| 2596 | 2765 | ||
| @@ -2598,10 +2767,10 @@ static void __clear_buddies_skip(struct sched_entity *se) | |||
| 2598 | { | 2767 | { |
| 2599 | for_each_sched_entity(se) { | 2768 | for_each_sched_entity(se) { |
| 2600 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 2769 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
| 2601 | if (cfs_rq->skip == se) | 2770 | if (cfs_rq->skip != se) |
| 2602 | cfs_rq->skip = NULL; | ||
| 2603 | else | ||
| 2604 | break; | 2771 | break; |
| 2772 | |||
| 2773 | cfs_rq->skip = NULL; | ||
| 2605 | } | 2774 | } |
| 2606 | } | 2775 | } |
| 2607 | 2776 | ||
| @@ -2744,17 +2913,36 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); | |||
| 2744 | * 3) pick the "last" process, for cache locality | 2913 | * 3) pick the "last" process, for cache locality |
| 2745 | * 4) do not run the "skip" process, if something else is available | 2914 | * 4) do not run the "skip" process, if something else is available |
| 2746 | */ | 2915 | */ |
| 2747 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | 2916 | static struct sched_entity * |
| 2917 | pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) | ||
| 2748 | { | 2918 | { |
| 2749 | struct sched_entity *se = __pick_first_entity(cfs_rq); | 2919 | struct sched_entity *left = __pick_first_entity(cfs_rq); |
| 2750 | struct sched_entity *left = se; | 2920 | struct sched_entity *se; |
| 2921 | |||
| 2922 | /* | ||
| 2923 | * If curr is set we have to see if its left of the leftmost entity | ||
| 2924 | * still in the tree, provided there was anything in the tree at all. | ||
| 2925 | */ | ||
| 2926 | if (!left || (curr && entity_before(curr, left))) | ||
| 2927 | left = curr; | ||
| 2928 | |||
| 2929 | se = left; /* ideally we run the leftmost entity */ | ||
| 2751 | 2930 | ||
| 2752 | /* | 2931 | /* |
| 2753 | * Avoid running the skip buddy, if running something else can | 2932 | * Avoid running the skip buddy, if running something else can |
| 2754 | * be done without getting too unfair. | 2933 | * be done without getting too unfair. |
| 2755 | */ | 2934 | */ |
| 2756 | if (cfs_rq->skip == se) { | 2935 | if (cfs_rq->skip == se) { |
| 2757 | struct sched_entity *second = __pick_next_entity(se); | 2936 | struct sched_entity *second; |
| 2937 | |||
| 2938 | if (se == curr) { | ||
| 2939 | second = __pick_first_entity(cfs_rq); | ||
| 2940 | } else { | ||
| 2941 | second = __pick_next_entity(se); | ||
| 2942 | if (!second || (curr && entity_before(curr, second))) | ||
| 2943 | second = curr; | ||
| 2944 | } | ||
| 2945 | |||
| 2758 | if (second && wakeup_preempt_entity(second, left) < 1) | 2946 | if (second && wakeup_preempt_entity(second, left) < 1) |
| 2759 | se = second; | 2947 | se = second; |
| 2760 | } | 2948 | } |
| @@ -2776,7 +2964,7 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | |||
| 2776 | return se; | 2964 | return se; |
| 2777 | } | 2965 | } |
| 2778 | 2966 | ||
| 2779 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq); | 2967 | static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); |
| 2780 | 2968 | ||
| 2781 | static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) | 2969 | static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) |
| 2782 | { | 2970 | { |
| @@ -3431,22 +3619,23 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) | |||
| 3431 | } | 3619 | } |
| 3432 | 3620 | ||
| 3433 | /* conditionally throttle active cfs_rq's from put_prev_entity() */ | 3621 | /* conditionally throttle active cfs_rq's from put_prev_entity() */ |
| 3434 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) | 3622 | static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) |
| 3435 | { | 3623 | { |
| 3436 | if (!cfs_bandwidth_used()) | 3624 | if (!cfs_bandwidth_used()) |
| 3437 | return; | 3625 | return false; |
| 3438 | 3626 | ||
| 3439 | if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0)) | 3627 | if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0)) |
| 3440 | return; | 3628 | return false; |
| 3441 | 3629 | ||
| 3442 | /* | 3630 | /* |
| 3443 | * it's possible for a throttled entity to be forced into a running | 3631 | * it's possible for a throttled entity to be forced into a running |
| 3444 | * state (e.g. set_curr_task), in this case we're finished. | 3632 | * state (e.g. set_curr_task), in this case we're finished. |
| 3445 | */ | 3633 | */ |
| 3446 | if (cfs_rq_throttled(cfs_rq)) | 3634 | if (cfs_rq_throttled(cfs_rq)) |
| 3447 | return; | 3635 | return true; |
| 3448 | 3636 | ||
| 3449 | throttle_cfs_rq(cfs_rq); | 3637 | throttle_cfs_rq(cfs_rq); |
| 3638 | return true; | ||
| 3450 | } | 3639 | } |
| 3451 | 3640 | ||
| 3452 | static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) | 3641 | static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) |
| @@ -3556,7 +3745,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq) | |||
| 3556 | } | 3745 | } |
| 3557 | 3746 | ||
| 3558 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {} | 3747 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {} |
| 3559 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 3748 | static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; } |
| 3560 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} | 3749 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} |
| 3561 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 3750 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
| 3562 | 3751 | ||
| @@ -4211,13 +4400,14 @@ done: | |||
| 4211 | } | 4400 | } |
| 4212 | 4401 | ||
| 4213 | /* | 4402 | /* |
| 4214 | * sched_balance_self: balance the current task (running on cpu) in domains | 4403 | * select_task_rq_fair: Select target runqueue for the waking task in domains |
| 4215 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and | 4404 | * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, |
| 4216 | * SD_BALANCE_EXEC. | 4405 | * SD_BALANCE_FORK, or SD_BALANCE_EXEC. |
| 4217 | * | 4406 | * |
| 4218 | * Balance, ie. select the least loaded group. | 4407 | * Balances load by selecting the idlest cpu in the idlest group, or under |
| 4408 | * certain conditions an idle sibling cpu if the domain has SD_WAKE_AFFINE set. | ||
| 4219 | * | 4409 | * |
| 4220 | * Returns the target CPU number, or the same CPU if no balancing is needed. | 4410 | * Returns the target cpu number. |
| 4221 | * | 4411 | * |
| 4222 | * preempt must be disabled. | 4412 | * preempt must be disabled. |
| 4223 | */ | 4413 | */ |
| @@ -4492,26 +4682,124 @@ preempt: | |||
| 4492 | set_last_buddy(se); | 4682 | set_last_buddy(se); |
| 4493 | } | 4683 | } |
| 4494 | 4684 | ||
| 4495 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 4685 | static struct task_struct * |
| 4686 | pick_next_task_fair(struct rq *rq, struct task_struct *prev) | ||
| 4496 | { | 4687 | { |
| 4497 | struct task_struct *p; | ||
| 4498 | struct cfs_rq *cfs_rq = &rq->cfs; | 4688 | struct cfs_rq *cfs_rq = &rq->cfs; |
| 4499 | struct sched_entity *se; | 4689 | struct sched_entity *se; |
| 4690 | struct task_struct *p; | ||
| 4691 | int new_tasks; | ||
| 4500 | 4692 | ||
| 4693 | again: | ||
| 4694 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 4501 | if (!cfs_rq->nr_running) | 4695 | if (!cfs_rq->nr_running) |
| 4502 | return NULL; | 4696 | goto idle; |
| 4697 | |||
| 4698 | if (prev->sched_class != &fair_sched_class) | ||
| 4699 | goto simple; | ||
| 4700 | |||
| 4701 | /* | ||
| 4702 | * Because of the set_next_buddy() in dequeue_task_fair() it is rather | ||
| 4703 | * likely that a next task is from the same cgroup as the current. | ||
| 4704 | * | ||
| 4705 | * Therefore attempt to avoid putting and setting the entire cgroup | ||
| 4706 | * hierarchy, only change the part that actually changes. | ||
| 4707 | */ | ||
| 4503 | 4708 | ||
| 4504 | do { | 4709 | do { |
| 4505 | se = pick_next_entity(cfs_rq); | 4710 | struct sched_entity *curr = cfs_rq->curr; |
| 4711 | |||
| 4712 | /* | ||
| 4713 | * Since we got here without doing put_prev_entity() we also | ||
| 4714 | * have to consider cfs_rq->curr. If it is still a runnable | ||
| 4715 | * entity, update_curr() will update its vruntime, otherwise | ||
| 4716 | * forget we've ever seen it. | ||
| 4717 | */ | ||
| 4718 | if (curr && curr->on_rq) | ||
| 4719 | update_curr(cfs_rq); | ||
| 4720 | else | ||
| 4721 | curr = NULL; | ||
| 4722 | |||
| 4723 | /* | ||
| 4724 | * This call to check_cfs_rq_runtime() will do the throttle and | ||
| 4725 | * dequeue its entity in the parent(s). Therefore the 'simple' | ||
| 4726 | * nr_running test will indeed be correct. | ||
| 4727 | */ | ||
| 4728 | if (unlikely(check_cfs_rq_runtime(cfs_rq))) | ||
| 4729 | goto simple; | ||
| 4730 | |||
| 4731 | se = pick_next_entity(cfs_rq, curr); | ||
| 4732 | cfs_rq = group_cfs_rq(se); | ||
| 4733 | } while (cfs_rq); | ||
| 4734 | |||
| 4735 | p = task_of(se); | ||
| 4736 | |||
| 4737 | /* | ||
| 4738 | * Since we haven't yet done put_prev_entity and if the selected task | ||
| 4739 | * is a different task than we started out with, try and touch the | ||
| 4740 | * least amount of cfs_rqs. | ||
| 4741 | */ | ||
| 4742 | if (prev != p) { | ||
| 4743 | struct sched_entity *pse = &prev->se; | ||
| 4744 | |||
| 4745 | while (!(cfs_rq = is_same_group(se, pse))) { | ||
| 4746 | int se_depth = se->depth; | ||
| 4747 | int pse_depth = pse->depth; | ||
| 4748 | |||
| 4749 | if (se_depth <= pse_depth) { | ||
| 4750 | put_prev_entity(cfs_rq_of(pse), pse); | ||
| 4751 | pse = parent_entity(pse); | ||
| 4752 | } | ||
| 4753 | if (se_depth >= pse_depth) { | ||
| 4754 | set_next_entity(cfs_rq_of(se), se); | ||
| 4755 | se = parent_entity(se); | ||
| 4756 | } | ||
| 4757 | } | ||
| 4758 | |||
| 4759 | put_prev_entity(cfs_rq, pse); | ||
| 4760 | set_next_entity(cfs_rq, se); | ||
| 4761 | } | ||
| 4762 | |||
| 4763 | if (hrtick_enabled(rq)) | ||
| 4764 | hrtick_start_fair(rq, p); | ||
| 4765 | |||
| 4766 | return p; | ||
| 4767 | simple: | ||
| 4768 | cfs_rq = &rq->cfs; | ||
| 4769 | #endif | ||
| 4770 | |||
| 4771 | if (!cfs_rq->nr_running) | ||
| 4772 | goto idle; | ||
| 4773 | |||
| 4774 | put_prev_task(rq, prev); | ||
| 4775 | |||
| 4776 | do { | ||
| 4777 | se = pick_next_entity(cfs_rq, NULL); | ||
| 4506 | set_next_entity(cfs_rq, se); | 4778 | set_next_entity(cfs_rq, se); |
| 4507 | cfs_rq = group_cfs_rq(se); | 4779 | cfs_rq = group_cfs_rq(se); |
| 4508 | } while (cfs_rq); | 4780 | } while (cfs_rq); |
| 4509 | 4781 | ||
| 4510 | p = task_of(se); | 4782 | p = task_of(se); |
| 4783 | |||
| 4511 | if (hrtick_enabled(rq)) | 4784 | if (hrtick_enabled(rq)) |
| 4512 | hrtick_start_fair(rq, p); | 4785 | hrtick_start_fair(rq, p); |
| 4513 | 4786 | ||
| 4514 | return p; | 4787 | return p; |
| 4788 | |||
| 4789 | idle: | ||
| 4790 | new_tasks = idle_balance(rq); | ||
| 4791 | /* | ||
| 4792 | * Because idle_balance() releases (and re-acquires) rq->lock, it is | ||
| 4793 | * possible for any higher priority task to appear. In that case we | ||
| 4794 | * must re-start the pick_next_entity() loop. | ||
| 4795 | */ | ||
| 4796 | if (new_tasks < 0) | ||
| 4797 | return RETRY_TASK; | ||
| 4798 | |||
| 4799 | if (new_tasks > 0) | ||
| 4800 | goto again; | ||
| 4801 | |||
| 4802 | return NULL; | ||
| 4515 | } | 4803 | } |
| 4516 | 4804 | ||
| 4517 | /* | 4805 | /* |
| @@ -4749,7 +5037,7 @@ static void move_task(struct task_struct *p, struct lb_env *env) | |||
| 4749 | * Is this task likely cache-hot: | 5037 | * Is this task likely cache-hot: |
| 4750 | */ | 5038 | */ |
| 4751 | static int | 5039 | static int |
| 4752 | task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | 5040 | task_hot(struct task_struct *p, u64 now) |
| 4753 | { | 5041 | { |
| 4754 | s64 delta; | 5042 | s64 delta; |
| 4755 | 5043 | ||
| @@ -4783,7 +5071,7 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) | |||
| 4783 | { | 5071 | { |
| 4784 | int src_nid, dst_nid; | 5072 | int src_nid, dst_nid; |
| 4785 | 5073 | ||
| 4786 | if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults || | 5074 | if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults_memory || |
| 4787 | !(env->sd->flags & SD_NUMA)) { | 5075 | !(env->sd->flags & SD_NUMA)) { |
| 4788 | return false; | 5076 | return false; |
| 4789 | } | 5077 | } |
| @@ -4814,7 +5102,7 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) | |||
| 4814 | if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) | 5102 | if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) |
| 4815 | return false; | 5103 | return false; |
| 4816 | 5104 | ||
| 4817 | if (!p->numa_faults || !(env->sd->flags & SD_NUMA)) | 5105 | if (!p->numa_faults_memory || !(env->sd->flags & SD_NUMA)) |
| 4818 | return false; | 5106 | return false; |
| 4819 | 5107 | ||
| 4820 | src_nid = cpu_to_node(env->src_cpu); | 5108 | src_nid = cpu_to_node(env->src_cpu); |
| @@ -4910,7 +5198,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) | |||
| 4910 | * 2) task is cache cold, or | 5198 | * 2) task is cache cold, or |
| 4911 | * 3) too many balance attempts have failed. | 5199 | * 3) too many balance attempts have failed. |
| 4912 | */ | 5200 | */ |
| 4913 | tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq), env->sd); | 5201 | tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq)); |
| 4914 | if (!tsk_cache_hot) | 5202 | if (!tsk_cache_hot) |
| 4915 | tsk_cache_hot = migrate_degrades_locality(p, env); | 5203 | tsk_cache_hot = migrate_degrades_locality(p, env); |
| 4916 | 5204 | ||
| @@ -5773,12 +6061,10 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) | |||
| 5773 | pwr_now /= SCHED_POWER_SCALE; | 6061 | pwr_now /= SCHED_POWER_SCALE; |
| 5774 | 6062 | ||
| 5775 | /* Amount of load we'd subtract */ | 6063 | /* Amount of load we'd subtract */ |
| 5776 | tmp = (busiest->load_per_task * SCHED_POWER_SCALE) / | 6064 | if (busiest->avg_load > scaled_busy_load_per_task) { |
| 5777 | busiest->group_power; | ||
| 5778 | if (busiest->avg_load > tmp) { | ||
| 5779 | pwr_move += busiest->group_power * | 6065 | pwr_move += busiest->group_power * |
| 5780 | min(busiest->load_per_task, | 6066 | min(busiest->load_per_task, |
| 5781 | busiest->avg_load - tmp); | 6067 | busiest->avg_load - scaled_busy_load_per_task); |
| 5782 | } | 6068 | } |
| 5783 | 6069 | ||
| 5784 | /* Amount of load we'd add */ | 6070 | /* Amount of load we'd add */ |
| @@ -6357,17 +6643,23 @@ out: | |||
| 6357 | * idle_balance is called by schedule() if this_cpu is about to become | 6643 | * idle_balance is called by schedule() if this_cpu is about to become |
| 6358 | * idle. Attempts to pull tasks from other CPUs. | 6644 | * idle. Attempts to pull tasks from other CPUs. |
| 6359 | */ | 6645 | */ |
| 6360 | void idle_balance(int this_cpu, struct rq *this_rq) | 6646 | static int idle_balance(struct rq *this_rq) |
| 6361 | { | 6647 | { |
| 6362 | struct sched_domain *sd; | 6648 | struct sched_domain *sd; |
| 6363 | int pulled_task = 0; | 6649 | int pulled_task = 0; |
| 6364 | unsigned long next_balance = jiffies + HZ; | 6650 | unsigned long next_balance = jiffies + HZ; |
| 6365 | u64 curr_cost = 0; | 6651 | u64 curr_cost = 0; |
| 6652 | int this_cpu = this_rq->cpu; | ||
| 6366 | 6653 | ||
| 6654 | idle_enter_fair(this_rq); | ||
| 6655 | /* | ||
| 6656 | * We must set idle_stamp _before_ calling idle_balance(), such that we | ||
| 6657 | * measure the duration of idle_balance() as idle time. | ||
| 6658 | */ | ||
| 6367 | this_rq->idle_stamp = rq_clock(this_rq); | 6659 | this_rq->idle_stamp = rq_clock(this_rq); |
| 6368 | 6660 | ||
| 6369 | if (this_rq->avg_idle < sysctl_sched_migration_cost) | 6661 | if (this_rq->avg_idle < sysctl_sched_migration_cost) |
| 6370 | return; | 6662 | goto out; |
| 6371 | 6663 | ||
| 6372 | /* | 6664 | /* |
| 6373 | * Drop the rq->lock, but keep IRQ/preempt disabled. | 6665 | * Drop the rq->lock, but keep IRQ/preempt disabled. |
| @@ -6405,15 +6697,22 @@ void idle_balance(int this_cpu, struct rq *this_rq) | |||
| 6405 | interval = msecs_to_jiffies(sd->balance_interval); | 6697 | interval = msecs_to_jiffies(sd->balance_interval); |
| 6406 | if (time_after(next_balance, sd->last_balance + interval)) | 6698 | if (time_after(next_balance, sd->last_balance + interval)) |
| 6407 | next_balance = sd->last_balance + interval; | 6699 | next_balance = sd->last_balance + interval; |
| 6408 | if (pulled_task) { | 6700 | if (pulled_task) |
| 6409 | this_rq->idle_stamp = 0; | ||
| 6410 | break; | 6701 | break; |
| 6411 | } | ||
| 6412 | } | 6702 | } |
| 6413 | rcu_read_unlock(); | 6703 | rcu_read_unlock(); |
| 6414 | 6704 | ||
| 6415 | raw_spin_lock(&this_rq->lock); | 6705 | raw_spin_lock(&this_rq->lock); |
| 6416 | 6706 | ||
| 6707 | /* | ||
| 6708 | * While browsing the domains, we released the rq lock. | ||
| 6709 | * A task could have be enqueued in the meantime | ||
| 6710 | */ | ||
| 6711 | if (this_rq->cfs.h_nr_running && !pulled_task) { | ||
| 6712 | pulled_task = 1; | ||
| 6713 | goto out; | ||
| 6714 | } | ||
| 6715 | |||
| 6417 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 6716 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { |
| 6418 | /* | 6717 | /* |
| 6419 | * We are going idle. next_balance may be set based on | 6718 | * We are going idle. next_balance may be set based on |
| @@ -6424,6 +6723,20 @@ void idle_balance(int this_cpu, struct rq *this_rq) | |||
| 6424 | 6723 | ||
| 6425 | if (curr_cost > this_rq->max_idle_balance_cost) | 6724 | if (curr_cost > this_rq->max_idle_balance_cost) |
| 6426 | this_rq->max_idle_balance_cost = curr_cost; | 6725 | this_rq->max_idle_balance_cost = curr_cost; |
| 6726 | |||
| 6727 | out: | ||
| 6728 | /* Is there a task of a high priority class? */ | ||
| 6729 | if (this_rq->nr_running != this_rq->cfs.h_nr_running && | ||
| 6730 | (this_rq->dl.dl_nr_running || | ||
| 6731 | (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt)))) | ||
| 6732 | pulled_task = -1; | ||
| 6733 | |||
| 6734 | if (pulled_task) { | ||
| 6735 | idle_exit_fair(this_rq); | ||
| 6736 | this_rq->idle_stamp = 0; | ||
| 6737 | } | ||
| 6738 | |||
| 6739 | return pulled_task; | ||
| 6427 | } | 6740 | } |
| 6428 | 6741 | ||
| 6429 | /* | 6742 | /* |
| @@ -6494,6 +6807,11 @@ out_unlock: | |||
| 6494 | return 0; | 6807 | return 0; |
| 6495 | } | 6808 | } |
| 6496 | 6809 | ||
| 6810 | static inline int on_null_domain(struct rq *rq) | ||
| 6811 | { | ||
| 6812 | return unlikely(!rcu_dereference_sched(rq->sd)); | ||
| 6813 | } | ||
| 6814 | |||
| 6497 | #ifdef CONFIG_NO_HZ_COMMON | 6815 | #ifdef CONFIG_NO_HZ_COMMON |
| 6498 | /* | 6816 | /* |
| 6499 | * idle load balancing details | 6817 | * idle load balancing details |
| @@ -6548,8 +6866,13 @@ static void nohz_balancer_kick(void) | |||
| 6548 | static inline void nohz_balance_exit_idle(int cpu) | 6866 | static inline void nohz_balance_exit_idle(int cpu) |
| 6549 | { | 6867 | { |
| 6550 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { | 6868 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { |
| 6551 | cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); | 6869 | /* |
| 6552 | atomic_dec(&nohz.nr_cpus); | 6870 | * Completely isolated CPUs don't ever set, so we must test. |
| 6871 | */ | ||
| 6872 | if (likely(cpumask_test_cpu(cpu, nohz.idle_cpus_mask))) { | ||
| 6873 | cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); | ||
| 6874 | atomic_dec(&nohz.nr_cpus); | ||
| 6875 | } | ||
| 6553 | clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); | 6876 | clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); |
| 6554 | } | 6877 | } |
| 6555 | } | 6878 | } |
| @@ -6603,6 +6926,12 @@ void nohz_balance_enter_idle(int cpu) | |||
| 6603 | if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) | 6926 | if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) |
| 6604 | return; | 6927 | return; |
| 6605 | 6928 | ||
| 6929 | /* | ||
| 6930 | * If we're a completely isolated CPU, we don't play. | ||
| 6931 | */ | ||
| 6932 | if (on_null_domain(cpu_rq(cpu))) | ||
| 6933 | return; | ||
| 6934 | |||
| 6606 | cpumask_set_cpu(cpu, nohz.idle_cpus_mask); | 6935 | cpumask_set_cpu(cpu, nohz.idle_cpus_mask); |
| 6607 | atomic_inc(&nohz.nr_cpus); | 6936 | atomic_inc(&nohz.nr_cpus); |
| 6608 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); | 6937 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); |
| @@ -6865,11 +7194,6 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
| 6865 | nohz_idle_balance(this_rq, idle); | 7194 | nohz_idle_balance(this_rq, idle); |
| 6866 | } | 7195 | } |
| 6867 | 7196 | ||
| 6868 | static inline int on_null_domain(struct rq *rq) | ||
| 6869 | { | ||
| 6870 | return !rcu_dereference_sched(rq->sd); | ||
| 6871 | } | ||
| 6872 | |||
| 6873 | /* | 7197 | /* |
| 6874 | * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing. | 7198 | * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing. |
| 6875 | */ | 7199 | */ |
| @@ -6999,15 +7323,15 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p) | |||
| 6999 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 7323 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
| 7000 | 7324 | ||
| 7001 | /* | 7325 | /* |
| 7002 | * Ensure the task's vruntime is normalized, so that when its | 7326 | * Ensure the task's vruntime is normalized, so that when it's |
| 7003 | * switched back to the fair class the enqueue_entity(.flags=0) will | 7327 | * switched back to the fair class the enqueue_entity(.flags=0) will |
| 7004 | * do the right thing. | 7328 | * do the right thing. |
| 7005 | * | 7329 | * |
| 7006 | * If it was on_rq, then the dequeue_entity(.flags=0) will already | 7330 | * If it's on_rq, then the dequeue_entity(.flags=0) will already |
| 7007 | * have normalized the vruntime, if it was !on_rq, then only when | 7331 | * have normalized the vruntime, if it's !on_rq, then only when |
| 7008 | * the task is sleeping will it still have non-normalized vruntime. | 7332 | * the task is sleeping will it still have non-normalized vruntime. |
| 7009 | */ | 7333 | */ |
| 7010 | if (!se->on_rq && p->state != TASK_RUNNING) { | 7334 | if (!p->on_rq && p->state != TASK_RUNNING) { |
| 7011 | /* | 7335 | /* |
| 7012 | * Fix up our vruntime so that the current sleep doesn't | 7336 | * Fix up our vruntime so that the current sleep doesn't |
| 7013 | * cause 'unlimited' sleep bonus. | 7337 | * cause 'unlimited' sleep bonus. |
| @@ -7034,7 +7358,15 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p) | |||
| 7034 | */ | 7358 | */ |
| 7035 | static void switched_to_fair(struct rq *rq, struct task_struct *p) | 7359 | static void switched_to_fair(struct rq *rq, struct task_struct *p) |
| 7036 | { | 7360 | { |
| 7037 | if (!p->se.on_rq) | 7361 | struct sched_entity *se = &p->se; |
| 7362 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 7363 | /* | ||
| 7364 | * Since the real-depth could have been changed (only FAIR | ||
| 7365 | * class maintain depth value), reset depth properly. | ||
| 7366 | */ | ||
| 7367 | se->depth = se->parent ? se->parent->depth + 1 : 0; | ||
| 7368 | #endif | ||
| 7369 | if (!se->on_rq) | ||
| 7038 | return; | 7370 | return; |
| 7039 | 7371 | ||
| 7040 | /* | 7372 | /* |
| @@ -7082,7 +7414,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) | |||
| 7082 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7414 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 7083 | static void task_move_group_fair(struct task_struct *p, int on_rq) | 7415 | static void task_move_group_fair(struct task_struct *p, int on_rq) |
| 7084 | { | 7416 | { |
| 7417 | struct sched_entity *se = &p->se; | ||
| 7085 | struct cfs_rq *cfs_rq; | 7418 | struct cfs_rq *cfs_rq; |
| 7419 | |||
| 7086 | /* | 7420 | /* |
| 7087 | * If the task was not on the rq at the time of this cgroup movement | 7421 | * If the task was not on the rq at the time of this cgroup movement |
| 7088 | * it must have been asleep, sleeping tasks keep their ->vruntime | 7422 | * it must have been asleep, sleeping tasks keep their ->vruntime |
| @@ -7108,23 +7442,24 @@ static void task_move_group_fair(struct task_struct *p, int on_rq) | |||
| 7108 | * To prevent boost or penalty in the new cfs_rq caused by delta | 7442 | * To prevent boost or penalty in the new cfs_rq caused by delta |
| 7109 | * min_vruntime between the two cfs_rqs, we skip vruntime adjustment. | 7443 | * min_vruntime between the two cfs_rqs, we skip vruntime adjustment. |
| 7110 | */ | 7444 | */ |
| 7111 | if (!on_rq && (!p->se.sum_exec_runtime || p->state == TASK_WAKING)) | 7445 | if (!on_rq && (!se->sum_exec_runtime || p->state == TASK_WAKING)) |
| 7112 | on_rq = 1; | 7446 | on_rq = 1; |
| 7113 | 7447 | ||
| 7114 | if (!on_rq) | 7448 | if (!on_rq) |
| 7115 | p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; | 7449 | se->vruntime -= cfs_rq_of(se)->min_vruntime; |
| 7116 | set_task_rq(p, task_cpu(p)); | 7450 | set_task_rq(p, task_cpu(p)); |
| 7451 | se->depth = se->parent ? se->parent->depth + 1 : 0; | ||
| 7117 | if (!on_rq) { | 7452 | if (!on_rq) { |
| 7118 | cfs_rq = cfs_rq_of(&p->se); | 7453 | cfs_rq = cfs_rq_of(se); |
| 7119 | p->se.vruntime += cfs_rq->min_vruntime; | 7454 | se->vruntime += cfs_rq->min_vruntime; |
| 7120 | #ifdef CONFIG_SMP | 7455 | #ifdef CONFIG_SMP |
| 7121 | /* | 7456 | /* |
| 7122 | * migrate_task_rq_fair() will have removed our previous | 7457 | * migrate_task_rq_fair() will have removed our previous |
| 7123 | * contribution, but we must synchronize for ongoing future | 7458 | * contribution, but we must synchronize for ongoing future |
| 7124 | * decay. | 7459 | * decay. |
| 7125 | */ | 7460 | */ |
| 7126 | p->se.avg.decay_count = atomic64_read(&cfs_rq->decay_counter); | 7461 | se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter); |
| 7127 | cfs_rq->blocked_load_avg += p->se.avg.load_avg_contrib; | 7462 | cfs_rq->blocked_load_avg += se->avg.load_avg_contrib; |
| 7128 | #endif | 7463 | #endif |
| 7129 | } | 7464 | } |
| 7130 | } | 7465 | } |
| @@ -7220,10 +7555,13 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | |||
| 7220 | if (!se) | 7555 | if (!se) |
| 7221 | return; | 7556 | return; |
| 7222 | 7557 | ||
| 7223 | if (!parent) | 7558 | if (!parent) { |
| 7224 | se->cfs_rq = &rq->cfs; | 7559 | se->cfs_rq = &rq->cfs; |
| 7225 | else | 7560 | se->depth = 0; |
| 7561 | } else { | ||
| 7226 | se->cfs_rq = parent->my_q; | 7562 | se->cfs_rq = parent->my_q; |
| 7563 | se->depth = parent->depth + 1; | ||
| 7564 | } | ||
| 7227 | 7565 | ||
| 7228 | se->my_q = cfs_rq; | 7566 | se->my_q = cfs_rq; |
| 7229 | /* guarantee group entities always have weight */ | 7567 | /* guarantee group entities always have weight */ |
diff --git a/kernel/cpu/idle.c b/kernel/sched/idle.c index 277f494c2a9a..b7976a127178 100644 --- a/kernel/cpu/idle.c +++ b/kernel/sched/idle.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | */ | 3 | */ |
| 4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
| 5 | #include <linux/cpu.h> | 5 | #include <linux/cpu.h> |
| 6 | #include <linux/cpuidle.h> | ||
| 6 | #include <linux/tick.h> | 7 | #include <linux/tick.h> |
| 7 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
| 8 | #include <linux/stackprotector.h> | 9 | #include <linux/stackprotector.h> |
| @@ -95,8 +96,10 @@ static void cpu_idle_loop(void) | |||
| 95 | if (!current_clr_polling_and_test()) { | 96 | if (!current_clr_polling_and_test()) { |
| 96 | stop_critical_timings(); | 97 | stop_critical_timings(); |
| 97 | rcu_idle_enter(); | 98 | rcu_idle_enter(); |
| 98 | arch_cpu_idle(); | 99 | if (cpuidle_idle_call()) |
| 99 | WARN_ON_ONCE(irqs_disabled()); | 100 | arch_cpu_idle(); |
| 101 | if (WARN_ON_ONCE(irqs_disabled())) | ||
| 102 | local_irq_enable(); | ||
| 100 | rcu_idle_exit(); | 103 | rcu_idle_exit(); |
| 101 | start_critical_timings(); | 104 | start_critical_timings(); |
| 102 | } else { | 105 | } else { |
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 516c3d9ceea1..879f2b75266a 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c | |||
| @@ -13,18 +13,8 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags) | |||
| 13 | { | 13 | { |
| 14 | return task_cpu(p); /* IDLE tasks as never migrated */ | 14 | return task_cpu(p); /* IDLE tasks as never migrated */ |
| 15 | } | 15 | } |
| 16 | |||
| 17 | static void pre_schedule_idle(struct rq *rq, struct task_struct *prev) | ||
| 18 | { | ||
| 19 | idle_exit_fair(rq); | ||
| 20 | rq_last_tick_reset(rq); | ||
| 21 | } | ||
| 22 | |||
| 23 | static void post_schedule_idle(struct rq *rq) | ||
| 24 | { | ||
| 25 | idle_enter_fair(rq); | ||
| 26 | } | ||
| 27 | #endif /* CONFIG_SMP */ | 16 | #endif /* CONFIG_SMP */ |
| 17 | |||
| 28 | /* | 18 | /* |
| 29 | * Idle tasks are unconditionally rescheduled: | 19 | * Idle tasks are unconditionally rescheduled: |
| 30 | */ | 20 | */ |
| @@ -33,13 +23,12 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl | |||
| 33 | resched_task(rq->idle); | 23 | resched_task(rq->idle); |
| 34 | } | 24 | } |
| 35 | 25 | ||
| 36 | static struct task_struct *pick_next_task_idle(struct rq *rq) | 26 | static struct task_struct * |
| 27 | pick_next_task_idle(struct rq *rq, struct task_struct *prev) | ||
| 37 | { | 28 | { |
| 29 | put_prev_task(rq, prev); | ||
| 30 | |||
| 38 | schedstat_inc(rq, sched_goidle); | 31 | schedstat_inc(rq, sched_goidle); |
| 39 | #ifdef CONFIG_SMP | ||
| 40 | /* Trigger the post schedule to do an idle_enter for CFS */ | ||
| 41 | rq->post_schedule = 1; | ||
| 42 | #endif | ||
| 43 | return rq->idle; | 32 | return rq->idle; |
| 44 | } | 33 | } |
| 45 | 34 | ||
| @@ -58,6 +47,8 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags) | |||
| 58 | 47 | ||
| 59 | static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) | 48 | static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) |
| 60 | { | 49 | { |
| 50 | idle_exit_fair(rq); | ||
| 51 | rq_last_tick_reset(rq); | ||
| 61 | } | 52 | } |
| 62 | 53 | ||
| 63 | static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued) | 54 | static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued) |
| @@ -101,8 +92,6 @@ const struct sched_class idle_sched_class = { | |||
| 101 | 92 | ||
| 102 | #ifdef CONFIG_SMP | 93 | #ifdef CONFIG_SMP |
| 103 | .select_task_rq = select_task_rq_idle, | 94 | .select_task_rq = select_task_rq_idle, |
| 104 | .pre_schedule = pre_schedule_idle, | ||
| 105 | .post_schedule = post_schedule_idle, | ||
| 106 | #endif | 95 | #endif |
| 107 | 96 | ||
| 108 | .set_curr_task = set_curr_task_idle, | 97 | .set_curr_task = set_curr_task_idle, |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a2740b775b45..d8cdf1618551 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
| @@ -229,6 +229,14 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 229 | 229 | ||
| 230 | #ifdef CONFIG_SMP | 230 | #ifdef CONFIG_SMP |
| 231 | 231 | ||
| 232 | static int pull_rt_task(struct rq *this_rq); | ||
| 233 | |||
| 234 | static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) | ||
| 235 | { | ||
| 236 | /* Try to pull RT tasks here if we lower this rq's prio */ | ||
| 237 | return rq->rt.highest_prio.curr > prev->prio; | ||
| 238 | } | ||
| 239 | |||
| 232 | static inline int rt_overloaded(struct rq *rq) | 240 | static inline int rt_overloaded(struct rq *rq) |
| 233 | { | 241 | { |
| 234 | return atomic_read(&rq->rd->rto_count); | 242 | return atomic_read(&rq->rd->rto_count); |
| @@ -315,6 +323,15 @@ static inline int has_pushable_tasks(struct rq *rq) | |||
| 315 | return !plist_head_empty(&rq->rt.pushable_tasks); | 323 | return !plist_head_empty(&rq->rt.pushable_tasks); |
| 316 | } | 324 | } |
| 317 | 325 | ||
| 326 | static inline void set_post_schedule(struct rq *rq) | ||
| 327 | { | ||
| 328 | /* | ||
| 329 | * We detect this state here so that we can avoid taking the RQ | ||
| 330 | * lock again later if there is no need to push | ||
| 331 | */ | ||
| 332 | rq->post_schedule = has_pushable_tasks(rq); | ||
| 333 | } | ||
| 334 | |||
| 318 | static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) | 335 | static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) |
| 319 | { | 336 | { |
| 320 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); | 337 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); |
| @@ -359,6 +376,19 @@ void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
| 359 | { | 376 | { |
| 360 | } | 377 | } |
| 361 | 378 | ||
| 379 | static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) | ||
| 380 | { | ||
| 381 | return false; | ||
| 382 | } | ||
| 383 | |||
| 384 | static inline int pull_rt_task(struct rq *this_rq) | ||
| 385 | { | ||
| 386 | return 0; | ||
| 387 | } | ||
| 388 | |||
| 389 | static inline void set_post_schedule(struct rq *rq) | ||
| 390 | { | ||
| 391 | } | ||
| 362 | #endif /* CONFIG_SMP */ | 392 | #endif /* CONFIG_SMP */ |
| 363 | 393 | ||
| 364 | static inline int on_rt_rq(struct sched_rt_entity *rt_se) | 394 | static inline int on_rt_rq(struct sched_rt_entity *rt_se) |
| @@ -440,11 +470,6 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) | |||
| 440 | dequeue_rt_entity(rt_se); | 470 | dequeue_rt_entity(rt_se); |
| 441 | } | 471 | } |
| 442 | 472 | ||
| 443 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
| 444 | { | ||
| 445 | return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; | ||
| 446 | } | ||
| 447 | |||
| 448 | static int rt_se_boosted(struct sched_rt_entity *rt_se) | 473 | static int rt_se_boosted(struct sched_rt_entity *rt_se) |
| 449 | { | 474 | { |
| 450 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | 475 | struct rt_rq *rt_rq = group_rt_rq(rt_se); |
| @@ -515,11 +540,6 @@ static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) | |||
| 515 | { | 540 | { |
| 516 | } | 541 | } |
| 517 | 542 | ||
| 518 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
| 519 | { | ||
| 520 | return rt_rq->rt_throttled; | ||
| 521 | } | ||
| 522 | |||
| 523 | static inline const struct cpumask *sched_rt_period_mask(void) | 543 | static inline const struct cpumask *sched_rt_period_mask(void) |
| 524 | { | 544 | { |
| 525 | return cpu_online_mask; | 545 | return cpu_online_mask; |
| @@ -538,6 +558,14 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
| 538 | 558 | ||
| 539 | #endif /* CONFIG_RT_GROUP_SCHED */ | 559 | #endif /* CONFIG_RT_GROUP_SCHED */ |
| 540 | 560 | ||
| 561 | bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) | ||
| 562 | { | ||
| 563 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
| 564 | |||
| 565 | return (hrtimer_active(&rt_b->rt_period_timer) || | ||
| 566 | rt_rq->rt_time < rt_b->rt_runtime); | ||
| 567 | } | ||
| 568 | |||
| 541 | #ifdef CONFIG_SMP | 569 | #ifdef CONFIG_SMP |
| 542 | /* | 570 | /* |
| 543 | * We ran out of runtime, see if we can borrow some from our neighbours. | 571 | * We ran out of runtime, see if we can borrow some from our neighbours. |
| @@ -1310,15 +1338,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) | |||
| 1310 | { | 1338 | { |
| 1311 | struct sched_rt_entity *rt_se; | 1339 | struct sched_rt_entity *rt_se; |
| 1312 | struct task_struct *p; | 1340 | struct task_struct *p; |
| 1313 | struct rt_rq *rt_rq; | 1341 | struct rt_rq *rt_rq = &rq->rt; |
| 1314 | |||
| 1315 | rt_rq = &rq->rt; | ||
| 1316 | |||
| 1317 | if (!rt_rq->rt_nr_running) | ||
| 1318 | return NULL; | ||
| 1319 | |||
| 1320 | if (rt_rq_throttled(rt_rq)) | ||
| 1321 | return NULL; | ||
| 1322 | 1342 | ||
| 1323 | do { | 1343 | do { |
| 1324 | rt_se = pick_next_rt_entity(rq, rt_rq); | 1344 | rt_se = pick_next_rt_entity(rq, rt_rq); |
| @@ -1332,21 +1352,45 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) | |||
| 1332 | return p; | 1352 | return p; |
| 1333 | } | 1353 | } |
| 1334 | 1354 | ||
| 1335 | static struct task_struct *pick_next_task_rt(struct rq *rq) | 1355 | static struct task_struct * |
| 1356 | pick_next_task_rt(struct rq *rq, struct task_struct *prev) | ||
| 1336 | { | 1357 | { |
| 1337 | struct task_struct *p = _pick_next_task_rt(rq); | 1358 | struct task_struct *p; |
| 1359 | struct rt_rq *rt_rq = &rq->rt; | ||
| 1360 | |||
| 1361 | if (need_pull_rt_task(rq, prev)) { | ||
| 1362 | pull_rt_task(rq); | ||
| 1363 | /* | ||
| 1364 | * pull_rt_task() can drop (and re-acquire) rq->lock; this | ||
| 1365 | * means a dl task can slip in, in which case we need to | ||
| 1366 | * re-start task selection. | ||
| 1367 | */ | ||
| 1368 | if (unlikely(rq->dl.dl_nr_running)) | ||
| 1369 | return RETRY_TASK; | ||
| 1370 | } | ||
| 1371 | |||
| 1372 | /* | ||
| 1373 | * We may dequeue prev's rt_rq in put_prev_task(). | ||
| 1374 | * So, we update time before rt_nr_running check. | ||
| 1375 | */ | ||
| 1376 | if (prev->sched_class == &rt_sched_class) | ||
| 1377 | update_curr_rt(rq); | ||
| 1378 | |||
| 1379 | if (!rt_rq->rt_nr_running) | ||
| 1380 | return NULL; | ||
| 1381 | |||
| 1382 | if (rt_rq_throttled(rt_rq)) | ||
| 1383 | return NULL; | ||
| 1384 | |||
| 1385 | put_prev_task(rq, prev); | ||
| 1386 | |||
| 1387 | p = _pick_next_task_rt(rq); | ||
| 1338 | 1388 | ||
| 1339 | /* The running task is never eligible for pushing */ | 1389 | /* The running task is never eligible for pushing */ |
| 1340 | if (p) | 1390 | if (p) |
| 1341 | dequeue_pushable_task(rq, p); | 1391 | dequeue_pushable_task(rq, p); |
| 1342 | 1392 | ||
| 1343 | #ifdef CONFIG_SMP | 1393 | set_post_schedule(rq); |
| 1344 | /* | ||
| 1345 | * We detect this state here so that we can avoid taking the RQ | ||
| 1346 | * lock again later if there is no need to push | ||
| 1347 | */ | ||
| 1348 | rq->post_schedule = has_pushable_tasks(rq); | ||
| 1349 | #endif | ||
| 1350 | 1394 | ||
| 1351 | return p; | 1395 | return p; |
| 1352 | } | 1396 | } |
| @@ -1716,13 +1760,6 @@ skip: | |||
| 1716 | return ret; | 1760 | return ret; |
| 1717 | } | 1761 | } |
| 1718 | 1762 | ||
| 1719 | static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) | ||
| 1720 | { | ||
| 1721 | /* Try to pull RT tasks here if we lower this rq's prio */ | ||
| 1722 | if (rq->rt.highest_prio.curr > prev->prio) | ||
| 1723 | pull_rt_task(rq); | ||
| 1724 | } | ||
| 1725 | |||
| 1726 | static void post_schedule_rt(struct rq *rq) | 1763 | static void post_schedule_rt(struct rq *rq) |
| 1727 | { | 1764 | { |
| 1728 | push_rt_tasks(rq); | 1765 | push_rt_tasks(rq); |
| @@ -1825,7 +1862,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) | |||
| 1825 | resched_task(rq->curr); | 1862 | resched_task(rq->curr); |
| 1826 | } | 1863 | } |
| 1827 | 1864 | ||
| 1828 | void init_sched_rt_class(void) | 1865 | void __init init_sched_rt_class(void) |
| 1829 | { | 1866 | { |
| 1830 | unsigned int i; | 1867 | unsigned int i; |
| 1831 | 1868 | ||
| @@ -1999,7 +2036,6 @@ const struct sched_class rt_sched_class = { | |||
| 1999 | .set_cpus_allowed = set_cpus_allowed_rt, | 2036 | .set_cpus_allowed = set_cpus_allowed_rt, |
| 2000 | .rq_online = rq_online_rt, | 2037 | .rq_online = rq_online_rt, |
| 2001 | .rq_offline = rq_offline_rt, | 2038 | .rq_offline = rq_offline_rt, |
| 2002 | .pre_schedule = pre_schedule_rt, | ||
| 2003 | .post_schedule = post_schedule_rt, | 2039 | .post_schedule = post_schedule_rt, |
| 2004 | .task_woken = task_woken_rt, | 2040 | .task_woken = task_woken_rt, |
| 2005 | .switched_from = switched_from_rt, | 2041 | .switched_from = switched_from_rt, |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c2119fd20f8b..f2de7a175620 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -24,24 +24,6 @@ extern long calc_load_fold_active(struct rq *this_rq); | |||
| 24 | extern void update_cpu_load_active(struct rq *this_rq); | 24 | extern void update_cpu_load_active(struct rq *this_rq); |
| 25 | 25 | ||
| 26 | /* | 26 | /* |
| 27 | * Convert user-nice values [ -20 ... 0 ... 19 ] | ||
| 28 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], | ||
| 29 | * and back. | ||
| 30 | */ | ||
| 31 | #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) | ||
| 32 | #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) | ||
| 33 | #define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) | ||
| 34 | |||
| 35 | /* | ||
| 36 | * 'User priority' is the nice value converted to something we | ||
| 37 | * can work with better when scaling various scheduler parameters, | ||
| 38 | * it's a [ 0 ... 39 ] range. | ||
| 39 | */ | ||
| 40 | #define USER_PRIO(p) ((p)-MAX_RT_PRIO) | ||
| 41 | #define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) | ||
| 42 | #define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Helpers for converting nanosecond timing to jiffy resolution | 27 | * Helpers for converting nanosecond timing to jiffy resolution |
| 46 | */ | 28 | */ |
| 47 | #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ)) | 29 | #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ)) |
| @@ -441,6 +423,18 @@ struct rt_rq { | |||
| 441 | #endif | 423 | #endif |
| 442 | }; | 424 | }; |
| 443 | 425 | ||
| 426 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 427 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
| 428 | { | ||
| 429 | return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; | ||
| 430 | } | ||
| 431 | #else | ||
| 432 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
| 433 | { | ||
| 434 | return rt_rq->rt_throttled; | ||
| 435 | } | ||
| 436 | #endif | ||
| 437 | |||
| 444 | /* Deadline class' related fields in a runqueue */ | 438 | /* Deadline class' related fields in a runqueue */ |
| 445 | struct dl_rq { | 439 | struct dl_rq { |
| 446 | /* runqueue is an rbtree, ordered by deadline */ | 440 | /* runqueue is an rbtree, ordered by deadline */ |
| @@ -462,7 +456,6 @@ struct dl_rq { | |||
| 462 | } earliest_dl; | 456 | } earliest_dl; |
| 463 | 457 | ||
| 464 | unsigned long dl_nr_migratory; | 458 | unsigned long dl_nr_migratory; |
| 465 | unsigned long dl_nr_total; | ||
| 466 | int overloaded; | 459 | int overloaded; |
| 467 | 460 | ||
| 468 | /* | 461 | /* |
| @@ -559,11 +552,9 @@ struct rq { | |||
| 559 | #ifdef CONFIG_FAIR_GROUP_SCHED | 552 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 560 | /* list of leaf cfs_rq on this cpu: */ | 553 | /* list of leaf cfs_rq on this cpu: */ |
| 561 | struct list_head leaf_cfs_rq_list; | 554 | struct list_head leaf_cfs_rq_list; |
| 562 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
| 563 | 555 | ||
| 564 | #ifdef CONFIG_RT_GROUP_SCHED | 556 | struct sched_avg avg; |
| 565 | struct list_head leaf_rt_rq_list; | 557 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
| 566 | #endif | ||
| 567 | 558 | ||
| 568 | /* | 559 | /* |
| 569 | * This is part of a global counter where only the total sum | 560 | * This is part of a global counter where only the total sum |
| @@ -652,8 +643,6 @@ struct rq { | |||
| 652 | #ifdef CONFIG_SMP | 643 | #ifdef CONFIG_SMP |
| 653 | struct llist_head wake_list; | 644 | struct llist_head wake_list; |
| 654 | #endif | 645 | #endif |
| 655 | |||
| 656 | struct sched_avg avg; | ||
| 657 | }; | 646 | }; |
| 658 | 647 | ||
| 659 | static inline int cpu_of(struct rq *rq) | 648 | static inline int cpu_of(struct rq *rq) |
| @@ -1113,6 +1102,8 @@ static const u32 prio_to_wmult[40] = { | |||
| 1113 | 1102 | ||
| 1114 | #define DEQUEUE_SLEEP 1 | 1103 | #define DEQUEUE_SLEEP 1 |
| 1115 | 1104 | ||
| 1105 | #define RETRY_TASK ((void *)-1UL) | ||
| 1106 | |||
| 1116 | struct sched_class { | 1107 | struct sched_class { |
| 1117 | const struct sched_class *next; | 1108 | const struct sched_class *next; |
| 1118 | 1109 | ||
| @@ -1123,14 +1114,22 @@ struct sched_class { | |||
| 1123 | 1114 | ||
| 1124 | void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); | 1115 | void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); |
| 1125 | 1116 | ||
| 1126 | struct task_struct * (*pick_next_task) (struct rq *rq); | 1117 | /* |
| 1118 | * It is the responsibility of the pick_next_task() method that will | ||
| 1119 | * return the next task to call put_prev_task() on the @prev task or | ||
| 1120 | * something equivalent. | ||
| 1121 | * | ||
| 1122 | * May return RETRY_TASK when it finds a higher prio class has runnable | ||
| 1123 | * tasks. | ||
| 1124 | */ | ||
| 1125 | struct task_struct * (*pick_next_task) (struct rq *rq, | ||
| 1126 | struct task_struct *prev); | ||
| 1127 | void (*put_prev_task) (struct rq *rq, struct task_struct *p); | 1127 | void (*put_prev_task) (struct rq *rq, struct task_struct *p); |
| 1128 | 1128 | ||
| 1129 | #ifdef CONFIG_SMP | 1129 | #ifdef CONFIG_SMP |
| 1130 | int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); | 1130 | int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); |
| 1131 | void (*migrate_task_rq)(struct task_struct *p, int next_cpu); | 1131 | void (*migrate_task_rq)(struct task_struct *p, int next_cpu); |
| 1132 | 1132 | ||
| 1133 | void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); | ||
| 1134 | void (*post_schedule) (struct rq *this_rq); | 1133 | void (*post_schedule) (struct rq *this_rq); |
| 1135 | void (*task_waking) (struct task_struct *task); | 1134 | void (*task_waking) (struct task_struct *task); |
| 1136 | void (*task_woken) (struct rq *this_rq, struct task_struct *task); | 1135 | void (*task_woken) (struct rq *this_rq, struct task_struct *task); |
| @@ -1160,6 +1159,11 @@ struct sched_class { | |||
| 1160 | #endif | 1159 | #endif |
| 1161 | }; | 1160 | }; |
| 1162 | 1161 | ||
| 1162 | static inline void put_prev_task(struct rq *rq, struct task_struct *prev) | ||
| 1163 | { | ||
| 1164 | prev->sched_class->put_prev_task(rq, prev); | ||
| 1165 | } | ||
| 1166 | |||
| 1163 | #define sched_class_highest (&stop_sched_class) | 1167 | #define sched_class_highest (&stop_sched_class) |
| 1164 | #define for_each_class(class) \ | 1168 | #define for_each_class(class) \ |
| 1165 | for (class = sched_class_highest; class; class = class->next) | 1169 | for (class = sched_class_highest; class; class = class->next) |
| @@ -1176,16 +1180,14 @@ extern const struct sched_class idle_sched_class; | |||
| 1176 | extern void update_group_power(struct sched_domain *sd, int cpu); | 1180 | extern void update_group_power(struct sched_domain *sd, int cpu); |
| 1177 | 1181 | ||
| 1178 | extern void trigger_load_balance(struct rq *rq); | 1182 | extern void trigger_load_balance(struct rq *rq); |
| 1179 | extern void idle_balance(int this_cpu, struct rq *this_rq); | ||
| 1180 | 1183 | ||
| 1181 | extern void idle_enter_fair(struct rq *this_rq); | 1184 | extern void idle_enter_fair(struct rq *this_rq); |
| 1182 | extern void idle_exit_fair(struct rq *this_rq); | 1185 | extern void idle_exit_fair(struct rq *this_rq); |
| 1183 | 1186 | ||
| 1184 | #else /* CONFIG_SMP */ | 1187 | #else |
| 1185 | 1188 | ||
| 1186 | static inline void idle_balance(int cpu, struct rq *rq) | 1189 | static inline void idle_enter_fair(struct rq *rq) { } |
| 1187 | { | 1190 | static inline void idle_exit_fair(struct rq *rq) { } |
| 1188 | } | ||
| 1189 | 1191 | ||
| 1190 | #endif | 1192 | #endif |
| 1191 | 1193 | ||
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index fdb6bb0b3356..d6ce65dde541 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c | |||
| @@ -23,16 +23,19 @@ check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) | |||
| 23 | /* we're never preempted */ | 23 | /* we're never preempted */ |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | static struct task_struct *pick_next_task_stop(struct rq *rq) | 26 | static struct task_struct * |
| 27 | pick_next_task_stop(struct rq *rq, struct task_struct *prev) | ||
| 27 | { | 28 | { |
| 28 | struct task_struct *stop = rq->stop; | 29 | struct task_struct *stop = rq->stop; |
| 29 | 30 | ||
| 30 | if (stop && stop->on_rq) { | 31 | if (!stop || !stop->on_rq) |
| 31 | stop->se.exec_start = rq_clock_task(rq); | 32 | return NULL; |
| 32 | return stop; | ||
| 33 | } | ||
| 34 | 33 | ||
| 35 | return NULL; | 34 | put_prev_task(rq, prev); |
| 35 | |||
| 36 | stop->se.exec_start = rq_clock_task(rq); | ||
| 37 | |||
| 38 | return stop; | ||
| 36 | } | 39 | } |
| 37 | 40 | ||
| 38 | static void | 41 | static void |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 84571e09c907..01fbae5b97b7 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -293,7 +293,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * | |||
| 293 | */ | 293 | */ |
| 294 | smp_call_function_single(min(cpu1, cpu2), | 294 | smp_call_function_single(min(cpu1, cpu2), |
| 295 | &irq_cpu_stop_queue_work, | 295 | &irq_cpu_stop_queue_work, |
| 296 | &call_args, 0); | 296 | &call_args, 1); |
| 297 | lg_local_unlock(&stop_cpus_lock); | 297 | lg_local_unlock(&stop_cpus_lock); |
| 298 | preempt_enable(); | 298 | preempt_enable(); |
| 299 | 299 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index c0a58be780a4..adaeab6f7a87 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -174,10 +174,10 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) | |||
| 174 | 174 | ||
| 175 | /* normalize: avoid signed division (rounding problems) */ | 175 | /* normalize: avoid signed division (rounding problems) */ |
| 176 | error = -ESRCH; | 176 | error = -ESRCH; |
| 177 | if (niceval < -20) | 177 | if (niceval < MIN_NICE) |
| 178 | niceval = -20; | 178 | niceval = MIN_NICE; |
| 179 | if (niceval > 19) | 179 | if (niceval > MAX_NICE) |
| 180 | niceval = 19; | 180 | niceval = MAX_NICE; |
| 181 | 181 | ||
| 182 | rcu_read_lock(); | 182 | rcu_read_lock(); |
| 183 | read_lock(&tasklist_lock); | 183 | read_lock(&tasklist_lock); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 49e13e1f8fe6..7754ff16f334 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -386,13 +386,6 @@ static struct ctl_table kern_table[] = { | |||
| 386 | .proc_handler = proc_dointvec, | 386 | .proc_handler = proc_dointvec, |
| 387 | }, | 387 | }, |
| 388 | { | 388 | { |
| 389 | .procname = "numa_balancing_migrate_deferred", | ||
| 390 | .data = &sysctl_numa_balancing_migrate_deferred, | ||
| 391 | .maxlen = sizeof(unsigned int), | ||
| 392 | .mode = 0644, | ||
| 393 | .proc_handler = proc_dointvec, | ||
| 394 | }, | ||
| 395 | { | ||
| 396 | .procname = "numa_balancing", | 389 | .procname = "numa_balancing", |
| 397 | .data = NULL, /* filled in by handler */ | 390 | .data = NULL, /* filled in by handler */ |
| 398 | .maxlen = sizeof(unsigned int), | 391 | .maxlen = sizeof(unsigned int), |
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 0abb36464281..4d23dc4d8139 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c | |||
| @@ -116,20 +116,42 @@ static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) | |||
| 116 | void __init sched_clock_register(u64 (*read)(void), int bits, | 116 | void __init sched_clock_register(u64 (*read)(void), int bits, |
| 117 | unsigned long rate) | 117 | unsigned long rate) |
| 118 | { | 118 | { |
| 119 | u64 res, wrap, new_mask, new_epoch, cyc, ns; | ||
| 120 | u32 new_mult, new_shift; | ||
| 121 | ktime_t new_wrap_kt; | ||
| 119 | unsigned long r; | 122 | unsigned long r; |
| 120 | u64 res, wrap; | ||
| 121 | char r_unit; | 123 | char r_unit; |
| 122 | 124 | ||
| 123 | if (cd.rate > rate) | 125 | if (cd.rate > rate) |
| 124 | return; | 126 | return; |
| 125 | 127 | ||
| 126 | WARN_ON(!irqs_disabled()); | 128 | WARN_ON(!irqs_disabled()); |
| 127 | read_sched_clock = read; | ||
| 128 | sched_clock_mask = CLOCKSOURCE_MASK(bits); | ||
| 129 | cd.rate = rate; | ||
| 130 | 129 | ||
| 131 | /* calculate the mult/shift to convert counter ticks to ns. */ | 130 | /* calculate the mult/shift to convert counter ticks to ns. */ |
| 132 | clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 3600); | 131 | clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); |
| 132 | |||
| 133 | new_mask = CLOCKSOURCE_MASK(bits); | ||
| 134 | |||
| 135 | /* calculate how many ns until we wrap */ | ||
| 136 | wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask); | ||
| 137 | new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); | ||
| 138 | |||
| 139 | /* update epoch for new counter and update epoch_ns from old counter*/ | ||
| 140 | new_epoch = read(); | ||
| 141 | cyc = read_sched_clock(); | ||
| 142 | ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, | ||
| 143 | cd.mult, cd.shift); | ||
| 144 | |||
| 145 | raw_write_seqcount_begin(&cd.seq); | ||
| 146 | read_sched_clock = read; | ||
| 147 | sched_clock_mask = new_mask; | ||
| 148 | cd.rate = rate; | ||
| 149 | cd.wrap_kt = new_wrap_kt; | ||
| 150 | cd.mult = new_mult; | ||
| 151 | cd.shift = new_shift; | ||
| 152 | cd.epoch_cyc = new_epoch; | ||
| 153 | cd.epoch_ns = ns; | ||
| 154 | raw_write_seqcount_end(&cd.seq); | ||
| 133 | 155 | ||
| 134 | r = rate; | 156 | r = rate; |
| 135 | if (r >= 4000000) { | 157 | if (r >= 4000000) { |
| @@ -141,22 +163,12 @@ void __init sched_clock_register(u64 (*read)(void), int bits, | |||
| 141 | } else | 163 | } else |
| 142 | r_unit = ' '; | 164 | r_unit = ' '; |
| 143 | 165 | ||
| 144 | /* calculate how many ns until we wrap */ | ||
| 145 | wrap = clocks_calc_max_nsecs(cd.mult, cd.shift, 0, sched_clock_mask); | ||
| 146 | cd.wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); | ||
| 147 | |||
| 148 | /* calculate the ns resolution of this counter */ | 166 | /* calculate the ns resolution of this counter */ |
| 149 | res = cyc_to_ns(1ULL, cd.mult, cd.shift); | 167 | res = cyc_to_ns(1ULL, new_mult, new_shift); |
| 168 | |||
| 150 | pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", | 169 | pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", |
| 151 | bits, r, r_unit, res, wrap); | 170 | bits, r, r_unit, res, wrap); |
| 152 | 171 | ||
| 153 | update_sched_clock(); | ||
| 154 | |||
| 155 | /* | ||
| 156 | * Ensure that sched_clock() starts off at 0ns | ||
| 157 | */ | ||
| 158 | cd.epoch_ns = 0; | ||
| 159 | |||
| 160 | /* Enable IRQ time accounting if we have a fast enough sched_clock */ | 172 | /* Enable IRQ time accounting if we have a fast enough sched_clock */ |
| 161 | if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) | 173 | if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) |
| 162 | enable_sched_clock_irqtime(); | 174 | enable_sched_clock_irqtime(); |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 0aa4ce81bc16..5b40279ecd71 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -1435,7 +1435,8 @@ void update_wall_time(void) | |||
| 1435 | out: | 1435 | out: |
| 1436 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 1436 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
| 1437 | if (clock_set) | 1437 | if (clock_set) |
| 1438 | clock_was_set(); | 1438 | /* Have to call _delayed version, since in irq context*/ |
| 1439 | clock_was_set_delayed(); | ||
| 1439 | } | 1440 | } |
| 1440 | 1441 | ||
| 1441 | /** | 1442 | /** |
diff --git a/kernel/timer.c b/kernel/timer.c index accfd241b9e5..d78de047599b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -52,7 +52,7 @@ | |||
| 52 | #define CREATE_TRACE_POINTS | 52 | #define CREATE_TRACE_POINTS |
| 53 | #include <trace/events/timer.h> | 53 | #include <trace/events/timer.h> |
| 54 | 54 | ||
| 55 | u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; | 55 | __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; |
| 56 | 56 | ||
| 57 | EXPORT_SYMBOL(jiffies_64); | 57 | EXPORT_SYMBOL(jiffies_64); |
| 58 | 58 | ||
diff --git a/kernel/torture.c b/kernel/torture.c new file mode 100644 index 000000000000..acc9afc2f26e --- /dev/null +++ b/kernel/torture.c | |||
| @@ -0,0 +1,719 @@ | |||
| 1 | /* | ||
| 2 | * Common functions for in-kernel torture tests. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, you can access it online at | ||
| 16 | * http://www.gnu.org/licenses/gpl-2.0.html. | ||
| 17 | * | ||
| 18 | * Copyright (C) IBM Corporation, 2014 | ||
| 19 | * | ||
| 20 | * Author: Paul E. McKenney <paulmck@us.ibm.com> | ||
| 21 | * Based on kernel/rcu/torture.c. | ||
| 22 | */ | ||
| 23 | #include <linux/types.h> | ||
| 24 | #include <linux/kernel.h> | ||
| 25 | #include <linux/init.h> | ||
| 26 | #include <linux/module.h> | ||
| 27 | #include <linux/kthread.h> | ||
| 28 | #include <linux/err.h> | ||
| 29 | #include <linux/spinlock.h> | ||
| 30 | #include <linux/smp.h> | ||
| 31 | #include <linux/interrupt.h> | ||
| 32 | #include <linux/sched.h> | ||
| 33 | #include <linux/atomic.h> | ||
| 34 | #include <linux/bitops.h> | ||
| 35 | #include <linux/completion.h> | ||
| 36 | #include <linux/moduleparam.h> | ||
| 37 | #include <linux/percpu.h> | ||
| 38 | #include <linux/notifier.h> | ||
| 39 | #include <linux/reboot.h> | ||
| 40 | #include <linux/freezer.h> | ||
| 41 | #include <linux/cpu.h> | ||
| 42 | #include <linux/delay.h> | ||
| 43 | #include <linux/stat.h> | ||
| 44 | #include <linux/slab.h> | ||
| 45 | #include <linux/trace_clock.h> | ||
| 46 | #include <asm/byteorder.h> | ||
| 47 | #include <linux/torture.h> | ||
| 48 | |||
| 49 | MODULE_LICENSE("GPL"); | ||
| 50 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>"); | ||
| 51 | |||
| 52 | static char *torture_type; | ||
| 53 | static bool verbose; | ||
| 54 | |||
| 55 | /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ | ||
| 56 | #define FULLSTOP_DONTSTOP 0 /* Normal operation. */ | ||
| 57 | #define FULLSTOP_SHUTDOWN 1 /* System shutdown with torture running. */ | ||
| 58 | #define FULLSTOP_RMMOD 2 /* Normal rmmod of torture. */ | ||
| 59 | static int fullstop = FULLSTOP_RMMOD; | ||
| 60 | static DEFINE_MUTEX(fullstop_mutex); | ||
| 61 | static int *torture_runnable; | ||
| 62 | |||
| 63 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 64 | |||
| 65 | /* | ||
| 66 | * Variables for online-offline handling. Only present if CPU hotplug | ||
| 67 | * is enabled, otherwise does nothing. | ||
| 68 | */ | ||
| 69 | |||
| 70 | static struct task_struct *onoff_task; | ||
| 71 | static long onoff_holdoff; | ||
| 72 | static long onoff_interval; | ||
| 73 | static long n_offline_attempts; | ||
| 74 | static long n_offline_successes; | ||
| 75 | static unsigned long sum_offline; | ||
| 76 | static int min_offline = -1; | ||
| 77 | static int max_offline; | ||
| 78 | static long n_online_attempts; | ||
| 79 | static long n_online_successes; | ||
| 80 | static unsigned long sum_online; | ||
| 81 | static int min_online = -1; | ||
| 82 | static int max_online; | ||
| 83 | |||
| 84 | /* | ||
| 85 | * Execute random CPU-hotplug operations at the interval specified | ||
| 86 | * by the onoff_interval. | ||
| 87 | */ | ||
| 88 | static int | ||
| 89 | torture_onoff(void *arg) | ||
| 90 | { | ||
| 91 | int cpu; | ||
| 92 | unsigned long delta; | ||
| 93 | int maxcpu = -1; | ||
| 94 | DEFINE_TORTURE_RANDOM(rand); | ||
| 95 | int ret; | ||
| 96 | unsigned long starttime; | ||
| 97 | |||
| 98 | VERBOSE_TOROUT_STRING("torture_onoff task started"); | ||
| 99 | for_each_online_cpu(cpu) | ||
| 100 | maxcpu = cpu; | ||
| 101 | WARN_ON(maxcpu < 0); | ||
| 102 | if (onoff_holdoff > 0) { | ||
| 103 | VERBOSE_TOROUT_STRING("torture_onoff begin holdoff"); | ||
| 104 | schedule_timeout_interruptible(onoff_holdoff); | ||
| 105 | VERBOSE_TOROUT_STRING("torture_onoff end holdoff"); | ||
| 106 | } | ||
| 107 | while (!torture_must_stop()) { | ||
| 108 | cpu = (torture_random(&rand) >> 4) % (maxcpu + 1); | ||
| 109 | if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { | ||
| 110 | if (verbose) | ||
| 111 | pr_alert("%s" TORTURE_FLAG | ||
| 112 | "torture_onoff task: offlining %d\n", | ||
| 113 | torture_type, cpu); | ||
| 114 | starttime = jiffies; | ||
| 115 | n_offline_attempts++; | ||
| 116 | ret = cpu_down(cpu); | ||
| 117 | if (ret) { | ||
| 118 | if (verbose) | ||
| 119 | pr_alert("%s" TORTURE_FLAG | ||
| 120 | "torture_onoff task: offline %d failed: errno %d\n", | ||
| 121 | torture_type, cpu, ret); | ||
| 122 | } else { | ||
| 123 | if (verbose) | ||
| 124 | pr_alert("%s" TORTURE_FLAG | ||
| 125 | "torture_onoff task: offlined %d\n", | ||
| 126 | torture_type, cpu); | ||
| 127 | n_offline_successes++; | ||
| 128 | delta = jiffies - starttime; | ||
| 129 | sum_offline += delta; | ||
| 130 | if (min_offline < 0) { | ||
| 131 | min_offline = delta; | ||
| 132 | max_offline = delta; | ||
| 133 | } | ||
| 134 | if (min_offline > delta) | ||
| 135 | min_offline = delta; | ||
| 136 | if (max_offline < delta) | ||
| 137 | max_offline = delta; | ||
| 138 | } | ||
| 139 | } else if (cpu_is_hotpluggable(cpu)) { | ||
| 140 | if (verbose) | ||
| 141 | pr_alert("%s" TORTURE_FLAG | ||
| 142 | "torture_onoff task: onlining %d\n", | ||
| 143 | torture_type, cpu); | ||
| 144 | starttime = jiffies; | ||
| 145 | n_online_attempts++; | ||
| 146 | ret = cpu_up(cpu); | ||
| 147 | if (ret) { | ||
| 148 | if (verbose) | ||
| 149 | pr_alert("%s" TORTURE_FLAG | ||
| 150 | "torture_onoff task: online %d failed: errno %d\n", | ||
| 151 | torture_type, cpu, ret); | ||
| 152 | } else { | ||
| 153 | if (verbose) | ||
| 154 | pr_alert("%s" TORTURE_FLAG | ||
| 155 | "torture_onoff task: onlined %d\n", | ||
| 156 | torture_type, cpu); | ||
| 157 | n_online_successes++; | ||
| 158 | delta = jiffies - starttime; | ||
| 159 | sum_online += delta; | ||
| 160 | if (min_online < 0) { | ||
| 161 | min_online = delta; | ||
| 162 | max_online = delta; | ||
| 163 | } | ||
| 164 | if (min_online > delta) | ||
| 165 | min_online = delta; | ||
| 166 | if (max_online < delta) | ||
| 167 | max_online = delta; | ||
| 168 | } | ||
| 169 | } | ||
| 170 | schedule_timeout_interruptible(onoff_interval); | ||
| 171 | } | ||
| 172 | torture_kthread_stopping("torture_onoff"); | ||
| 173 | return 0; | ||
| 174 | } | ||
| 175 | |||
| 176 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 177 | |||
| 178 | /* | ||
| 179 | * Initiate online-offline handling. | ||
| 180 | */ | ||
| 181 | int torture_onoff_init(long ooholdoff, long oointerval) | ||
| 182 | { | ||
| 183 | int ret = 0; | ||
| 184 | |||
| 185 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 186 | onoff_holdoff = ooholdoff; | ||
| 187 | onoff_interval = oointerval; | ||
| 188 | if (onoff_interval <= 0) | ||
| 189 | return 0; | ||
| 190 | ret = torture_create_kthread(torture_onoff, NULL, onoff_task); | ||
| 191 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 192 | return ret; | ||
| 193 | } | ||
| 194 | EXPORT_SYMBOL_GPL(torture_onoff_init); | ||
| 195 | |||
| 196 | /* | ||
| 197 | * Clean up after online/offline testing. | ||
| 198 | */ | ||
| 199 | static void torture_onoff_cleanup(void) | ||
| 200 | { | ||
| 201 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 202 | if (onoff_task == NULL) | ||
| 203 | return; | ||
| 204 | VERBOSE_TOROUT_STRING("Stopping torture_onoff task"); | ||
| 205 | kthread_stop(onoff_task); | ||
| 206 | onoff_task = NULL; | ||
| 207 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 208 | } | ||
| 209 | EXPORT_SYMBOL_GPL(torture_onoff_cleanup); | ||
| 210 | |||
| 211 | /* | ||
| 212 | * Print online/offline testing statistics. | ||
| 213 | */ | ||
| 214 | char *torture_onoff_stats(char *page) | ||
| 215 | { | ||
| 216 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 217 | page += sprintf(page, | ||
| 218 | "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", | ||
| 219 | n_online_successes, n_online_attempts, | ||
| 220 | n_offline_successes, n_offline_attempts, | ||
| 221 | min_online, max_online, | ||
| 222 | min_offline, max_offline, | ||
| 223 | sum_online, sum_offline, HZ); | ||
| 224 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 225 | return page; | ||
| 226 | } | ||
| 227 | EXPORT_SYMBOL_GPL(torture_onoff_stats); | ||
| 228 | |||
| 229 | /* | ||
| 230 | * Were all the online/offline operations successful? | ||
| 231 | */ | ||
| 232 | bool torture_onoff_failures(void) | ||
| 233 | { | ||
| 234 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 235 | return n_online_successes != n_online_attempts || | ||
| 236 | n_offline_successes != n_offline_attempts; | ||
| 237 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 238 | return false; | ||
| 239 | #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 240 | } | ||
| 241 | EXPORT_SYMBOL_GPL(torture_onoff_failures); | ||
| 242 | |||
| 243 | #define TORTURE_RANDOM_MULT 39916801 /* prime */ | ||
| 244 | #define TORTURE_RANDOM_ADD 479001701 /* prime */ | ||
| 245 | #define TORTURE_RANDOM_REFRESH 10000 | ||
| 246 | |||
| 247 | /* | ||
| 248 | * Crude but fast random-number generator. Uses a linear congruential | ||
| 249 | * generator, with occasional help from cpu_clock(). | ||
| 250 | */ | ||
| 251 | unsigned long | ||
| 252 | torture_random(struct torture_random_state *trsp) | ||
| 253 | { | ||
| 254 | if (--trsp->trs_count < 0) { | ||
| 255 | trsp->trs_state += (unsigned long)local_clock(); | ||
| 256 | trsp->trs_count = TORTURE_RANDOM_REFRESH; | ||
| 257 | } | ||
| 258 | trsp->trs_state = trsp->trs_state * TORTURE_RANDOM_MULT + | ||
| 259 | TORTURE_RANDOM_ADD; | ||
| 260 | return swahw32(trsp->trs_state); | ||
| 261 | } | ||
| 262 | EXPORT_SYMBOL_GPL(torture_random); | ||
| 263 | |||
| 264 | /* | ||
| 265 | * Variables for shuffling. The idea is to ensure that each CPU stays | ||
| 266 | * idle for an extended period to test interactions with dyntick idle, | ||
| 267 | * as well as interactions with any per-CPU varibles. | ||
| 268 | */ | ||
| 269 | struct shuffle_task { | ||
| 270 | struct list_head st_l; | ||
| 271 | struct task_struct *st_t; | ||
| 272 | }; | ||
| 273 | |||
| 274 | static long shuffle_interval; /* In jiffies. */ | ||
| 275 | static struct task_struct *shuffler_task; | ||
| 276 | static cpumask_var_t shuffle_tmp_mask; | ||
| 277 | static int shuffle_idle_cpu; /* Force all torture tasks off this CPU */ | ||
| 278 | static struct list_head shuffle_task_list = LIST_HEAD_INIT(shuffle_task_list); | ||
| 279 | static DEFINE_MUTEX(shuffle_task_mutex); | ||
| 280 | |||
| 281 | /* | ||
| 282 | * Register a task to be shuffled. If there is no memory, just splat | ||
| 283 | * and don't bother registering. | ||
| 284 | */ | ||
| 285 | void torture_shuffle_task_register(struct task_struct *tp) | ||
| 286 | { | ||
| 287 | struct shuffle_task *stp; | ||
| 288 | |||
| 289 | if (WARN_ON_ONCE(tp == NULL)) | ||
| 290 | return; | ||
| 291 | stp = kmalloc(sizeof(*stp), GFP_KERNEL); | ||
| 292 | if (WARN_ON_ONCE(stp == NULL)) | ||
| 293 | return; | ||
| 294 | stp->st_t = tp; | ||
| 295 | mutex_lock(&shuffle_task_mutex); | ||
| 296 | list_add(&stp->st_l, &shuffle_task_list); | ||
| 297 | mutex_unlock(&shuffle_task_mutex); | ||
| 298 | } | ||
| 299 | EXPORT_SYMBOL_GPL(torture_shuffle_task_register); | ||
| 300 | |||
| 301 | /* | ||
| 302 | * Unregister all tasks, for example, at the end of the torture run. | ||
| 303 | */ | ||
| 304 | static void torture_shuffle_task_unregister_all(void) | ||
| 305 | { | ||
| 306 | struct shuffle_task *stp; | ||
| 307 | struct shuffle_task *p; | ||
| 308 | |||
| 309 | mutex_lock(&shuffle_task_mutex); | ||
| 310 | list_for_each_entry_safe(stp, p, &shuffle_task_list, st_l) { | ||
| 311 | list_del(&stp->st_l); | ||
| 312 | kfree(stp); | ||
| 313 | } | ||
| 314 | mutex_unlock(&shuffle_task_mutex); | ||
| 315 | } | ||
| 316 | |||
| 317 | /* Shuffle tasks such that we allow shuffle_idle_cpu to become idle. | ||
| 318 | * A special case is when shuffle_idle_cpu = -1, in which case we allow | ||
| 319 | * the tasks to run on all CPUs. | ||
| 320 | */ | ||
| 321 | static void torture_shuffle_tasks(void) | ||
| 322 | { | ||
| 323 | struct shuffle_task *stp; | ||
| 324 | |||
| 325 | cpumask_setall(shuffle_tmp_mask); | ||
| 326 | get_online_cpus(); | ||
| 327 | |||
| 328 | /* No point in shuffling if there is only one online CPU (ex: UP) */ | ||
| 329 | if (num_online_cpus() == 1) { | ||
| 330 | put_online_cpus(); | ||
| 331 | return; | ||
| 332 | } | ||
| 333 | |||
| 334 | /* Advance to the next CPU. Upon overflow, don't idle any CPUs. */ | ||
| 335 | shuffle_idle_cpu = cpumask_next(shuffle_idle_cpu, shuffle_tmp_mask); | ||
| 336 | if (shuffle_idle_cpu >= nr_cpu_ids) | ||
| 337 | shuffle_idle_cpu = -1; | ||
| 338 | if (shuffle_idle_cpu != -1) { | ||
| 339 | cpumask_clear_cpu(shuffle_idle_cpu, shuffle_tmp_mask); | ||
| 340 | if (cpumask_empty(shuffle_tmp_mask)) { | ||
| 341 | put_online_cpus(); | ||
| 342 | return; | ||
| 343 | } | ||
| 344 | } | ||
| 345 | |||
| 346 | mutex_lock(&shuffle_task_mutex); | ||
| 347 | list_for_each_entry(stp, &shuffle_task_list, st_l) | ||
| 348 | set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask); | ||
| 349 | mutex_unlock(&shuffle_task_mutex); | ||
| 350 | |||
| 351 | put_online_cpus(); | ||
| 352 | } | ||
| 353 | |||
| 354 | /* Shuffle tasks across CPUs, with the intent of allowing each CPU in the | ||
| 355 | * system to become idle at a time and cut off its timer ticks. This is meant | ||
| 356 | * to test the support for such tickless idle CPU in RCU. | ||
| 357 | */ | ||
| 358 | static int torture_shuffle(void *arg) | ||
| 359 | { | ||
| 360 | VERBOSE_TOROUT_STRING("torture_shuffle task started"); | ||
| 361 | do { | ||
| 362 | schedule_timeout_interruptible(shuffle_interval); | ||
| 363 | torture_shuffle_tasks(); | ||
| 364 | torture_shutdown_absorb("torture_shuffle"); | ||
| 365 | } while (!torture_must_stop()); | ||
| 366 | torture_kthread_stopping("torture_shuffle"); | ||
| 367 | return 0; | ||
| 368 | } | ||
| 369 | |||
| 370 | /* | ||
| 371 | * Start the shuffler, with shuffint in jiffies. | ||
| 372 | */ | ||
| 373 | int torture_shuffle_init(long shuffint) | ||
| 374 | { | ||
| 375 | shuffle_interval = shuffint; | ||
| 376 | |||
| 377 | shuffle_idle_cpu = -1; | ||
| 378 | |||
| 379 | if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) { | ||
| 380 | VERBOSE_TOROUT_ERRSTRING("Failed to alloc mask"); | ||
| 381 | return -ENOMEM; | ||
| 382 | } | ||
| 383 | |||
| 384 | /* Create the shuffler thread */ | ||
| 385 | return torture_create_kthread(torture_shuffle, NULL, shuffler_task); | ||
| 386 | } | ||
| 387 | EXPORT_SYMBOL_GPL(torture_shuffle_init); | ||
| 388 | |||
| 389 | /* | ||
| 390 | * Stop the shuffling. | ||
| 391 | */ | ||
| 392 | static void torture_shuffle_cleanup(void) | ||
| 393 | { | ||
| 394 | torture_shuffle_task_unregister_all(); | ||
| 395 | if (shuffler_task) { | ||
| 396 | VERBOSE_TOROUT_STRING("Stopping torture_shuffle task"); | ||
| 397 | kthread_stop(shuffler_task); | ||
| 398 | free_cpumask_var(shuffle_tmp_mask); | ||
| 399 | } | ||
| 400 | shuffler_task = NULL; | ||
| 401 | } | ||
| 402 | EXPORT_SYMBOL_GPL(torture_shuffle_cleanup); | ||
| 403 | |||
| 404 | /* | ||
| 405 | * Variables for auto-shutdown. This allows "lights out" torture runs | ||
| 406 | * to be fully scripted. | ||
| 407 | */ | ||
| 408 | static int shutdown_secs; /* desired test duration in seconds. */ | ||
| 409 | static struct task_struct *shutdown_task; | ||
| 410 | static unsigned long shutdown_time; /* jiffies to system shutdown. */ | ||
| 411 | static void (*torture_shutdown_hook)(void); | ||
| 412 | |||
| 413 | /* | ||
| 414 | * Absorb kthreads into a kernel function that won't return, so that | ||
| 415 | * they won't ever access module text or data again. | ||
| 416 | */ | ||
| 417 | void torture_shutdown_absorb(const char *title) | ||
| 418 | { | ||
| 419 | while (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { | ||
| 420 | pr_notice("torture thread %s parking due to system shutdown\n", | ||
| 421 | title); | ||
| 422 | schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); | ||
| 423 | } | ||
| 424 | } | ||
| 425 | EXPORT_SYMBOL_GPL(torture_shutdown_absorb); | ||
| 426 | |||
| 427 | /* | ||
| 428 | * Cause the torture test to shutdown the system after the test has | ||
| 429 | * run for the time specified by the shutdown_secs parameter. | ||
| 430 | */ | ||
| 431 | static int torture_shutdown(void *arg) | ||
| 432 | { | ||
| 433 | long delta; | ||
| 434 | unsigned long jiffies_snap; | ||
| 435 | |||
| 436 | VERBOSE_TOROUT_STRING("torture_shutdown task started"); | ||
| 437 | jiffies_snap = jiffies; | ||
| 438 | while (ULONG_CMP_LT(jiffies_snap, shutdown_time) && | ||
| 439 | !torture_must_stop()) { | ||
| 440 | delta = shutdown_time - jiffies_snap; | ||
| 441 | if (verbose) | ||
| 442 | pr_alert("%s" TORTURE_FLAG | ||
| 443 | "torture_shutdown task: %lu jiffies remaining\n", | ||
| 444 | torture_type, delta); | ||
| 445 | schedule_timeout_interruptible(delta); | ||
| 446 | jiffies_snap = jiffies; | ||
| 447 | } | ||
| 448 | if (torture_must_stop()) { | ||
| 449 | torture_kthread_stopping("torture_shutdown"); | ||
| 450 | return 0; | ||
| 451 | } | ||
| 452 | |||
| 453 | /* OK, shut down the system. */ | ||
| 454 | |||
| 455 | VERBOSE_TOROUT_STRING("torture_shutdown task shutting down system"); | ||
| 456 | shutdown_task = NULL; /* Avoid self-kill deadlock. */ | ||
| 457 | if (torture_shutdown_hook) | ||
| 458 | torture_shutdown_hook(); | ||
| 459 | else | ||
| 460 | VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping."); | ||
| 461 | kernel_power_off(); /* Shut down the system. */ | ||
| 462 | return 0; | ||
| 463 | } | ||
| 464 | |||
| 465 | /* | ||
| 466 | * Start up the shutdown task. | ||
| 467 | */ | ||
| 468 | int torture_shutdown_init(int ssecs, void (*cleanup)(void)) | ||
| 469 | { | ||
| 470 | int ret = 0; | ||
| 471 | |||
| 472 | shutdown_secs = ssecs; | ||
| 473 | torture_shutdown_hook = cleanup; | ||
| 474 | if (shutdown_secs > 0) { | ||
| 475 | shutdown_time = jiffies + shutdown_secs * HZ; | ||
| 476 | ret = torture_create_kthread(torture_shutdown, NULL, | ||
| 477 | shutdown_task); | ||
| 478 | } | ||
| 479 | return ret; | ||
| 480 | } | ||
| 481 | EXPORT_SYMBOL_GPL(torture_shutdown_init); | ||
| 482 | |||
| 483 | /* | ||
| 484 | * Detect and respond to a system shutdown. | ||
| 485 | */ | ||
| 486 | static int torture_shutdown_notify(struct notifier_block *unused1, | ||
| 487 | unsigned long unused2, void *unused3) | ||
| 488 | { | ||
| 489 | mutex_lock(&fullstop_mutex); | ||
| 490 | if (ACCESS_ONCE(fullstop) == FULLSTOP_DONTSTOP) { | ||
| 491 | VERBOSE_TOROUT_STRING("Unscheduled system shutdown detected"); | ||
| 492 | ACCESS_ONCE(fullstop) = FULLSTOP_SHUTDOWN; | ||
| 493 | } else { | ||
| 494 | pr_warn("Concurrent rmmod and shutdown illegal!\n"); | ||
| 495 | } | ||
| 496 | mutex_unlock(&fullstop_mutex); | ||
| 497 | return NOTIFY_DONE; | ||
| 498 | } | ||
| 499 | |||
| 500 | static struct notifier_block torture_shutdown_nb = { | ||
| 501 | .notifier_call = torture_shutdown_notify, | ||
| 502 | }; | ||
| 503 | |||
| 504 | /* | ||
| 505 | * Shut down the shutdown task. Say what??? Heh! This can happen if | ||
| 506 | * the torture module gets an rmmod before the shutdown time arrives. ;-) | ||
| 507 | */ | ||
| 508 | static void torture_shutdown_cleanup(void) | ||
| 509 | { | ||
| 510 | unregister_reboot_notifier(&torture_shutdown_nb); | ||
| 511 | if (shutdown_task != NULL) { | ||
| 512 | VERBOSE_TOROUT_STRING("Stopping torture_shutdown task"); | ||
| 513 | kthread_stop(shutdown_task); | ||
| 514 | } | ||
| 515 | shutdown_task = NULL; | ||
| 516 | } | ||
| 517 | |||
| 518 | /* | ||
| 519 | * Variables for stuttering, which means to periodically pause and | ||
| 520 | * restart testing in order to catch bugs that appear when load is | ||
| 521 | * suddenly applied to or removed from the system. | ||
| 522 | */ | ||
| 523 | static struct task_struct *stutter_task; | ||
| 524 | static int stutter_pause_test; | ||
| 525 | static int stutter; | ||
| 526 | |||
| 527 | /* | ||
| 528 | * Block until the stutter interval ends. This must be called periodically | ||
| 529 | * by all running kthreads that need to be subject to stuttering. | ||
| 530 | */ | ||
| 531 | void stutter_wait(const char *title) | ||
| 532 | { | ||
| 533 | while (ACCESS_ONCE(stutter_pause_test) || | ||
| 534 | (torture_runnable && !ACCESS_ONCE(*torture_runnable))) { | ||
| 535 | if (stutter_pause_test) | ||
| 536 | schedule_timeout_interruptible(1); | ||
| 537 | else | ||
| 538 | schedule_timeout_interruptible(round_jiffies_relative(HZ)); | ||
| 539 | torture_shutdown_absorb(title); | ||
| 540 | } | ||
| 541 | } | ||
| 542 | EXPORT_SYMBOL_GPL(stutter_wait); | ||
| 543 | |||
| 544 | /* | ||
| 545 | * Cause the torture test to "stutter", starting and stopping all | ||
| 546 | * threads periodically. | ||
| 547 | */ | ||
| 548 | static int torture_stutter(void *arg) | ||
| 549 | { | ||
| 550 | VERBOSE_TOROUT_STRING("torture_stutter task started"); | ||
| 551 | do { | ||
| 552 | if (!torture_must_stop()) { | ||
| 553 | schedule_timeout_interruptible(stutter); | ||
| 554 | ACCESS_ONCE(stutter_pause_test) = 1; | ||
| 555 | } | ||
| 556 | if (!torture_must_stop()) | ||
| 557 | schedule_timeout_interruptible(stutter); | ||
| 558 | ACCESS_ONCE(stutter_pause_test) = 0; | ||
| 559 | torture_shutdown_absorb("torture_stutter"); | ||
| 560 | } while (!torture_must_stop()); | ||
| 561 | torture_kthread_stopping("torture_stutter"); | ||
| 562 | return 0; | ||
| 563 | } | ||
| 564 | |||
| 565 | /* | ||
| 566 | * Initialize and kick off the torture_stutter kthread. | ||
| 567 | */ | ||
| 568 | int torture_stutter_init(int s) | ||
| 569 | { | ||
| 570 | int ret; | ||
| 571 | |||
| 572 | stutter = s; | ||
| 573 | ret = torture_create_kthread(torture_stutter, NULL, stutter_task); | ||
| 574 | return ret; | ||
| 575 | } | ||
| 576 | EXPORT_SYMBOL_GPL(torture_stutter_init); | ||
| 577 | |||
| 578 | /* | ||
| 579 | * Cleanup after the torture_stutter kthread. | ||
| 580 | */ | ||
| 581 | static void torture_stutter_cleanup(void) | ||
| 582 | { | ||
| 583 | if (!stutter_task) | ||
| 584 | return; | ||
| 585 | VERBOSE_TOROUT_STRING("Stopping torture_stutter task"); | ||
| 586 | kthread_stop(stutter_task); | ||
| 587 | stutter_task = NULL; | ||
| 588 | } | ||
| 589 | |||
| 590 | /* | ||
| 591 | * Initialize torture module. Please note that this is -not- invoked via | ||
| 592 | * the usual module_init() mechanism, but rather by an explicit call from | ||
| 593 | * the client torture module. This call must be paired with a later | ||
| 594 | * torture_init_end(). | ||
| 595 | * | ||
| 596 | * The runnable parameter points to a flag that controls whether or not | ||
| 597 | * the test is currently runnable. If there is no such flag, pass in NULL. | ||
| 598 | */ | ||
| 599 | void __init torture_init_begin(char *ttype, bool v, int *runnable) | ||
| 600 | { | ||
| 601 | mutex_lock(&fullstop_mutex); | ||
| 602 | torture_type = ttype; | ||
| 603 | verbose = v; | ||
| 604 | torture_runnable = runnable; | ||
| 605 | fullstop = FULLSTOP_DONTSTOP; | ||
| 606 | |||
| 607 | } | ||
| 608 | EXPORT_SYMBOL_GPL(torture_init_begin); | ||
| 609 | |||
| 610 | /* | ||
| 611 | * Tell the torture module that initialization is complete. | ||
| 612 | */ | ||
| 613 | void __init torture_init_end(void) | ||
| 614 | { | ||
| 615 | mutex_unlock(&fullstop_mutex); | ||
| 616 | register_reboot_notifier(&torture_shutdown_nb); | ||
| 617 | } | ||
| 618 | EXPORT_SYMBOL_GPL(torture_init_end); | ||
| 619 | |||
| 620 | /* | ||
| 621 | * Clean up torture module. Please note that this is -not- invoked via | ||
| 622 | * the usual module_exit() mechanism, but rather by an explicit call from | ||
| 623 | * the client torture module. Returns true if a race with system shutdown | ||
| 624 | * is detected, otherwise, all kthreads started by functions in this file | ||
| 625 | * will be shut down. | ||
| 626 | * | ||
| 627 | * This must be called before the caller starts shutting down its own | ||
| 628 | * kthreads. | ||
| 629 | */ | ||
| 630 | bool torture_cleanup(void) | ||
| 631 | { | ||
| 632 | mutex_lock(&fullstop_mutex); | ||
| 633 | if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { | ||
| 634 | pr_warn("Concurrent rmmod and shutdown illegal!\n"); | ||
| 635 | mutex_unlock(&fullstop_mutex); | ||
| 636 | schedule_timeout_uninterruptible(10); | ||
| 637 | return true; | ||
| 638 | } | ||
| 639 | ACCESS_ONCE(fullstop) = FULLSTOP_RMMOD; | ||
| 640 | mutex_unlock(&fullstop_mutex); | ||
| 641 | torture_shutdown_cleanup(); | ||
| 642 | torture_shuffle_cleanup(); | ||
| 643 | torture_stutter_cleanup(); | ||
| 644 | torture_onoff_cleanup(); | ||
| 645 | return false; | ||
| 646 | } | ||
| 647 | EXPORT_SYMBOL_GPL(torture_cleanup); | ||
| 648 | |||
| 649 | /* | ||
| 650 | * Is it time for the current torture test to stop? | ||
| 651 | */ | ||
| 652 | bool torture_must_stop(void) | ||
| 653 | { | ||
| 654 | return torture_must_stop_irq() || kthread_should_stop(); | ||
| 655 | } | ||
| 656 | EXPORT_SYMBOL_GPL(torture_must_stop); | ||
| 657 | |||
| 658 | /* | ||
| 659 | * Is it time for the current torture test to stop? This is the irq-safe | ||
| 660 | * version, hence no check for kthread_should_stop(). | ||
| 661 | */ | ||
| 662 | bool torture_must_stop_irq(void) | ||
| 663 | { | ||
| 664 | return ACCESS_ONCE(fullstop) != FULLSTOP_DONTSTOP; | ||
| 665 | } | ||
| 666 | EXPORT_SYMBOL_GPL(torture_must_stop_irq); | ||
| 667 | |||
| 668 | /* | ||
| 669 | * Each kthread must wait for kthread_should_stop() before returning from | ||
| 670 | * its top-level function, otherwise segfaults ensue. This function | ||
| 671 | * prints a "stopping" message and waits for kthread_should_stop(), and | ||
| 672 | * should be called from all torture kthreads immediately prior to | ||
| 673 | * returning. | ||
| 674 | */ | ||
| 675 | void torture_kthread_stopping(char *title) | ||
| 676 | { | ||
| 677 | if (verbose) | ||
| 678 | VERBOSE_TOROUT_STRING(title); | ||
| 679 | while (!kthread_should_stop()) { | ||
| 680 | torture_shutdown_absorb(title); | ||
| 681 | schedule_timeout_uninterruptible(1); | ||
| 682 | } | ||
| 683 | } | ||
| 684 | EXPORT_SYMBOL_GPL(torture_kthread_stopping); | ||
| 685 | |||
| 686 | /* | ||
| 687 | * Create a generic torture kthread that is immediately runnable. If you | ||
| 688 | * need the kthread to be stopped so that you can do something to it before | ||
| 689 | * it starts, you will need to open-code your own. | ||
| 690 | */ | ||
| 691 | int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, | ||
| 692 | char *f, struct task_struct **tp) | ||
| 693 | { | ||
| 694 | int ret = 0; | ||
| 695 | |||
| 696 | VERBOSE_TOROUT_STRING(m); | ||
| 697 | *tp = kthread_run(fn, arg, s); | ||
| 698 | if (IS_ERR(*tp)) { | ||
| 699 | ret = PTR_ERR(*tp); | ||
| 700 | VERBOSE_TOROUT_ERRSTRING(f); | ||
| 701 | *tp = NULL; | ||
| 702 | } | ||
| 703 | torture_shuffle_task_register(*tp); | ||
| 704 | return ret; | ||
| 705 | } | ||
| 706 | EXPORT_SYMBOL_GPL(_torture_create_kthread); | ||
| 707 | |||
| 708 | /* | ||
| 709 | * Stop a generic kthread, emitting a message. | ||
| 710 | */ | ||
| 711 | void _torture_stop_kthread(char *m, struct task_struct **tp) | ||
| 712 | { | ||
| 713 | if (*tp == NULL) | ||
| 714 | return; | ||
| 715 | VERBOSE_TOROUT_STRING(m); | ||
| 716 | kthread_stop(*tp); | ||
| 717 | *tp = NULL; | ||
| 718 | } | ||
| 719 | EXPORT_SYMBOL_GPL(_torture_stop_kthread); | ||
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index a5457d577b98..0434ff1b808e 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c | |||
| @@ -40,8 +40,8 @@ static int write_iteration = 50; | |||
| 40 | module_param(write_iteration, uint, 0644); | 40 | module_param(write_iteration, uint, 0644); |
| 41 | MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); | 41 | MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); |
| 42 | 42 | ||
| 43 | static int producer_nice = 19; | 43 | static int producer_nice = MAX_NICE; |
| 44 | static int consumer_nice = 19; | 44 | static int consumer_nice = MAX_NICE; |
| 45 | 45 | ||
| 46 | static int producer_fifo = -1; | 46 | static int producer_fifo = -1; |
| 47 | static int consumer_fifo = -1; | 47 | static int consumer_fifo = -1; |
| @@ -308,7 +308,7 @@ static void ring_buffer_producer(void) | |||
| 308 | 308 | ||
| 309 | /* Let the user know that the test is running at low priority */ | 309 | /* Let the user know that the test is running at low priority */ |
| 310 | if (producer_fifo < 0 && consumer_fifo < 0 && | 310 | if (producer_fifo < 0 && consumer_fifo < 0 && |
| 311 | producer_nice == 19 && consumer_nice == 19) | 311 | producer_nice == MAX_NICE && consumer_nice == MAX_NICE) |
| 312 | trace_printk("WARNING!!! This test is running at lowest priority.\n"); | 312 | trace_printk("WARNING!!! This test is running at lowest priority.\n"); |
| 313 | 313 | ||
| 314 | trace_printk("Time: %lld (usecs)\n", time); | 314 | trace_printk("Time: %lld (usecs)\n", time); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 815c878f409b..24c1f2382557 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -1600,15 +1600,31 @@ void trace_buffer_unlock_commit(struct ring_buffer *buffer, | |||
| 1600 | } | 1600 | } |
| 1601 | EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); | 1601 | EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); |
| 1602 | 1602 | ||
| 1603 | static struct ring_buffer *temp_buffer; | ||
| 1604 | |||
| 1603 | struct ring_buffer_event * | 1605 | struct ring_buffer_event * |
| 1604 | trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, | 1606 | trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, |
| 1605 | struct ftrace_event_file *ftrace_file, | 1607 | struct ftrace_event_file *ftrace_file, |
| 1606 | int type, unsigned long len, | 1608 | int type, unsigned long len, |
| 1607 | unsigned long flags, int pc) | 1609 | unsigned long flags, int pc) |
| 1608 | { | 1610 | { |
| 1611 | struct ring_buffer_event *entry; | ||
| 1612 | |||
| 1609 | *current_rb = ftrace_file->tr->trace_buffer.buffer; | 1613 | *current_rb = ftrace_file->tr->trace_buffer.buffer; |
| 1610 | return trace_buffer_lock_reserve(*current_rb, | 1614 | entry = trace_buffer_lock_reserve(*current_rb, |
| 1611 | type, len, flags, pc); | 1615 | type, len, flags, pc); |
| 1616 | /* | ||
| 1617 | * If tracing is off, but we have triggers enabled | ||
| 1618 | * we still need to look at the event data. Use the temp_buffer | ||
| 1619 | * to store the trace event for the tigger to use. It's recusive | ||
| 1620 | * safe and will not be recorded anywhere. | ||
| 1621 | */ | ||
| 1622 | if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) { | ||
| 1623 | *current_rb = temp_buffer; | ||
| 1624 | entry = trace_buffer_lock_reserve(*current_rb, | ||
| 1625 | type, len, flags, pc); | ||
| 1626 | } | ||
| 1627 | return entry; | ||
| 1612 | } | 1628 | } |
| 1613 | EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); | 1629 | EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); |
| 1614 | 1630 | ||
| @@ -6494,11 +6510,16 @@ __init static int tracer_alloc_buffers(void) | |||
| 6494 | 6510 | ||
| 6495 | raw_spin_lock_init(&global_trace.start_lock); | 6511 | raw_spin_lock_init(&global_trace.start_lock); |
| 6496 | 6512 | ||
| 6513 | /* Used for event triggers */ | ||
| 6514 | temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); | ||
| 6515 | if (!temp_buffer) | ||
| 6516 | goto out_free_cpumask; | ||
| 6517 | |||
| 6497 | /* TODO: make the number of buffers hot pluggable with CPUS */ | 6518 | /* TODO: make the number of buffers hot pluggable with CPUS */ |
| 6498 | if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { | 6519 | if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { |
| 6499 | printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); | 6520 | printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); |
| 6500 | WARN_ON(1); | 6521 | WARN_ON(1); |
| 6501 | goto out_free_cpumask; | 6522 | goto out_free_temp_buffer; |
| 6502 | } | 6523 | } |
| 6503 | 6524 | ||
| 6504 | if (global_trace.buffer_disabled) | 6525 | if (global_trace.buffer_disabled) |
| @@ -6540,6 +6561,8 @@ __init static int tracer_alloc_buffers(void) | |||
| 6540 | 6561 | ||
| 6541 | return 0; | 6562 | return 0; |
| 6542 | 6563 | ||
| 6564 | out_free_temp_buffer: | ||
| 6565 | ring_buffer_free(temp_buffer); | ||
| 6543 | out_free_cpumask: | 6566 | out_free_cpumask: |
| 6544 | free_percpu(global_trace.trace_buffer.data); | 6567 | free_percpu(global_trace.trace_buffer.data); |
| 6545 | #ifdef CONFIG_TRACER_MAX_TRACE | 6568 | #ifdef CONFIG_TRACER_MAX_TRACE |
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index e854f420e033..c894614de14d 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
| @@ -31,9 +31,25 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, | |||
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | /* The ftrace function trace is allowed only for root. */ | 33 | /* The ftrace function trace is allowed only for root. */ |
| 34 | if (ftrace_event_is_function(tp_event) && | 34 | if (ftrace_event_is_function(tp_event)) { |
| 35 | perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) | 35 | if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) |
| 36 | return -EPERM; | 36 | return -EPERM; |
| 37 | |||
| 38 | /* | ||
| 39 | * We don't allow user space callchains for function trace | ||
| 40 | * event, due to issues with page faults while tracing page | ||
| 41 | * fault handler and its overall trickiness nature. | ||
| 42 | */ | ||
| 43 | if (!p_event->attr.exclude_callchain_user) | ||
| 44 | return -EINVAL; | ||
| 45 | |||
| 46 | /* | ||
| 47 | * Same reason to disable user stack dump as for user space | ||
| 48 | * callchains above. | ||
| 49 | */ | ||
| 50 | if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER) | ||
| 51 | return -EINVAL; | ||
| 52 | } | ||
| 37 | 53 | ||
| 38 | /* No tracing, just counting, so no obvious leak */ | 54 | /* No tracing, just counting, so no obvious leak */ |
| 39 | if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) | 55 | if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e71ffd4eccb5..7b16d40bd64d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
| @@ -27,12 +27,6 @@ | |||
| 27 | 27 | ||
| 28 | DEFINE_MUTEX(event_mutex); | 28 | DEFINE_MUTEX(event_mutex); |
| 29 | 29 | ||
| 30 | DEFINE_MUTEX(event_storage_mutex); | ||
| 31 | EXPORT_SYMBOL_GPL(event_storage_mutex); | ||
| 32 | |||
| 33 | char event_storage[EVENT_STORAGE_SIZE]; | ||
| 34 | EXPORT_SYMBOL_GPL(event_storage); | ||
| 35 | |||
| 36 | LIST_HEAD(ftrace_events); | 30 | LIST_HEAD(ftrace_events); |
| 37 | static LIST_HEAD(ftrace_common_fields); | 31 | static LIST_HEAD(ftrace_common_fields); |
| 38 | 32 | ||
| @@ -1777,6 +1771,16 @@ static void trace_module_add_events(struct module *mod) | |||
| 1777 | { | 1771 | { |
| 1778 | struct ftrace_event_call **call, **start, **end; | 1772 | struct ftrace_event_call **call, **start, **end; |
| 1779 | 1773 | ||
| 1774 | if (!mod->num_trace_events) | ||
| 1775 | return; | ||
| 1776 | |||
| 1777 | /* Don't add infrastructure for mods without tracepoints */ | ||
| 1778 | if (trace_module_has_bad_taint(mod)) { | ||
| 1779 | pr_err("%s: module has bad taint, not creating trace events\n", | ||
| 1780 | mod->name); | ||
| 1781 | return; | ||
| 1782 | } | ||
| 1783 | |||
| 1780 | start = mod->trace_events; | 1784 | start = mod->trace_events; |
| 1781 | end = mod->trace_events + mod->num_trace_events; | 1785 | end = mod->trace_events + mod->num_trace_events; |
| 1782 | 1786 | ||
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 7c3e3e72e2b6..ee0a5098ac43 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
| @@ -95,15 +95,12 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
| 95 | #undef __array | 95 | #undef __array |
| 96 | #define __array(type, item, len) \ | 96 | #define __array(type, item, len) \ |
| 97 | do { \ | 97 | do { \ |
| 98 | char *type_str = #type"["__stringify(len)"]"; \ | ||
| 98 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ | 99 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ |
| 99 | mutex_lock(&event_storage_mutex); \ | 100 | ret = trace_define_field(event_call, type_str, #item, \ |
| 100 | snprintf(event_storage, sizeof(event_storage), \ | ||
| 101 | "%s[%d]", #type, len); \ | ||
| 102 | ret = trace_define_field(event_call, event_storage, #item, \ | ||
| 103 | offsetof(typeof(field), item), \ | 101 | offsetof(typeof(field), item), \ |
| 104 | sizeof(field.item), \ | 102 | sizeof(field.item), \ |
| 105 | is_signed_type(type), filter_type); \ | 103 | is_signed_type(type), filter_type); \ |
| 106 | mutex_unlock(&event_storage_mutex); \ | ||
| 107 | if (ret) \ | 104 | if (ret) \ |
| 108 | return ret; \ | 105 | return ret; \ |
| 109 | } while (0); | 106 | } while (0); |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2aefbee93a6d..887ef88b0bc7 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
| @@ -498,14 +498,14 @@ void trace_hardirqs_off(void) | |||
| 498 | } | 498 | } |
| 499 | EXPORT_SYMBOL(trace_hardirqs_off); | 499 | EXPORT_SYMBOL(trace_hardirqs_off); |
| 500 | 500 | ||
| 501 | void trace_hardirqs_on_caller(unsigned long caller_addr) | 501 | __visible void trace_hardirqs_on_caller(unsigned long caller_addr) |
| 502 | { | 502 | { |
| 503 | if (!preempt_trace() && irq_trace()) | 503 | if (!preempt_trace() && irq_trace()) |
| 504 | stop_critical_timing(CALLER_ADDR0, caller_addr); | 504 | stop_critical_timing(CALLER_ADDR0, caller_addr); |
| 505 | } | 505 | } |
| 506 | EXPORT_SYMBOL(trace_hardirqs_on_caller); | 506 | EXPORT_SYMBOL(trace_hardirqs_on_caller); |
| 507 | 507 | ||
| 508 | void trace_hardirqs_off_caller(unsigned long caller_addr) | 508 | __visible void trace_hardirqs_off_caller(unsigned long caller_addr) |
| 509 | { | 509 | { |
| 510 | if (!preempt_trace() && irq_trace()) | 510 | if (!preempt_trace() && irq_trace()) |
| 511 | start_critical_timing(CALLER_ADDR0, caller_addr); | 511 | start_critical_timing(CALLER_ADDR0, caller_addr); |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 29f26540e9c9..031cc5655a51 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
| @@ -631,6 +631,11 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter) | |||
| 631 | EXPORT_SYMBOL_GPL(tracepoint_iter_reset); | 631 | EXPORT_SYMBOL_GPL(tracepoint_iter_reset); |
| 632 | 632 | ||
| 633 | #ifdef CONFIG_MODULES | 633 | #ifdef CONFIG_MODULES |
| 634 | bool trace_module_has_bad_taint(struct module *mod) | ||
| 635 | { | ||
| 636 | return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP)); | ||
| 637 | } | ||
| 638 | |||
| 634 | static int tracepoint_module_coming(struct module *mod) | 639 | static int tracepoint_module_coming(struct module *mod) |
| 635 | { | 640 | { |
| 636 | struct tp_module *tp_mod, *iter; | 641 | struct tp_module *tp_mod, *iter; |
| @@ -641,7 +646,7 @@ static int tracepoint_module_coming(struct module *mod) | |||
| 641 | * module headers (for forced load), to make sure we don't cause a crash. | 646 | * module headers (for forced load), to make sure we don't cause a crash. |
| 642 | * Staging and out-of-tree GPL modules are fine. | 647 | * Staging and out-of-tree GPL modules are fine. |
| 643 | */ | 648 | */ |
| 644 | if (mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP))) | 649 | if (trace_module_has_bad_taint(mod)) |
| 645 | return 0; | 650 | return 0; |
| 646 | mutex_lock(&tracepoints_mutex); | 651 | mutex_lock(&tracepoints_mutex); |
| 647 | tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); | 652 | tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 240fb62cf394..dd06439b9c84 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
| @@ -225,7 +225,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id) | |||
| 225 | * | 225 | * |
| 226 | * When there is no mapping defined for the user-namespace uid | 226 | * When there is no mapping defined for the user-namespace uid |
| 227 | * pair INVALID_UID is returned. Callers are expected to test | 227 | * pair INVALID_UID is returned. Callers are expected to test |
| 228 | * for and handle handle INVALID_UID being returned. INVALID_UID | 228 | * for and handle INVALID_UID being returned. INVALID_UID |
| 229 | * may be tested for using uid_valid(). | 229 | * may be tested for using uid_valid(). |
| 230 | */ | 230 | */ |
| 231 | kuid_t make_kuid(struct user_namespace *ns, uid_t uid) | 231 | kuid_t make_kuid(struct user_namespace *ns, uid_t uid) |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 82ef9f3b7473..3fa5b8f3aae3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -1851,6 +1851,12 @@ static void destroy_worker(struct worker *worker) | |||
| 1851 | if (worker->flags & WORKER_IDLE) | 1851 | if (worker->flags & WORKER_IDLE) |
| 1852 | pool->nr_idle--; | 1852 | pool->nr_idle--; |
| 1853 | 1853 | ||
| 1854 | /* | ||
| 1855 | * Once WORKER_DIE is set, the kworker may destroy itself at any | ||
| 1856 | * point. Pin to ensure the task stays until we're done with it. | ||
| 1857 | */ | ||
| 1858 | get_task_struct(worker->task); | ||
| 1859 | |||
| 1854 | list_del_init(&worker->entry); | 1860 | list_del_init(&worker->entry); |
| 1855 | worker->flags |= WORKER_DIE; | 1861 | worker->flags |= WORKER_DIE; |
| 1856 | 1862 | ||
| @@ -1859,6 +1865,7 @@ static void destroy_worker(struct worker *worker) | |||
| 1859 | spin_unlock_irq(&pool->lock); | 1865 | spin_unlock_irq(&pool->lock); |
| 1860 | 1866 | ||
| 1861 | kthread_stop(worker->task); | 1867 | kthread_stop(worker->task); |
| 1868 | put_task_struct(worker->task); | ||
| 1862 | kfree(worker); | 1869 | kfree(worker); |
| 1863 | 1870 | ||
| 1864 | spin_lock_irq(&pool->lock); | 1871 | spin_lock_irq(&pool->lock); |
| @@ -3218,7 +3225,7 @@ static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr, | |||
| 3218 | return -ENOMEM; | 3225 | return -ENOMEM; |
| 3219 | 3226 | ||
| 3220 | if (sscanf(buf, "%d", &attrs->nice) == 1 && | 3227 | if (sscanf(buf, "%d", &attrs->nice) == 1 && |
| 3221 | attrs->nice >= -20 && attrs->nice <= 19) | 3228 | attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE) |
| 3222 | ret = apply_workqueue_attrs(wq, attrs); | 3229 | ret = apply_workqueue_attrs(wq, attrs); |
| 3223 | else | 3230 | else |
| 3224 | ret = -EINVAL; | 3231 | ret = -EINVAL; |
