aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/syscall.c5
-rw-r--r--kernel/bpf/verifier.c145
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/kprobes.c8
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/sched/core.c25
-rw-r--r--kernel/sched/idle.c2
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/ftrace.c12
-rw-r--r--kernel/trace/trace.c34
-rw-r--r--kernel/trace/trace.h5
-rw-r--r--kernel/trace/trace_kprobe.c5
14 files changed, 221 insertions, 38 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fd2411fd6914..265a0d854e33 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -783,7 +783,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
783EXPORT_SYMBOL_GPL(bpf_prog_get_type); 783EXPORT_SYMBOL_GPL(bpf_prog_get_type);
784 784
785/* last field in 'union bpf_attr' used by this command */ 785/* last field in 'union bpf_attr' used by this command */
786#define BPF_PROG_LOAD_LAST_FIELD kern_version 786#define BPF_PROG_LOAD_LAST_FIELD prog_flags
787 787
788static int bpf_prog_load(union bpf_attr *attr) 788static int bpf_prog_load(union bpf_attr *attr)
789{ 789{
@@ -796,6 +796,9 @@ static int bpf_prog_load(union bpf_attr *attr)
796 if (CHECK_ATTR(BPF_PROG_LOAD)) 796 if (CHECK_ATTR(BPF_PROG_LOAD))
797 return -EINVAL; 797 return -EINVAL;
798 798
799 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT)
800 return -EINVAL;
801
799 /* copy eBPF program license from user space */ 802 /* copy eBPF program license from user space */
800 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 803 if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
801 sizeof(license) - 1) < 0) 804 sizeof(license) - 1) < 0)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c5b56c92f8e2..1eddb713b815 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -140,7 +140,7 @@ struct bpf_verifier_stack_elem {
140 struct bpf_verifier_stack_elem *next; 140 struct bpf_verifier_stack_elem *next;
141}; 141};
142 142
143#define BPF_COMPLEXITY_LIMIT_INSNS 65536 143#define BPF_COMPLEXITY_LIMIT_INSNS 98304
144#define BPF_COMPLEXITY_LIMIT_STACK 1024 144#define BPF_COMPLEXITY_LIMIT_STACK 1024
145 145
146#define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA) 146#define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
@@ -241,6 +241,12 @@ static void print_verifier_state(struct bpf_verifier_state *state)
241 if (reg->max_value != BPF_REGISTER_MAX_RANGE) 241 if (reg->max_value != BPF_REGISTER_MAX_RANGE)
242 verbose(",max_value=%llu", 242 verbose(",max_value=%llu",
243 (unsigned long long)reg->max_value); 243 (unsigned long long)reg->max_value);
244 if (reg->min_align)
245 verbose(",min_align=%u", reg->min_align);
246 if (reg->aux_off)
247 verbose(",aux_off=%u", reg->aux_off);
248 if (reg->aux_off_align)
249 verbose(",aux_off_align=%u", reg->aux_off_align);
244 } 250 }
245 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { 251 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
246 if (state->stack_slot_type[i] == STACK_SPILL) 252 if (state->stack_slot_type[i] == STACK_SPILL)
@@ -466,6 +472,9 @@ static void init_reg_state(struct bpf_reg_state *regs)
466 regs[i].imm = 0; 472 regs[i].imm = 0;
467 regs[i].min_value = BPF_REGISTER_MIN_RANGE; 473 regs[i].min_value = BPF_REGISTER_MIN_RANGE;
468 regs[i].max_value = BPF_REGISTER_MAX_RANGE; 474 regs[i].max_value = BPF_REGISTER_MAX_RANGE;
475 regs[i].min_align = 0;
476 regs[i].aux_off = 0;
477 regs[i].aux_off_align = 0;
469 } 478 }
470 479
471 /* frame pointer */ 480 /* frame pointer */
@@ -492,6 +501,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
492{ 501{
493 regs[regno].min_value = BPF_REGISTER_MIN_RANGE; 502 regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
494 regs[regno].max_value = BPF_REGISTER_MAX_RANGE; 503 regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
504 regs[regno].min_align = 0;
495} 505}
496 506
497static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs, 507static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs,
@@ -779,17 +789,33 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
779} 789}
780 790
781static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, 791static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
782 int off, int size) 792 int off, int size, bool strict)
783{ 793{
784 if (reg->id && size != 1) { 794 int ip_align;
785 verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n"); 795 int reg_off;
786 return -EACCES; 796
797 /* Byte size accesses are always allowed. */
798 if (!strict || size == 1)
799 return 0;
800
801 reg_off = reg->off;
802 if (reg->id) {
803 if (reg->aux_off_align % size) {
804 verbose("Packet access is only %u byte aligned, %d byte access not allowed\n",
805 reg->aux_off_align, size);
806 return -EACCES;
807 }
808 reg_off += reg->aux_off;
787 } 809 }
788 810
789 /* skb->data is NET_IP_ALIGN-ed */ 811 /* skb->data is NET_IP_ALIGN-ed, but for strict alignment checking
790 if ((NET_IP_ALIGN + reg->off + off) % size != 0) { 812 * we force this to 2 which is universally what architectures use
813 * when they don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
814 */
815 ip_align = strict ? 2 : NET_IP_ALIGN;
816 if ((ip_align + reg_off + off) % size != 0) {
791 verbose("misaligned packet access off %d+%d+%d size %d\n", 817 verbose("misaligned packet access off %d+%d+%d size %d\n",
792 NET_IP_ALIGN, reg->off, off, size); 818 ip_align, reg_off, off, size);
793 return -EACCES; 819 return -EACCES;
794 } 820 }
795 821
@@ -797,9 +823,9 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
797} 823}
798 824
799static int check_val_ptr_alignment(const struct bpf_reg_state *reg, 825static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
800 int size) 826 int size, bool strict)
801{ 827{
802 if (size != 1) { 828 if (strict && size != 1) {
803 verbose("Unknown alignment. Only byte-sized access allowed in value access.\n"); 829 verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
804 return -EACCES; 830 return -EACCES;
805 } 831 }
@@ -807,16 +833,20 @@ static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
807 return 0; 833 return 0;
808} 834}
809 835
810static int check_ptr_alignment(const struct bpf_reg_state *reg, 836static int check_ptr_alignment(struct bpf_verifier_env *env,
837 const struct bpf_reg_state *reg,
811 int off, int size) 838 int off, int size)
812{ 839{
840 bool strict = env->strict_alignment;
841
842 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
843 strict = true;
844
813 switch (reg->type) { 845 switch (reg->type) {
814 case PTR_TO_PACKET: 846 case PTR_TO_PACKET:
815 return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : 847 return check_pkt_ptr_alignment(reg, off, size, strict);
816 check_pkt_ptr_alignment(reg, off, size);
817 case PTR_TO_MAP_VALUE_ADJ: 848 case PTR_TO_MAP_VALUE_ADJ:
818 return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : 849 return check_val_ptr_alignment(reg, size, strict);
819 check_val_ptr_alignment(reg, size);
820 default: 850 default:
821 if (off % size != 0) { 851 if (off % size != 0) {
822 verbose("misaligned access off %d size %d\n", 852 verbose("misaligned access off %d size %d\n",
@@ -849,7 +879,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
849 if (size < 0) 879 if (size < 0)
850 return size; 880 return size;
851 881
852 err = check_ptr_alignment(reg, off, size); 882 err = check_ptr_alignment(env, reg, off, size);
853 if (err) 883 if (err)
854 return err; 884 return err;
855 885
@@ -883,6 +913,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
883 value_regno); 913 value_regno);
884 /* note that reg.[id|off|range] == 0 */ 914 /* note that reg.[id|off|range] == 0 */
885 state->regs[value_regno].type = reg_type; 915 state->regs[value_regno].type = reg_type;
916 state->regs[value_regno].aux_off = 0;
917 state->regs[value_regno].aux_off_align = 0;
886 } 918 }
887 919
888 } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { 920 } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
@@ -1455,6 +1487,8 @@ add_imm:
1455 */ 1487 */
1456 dst_reg->off += imm; 1488 dst_reg->off += imm;
1457 } else { 1489 } else {
1490 bool had_id;
1491
1458 if (src_reg->type == PTR_TO_PACKET) { 1492 if (src_reg->type == PTR_TO_PACKET) {
1459 /* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */ 1493 /* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */
1460 tmp_reg = *dst_reg; /* save r7 state */ 1494 tmp_reg = *dst_reg; /* save r7 state */
@@ -1488,14 +1522,23 @@ add_imm:
1488 src_reg->imm); 1522 src_reg->imm);
1489 return -EACCES; 1523 return -EACCES;
1490 } 1524 }
1525
1526 had_id = (dst_reg->id != 0);
1527
1491 /* dst_reg stays as pkt_ptr type and since some positive 1528 /* dst_reg stays as pkt_ptr type and since some positive
1492 * integer value was added to the pointer, increment its 'id' 1529 * integer value was added to the pointer, increment its 'id'
1493 */ 1530 */
1494 dst_reg->id = ++env->id_gen; 1531 dst_reg->id = ++env->id_gen;
1495 1532
1496 /* something was added to pkt_ptr, set range and off to zero */ 1533 /* something was added to pkt_ptr, set range to zero */
1534 dst_reg->aux_off += dst_reg->off;
1497 dst_reg->off = 0; 1535 dst_reg->off = 0;
1498 dst_reg->range = 0; 1536 dst_reg->range = 0;
1537 if (had_id)
1538 dst_reg->aux_off_align = min(dst_reg->aux_off_align,
1539 src_reg->min_align);
1540 else
1541 dst_reg->aux_off_align = src_reg->min_align;
1499 } 1542 }
1500 return 0; 1543 return 0;
1501} 1544}
@@ -1669,6 +1712,13 @@ static void check_reg_overflow(struct bpf_reg_state *reg)
1669 reg->min_value = BPF_REGISTER_MIN_RANGE; 1712 reg->min_value = BPF_REGISTER_MIN_RANGE;
1670} 1713}
1671 1714
1715static u32 calc_align(u32 imm)
1716{
1717 if (!imm)
1718 return 1U << 31;
1719 return imm - ((imm - 1) & imm);
1720}
1721
1672static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, 1722static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
1673 struct bpf_insn *insn) 1723 struct bpf_insn *insn)
1674{ 1724{
@@ -1676,8 +1726,10 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
1676 s64 min_val = BPF_REGISTER_MIN_RANGE; 1726 s64 min_val = BPF_REGISTER_MIN_RANGE;
1677 u64 max_val = BPF_REGISTER_MAX_RANGE; 1727 u64 max_val = BPF_REGISTER_MAX_RANGE;
1678 u8 opcode = BPF_OP(insn->code); 1728 u8 opcode = BPF_OP(insn->code);
1729 u32 dst_align, src_align;
1679 1730
1680 dst_reg = &regs[insn->dst_reg]; 1731 dst_reg = &regs[insn->dst_reg];
1732 src_align = 0;
1681 if (BPF_SRC(insn->code) == BPF_X) { 1733 if (BPF_SRC(insn->code) == BPF_X) {
1682 check_reg_overflow(&regs[insn->src_reg]); 1734 check_reg_overflow(&regs[insn->src_reg]);
1683 min_val = regs[insn->src_reg].min_value; 1735 min_val = regs[insn->src_reg].min_value;
@@ -1693,12 +1745,18 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
1693 regs[insn->src_reg].type != UNKNOWN_VALUE) { 1745 regs[insn->src_reg].type != UNKNOWN_VALUE) {
1694 min_val = BPF_REGISTER_MIN_RANGE; 1746 min_val = BPF_REGISTER_MIN_RANGE;
1695 max_val = BPF_REGISTER_MAX_RANGE; 1747 max_val = BPF_REGISTER_MAX_RANGE;
1748 src_align = 0;
1749 } else {
1750 src_align = regs[insn->src_reg].min_align;
1696 } 1751 }
1697 } else if (insn->imm < BPF_REGISTER_MAX_RANGE && 1752 } else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
1698 (s64)insn->imm > BPF_REGISTER_MIN_RANGE) { 1753 (s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
1699 min_val = max_val = insn->imm; 1754 min_val = max_val = insn->imm;
1755 src_align = calc_align(insn->imm);
1700 } 1756 }
1701 1757
1758 dst_align = dst_reg->min_align;
1759
1702 /* We don't know anything about what was done to this register, mark it 1760 /* We don't know anything about what was done to this register, mark it
1703 * as unknown. 1761 * as unknown.
1704 */ 1762 */
@@ -1723,18 +1781,21 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
1723 dst_reg->min_value += min_val; 1781 dst_reg->min_value += min_val;
1724 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) 1782 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
1725 dst_reg->max_value += max_val; 1783 dst_reg->max_value += max_val;
1784 dst_reg->min_align = min(src_align, dst_align);
1726 break; 1785 break;
1727 case BPF_SUB: 1786 case BPF_SUB:
1728 if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) 1787 if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
1729 dst_reg->min_value -= min_val; 1788 dst_reg->min_value -= min_val;
1730 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) 1789 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
1731 dst_reg->max_value -= max_val; 1790 dst_reg->max_value -= max_val;
1791 dst_reg->min_align = min(src_align, dst_align);
1732 break; 1792 break;
1733 case BPF_MUL: 1793 case BPF_MUL:
1734 if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) 1794 if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
1735 dst_reg->min_value *= min_val; 1795 dst_reg->min_value *= min_val;
1736 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) 1796 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
1737 dst_reg->max_value *= max_val; 1797 dst_reg->max_value *= max_val;
1798 dst_reg->min_align = max(src_align, dst_align);
1738 break; 1799 break;
1739 case BPF_AND: 1800 case BPF_AND:
1740 /* Disallow AND'ing of negative numbers, ain't nobody got time 1801 /* Disallow AND'ing of negative numbers, ain't nobody got time
@@ -1746,17 +1807,23 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
1746 else 1807 else
1747 dst_reg->min_value = 0; 1808 dst_reg->min_value = 0;
1748 dst_reg->max_value = max_val; 1809 dst_reg->max_value = max_val;
1810 dst_reg->min_align = max(src_align, dst_align);
1749 break; 1811 break;
1750 case BPF_LSH: 1812 case BPF_LSH:
1751 /* Gotta have special overflow logic here, if we're shifting 1813 /* Gotta have special overflow logic here, if we're shifting
1752 * more than MAX_RANGE then just assume we have an invalid 1814 * more than MAX_RANGE then just assume we have an invalid
1753 * range. 1815 * range.
1754 */ 1816 */
1755 if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) 1817 if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) {
1756 dst_reg->min_value = BPF_REGISTER_MIN_RANGE; 1818 dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
1757 else if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) 1819 dst_reg->min_align = 1;
1758 dst_reg->min_value <<= min_val; 1820 } else {
1759 1821 if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
1822 dst_reg->min_value <<= min_val;
1823 if (!dst_reg->min_align)
1824 dst_reg->min_align = 1;
1825 dst_reg->min_align <<= min_val;
1826 }
1760 if (max_val > ilog2(BPF_REGISTER_MAX_RANGE)) 1827 if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
1761 dst_reg->max_value = BPF_REGISTER_MAX_RANGE; 1828 dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
1762 else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) 1829 else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
@@ -1766,11 +1833,19 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
1766 /* RSH by a negative number is undefined, and the BPF_RSH is an 1833 /* RSH by a negative number is undefined, and the BPF_RSH is an
1767 * unsigned shift, so make the appropriate casts. 1834 * unsigned shift, so make the appropriate casts.
1768 */ 1835 */
1769 if (min_val < 0 || dst_reg->min_value < 0) 1836 if (min_val < 0 || dst_reg->min_value < 0) {
1770 dst_reg->min_value = BPF_REGISTER_MIN_RANGE; 1837 dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
1771 else 1838 } else {
1772 dst_reg->min_value = 1839 dst_reg->min_value =
1773 (u64)(dst_reg->min_value) >> min_val; 1840 (u64)(dst_reg->min_value) >> min_val;
1841 }
1842 if (min_val < 0) {
1843 dst_reg->min_align = 1;
1844 } else {
1845 dst_reg->min_align >>= (u64) min_val;
1846 if (!dst_reg->min_align)
1847 dst_reg->min_align = 1;
1848 }
1774 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) 1849 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
1775 dst_reg->max_value >>= max_val; 1850 dst_reg->max_value >>= max_val;
1776 break; 1851 break;
@@ -1872,6 +1947,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
1872 regs[insn->dst_reg].imm = insn->imm; 1947 regs[insn->dst_reg].imm = insn->imm;
1873 regs[insn->dst_reg].max_value = insn->imm; 1948 regs[insn->dst_reg].max_value = insn->imm;
1874 regs[insn->dst_reg].min_value = insn->imm; 1949 regs[insn->dst_reg].min_value = insn->imm;
1950 regs[insn->dst_reg].min_align = calc_align(insn->imm);
1875 } 1951 }
1876 1952
1877 } else if (opcode > BPF_END) { 1953 } else if (opcode > BPF_END) {
@@ -2564,6 +2640,7 @@ peek_stack:
2564 env->explored_states[t + 1] = STATE_LIST_MARK; 2640 env->explored_states[t + 1] = STATE_LIST_MARK;
2565 } else { 2641 } else {
2566 /* conditional jump with two edges */ 2642 /* conditional jump with two edges */
2643 env->explored_states[t] = STATE_LIST_MARK;
2567 ret = push_insn(t, t + 1, FALLTHROUGH, env); 2644 ret = push_insn(t, t + 1, FALLTHROUGH, env);
2568 if (ret == 1) 2645 if (ret == 1)
2569 goto peek_stack; 2646 goto peek_stack;
@@ -2722,6 +2799,12 @@ static bool states_equal(struct bpf_verifier_env *env,
2722 rcur->type != NOT_INIT)) 2799 rcur->type != NOT_INIT))
2723 continue; 2800 continue;
2724 2801
2802 /* Don't care about the reg->id in this case. */
2803 if (rold->type == PTR_TO_MAP_VALUE_OR_NULL &&
2804 rcur->type == PTR_TO_MAP_VALUE_OR_NULL &&
2805 rold->map_ptr == rcur->map_ptr)
2806 continue;
2807
2725 if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && 2808 if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
2726 compare_ptrs_to_packet(rold, rcur)) 2809 compare_ptrs_to_packet(rold, rcur))
2727 continue; 2810 continue;
@@ -2856,8 +2939,15 @@ static int do_check(struct bpf_verifier_env *env)
2856 goto process_bpf_exit; 2939 goto process_bpf_exit;
2857 } 2940 }
2858 2941
2859 if (log_level && do_print_state) { 2942 if (need_resched())
2860 verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx); 2943 cond_resched();
2944
2945 if (log_level > 1 || (log_level && do_print_state)) {
2946 if (log_level > 1)
2947 verbose("%d:", insn_idx);
2948 else
2949 verbose("\nfrom %d to %d:",
2950 prev_insn_idx, insn_idx);
2861 print_verifier_state(&env->cur_state); 2951 print_verifier_state(&env->cur_state);
2862 do_print_state = false; 2952 do_print_state = false;
2863 } 2953 }
@@ -3494,6 +3584,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
3494 } else { 3584 } else {
3495 log_level = 0; 3585 log_level = 0;
3496 } 3586 }
3587 if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT)
3588 env->strict_alignment = true;
3589 else
3590 env->strict_alignment = false;
3497 3591
3498 ret = replace_map_fd_with_map_ptr(env); 3592 ret = replace_map_fd_with_map_ptr(env);
3499 if (ret < 0) 3593 if (ret < 0)
@@ -3599,6 +3693,7 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
3599 mutex_lock(&bpf_verifier_lock); 3693 mutex_lock(&bpf_verifier_lock);
3600 3694
3601 log_level = 0; 3695 log_level = 0;
3696 env->strict_alignment = false;
3602 3697
3603 env->explored_states = kcalloc(env->prog->len, 3698 env->explored_states = kcalloc(env->prog->len,
3604 sizeof(struct bpf_verifier_state_list *), 3699 sizeof(struct bpf_verifier_state_list *),
diff --git a/kernel/fork.c b/kernel/fork.c
index 06d759ab4c62..aa1076c5e4a9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1845,11 +1845,13 @@ static __latent_entropy struct task_struct *copy_process(
1845 */ 1845 */
1846 recalc_sigpending(); 1846 recalc_sigpending();
1847 if (signal_pending(current)) { 1847 if (signal_pending(current)) {
1848 spin_unlock(&current->sighand->siglock);
1849 write_unlock_irq(&tasklist_lock);
1850 retval = -ERESTARTNOINTR; 1848 retval = -ERESTARTNOINTR;
1851 goto bad_fork_cancel_cgroup; 1849 goto bad_fork_cancel_cgroup;
1852 } 1850 }
1851 if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
1852 retval = -ENOMEM;
1853 goto bad_fork_cancel_cgroup;
1854 }
1853 1855
1854 if (likely(p->pid)) { 1856 if (likely(p->pid)) {
1855 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); 1857 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
@@ -1907,6 +1909,8 @@ static __latent_entropy struct task_struct *copy_process(
1907 return p; 1909 return p;
1908 1910
1909bad_fork_cancel_cgroup: 1911bad_fork_cancel_cgroup:
1912 spin_unlock(&current->sighand->siglock);
1913 write_unlock_irq(&tasklist_lock);
1910 cgroup_cancel_fork(p); 1914 cgroup_cancel_fork(p);
1911bad_fork_free_pid: 1915bad_fork_free_pid:
1912 cgroup_threadgroup_change_end(current); 1916 cgroup_threadgroup_change_end(current);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 686be4b73018..c94da688ee9b 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -880,8 +880,8 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
880 if (!desc) 880 if (!desc)
881 return; 881 return;
882 882
883 __irq_do_set_handler(desc, handle, 1, NULL);
884 desc->irq_common_data.handler_data = data; 883 desc->irq_common_data.handler_data = data;
884 __irq_do_set_handler(desc, handle, 1, NULL);
885 885
886 irq_put_desc_busunlock(desc, flags); 886 irq_put_desc_busunlock(desc, flags);
887} 887}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7367e0ec6f81..2d2d3a568e4e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -595,7 +595,7 @@ static void kprobe_optimizer(struct work_struct *work)
595} 595}
596 596
597/* Wait for completing optimization and unoptimization */ 597/* Wait for completing optimization and unoptimization */
598static void wait_for_kprobe_optimizer(void) 598void wait_for_kprobe_optimizer(void)
599{ 599{
600 mutex_lock(&kprobe_mutex); 600 mutex_lock(&kprobe_mutex);
601 601
@@ -2183,6 +2183,12 @@ static int kprobes_module_callback(struct notifier_block *nb,
2183 * The vaddr this probe is installed will soon 2183 * The vaddr this probe is installed will soon
2184 * be vfreed buy not synced to disk. Hence, 2184 * be vfreed buy not synced to disk. Hence,
2185 * disarming the breakpoint isn't needed. 2185 * disarming the breakpoint isn't needed.
2186 *
2187 * Note, this will also move any optimized probes
2188 * that are pending to be removed from their
2189 * corresponding lists to the freeing_list and
2190 * will not be touched by the delayed
2191 * kprobe_optimizer work handler.
2186 */ 2192 */
2187 kill_kprobe(p); 2193 kill_kprobe(p);
2188 } 2194 }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index d1f3e9f558b8..74a5a7255b4d 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -277,7 +277,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
277 * if reparented. 277 * if reparented.
278 */ 278 */
279 for (;;) { 279 for (;;) {
280 set_current_state(TASK_UNINTERRUPTIBLE); 280 set_current_state(TASK_INTERRUPTIBLE);
281 if (pid_ns->nr_hashed == init_pids) 281 if (pid_ns->nr_hashed == init_pids)
282 break; 282 break;
283 schedule(); 283 schedule();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 759f4bd52cd6..803c3bc274c4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3502,6 +3502,31 @@ asmlinkage __visible void __sched schedule(void)
3502} 3502}
3503EXPORT_SYMBOL(schedule); 3503EXPORT_SYMBOL(schedule);
3504 3504
3505/*
3506 * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
3507 * state (have scheduled out non-voluntarily) by making sure that all
3508 * tasks have either left the run queue or have gone into user space.
3509 * As idle tasks do not do either, they must not ever be preempted
3510 * (schedule out non-voluntarily).
3511 *
3512 * schedule_idle() is similar to schedule_preempt_disable() except that it
3513 * never enables preemption because it does not call sched_submit_work().
3514 */
3515void __sched schedule_idle(void)
3516{
3517 /*
3518 * As this skips calling sched_submit_work(), which the idle task does
3519 * regardless because that function is a nop when the task is in a
3520 * TASK_RUNNING state, make sure this isn't used someplace that the
3521 * current task can be in any other state. Note, idle is always in the
3522 * TASK_RUNNING state.
3523 */
3524 WARN_ON_ONCE(current->state);
3525 do {
3526 __schedule(false);
3527 } while (need_resched());
3528}
3529
3505#ifdef CONFIG_CONTEXT_TRACKING 3530#ifdef CONFIG_CONTEXT_TRACKING
3506asmlinkage __visible void __sched schedule_user(void) 3531asmlinkage __visible void __sched schedule_user(void)
3507{ 3532{
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 2a25a9ec2c6e..ef63adce0c9c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -265,7 +265,7 @@ static void do_idle(void)
265 smp_mb__after_atomic(); 265 smp_mb__after_atomic();
266 266
267 sched_ttwu_pending(); 267 sched_ttwu_pending();
268 schedule_preempt_disabled(); 268 schedule_idle();
269 269
270 if (unlikely(klp_patch_pending(current))) 270 if (unlikely(klp_patch_pending(current)))
271 klp_update_patch_state(current); 271 klp_update_patch_state(current);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7808ab050599..6dda2aab731e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1467,6 +1467,8 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
1467} 1467}
1468#endif 1468#endif
1469 1469
1470extern void schedule_idle(void);
1471
1470extern void sysrq_sched_debug_show(void); 1472extern void sysrq_sched_debug_show(void);
1471extern void sched_init_granularity(void); 1473extern void sched_init_granularity(void);
1472extern void update_max_interval(void); 1474extern void update_max_interval(void);
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index bd8ae8d5ae9c..193c5f5e3f79 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1662,14 +1662,14 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1662 goto out; 1662 goto out;
1663 1663
1664 if (attr == &dev_attr_act_mask) { 1664 if (attr == &dev_attr_act_mask) {
1665 if (sscanf(buf, "%llx", &value) != 1) { 1665 if (kstrtoull(buf, 0, &value)) {
1666 /* Assume it is a list of trace category names */ 1666 /* Assume it is a list of trace category names */
1667 ret = blk_trace_str2mask(buf); 1667 ret = blk_trace_str2mask(buf);
1668 if (ret < 0) 1668 if (ret < 0)
1669 goto out; 1669 goto out;
1670 value = ret; 1670 value = ret;
1671 } 1671 }
1672 } else if (sscanf(buf, "%llu", &value) != 1) 1672 } else if (kstrtoull(buf, 0, &value))
1673 goto out; 1673 goto out;
1674 1674
1675 ret = -ENXIO; 1675 ret = -ENXIO;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 39dca4e86a94..74fdfe9ed3db 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4144,9 +4144,9 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
4144 int i, ret = -ENODEV; 4144 int i, ret = -ENODEV;
4145 int size; 4145 int size;
4146 4146
4147 if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) 4147 if (!glob || !strlen(glob) || !strcmp(glob, "*"))
4148 func_g.search = NULL; 4148 func_g.search = NULL;
4149 else if (glob) { 4149 else {
4150 int not; 4150 int not;
4151 4151
4152 func_g.type = filter_parse_regex(glob, strlen(glob), 4152 func_g.type = filter_parse_regex(glob, strlen(glob),
@@ -4256,6 +4256,14 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
4256 return ret; 4256 return ret;
4257} 4257}
4258 4258
4259void clear_ftrace_function_probes(struct trace_array *tr)
4260{
4261 struct ftrace_func_probe *probe, *n;
4262
4263 list_for_each_entry_safe(probe, n, &tr->func_probes, list)
4264 unregister_ftrace_function_probe_func(NULL, tr, probe->probe_ops);
4265}
4266
4259static LIST_HEAD(ftrace_commands); 4267static LIST_HEAD(ftrace_commands);
4260static DEFINE_MUTEX(ftrace_cmd_mutex); 4268static DEFINE_MUTEX(ftrace_cmd_mutex);
4261 4269
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c4536c449021..1122f151466f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1558,7 +1558,7 @@ static __init int init_trace_selftests(void)
1558 1558
1559 return 0; 1559 return 0;
1560} 1560}
1561early_initcall(init_trace_selftests); 1561core_initcall(init_trace_selftests);
1562#else 1562#else
1563static inline int run_tracer_selftest(struct tracer *type) 1563static inline int run_tracer_selftest(struct tracer *type)
1564{ 1564{
@@ -2568,7 +2568,36 @@ static inline void ftrace_trace_stack(struct trace_array *tr,
2568void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, 2568void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2569 int pc) 2569 int pc)
2570{ 2570{
2571 __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL); 2571 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2572
2573 if (rcu_is_watching()) {
2574 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2575 return;
2576 }
2577
2578 /*
2579 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2580 * but if the above rcu_is_watching() failed, then the NMI
2581 * triggered someplace critical, and rcu_irq_enter() should
2582 * not be called from NMI.
2583 */
2584 if (unlikely(in_nmi()))
2585 return;
2586
2587 /*
2588 * It is possible that a function is being traced in a
2589 * location that RCU is not watching. A call to
2590 * rcu_irq_enter() will make sure that it is, but there's
2591 * a few internal rcu functions that could be traced
2592 * where that wont work either. In those cases, we just
2593 * do nothing.
2594 */
2595 if (unlikely(rcu_irq_enter_disabled()))
2596 return;
2597
2598 rcu_irq_enter_irqson();
2599 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2600 rcu_irq_exit_irqson();
2572} 2601}
2573 2602
2574/** 2603/**
@@ -7550,6 +7579,7 @@ static int instance_rmdir(const char *name)
7550 } 7579 }
7551 7580
7552 tracing_set_nop(tr); 7581 tracing_set_nop(tr);
7582 clear_ftrace_function_probes(tr);
7553 event_trace_del_tracer(tr); 7583 event_trace_del_tracer(tr);
7554 ftrace_clear_pids(tr); 7584 ftrace_clear_pids(tr);
7555 ftrace_destroy_function_files(tr); 7585 ftrace_destroy_function_files(tr);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 291a1bca5748..39fd77330aab 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -980,6 +980,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr,
980extern int 980extern int
981unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, 981unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
982 struct ftrace_probe_ops *ops); 982 struct ftrace_probe_ops *ops);
983extern void clear_ftrace_function_probes(struct trace_array *tr);
983 984
984int register_ftrace_command(struct ftrace_func_command *cmd); 985int register_ftrace_command(struct ftrace_func_command *cmd);
985int unregister_ftrace_command(struct ftrace_func_command *cmd); 986int unregister_ftrace_command(struct ftrace_func_command *cmd);
@@ -998,6 +999,10 @@ static inline __init int unregister_ftrace_command(char *cmd_name)
998{ 999{
999 return -EINVAL; 1000 return -EINVAL;
1000} 1001}
1002static inline void clear_ftrace_function_probes(struct trace_array *tr)
1003{
1004}
1005
1001/* 1006/*
1002 * The ops parameter passed in is usually undefined. 1007 * The ops parameter passed in is usually undefined.
1003 * This must be a macro. 1008 * This must be a macro.
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8485f6738a87..c129fca6ec99 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1535,6 +1535,11 @@ static __init int kprobe_trace_self_tests_init(void)
1535 1535
1536end: 1536end:
1537 release_all_trace_kprobes(); 1537 release_all_trace_kprobes();
1538 /*
1539 * Wait for the optimizer work to finish. Otherwise it might fiddle
1540 * with probes in already freed __init text.
1541 */
1542 wait_for_kprobe_optimizer();
1538 if (warn) 1543 if (warn)
1539 pr_cont("NG: Some tests are failed. Please check them.\n"); 1544 pr_cont("NG: Some tests are failed. Please check them.\n");
1540 else 1545 else