diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2018-01-02 08:46:35 -0500 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2018-01-02 08:46:35 -0500 |
| commit | b6a09416e83ffe4eccfb4ef1b91b3b66483fa810 (patch) | |
| tree | b30f266e85047244dcdb47d5afc134e76aec530d /kernel | |
| parent | db809859c8cee415293b830e67178f526d1eb2be (diff) | |
| parent | 30a7acd573899fd8b8ac39236eff6468b195ac7d (diff) | |
Merge 4.15-rc6 into char-misc-next
We want the fixes in here as well.
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'kernel')
31 files changed, 462 insertions, 887 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index e469e05c8e83..3905d4bc5b80 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
| @@ -114,6 +114,7 @@ static void htab_free_elems(struct bpf_htab *htab) | |||
| 114 | pptr = htab_elem_get_ptr(get_htab_elem(htab, i), | 114 | pptr = htab_elem_get_ptr(get_htab_elem(htab, i), |
| 115 | htab->map.key_size); | 115 | htab->map.key_size); |
| 116 | free_percpu(pptr); | 116 | free_percpu(pptr); |
| 117 | cond_resched(); | ||
| 117 | } | 118 | } |
| 118 | free_elems: | 119 | free_elems: |
| 119 | bpf_map_area_free(htab->elems); | 120 | bpf_map_area_free(htab->elems); |
| @@ -159,6 +160,7 @@ static int prealloc_init(struct bpf_htab *htab) | |||
| 159 | goto free_elems; | 160 | goto free_elems; |
| 160 | htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, | 161 | htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, |
| 161 | pptr); | 162 | pptr); |
| 163 | cond_resched(); | ||
| 162 | } | 164 | } |
| 163 | 165 | ||
| 164 | skip_percpu_elems: | 166 | skip_percpu_elems: |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d4593571c404..04b24876cd23 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
| @@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, | |||
| 1059 | break; | 1059 | break; |
| 1060 | case PTR_TO_STACK: | 1060 | case PTR_TO_STACK: |
| 1061 | pointer_desc = "stack "; | 1061 | pointer_desc = "stack "; |
| 1062 | /* The stack spill tracking logic in check_stack_write() | ||
| 1063 | * and check_stack_read() relies on stack accesses being | ||
| 1064 | * aligned. | ||
| 1065 | */ | ||
| 1066 | strict = true; | ||
| 1062 | break; | 1067 | break; |
| 1063 | default: | 1068 | default: |
| 1064 | break; | 1069 | break; |
| @@ -1067,6 +1072,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, | |||
| 1067 | strict); | 1072 | strict); |
| 1068 | } | 1073 | } |
| 1069 | 1074 | ||
| 1075 | /* truncate register to smaller size (in bytes) | ||
| 1076 | * must be called with size < BPF_REG_SIZE | ||
| 1077 | */ | ||
| 1078 | static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) | ||
| 1079 | { | ||
| 1080 | u64 mask; | ||
| 1081 | |||
| 1082 | /* clear high bits in bit representation */ | ||
| 1083 | reg->var_off = tnum_cast(reg->var_off, size); | ||
| 1084 | |||
| 1085 | /* fix arithmetic bounds */ | ||
| 1086 | mask = ((u64)1 << (size * 8)) - 1; | ||
| 1087 | if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) { | ||
| 1088 | reg->umin_value &= mask; | ||
| 1089 | reg->umax_value &= mask; | ||
| 1090 | } else { | ||
| 1091 | reg->umin_value = 0; | ||
| 1092 | reg->umax_value = mask; | ||
| 1093 | } | ||
| 1094 | reg->smin_value = reg->umin_value; | ||
| 1095 | reg->smax_value = reg->umax_value; | ||
| 1096 | } | ||
| 1097 | |||
| 1070 | /* check whether memory at (regno + off) is accessible for t = (read | write) | 1098 | /* check whether memory at (regno + off) is accessible for t = (read | write) |
| 1071 | * if t==write, value_regno is a register which value is stored into memory | 1099 | * if t==write, value_regno is a register which value is stored into memory |
| 1072 | * if t==read, value_regno is a register which will receive the value from memory | 1100 | * if t==read, value_regno is a register which will receive the value from memory |
| @@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn | |||
| 1200 | if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && | 1228 | if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && |
| 1201 | regs[value_regno].type == SCALAR_VALUE) { | 1229 | regs[value_regno].type == SCALAR_VALUE) { |
| 1202 | /* b/h/w load zero-extends, mark upper bits as known 0 */ | 1230 | /* b/h/w load zero-extends, mark upper bits as known 0 */ |
| 1203 | regs[value_regno].var_off = | 1231 | coerce_reg_to_size(®s[value_regno], size); |
| 1204 | tnum_cast(regs[value_regno].var_off, size); | ||
| 1205 | __update_reg_bounds(®s[value_regno]); | ||
| 1206 | } | 1232 | } |
| 1207 | return err; | 1233 | return err; |
| 1208 | } | 1234 | } |
| @@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, | |||
| 1282 | tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); | 1308 | tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); |
| 1283 | verbose(env, "invalid variable stack read R%d var_off=%s\n", | 1309 | verbose(env, "invalid variable stack read R%d var_off=%s\n", |
| 1284 | regno, tn_buf); | 1310 | regno, tn_buf); |
| 1311 | return -EACCES; | ||
| 1285 | } | 1312 | } |
| 1286 | off = regs[regno].off + regs[regno].var_off.value; | 1313 | off = regs[regno].off + regs[regno].var_off.value; |
| 1287 | if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || | 1314 | if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || |
| @@ -1674,7 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) | |||
| 1674 | return -EINVAL; | 1701 | return -EINVAL; |
| 1675 | } | 1702 | } |
| 1676 | 1703 | ||
| 1704 | /* With LD_ABS/IND some JITs save/restore skb from r1. */ | ||
| 1677 | changes_data = bpf_helper_changes_pkt_data(fn->func); | 1705 | changes_data = bpf_helper_changes_pkt_data(fn->func); |
| 1706 | if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { | ||
| 1707 | verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", | ||
| 1708 | func_id_name(func_id), func_id); | ||
| 1709 | return -EINVAL; | ||
| 1710 | } | ||
| 1678 | 1711 | ||
| 1679 | memset(&meta, 0, sizeof(meta)); | 1712 | memset(&meta, 0, sizeof(meta)); |
| 1680 | meta.pkt_access = fn->pkt_access; | 1713 | meta.pkt_access = fn->pkt_access; |
| @@ -1766,14 +1799,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) | |||
| 1766 | return 0; | 1799 | return 0; |
| 1767 | } | 1800 | } |
| 1768 | 1801 | ||
| 1769 | static void coerce_reg_to_32(struct bpf_reg_state *reg) | ||
| 1770 | { | ||
| 1771 | /* clear high 32 bits */ | ||
| 1772 | reg->var_off = tnum_cast(reg->var_off, 4); | ||
| 1773 | /* Update bounds */ | ||
| 1774 | __update_reg_bounds(reg); | ||
| 1775 | } | ||
| 1776 | |||
| 1777 | static bool signed_add_overflows(s64 a, s64 b) | 1802 | static bool signed_add_overflows(s64 a, s64 b) |
| 1778 | { | 1803 | { |
| 1779 | /* Do the add in u64, where overflow is well-defined */ | 1804 | /* Do the add in u64, where overflow is well-defined */ |
| @@ -1794,6 +1819,41 @@ static bool signed_sub_overflows(s64 a, s64 b) | |||
| 1794 | return res > a; | 1819 | return res > a; |
| 1795 | } | 1820 | } |
| 1796 | 1821 | ||
| 1822 | static bool check_reg_sane_offset(struct bpf_verifier_env *env, | ||
| 1823 | const struct bpf_reg_state *reg, | ||
| 1824 | enum bpf_reg_type type) | ||
| 1825 | { | ||
| 1826 | bool known = tnum_is_const(reg->var_off); | ||
| 1827 | s64 val = reg->var_off.value; | ||
| 1828 | s64 smin = reg->smin_value; | ||
| 1829 | |||
| 1830 | if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { | ||
| 1831 | verbose(env, "math between %s pointer and %lld is not allowed\n", | ||
| 1832 | reg_type_str[type], val); | ||
| 1833 | return false; | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { | ||
| 1837 | verbose(env, "%s pointer offset %d is not allowed\n", | ||
| 1838 | reg_type_str[type], reg->off); | ||
| 1839 | return false; | ||
| 1840 | } | ||
| 1841 | |||
| 1842 | if (smin == S64_MIN) { | ||
| 1843 | verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", | ||
| 1844 | reg_type_str[type]); | ||
| 1845 | return false; | ||
| 1846 | } | ||
| 1847 | |||
| 1848 | if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { | ||
| 1849 | verbose(env, "value %lld makes %s pointer be out of bounds\n", | ||
| 1850 | smin, reg_type_str[type]); | ||
| 1851 | return false; | ||
| 1852 | } | ||
| 1853 | |||
| 1854 | return true; | ||
| 1855 | } | ||
| 1856 | |||
| 1797 | /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. | 1857 | /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. |
| 1798 | * Caller should also handle BPF_MOV case separately. | 1858 | * Caller should also handle BPF_MOV case separately. |
| 1799 | * If we return -EACCES, caller may want to try again treating pointer as a | 1859 | * If we return -EACCES, caller may want to try again treating pointer as a |
| @@ -1830,29 +1890,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, | |||
| 1830 | 1890 | ||
| 1831 | if (BPF_CLASS(insn->code) != BPF_ALU64) { | 1891 | if (BPF_CLASS(insn->code) != BPF_ALU64) { |
| 1832 | /* 32-bit ALU ops on pointers produce (meaningless) scalars */ | 1892 | /* 32-bit ALU ops on pointers produce (meaningless) scalars */ |
| 1833 | if (!env->allow_ptr_leaks) | 1893 | verbose(env, |
| 1834 | verbose(env, | 1894 | "R%d 32-bit pointer arithmetic prohibited\n", |
| 1835 | "R%d 32-bit pointer arithmetic prohibited\n", | 1895 | dst); |
| 1836 | dst); | ||
| 1837 | return -EACCES; | 1896 | return -EACCES; |
| 1838 | } | 1897 | } |
| 1839 | 1898 | ||
| 1840 | if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { | 1899 | if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { |
| 1841 | if (!env->allow_ptr_leaks) | 1900 | verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", |
| 1842 | verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", | 1901 | dst); |
| 1843 | dst); | ||
| 1844 | return -EACCES; | 1902 | return -EACCES; |
| 1845 | } | 1903 | } |
| 1846 | if (ptr_reg->type == CONST_PTR_TO_MAP) { | 1904 | if (ptr_reg->type == CONST_PTR_TO_MAP) { |
| 1847 | if (!env->allow_ptr_leaks) | 1905 | verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", |
| 1848 | verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", | 1906 | dst); |
| 1849 | dst); | ||
| 1850 | return -EACCES; | 1907 | return -EACCES; |
| 1851 | } | 1908 | } |
| 1852 | if (ptr_reg->type == PTR_TO_PACKET_END) { | 1909 | if (ptr_reg->type == PTR_TO_PACKET_END) { |
| 1853 | if (!env->allow_ptr_leaks) | 1910 | verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", |
| 1854 | verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", | 1911 | dst); |
| 1855 | dst); | ||
| 1856 | return -EACCES; | 1912 | return -EACCES; |
| 1857 | } | 1913 | } |
| 1858 | 1914 | ||
| @@ -1862,6 +1918,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, | |||
| 1862 | dst_reg->type = ptr_reg->type; | 1918 | dst_reg->type = ptr_reg->type; |
| 1863 | dst_reg->id = ptr_reg->id; | 1919 | dst_reg->id = ptr_reg->id; |
| 1864 | 1920 | ||
| 1921 | if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || | ||
| 1922 | !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) | ||
| 1923 | return -EINVAL; | ||
| 1924 | |||
| 1865 | switch (opcode) { | 1925 | switch (opcode) { |
| 1866 | case BPF_ADD: | 1926 | case BPF_ADD: |
| 1867 | /* We can take a fixed offset as long as it doesn't overflow | 1927 | /* We can take a fixed offset as long as it doesn't overflow |
| @@ -1915,9 +1975,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, | |||
| 1915 | case BPF_SUB: | 1975 | case BPF_SUB: |
| 1916 | if (dst_reg == off_reg) { | 1976 | if (dst_reg == off_reg) { |
| 1917 | /* scalar -= pointer. Creates an unknown scalar */ | 1977 | /* scalar -= pointer. Creates an unknown scalar */ |
| 1918 | if (!env->allow_ptr_leaks) | 1978 | verbose(env, "R%d tried to subtract pointer from scalar\n", |
| 1919 | verbose(env, "R%d tried to subtract pointer from scalar\n", | 1979 | dst); |
| 1920 | dst); | ||
| 1921 | return -EACCES; | 1980 | return -EACCES; |
| 1922 | } | 1981 | } |
| 1923 | /* We don't allow subtraction from FP, because (according to | 1982 | /* We don't allow subtraction from FP, because (according to |
| @@ -1925,9 +1984,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, | |||
| 1925 | * be able to deal with it. | 1984 | * be able to deal with it. |
| 1926 | */ | 1985 | */ |
| 1927 | if (ptr_reg->type == PTR_TO_STACK) { | 1986 | if (ptr_reg->type == PTR_TO_STACK) { |
| 1928 | if (!env->allow_ptr_leaks) | 1987 | verbose(env, "R%d subtraction from stack pointer prohibited\n", |
| 1929 | verbose(env, "R%d subtraction from stack pointer prohibited\n", | 1988 | dst); |
| 1930 | dst); | ||
| 1931 | return -EACCES; | 1989 | return -EACCES; |
| 1932 | } | 1990 | } |
| 1933 | if (known && (ptr_reg->off - smin_val == | 1991 | if (known && (ptr_reg->off - smin_val == |
| @@ -1976,28 +2034,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, | |||
| 1976 | case BPF_AND: | 2034 | case BPF_AND: |
| 1977 | case BPF_OR: | 2035 | case BPF_OR: |
| 1978 | case BPF_XOR: | 2036 | case BPF_XOR: |
| 1979 | /* bitwise ops on pointers are troublesome, prohibit for now. | 2037 | /* bitwise ops on pointers are troublesome, prohibit. */ |
| 1980 | * (However, in principle we could allow some cases, e.g. | 2038 | verbose(env, "R%d bitwise operator %s on pointer prohibited\n", |
| 1981 | * ptr &= ~3 which would reduce min_value by 3.) | 2039 | dst, bpf_alu_string[opcode >> 4]); |
| 1982 | */ | ||
| 1983 | if (!env->allow_ptr_leaks) | ||
| 1984 | verbose(env, "R%d bitwise operator %s on pointer prohibited\n", | ||
| 1985 | dst, bpf_alu_string[opcode >> 4]); | ||
| 1986 | return -EACCES; | 2040 | return -EACCES; |
| 1987 | default: | 2041 | default: |
| 1988 | /* other operators (e.g. MUL,LSH) produce non-pointer results */ | 2042 | /* other operators (e.g. MUL,LSH) produce non-pointer results */ |
| 1989 | if (!env->allow_ptr_leaks) | 2043 | verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", |
| 1990 | verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", | 2044 | dst, bpf_alu_string[opcode >> 4]); |
| 1991 | dst, bpf_alu_string[opcode >> 4]); | ||
| 1992 | return -EACCES; | 2045 | return -EACCES; |
| 1993 | } | 2046 | } |
| 1994 | 2047 | ||
| 2048 | if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) | ||
| 2049 | return -EINVAL; | ||
| 2050 | |||
| 1995 | __update_reg_bounds(dst_reg); | 2051 | __update_reg_bounds(dst_reg); |
| 1996 | __reg_deduce_bounds(dst_reg); | 2052 | __reg_deduce_bounds(dst_reg); |
| 1997 | __reg_bound_offset(dst_reg); | 2053 | __reg_bound_offset(dst_reg); |
| 1998 | return 0; | 2054 | return 0; |
| 1999 | } | 2055 | } |
| 2000 | 2056 | ||
| 2057 | /* WARNING: This function does calculations on 64-bit values, but the actual | ||
| 2058 | * execution may occur on 32-bit values. Therefore, things like bitshifts | ||
| 2059 | * need extra checks in the 32-bit case. | ||
| 2060 | */ | ||
| 2001 | static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | 2061 | static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, |
| 2002 | struct bpf_insn *insn, | 2062 | struct bpf_insn *insn, |
| 2003 | struct bpf_reg_state *dst_reg, | 2063 | struct bpf_reg_state *dst_reg, |
| @@ -2008,12 +2068,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 2008 | bool src_known, dst_known; | 2068 | bool src_known, dst_known; |
| 2009 | s64 smin_val, smax_val; | 2069 | s64 smin_val, smax_val; |
| 2010 | u64 umin_val, umax_val; | 2070 | u64 umin_val, umax_val; |
| 2071 | u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; | ||
| 2011 | 2072 | ||
| 2012 | if (BPF_CLASS(insn->code) != BPF_ALU64) { | ||
| 2013 | /* 32-bit ALU ops are (32,32)->64 */ | ||
| 2014 | coerce_reg_to_32(dst_reg); | ||
| 2015 | coerce_reg_to_32(&src_reg); | ||
| 2016 | } | ||
| 2017 | smin_val = src_reg.smin_value; | 2073 | smin_val = src_reg.smin_value; |
| 2018 | smax_val = src_reg.smax_value; | 2074 | smax_val = src_reg.smax_value; |
| 2019 | umin_val = src_reg.umin_value; | 2075 | umin_val = src_reg.umin_value; |
| @@ -2021,6 +2077,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 2021 | src_known = tnum_is_const(src_reg.var_off); | 2077 | src_known = tnum_is_const(src_reg.var_off); |
| 2022 | dst_known = tnum_is_const(dst_reg->var_off); | 2078 | dst_known = tnum_is_const(dst_reg->var_off); |
| 2023 | 2079 | ||
| 2080 | if (!src_known && | ||
| 2081 | opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { | ||
| 2082 | __mark_reg_unknown(dst_reg); | ||
| 2083 | return 0; | ||
| 2084 | } | ||
| 2085 | |||
| 2024 | switch (opcode) { | 2086 | switch (opcode) { |
| 2025 | case BPF_ADD: | 2087 | case BPF_ADD: |
| 2026 | if (signed_add_overflows(dst_reg->smin_value, smin_val) || | 2088 | if (signed_add_overflows(dst_reg->smin_value, smin_val) || |
| @@ -2149,9 +2211,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 2149 | __update_reg_bounds(dst_reg); | 2211 | __update_reg_bounds(dst_reg); |
| 2150 | break; | 2212 | break; |
| 2151 | case BPF_LSH: | 2213 | case BPF_LSH: |
| 2152 | if (umax_val > 63) { | 2214 | if (umax_val >= insn_bitness) { |
| 2153 | /* Shifts greater than 63 are undefined. This includes | 2215 | /* Shifts greater than 31 or 63 are undefined. |
| 2154 | * shifts by a negative number. | 2216 | * This includes shifts by a negative number. |
| 2155 | */ | 2217 | */ |
| 2156 | mark_reg_unknown(env, regs, insn->dst_reg); | 2218 | mark_reg_unknown(env, regs, insn->dst_reg); |
| 2157 | break; | 2219 | break; |
| @@ -2177,27 +2239,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 2177 | __update_reg_bounds(dst_reg); | 2239 | __update_reg_bounds(dst_reg); |
| 2178 | break; | 2240 | break; |
| 2179 | case BPF_RSH: | 2241 | case BPF_RSH: |
| 2180 | if (umax_val > 63) { | 2242 | if (umax_val >= insn_bitness) { |
| 2181 | /* Shifts greater than 63 are undefined. This includes | 2243 | /* Shifts greater than 31 or 63 are undefined. |
| 2182 | * shifts by a negative number. | 2244 | * This includes shifts by a negative number. |
| 2183 | */ | 2245 | */ |
| 2184 | mark_reg_unknown(env, regs, insn->dst_reg); | 2246 | mark_reg_unknown(env, regs, insn->dst_reg); |
| 2185 | break; | 2247 | break; |
| 2186 | } | 2248 | } |
| 2187 | /* BPF_RSH is an unsigned shift, so make the appropriate casts */ | 2249 | /* BPF_RSH is an unsigned shift. If the value in dst_reg might |
| 2188 | if (dst_reg->smin_value < 0) { | 2250 | * be negative, then either: |
| 2189 | if (umin_val) { | 2251 | * 1) src_reg might be zero, so the sign bit of the result is |
| 2190 | /* Sign bit will be cleared */ | 2252 | * unknown, so we lose our signed bounds |
| 2191 | dst_reg->smin_value = 0; | 2253 | * 2) it's known negative, thus the unsigned bounds capture the |
| 2192 | } else { | 2254 | * signed bounds |
| 2193 | /* Lost sign bit information */ | 2255 | * 3) the signed bounds cross zero, so they tell us nothing |
| 2194 | dst_reg->smin_value = S64_MIN; | 2256 | * about the result |
| 2195 | dst_reg->smax_value = S64_MAX; | 2257 | * If the value in dst_reg is known nonnegative, then again the |
| 2196 | } | 2258 | * unsigned bounts capture the signed bounds. |
| 2197 | } else { | 2259 | * Thus, in all cases it suffices to blow away our signed bounds |
| 2198 | dst_reg->smin_value = | 2260 | * and rely on inferring new ones from the unsigned bounds and |
| 2199 | (u64)(dst_reg->smin_value) >> umax_val; | 2261 | * var_off of the result. |
| 2200 | } | 2262 | */ |
| 2263 | dst_reg->smin_value = S64_MIN; | ||
| 2264 | dst_reg->smax_value = S64_MAX; | ||
| 2201 | if (src_known) | 2265 | if (src_known) |
| 2202 | dst_reg->var_off = tnum_rshift(dst_reg->var_off, | 2266 | dst_reg->var_off = tnum_rshift(dst_reg->var_off, |
| 2203 | umin_val); | 2267 | umin_val); |
| @@ -2213,6 +2277,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 2213 | break; | 2277 | break; |
| 2214 | } | 2278 | } |
| 2215 | 2279 | ||
| 2280 | if (BPF_CLASS(insn->code) != BPF_ALU64) { | ||
| 2281 | /* 32-bit ALU ops are (32,32)->32 */ | ||
| 2282 | coerce_reg_to_size(dst_reg, 4); | ||
| 2283 | coerce_reg_to_size(&src_reg, 4); | ||
| 2284 | } | ||
| 2285 | |||
| 2216 | __reg_deduce_bounds(dst_reg); | 2286 | __reg_deduce_bounds(dst_reg); |
| 2217 | __reg_bound_offset(dst_reg); | 2287 | __reg_bound_offset(dst_reg); |
| 2218 | return 0; | 2288 | return 0; |
| @@ -2227,7 +2297,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, | |||
| 2227 | struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; | 2297 | struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; |
| 2228 | struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; | 2298 | struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; |
| 2229 | u8 opcode = BPF_OP(insn->code); | 2299 | u8 opcode = BPF_OP(insn->code); |
| 2230 | int rc; | ||
| 2231 | 2300 | ||
| 2232 | dst_reg = ®s[insn->dst_reg]; | 2301 | dst_reg = ®s[insn->dst_reg]; |
| 2233 | src_reg = NULL; | 2302 | src_reg = NULL; |
| @@ -2238,43 +2307,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, | |||
| 2238 | if (src_reg->type != SCALAR_VALUE) { | 2307 | if (src_reg->type != SCALAR_VALUE) { |
| 2239 | if (dst_reg->type != SCALAR_VALUE) { | 2308 | if (dst_reg->type != SCALAR_VALUE) { |
| 2240 | /* Combining two pointers by any ALU op yields | 2309 | /* Combining two pointers by any ALU op yields |
| 2241 | * an arbitrary scalar. | 2310 | * an arbitrary scalar. Disallow all math except |
| 2311 | * pointer subtraction | ||
| 2242 | */ | 2312 | */ |
| 2243 | if (!env->allow_ptr_leaks) { | 2313 | if (opcode == BPF_SUB){ |
| 2244 | verbose(env, "R%d pointer %s pointer prohibited\n", | 2314 | mark_reg_unknown(env, regs, insn->dst_reg); |
| 2245 | insn->dst_reg, | 2315 | return 0; |
| 2246 | bpf_alu_string[opcode >> 4]); | ||
| 2247 | return -EACCES; | ||
| 2248 | } | 2316 | } |
| 2249 | mark_reg_unknown(env, regs, insn->dst_reg); | 2317 | verbose(env, "R%d pointer %s pointer prohibited\n", |
| 2250 | return 0; | 2318 | insn->dst_reg, |
| 2319 | bpf_alu_string[opcode >> 4]); | ||
| 2320 | return -EACCES; | ||
| 2251 | } else { | 2321 | } else { |
| 2252 | /* scalar += pointer | 2322 | /* scalar += pointer |
| 2253 | * This is legal, but we have to reverse our | 2323 | * This is legal, but we have to reverse our |
| 2254 | * src/dest handling in computing the range | 2324 | * src/dest handling in computing the range |
| 2255 | */ | 2325 | */ |
| 2256 | rc = adjust_ptr_min_max_vals(env, insn, | 2326 | return adjust_ptr_min_max_vals(env, insn, |
| 2257 | src_reg, dst_reg); | 2327 | src_reg, dst_reg); |
| 2258 | if (rc == -EACCES && env->allow_ptr_leaks) { | ||
| 2259 | /* scalar += unknown scalar */ | ||
| 2260 | __mark_reg_unknown(&off_reg); | ||
| 2261 | return adjust_scalar_min_max_vals( | ||
| 2262 | env, insn, | ||
| 2263 | dst_reg, off_reg); | ||
| 2264 | } | ||
| 2265 | return rc; | ||
| 2266 | } | 2328 | } |
| 2267 | } else if (ptr_reg) { | 2329 | } else if (ptr_reg) { |
| 2268 | /* pointer += scalar */ | 2330 | /* pointer += scalar */ |
| 2269 | rc = adjust_ptr_min_max_vals(env, insn, | 2331 | return adjust_ptr_min_max_vals(env, insn, |
| 2270 | dst_reg, src_reg); | 2332 | dst_reg, src_reg); |
| 2271 | if (rc == -EACCES && env->allow_ptr_leaks) { | ||
| 2272 | /* unknown scalar += scalar */ | ||
| 2273 | __mark_reg_unknown(dst_reg); | ||
| 2274 | return adjust_scalar_min_max_vals( | ||
| 2275 | env, insn, dst_reg, *src_reg); | ||
| 2276 | } | ||
| 2277 | return rc; | ||
| 2278 | } | 2333 | } |
| 2279 | } else { | 2334 | } else { |
| 2280 | /* Pretend the src is a reg with a known value, since we only | 2335 | /* Pretend the src is a reg with a known value, since we only |
| @@ -2283,17 +2338,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, | |||
| 2283 | off_reg.type = SCALAR_VALUE; | 2338 | off_reg.type = SCALAR_VALUE; |
| 2284 | __mark_reg_known(&off_reg, insn->imm); | 2339 | __mark_reg_known(&off_reg, insn->imm); |
| 2285 | src_reg = &off_reg; | 2340 | src_reg = &off_reg; |
| 2286 | if (ptr_reg) { /* pointer += K */ | 2341 | if (ptr_reg) /* pointer += K */ |
| 2287 | rc = adjust_ptr_min_max_vals(env, insn, | 2342 | return adjust_ptr_min_max_vals(env, insn, |
| 2288 | ptr_reg, src_reg); | 2343 | ptr_reg, src_reg); |
| 2289 | if (rc == -EACCES && env->allow_ptr_leaks) { | ||
| 2290 | /* unknown scalar += K */ | ||
| 2291 | __mark_reg_unknown(dst_reg); | ||
| 2292 | return adjust_scalar_min_max_vals( | ||
| 2293 | env, insn, dst_reg, off_reg); | ||
| 2294 | } | ||
| 2295 | return rc; | ||
| 2296 | } | ||
| 2297 | } | 2344 | } |
| 2298 | 2345 | ||
| 2299 | /* Got here implies adding two SCALAR_VALUEs */ | 2346 | /* Got here implies adding two SCALAR_VALUEs */ |
| @@ -2390,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) | |||
| 2390 | return -EACCES; | 2437 | return -EACCES; |
| 2391 | } | 2438 | } |
| 2392 | mark_reg_unknown(env, regs, insn->dst_reg); | 2439 | mark_reg_unknown(env, regs, insn->dst_reg); |
| 2393 | /* high 32 bits are known zero. */ | 2440 | coerce_reg_to_size(®s[insn->dst_reg], 4); |
| 2394 | regs[insn->dst_reg].var_off = tnum_cast( | ||
| 2395 | regs[insn->dst_reg].var_off, 4); | ||
| 2396 | __update_reg_bounds(®s[insn->dst_reg]); | ||
| 2397 | } | 2441 | } |
| 2398 | } else { | 2442 | } else { |
| 2399 | /* case: R = imm | 2443 | /* case: R = imm |
| 2400 | * remember the value we stored into this reg | 2444 | * remember the value we stored into this reg |
| 2401 | */ | 2445 | */ |
| 2402 | regs[insn->dst_reg].type = SCALAR_VALUE; | 2446 | regs[insn->dst_reg].type = SCALAR_VALUE; |
| 2403 | __mark_reg_known(regs + insn->dst_reg, insn->imm); | 2447 | if (BPF_CLASS(insn->code) == BPF_ALU64) { |
| 2448 | __mark_reg_known(regs + insn->dst_reg, | ||
| 2449 | insn->imm); | ||
| 2450 | } else { | ||
| 2451 | __mark_reg_known(regs + insn->dst_reg, | ||
| 2452 | (u32)insn->imm); | ||
| 2453 | } | ||
| 2404 | } | 2454 | } |
| 2405 | 2455 | ||
| 2406 | } else if (opcode > BPF_END) { | 2456 | } else if (opcode > BPF_END) { |
| @@ -3431,15 +3481,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, | |||
| 3431 | return range_within(rold, rcur) && | 3481 | return range_within(rold, rcur) && |
| 3432 | tnum_in(rold->var_off, rcur->var_off); | 3482 | tnum_in(rold->var_off, rcur->var_off); |
| 3433 | } else { | 3483 | } else { |
| 3434 | /* if we knew anything about the old value, we're not | 3484 | /* We're trying to use a pointer in place of a scalar. |
| 3435 | * equal, because we can't know anything about the | 3485 | * Even if the scalar was unbounded, this could lead to |
| 3436 | * scalar value of the pointer in the new value. | 3486 | * pointer leaks because scalars are allowed to leak |
| 3487 | * while pointers are not. We could make this safe in | ||
| 3488 | * special cases if root is calling us, but it's | ||
| 3489 | * probably not worth the hassle. | ||
| 3437 | */ | 3490 | */ |
| 3438 | return rold->umin_value == 0 && | 3491 | return false; |
| 3439 | rold->umax_value == U64_MAX && | ||
| 3440 | rold->smin_value == S64_MIN && | ||
| 3441 | rold->smax_value == S64_MAX && | ||
| 3442 | tnum_is_unknown(rold->var_off); | ||
| 3443 | } | 3492 | } |
| 3444 | case PTR_TO_MAP_VALUE: | 3493 | case PTR_TO_MAP_VALUE: |
| 3445 | /* If the new min/max/var_off satisfy the old ones and | 3494 | /* If the new min/max/var_off satisfy the old ones and |
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c index 5f780d8f6a9d..9caeda610249 100644 --- a/kernel/cgroup/debug.c +++ b/kernel/cgroup/debug.c | |||
| @@ -50,7 +50,7 @@ static int current_css_set_read(struct seq_file *seq, void *v) | |||
| 50 | 50 | ||
| 51 | spin_lock_irq(&css_set_lock); | 51 | spin_lock_irq(&css_set_lock); |
| 52 | rcu_read_lock(); | 52 | rcu_read_lock(); |
| 53 | cset = rcu_dereference(current->cgroups); | 53 | cset = task_css_set(current); |
| 54 | refcnt = refcount_read(&cset->refcount); | 54 | refcnt = refcount_read(&cset->refcount); |
| 55 | seq_printf(seq, "css_set %pK %d", cset, refcnt); | 55 | seq_printf(seq, "css_set %pK %d", cset, refcnt); |
| 56 | if (refcnt > cset->nr_tasks) | 56 | if (refcnt > cset->nr_tasks) |
| @@ -96,7 +96,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v) | |||
| 96 | 96 | ||
| 97 | spin_lock_irq(&css_set_lock); | 97 | spin_lock_irq(&css_set_lock); |
| 98 | rcu_read_lock(); | 98 | rcu_read_lock(); |
| 99 | cset = rcu_dereference(current->cgroups); | 99 | cset = task_css_set(current); |
| 100 | list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { | 100 | list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { |
| 101 | struct cgroup *c = link->cgrp; | 101 | struct cgroup *c = link->cgrp; |
| 102 | 102 | ||
diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c index 133b465691d6..1e111dd455c4 100644 --- a/kernel/cgroup/stat.c +++ b/kernel/cgroup/stat.c | |||
| @@ -296,8 +296,12 @@ int cgroup_stat_init(struct cgroup *cgrp) | |||
| 296 | } | 296 | } |
| 297 | 297 | ||
| 298 | /* ->updated_children list is self terminated */ | 298 | /* ->updated_children list is self terminated */ |
| 299 | for_each_possible_cpu(cpu) | 299 | for_each_possible_cpu(cpu) { |
| 300 | cgroup_cpu_stat(cgrp, cpu)->updated_children = cgrp; | 300 | struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu); |
| 301 | |||
| 302 | cstat->updated_children = cgrp; | ||
| 303 | u64_stats_init(&cstat->sync); | ||
| 304 | } | ||
| 301 | 305 | ||
| 302 | prev_cputime_init(&cgrp->stat.prev_cputime); | 306 | prev_cputime_init(&cgrp->stat.prev_cputime); |
| 303 | 307 | ||
diff --git a/kernel/cpu.c b/kernel/cpu.c index 41376c3ac93b..53f7dc65f9a3 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -80,19 +80,19 @@ static struct lockdep_map cpuhp_state_down_map = | |||
| 80 | STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); | 80 | STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); |
| 81 | 81 | ||
| 82 | 82 | ||
| 83 | static void inline cpuhp_lock_acquire(bool bringup) | 83 | static inline void cpuhp_lock_acquire(bool bringup) |
| 84 | { | 84 | { |
| 85 | lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); | 85 | lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); |
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | static void inline cpuhp_lock_release(bool bringup) | 88 | static inline void cpuhp_lock_release(bool bringup) |
| 89 | { | 89 | { |
| 90 | lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); | 90 | lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); |
| 91 | } | 91 | } |
| 92 | #else | 92 | #else |
| 93 | 93 | ||
| 94 | static void inline cpuhp_lock_acquire(bool bringup) { } | 94 | static inline void cpuhp_lock_acquire(bool bringup) { } |
| 95 | static void inline cpuhp_lock_release(bool bringup) { } | 95 | static inline void cpuhp_lock_release(bool bringup) { } |
| 96 | 96 | ||
| 97 | #endif | 97 | #endif |
| 98 | 98 | ||
| @@ -1277,9 +1277,9 @@ static struct cpuhp_step cpuhp_bp_states[] = { | |||
| 1277 | * before blk_mq_queue_reinit_notify() from notify_dead(), | 1277 | * before blk_mq_queue_reinit_notify() from notify_dead(), |
| 1278 | * otherwise a RCU stall occurs. | 1278 | * otherwise a RCU stall occurs. |
| 1279 | */ | 1279 | */ |
| 1280 | [CPUHP_TIMERS_DEAD] = { | 1280 | [CPUHP_TIMERS_PREPARE] = { |
| 1281 | .name = "timers:dead", | 1281 | .name = "timers:dead", |
| 1282 | .startup.single = NULL, | 1282 | .startup.single = timers_prepare_cpu, |
| 1283 | .teardown.single = timers_dead_cpu, | 1283 | .teardown.single = timers_dead_cpu, |
| 1284 | }, | 1284 | }, |
| 1285 | /* Kicks the plugged cpu into life */ | 1285 | /* Kicks the plugged cpu into life */ |
diff --git a/kernel/exit.c b/kernel/exit.c index 6b4298a41167..df0c91d5606c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -1755,3 +1755,11 @@ Efault: | |||
| 1755 | return -EFAULT; | 1755 | return -EFAULT; |
| 1756 | } | 1756 | } |
| 1757 | #endif | 1757 | #endif |
| 1758 | |||
| 1759 | __weak void abort(void) | ||
| 1760 | { | ||
| 1761 | BUG(); | ||
| 1762 | |||
| 1763 | /* if that doesn't kill us, halt */ | ||
| 1764 | panic("Oops failed to kill thread"); | ||
| 1765 | } | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 432eadf6b58c..2295fc69717f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, | |||
| 721 | goto out; | 721 | goto out; |
| 722 | } | 722 | } |
| 723 | /* a new mm has just been created */ | 723 | /* a new mm has just been created */ |
| 724 | arch_dup_mmap(oldmm, mm); | 724 | retval = arch_dup_mmap(oldmm, mm); |
| 725 | retval = 0; | ||
| 726 | out: | 725 | out: |
| 727 | up_write(&mm->mmap_sem); | 726 | up_write(&mm->mmap_sem); |
| 728 | flush_tlb_mm(oldmm); | 727 | flush_tlb_mm(oldmm); |
diff --git a/kernel/groups.c b/kernel/groups.c index e357bc800111..daae2f2dc6d4 100644 --- a/kernel/groups.c +++ b/kernel/groups.c | |||
| @@ -86,11 +86,12 @@ static int gid_cmp(const void *_a, const void *_b) | |||
| 86 | return gid_gt(a, b) - gid_lt(a, b); | 86 | return gid_gt(a, b) - gid_lt(a, b); |
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | static void groups_sort(struct group_info *group_info) | 89 | void groups_sort(struct group_info *group_info) |
| 90 | { | 90 | { |
| 91 | sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid), | 91 | sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid), |
| 92 | gid_cmp, NULL); | 92 | gid_cmp, NULL); |
| 93 | } | 93 | } |
| 94 | EXPORT_SYMBOL(groups_sort); | ||
| 94 | 95 | ||
| 95 | /* a simple bsearch */ | 96 | /* a simple bsearch */ |
| 96 | int groups_search(const struct group_info *group_info, kgid_t grp) | 97 | int groups_search(const struct group_info *group_info, kgid_t grp) |
| @@ -122,7 +123,6 @@ int groups_search(const struct group_info *group_info, kgid_t grp) | |||
| 122 | void set_groups(struct cred *new, struct group_info *group_info) | 123 | void set_groups(struct cred *new, struct group_info *group_info) |
| 123 | { | 124 | { |
| 124 | put_group_info(new->group_info); | 125 | put_group_info(new->group_info); |
| 125 | groups_sort(group_info); | ||
| 126 | get_group_info(group_info); | 126 | get_group_info(group_info); |
| 127 | new->group_info = group_info; | 127 | new->group_info = group_info; |
| 128 | } | 128 | } |
| @@ -206,6 +206,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) | |||
| 206 | return retval; | 206 | return retval; |
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | groups_sort(group_info); | ||
| 209 | retval = set_current_groups(group_info); | 210 | retval = set_current_groups(group_info); |
| 210 | put_group_info(group_info); | 211 | put_group_info(group_info); |
| 211 | 212 | ||
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h index 17f05ef8f575..e4d3819a91cc 100644 --- a/kernel/irq/debug.h +++ b/kernel/irq/debug.h | |||
| @@ -12,6 +12,11 @@ | |||
| 12 | 12 | ||
| 13 | static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) | 13 | static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) |
| 14 | { | 14 | { |
| 15 | static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5); | ||
| 16 | |||
| 17 | if (!__ratelimit(&ratelimit)) | ||
| 18 | return; | ||
| 19 | |||
| 15 | printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n", | 20 | printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n", |
| 16 | irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); | 21 | irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); |
| 17 | printk("->handle_irq(): %p, ", desc->handle_irq); | 22 | printk("->handle_irq(): %p, ", desc->handle_irq); |
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 7f608ac39653..acfaaef8672a 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c | |||
| @@ -113,6 +113,7 @@ static const struct irq_bit_descr irqdata_states[] = { | |||
| 113 | BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), | 113 | BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), |
| 114 | BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), | 114 | BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), |
| 115 | BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), | 115 | BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), |
| 116 | BIT_MASK_DESCR(IRQD_CAN_RESERVE), | ||
| 116 | 117 | ||
| 117 | BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), | 118 | BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), |
| 118 | 119 | ||
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index c26c5bb6b491..508c03dfef25 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c | |||
| @@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq) | |||
| 364 | EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); | 364 | EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); |
| 365 | 365 | ||
| 366 | /* | 366 | /* |
| 367 | * Separate lockdep class for interrupt chip which can nest irq_desc | 367 | * Separate lockdep classes for interrupt chip which can nest irq_desc |
| 368 | * lock. | 368 | * lock and request mutex. |
| 369 | */ | 369 | */ |
| 370 | static struct lock_class_key irq_nested_lock_class; | 370 | static struct lock_class_key irq_nested_lock_class; |
| 371 | static struct lock_class_key irq_nested_request_class; | ||
| 371 | 372 | ||
| 372 | /* | 373 | /* |
| 373 | * irq_map_generic_chip - Map a generic chip for an irq domain | 374 | * irq_map_generic_chip - Map a generic chip for an irq domain |
| @@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, | |||
| 409 | set_bit(idx, &gc->installed); | 410 | set_bit(idx, &gc->installed); |
| 410 | 411 | ||
| 411 | if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK) | 412 | if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK) |
| 412 | irq_set_lockdep_class(virq, &irq_nested_lock_class); | 413 | irq_set_lockdep_class(virq, &irq_nested_lock_class, |
| 414 | &irq_nested_request_class); | ||
| 413 | 415 | ||
| 414 | if (chip->irq_calc_mask) | 416 | if (chip->irq_calc_mask) |
| 415 | chip->irq_calc_mask(data); | 417 | chip->irq_calc_mask(data); |
| @@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, | |||
| 479 | continue; | 481 | continue; |
| 480 | 482 | ||
| 481 | if (flags & IRQ_GC_INIT_NESTED_LOCK) | 483 | if (flags & IRQ_GC_INIT_NESTED_LOCK) |
| 482 | irq_set_lockdep_class(i, &irq_nested_lock_class); | 484 | irq_set_lockdep_class(i, &irq_nested_lock_class, |
| 485 | &irq_nested_request_class); | ||
| 483 | 486 | ||
| 484 | if (!(flags & IRQ_GC_NO_MASK)) { | 487 | if (!(flags & IRQ_GC_NO_MASK)) { |
| 485 | struct irq_data *d = irq_get_irq_data(i); | 488 | struct irq_data *d = irq_get_irq_data(i); |
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 07d08ca701ec..ab19371eab9b 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h | |||
| @@ -440,7 +440,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) | |||
| 440 | #endif /* !CONFIG_GENERIC_PENDING_IRQ */ | 440 | #endif /* !CONFIG_GENERIC_PENDING_IRQ */ |
| 441 | 441 | ||
| 442 | #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) | 442 | #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) |
| 443 | static inline int irq_domain_activate_irq(struct irq_data *data, bool early) | 443 | static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve) |
| 444 | { | 444 | { |
| 445 | irqd_set_activated(data); | 445 | irqd_set_activated(data); |
| 446 | return 0; | 446 | return 0; |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 4f4f60015e8a..62068ad46930 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
| @@ -1693,7 +1693,7 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data) | |||
| 1693 | } | 1693 | } |
| 1694 | } | 1694 | } |
| 1695 | 1695 | ||
| 1696 | static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) | 1696 | static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve) |
| 1697 | { | 1697 | { |
| 1698 | int ret = 0; | 1698 | int ret = 0; |
| 1699 | 1699 | ||
| @@ -1702,9 +1702,9 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) | |||
| 1702 | 1702 | ||
| 1703 | if (irqd->parent_data) | 1703 | if (irqd->parent_data) |
| 1704 | ret = __irq_domain_activate_irq(irqd->parent_data, | 1704 | ret = __irq_domain_activate_irq(irqd->parent_data, |
| 1705 | early); | 1705 | reserve); |
| 1706 | if (!ret && domain->ops->activate) { | 1706 | if (!ret && domain->ops->activate) { |
| 1707 | ret = domain->ops->activate(domain, irqd, early); | 1707 | ret = domain->ops->activate(domain, irqd, reserve); |
| 1708 | /* Rollback in case of error */ | 1708 | /* Rollback in case of error */ |
| 1709 | if (ret && irqd->parent_data) | 1709 | if (ret && irqd->parent_data) |
| 1710 | __irq_domain_deactivate_irq(irqd->parent_data); | 1710 | __irq_domain_deactivate_irq(irqd->parent_data); |
| @@ -1716,17 +1716,18 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) | |||
| 1716 | /** | 1716 | /** |
| 1717 | * irq_domain_activate_irq - Call domain_ops->activate recursively to activate | 1717 | * irq_domain_activate_irq - Call domain_ops->activate recursively to activate |
| 1718 | * interrupt | 1718 | * interrupt |
| 1719 | * @irq_data: outermost irq_data associated with interrupt | 1719 | * @irq_data: Outermost irq_data associated with interrupt |
| 1720 | * @reserve: If set only reserve an interrupt vector instead of assigning one | ||
| 1720 | * | 1721 | * |
| 1721 | * This is the second step to call domain_ops->activate to program interrupt | 1722 | * This is the second step to call domain_ops->activate to program interrupt |
| 1722 | * controllers, so the interrupt could actually get delivered. | 1723 | * controllers, so the interrupt could actually get delivered. |
| 1723 | */ | 1724 | */ |
| 1724 | int irq_domain_activate_irq(struct irq_data *irq_data, bool early) | 1725 | int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve) |
| 1725 | { | 1726 | { |
| 1726 | int ret = 0; | 1727 | int ret = 0; |
| 1727 | 1728 | ||
| 1728 | if (!irqd_is_activated(irq_data)) | 1729 | if (!irqd_is_activated(irq_data)) |
| 1729 | ret = __irq_domain_activate_irq(irq_data, early); | 1730 | ret = __irq_domain_activate_irq(irq_data, reserve); |
| 1730 | if (!ret) | 1731 | if (!ret) |
| 1731 | irqd_set_activated(irq_data); | 1732 | irqd_set_activated(irq_data); |
| 1732 | return ret; | 1733 | return ret; |
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index edb987b2c58d..2f3c4f5382cc 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c | |||
| @@ -339,6 +339,40 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, | |||
| 339 | return ret; | 339 | return ret; |
| 340 | } | 340 | } |
| 341 | 341 | ||
| 342 | /* | ||
| 343 | * Carefully check whether the device can use reservation mode. If | ||
| 344 | * reservation mode is enabled then the early activation will assign a | ||
| 345 | * dummy vector to the device. If the PCI/MSI device does not support | ||
| 346 | * masking of the entry then this can result in spurious interrupts when | ||
| 347 | * the device driver is not absolutely careful. But even then a malfunction | ||
| 348 | * of the hardware could result in a spurious interrupt on the dummy vector | ||
| 349 | * and render the device unusable. If the entry can be masked then the core | ||
| 350 | * logic will prevent the spurious interrupt and reservation mode can be | ||
| 351 | * used. For now reservation mode is restricted to PCI/MSI. | ||
| 352 | */ | ||
| 353 | static bool msi_check_reservation_mode(struct irq_domain *domain, | ||
| 354 | struct msi_domain_info *info, | ||
| 355 | struct device *dev) | ||
| 356 | { | ||
| 357 | struct msi_desc *desc; | ||
| 358 | |||
| 359 | if (domain->bus_token != DOMAIN_BUS_PCI_MSI) | ||
| 360 | return false; | ||
| 361 | |||
| 362 | if (!(info->flags & MSI_FLAG_MUST_REACTIVATE)) | ||
| 363 | return false; | ||
| 364 | |||
| 365 | if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask) | ||
| 366 | return false; | ||
| 367 | |||
| 368 | /* | ||
| 369 | * Checking the first MSI descriptor is sufficient. MSIX supports | ||
| 370 | * masking and MSI does so when the maskbit is set. | ||
| 371 | */ | ||
| 372 | desc = first_msi_entry(dev); | ||
| 373 | return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit; | ||
| 374 | } | ||
| 375 | |||
| 342 | /** | 376 | /** |
| 343 | * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain | 377 | * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain |
| 344 | * @domain: The domain to allocate from | 378 | * @domain: The domain to allocate from |
| @@ -353,9 +387,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, | |||
| 353 | { | 387 | { |
| 354 | struct msi_domain_info *info = domain->host_data; | 388 | struct msi_domain_info *info = domain->host_data; |
| 355 | struct msi_domain_ops *ops = info->ops; | 389 | struct msi_domain_ops *ops = info->ops; |
| 356 | msi_alloc_info_t arg; | 390 | struct irq_data *irq_data; |
| 357 | struct msi_desc *desc; | 391 | struct msi_desc *desc; |
| 392 | msi_alloc_info_t arg; | ||
| 358 | int i, ret, virq; | 393 | int i, ret, virq; |
| 394 | bool can_reserve; | ||
| 359 | 395 | ||
| 360 | ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); | 396 | ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); |
| 361 | if (ret) | 397 | if (ret) |
| @@ -385,6 +421,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, | |||
| 385 | if (ops->msi_finish) | 421 | if (ops->msi_finish) |
| 386 | ops->msi_finish(&arg, 0); | 422 | ops->msi_finish(&arg, 0); |
| 387 | 423 | ||
| 424 | can_reserve = msi_check_reservation_mode(domain, info, dev); | ||
| 425 | |||
| 388 | for_each_msi_entry(desc, dev) { | 426 | for_each_msi_entry(desc, dev) { |
| 389 | virq = desc->irq; | 427 | virq = desc->irq; |
| 390 | if (desc->nvec_used == 1) | 428 | if (desc->nvec_used == 1) |
| @@ -397,15 +435,25 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, | |||
| 397 | * the MSI entries before the PCI layer enables MSI in the | 435 | * the MSI entries before the PCI layer enables MSI in the |
| 398 | * card. Otherwise the card latches a random msi message. | 436 | * card. Otherwise the card latches a random msi message. |
| 399 | */ | 437 | */ |
| 400 | if (info->flags & MSI_FLAG_ACTIVATE_EARLY) { | 438 | if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY)) |
| 401 | struct irq_data *irq_data; | 439 | continue; |
| 402 | 440 | ||
| 441 | irq_data = irq_domain_get_irq_data(domain, desc->irq); | ||
| 442 | if (!can_reserve) | ||
| 443 | irqd_clr_can_reserve(irq_data); | ||
| 444 | ret = irq_domain_activate_irq(irq_data, can_reserve); | ||
| 445 | if (ret) | ||
| 446 | goto cleanup; | ||
| 447 | } | ||
| 448 | |||
| 449 | /* | ||
| 450 | * If these interrupts use reservation mode, clear the activated bit | ||
| 451 | * so request_irq() will assign the final vector. | ||
| 452 | */ | ||
| 453 | if (can_reserve) { | ||
| 454 | for_each_msi_entry(desc, dev) { | ||
| 403 | irq_data = irq_domain_get_irq_data(domain, desc->irq); | 455 | irq_data = irq_domain_get_irq_data(domain, desc->irq); |
| 404 | ret = irq_domain_activate_irq(irq_data, true); | 456 | irqd_clr_activated(irq_data); |
| 405 | if (ret) | ||
| 406 | goto cleanup; | ||
| 407 | if (info->flags & MSI_FLAG_MUST_REACTIVATE) | ||
| 408 | irqd_clr_activated(irq_data); | ||
| 409 | } | 457 | } |
| 410 | } | 458 | } |
| 411 | return 0; | 459 | return 0; |
diff --git a/kernel/kcov.c b/kernel/kcov.c index 15f33faf4013..7594c033d98a 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c | |||
| @@ -157,7 +157,7 @@ void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2) | |||
| 157 | } | 157 | } |
| 158 | EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); | 158 | EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); |
| 159 | 159 | ||
| 160 | void notrace __sanitizer_cov_trace_cmp4(u16 arg1, u16 arg2) | 160 | void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2) |
| 161 | { | 161 | { |
| 162 | write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); | 162 | write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); |
| 163 | } | 163 | } |
| @@ -183,7 +183,7 @@ void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2) | |||
| 183 | } | 183 | } |
| 184 | EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); | 184 | EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); |
| 185 | 185 | ||
| 186 | void notrace __sanitizer_cov_trace_const_cmp4(u16 arg1, u16 arg2) | 186 | void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2) |
| 187 | { | 187 | { |
| 188 | write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, | 188 | write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, |
| 189 | _RET_IP_); | 189 | _RET_IP_); |
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 670d8d7d8087..5fa1324a4f29 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
| @@ -57,10 +57,6 @@ | |||
| 57 | #define CREATE_TRACE_POINTS | 57 | #define CREATE_TRACE_POINTS |
| 58 | #include <trace/events/lock.h> | 58 | #include <trace/events/lock.h> |
| 59 | 59 | ||
| 60 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 61 | #include <linux/slab.h> | ||
| 62 | #endif | ||
| 63 | |||
| 64 | #ifdef CONFIG_PROVE_LOCKING | 60 | #ifdef CONFIG_PROVE_LOCKING |
| 65 | int prove_locking = 1; | 61 | int prove_locking = 1; |
| 66 | module_param(prove_locking, int, 0644); | 62 | module_param(prove_locking, int, 0644); |
| @@ -75,19 +71,6 @@ module_param(lock_stat, int, 0644); | |||
| 75 | #define lock_stat 0 | 71 | #define lock_stat 0 |
| 76 | #endif | 72 | #endif |
| 77 | 73 | ||
| 78 | #ifdef CONFIG_BOOTPARAM_LOCKDEP_CROSSRELEASE_FULLSTACK | ||
| 79 | static int crossrelease_fullstack = 1; | ||
| 80 | #else | ||
| 81 | static int crossrelease_fullstack; | ||
| 82 | #endif | ||
| 83 | static int __init allow_crossrelease_fullstack(char *str) | ||
| 84 | { | ||
| 85 | crossrelease_fullstack = 1; | ||
| 86 | return 0; | ||
| 87 | } | ||
| 88 | |||
| 89 | early_param("crossrelease_fullstack", allow_crossrelease_fullstack); | ||
| 90 | |||
| 91 | /* | 74 | /* |
| 92 | * lockdep_lock: protects the lockdep graph, the hashes and the | 75 | * lockdep_lock: protects the lockdep graph, the hashes and the |
| 93 | * class/list/hash allocators. | 76 | * class/list/hash allocators. |
| @@ -740,18 +723,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
| 740 | return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); | 723 | return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); |
| 741 | } | 724 | } |
| 742 | 725 | ||
| 743 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 744 | static void cross_init(struct lockdep_map *lock, int cross); | ||
| 745 | static int cross_lock(struct lockdep_map *lock); | ||
| 746 | static int lock_acquire_crosslock(struct held_lock *hlock); | ||
| 747 | static int lock_release_crosslock(struct lockdep_map *lock); | ||
| 748 | #else | ||
| 749 | static inline void cross_init(struct lockdep_map *lock, int cross) {} | ||
| 750 | static inline int cross_lock(struct lockdep_map *lock) { return 0; } | ||
| 751 | static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; } | ||
| 752 | static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; } | ||
| 753 | #endif | ||
| 754 | |||
| 755 | /* | 726 | /* |
| 756 | * Register a lock's class in the hash-table, if the class is not present | 727 | * Register a lock's class in the hash-table, if the class is not present |
| 757 | * yet. Otherwise we look it up. We cache the result in the lock object | 728 | * yet. Otherwise we look it up. We cache the result in the lock object |
| @@ -1151,41 +1122,22 @@ print_circular_lock_scenario(struct held_lock *src, | |||
| 1151 | printk(KERN_CONT "\n\n"); | 1122 | printk(KERN_CONT "\n\n"); |
| 1152 | } | 1123 | } |
| 1153 | 1124 | ||
| 1154 | if (cross_lock(tgt->instance)) { | 1125 | printk(" Possible unsafe locking scenario:\n\n"); |
| 1155 | printk(" Possible unsafe locking scenario by crosslock:\n\n"); | 1126 | printk(" CPU0 CPU1\n"); |
| 1156 | printk(" CPU0 CPU1\n"); | 1127 | printk(" ---- ----\n"); |
| 1157 | printk(" ---- ----\n"); | 1128 | printk(" lock("); |
| 1158 | printk(" lock("); | 1129 | __print_lock_name(target); |
| 1159 | __print_lock_name(parent); | 1130 | printk(KERN_CONT ");\n"); |
| 1160 | printk(KERN_CONT ");\n"); | 1131 | printk(" lock("); |
| 1161 | printk(" lock("); | 1132 | __print_lock_name(parent); |
| 1162 | __print_lock_name(target); | 1133 | printk(KERN_CONT ");\n"); |
| 1163 | printk(KERN_CONT ");\n"); | 1134 | printk(" lock("); |
| 1164 | printk(" lock("); | 1135 | __print_lock_name(target); |
| 1165 | __print_lock_name(source); | 1136 | printk(KERN_CONT ");\n"); |
| 1166 | printk(KERN_CONT ");\n"); | 1137 | printk(" lock("); |
| 1167 | printk(" unlock("); | 1138 | __print_lock_name(source); |
| 1168 | __print_lock_name(target); | 1139 | printk(KERN_CONT ");\n"); |
| 1169 | printk(KERN_CONT ");\n"); | 1140 | printk("\n *** DEADLOCK ***\n\n"); |
| 1170 | printk("\n *** DEADLOCK ***\n\n"); | ||
| 1171 | } else { | ||
| 1172 | printk(" Possible unsafe locking scenario:\n\n"); | ||
| 1173 | printk(" CPU0 CPU1\n"); | ||
| 1174 | printk(" ---- ----\n"); | ||
| 1175 | printk(" lock("); | ||
| 1176 | __print_lock_name(target); | ||
| 1177 | printk(KERN_CONT ");\n"); | ||
| 1178 | printk(" lock("); | ||
| 1179 | __print_lock_name(parent); | ||
| 1180 | printk(KERN_CONT ");\n"); | ||
| 1181 | printk(" lock("); | ||
| 1182 | __print_lock_name(target); | ||
| 1183 | printk(KERN_CONT ");\n"); | ||
| 1184 | printk(" lock("); | ||
| 1185 | __print_lock_name(source); | ||
| 1186 | printk(KERN_CONT ");\n"); | ||
| 1187 | printk("\n *** DEADLOCK ***\n\n"); | ||
| 1188 | } | ||
| 1189 | } | 1141 | } |
| 1190 | 1142 | ||
| 1191 | /* | 1143 | /* |
| @@ -1211,10 +1163,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, | |||
| 1211 | curr->comm, task_pid_nr(curr)); | 1163 | curr->comm, task_pid_nr(curr)); |
| 1212 | print_lock(check_src); | 1164 | print_lock(check_src); |
| 1213 | 1165 | ||
| 1214 | if (cross_lock(check_tgt->instance)) | 1166 | pr_warn("\nbut task is already holding lock:\n"); |
| 1215 | pr_warn("\nbut now in release context of a crosslock acquired at the following:\n"); | ||
| 1216 | else | ||
| 1217 | pr_warn("\nbut task is already holding lock:\n"); | ||
| 1218 | 1167 | ||
| 1219 | print_lock(check_tgt); | 1168 | print_lock(check_tgt); |
| 1220 | pr_warn("\nwhich lock already depends on the new lock.\n\n"); | 1169 | pr_warn("\nwhich lock already depends on the new lock.\n\n"); |
| @@ -1244,9 +1193,7 @@ static noinline int print_circular_bug(struct lock_list *this, | |||
| 1244 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 1193 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
| 1245 | return 0; | 1194 | return 0; |
| 1246 | 1195 | ||
| 1247 | if (cross_lock(check_tgt->instance)) | 1196 | if (!save_trace(&this->trace)) |
| 1248 | this->trace = *trace; | ||
| 1249 | else if (!save_trace(&this->trace)) | ||
| 1250 | return 0; | 1197 | return 0; |
| 1251 | 1198 | ||
| 1252 | depth = get_lock_depth(target); | 1199 | depth = get_lock_depth(target); |
| @@ -1850,9 +1797,6 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, | |||
| 1850 | if (nest) | 1797 | if (nest) |
| 1851 | return 2; | 1798 | return 2; |
| 1852 | 1799 | ||
| 1853 | if (cross_lock(prev->instance)) | ||
| 1854 | continue; | ||
| 1855 | |||
| 1856 | return print_deadlock_bug(curr, prev, next); | 1800 | return print_deadlock_bug(curr, prev, next); |
| 1857 | } | 1801 | } |
| 1858 | return 1; | 1802 | return 1; |
| @@ -2018,31 +1962,26 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) | |||
| 2018 | for (;;) { | 1962 | for (;;) { |
| 2019 | int distance = curr->lockdep_depth - depth + 1; | 1963 | int distance = curr->lockdep_depth - depth + 1; |
| 2020 | hlock = curr->held_locks + depth - 1; | 1964 | hlock = curr->held_locks + depth - 1; |
| 1965 | |||
| 2021 | /* | 1966 | /* |
| 2022 | * Only non-crosslock entries get new dependencies added. | 1967 | * Only non-recursive-read entries get new dependencies |
| 2023 | * Crosslock entries will be added by commit later: | 1968 | * added: |
| 2024 | */ | 1969 | */ |
| 2025 | if (!cross_lock(hlock->instance)) { | 1970 | if (hlock->read != 2 && hlock->check) { |
| 1971 | int ret = check_prev_add(curr, hlock, next, distance, &trace, save_trace); | ||
| 1972 | if (!ret) | ||
| 1973 | return 0; | ||
| 1974 | |||
| 2026 | /* | 1975 | /* |
| 2027 | * Only non-recursive-read entries get new dependencies | 1976 | * Stop after the first non-trylock entry, |
| 2028 | * added: | 1977 | * as non-trylock entries have added their |
| 1978 | * own direct dependencies already, so this | ||
| 1979 | * lock is connected to them indirectly: | ||
| 2029 | */ | 1980 | */ |
| 2030 | if (hlock->read != 2 && hlock->check) { | 1981 | if (!hlock->trylock) |
| 2031 | int ret = check_prev_add(curr, hlock, next, | 1982 | break; |
| 2032 | distance, &trace, save_trace); | ||
| 2033 | if (!ret) | ||
| 2034 | return 0; | ||
| 2035 | |||
| 2036 | /* | ||
| 2037 | * Stop after the first non-trylock entry, | ||
| 2038 | * as non-trylock entries have added their | ||
| 2039 | * own direct dependencies already, so this | ||
| 2040 | * lock is connected to them indirectly: | ||
| 2041 | */ | ||
| 2042 | if (!hlock->trylock) | ||
| 2043 | break; | ||
| 2044 | } | ||
| 2045 | } | 1983 | } |
| 1984 | |||
| 2046 | depth--; | 1985 | depth--; |
| 2047 | /* | 1986 | /* |
| 2048 | * End of lock-stack? | 1987 | * End of lock-stack? |
| @@ -3292,21 +3231,10 @@ static void __lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
| 3292 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | 3231 | void lockdep_init_map(struct lockdep_map *lock, const char *name, |
| 3293 | struct lock_class_key *key, int subclass) | 3232 | struct lock_class_key *key, int subclass) |
| 3294 | { | 3233 | { |
| 3295 | cross_init(lock, 0); | ||
| 3296 | __lockdep_init_map(lock, name, key, subclass); | 3234 | __lockdep_init_map(lock, name, key, subclass); |
| 3297 | } | 3235 | } |
| 3298 | EXPORT_SYMBOL_GPL(lockdep_init_map); | 3236 | EXPORT_SYMBOL_GPL(lockdep_init_map); |
| 3299 | 3237 | ||
| 3300 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 3301 | void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name, | ||
| 3302 | struct lock_class_key *key, int subclass) | ||
| 3303 | { | ||
| 3304 | cross_init(lock, 1); | ||
| 3305 | __lockdep_init_map(lock, name, key, subclass); | ||
| 3306 | } | ||
| 3307 | EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock); | ||
| 3308 | #endif | ||
| 3309 | |||
| 3310 | struct lock_class_key __lockdep_no_validate__; | 3238 | struct lock_class_key __lockdep_no_validate__; |
| 3311 | EXPORT_SYMBOL_GPL(__lockdep_no_validate__); | 3239 | EXPORT_SYMBOL_GPL(__lockdep_no_validate__); |
| 3312 | 3240 | ||
| @@ -3362,7 +3290,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3362 | int chain_head = 0; | 3290 | int chain_head = 0; |
| 3363 | int class_idx; | 3291 | int class_idx; |
| 3364 | u64 chain_key; | 3292 | u64 chain_key; |
| 3365 | int ret; | ||
| 3366 | 3293 | ||
| 3367 | if (unlikely(!debug_locks)) | 3294 | if (unlikely(!debug_locks)) |
| 3368 | return 0; | 3295 | return 0; |
| @@ -3411,8 +3338,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3411 | 3338 | ||
| 3412 | class_idx = class - lock_classes + 1; | 3339 | class_idx = class - lock_classes + 1; |
| 3413 | 3340 | ||
| 3414 | /* TODO: nest_lock is not implemented for crosslock yet. */ | 3341 | if (depth) { |
| 3415 | if (depth && !cross_lock(lock)) { | ||
| 3416 | hlock = curr->held_locks + depth - 1; | 3342 | hlock = curr->held_locks + depth - 1; |
| 3417 | if (hlock->class_idx == class_idx && nest_lock) { | 3343 | if (hlock->class_idx == class_idx && nest_lock) { |
| 3418 | if (hlock->references) { | 3344 | if (hlock->references) { |
| @@ -3500,14 +3426,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3500 | if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) | 3426 | if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) |
| 3501 | return 0; | 3427 | return 0; |
| 3502 | 3428 | ||
| 3503 | ret = lock_acquire_crosslock(hlock); | ||
| 3504 | /* | ||
| 3505 | * 2 means normal acquire operations are needed. Otherwise, it's | ||
| 3506 | * ok just to return with '0:fail, 1:success'. | ||
| 3507 | */ | ||
| 3508 | if (ret != 2) | ||
| 3509 | return ret; | ||
| 3510 | |||
| 3511 | curr->curr_chain_key = chain_key; | 3429 | curr->curr_chain_key = chain_key; |
| 3512 | curr->lockdep_depth++; | 3430 | curr->lockdep_depth++; |
| 3513 | check_chain_key(curr); | 3431 | check_chain_key(curr); |
| @@ -3745,19 +3663,11 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) | |||
| 3745 | struct task_struct *curr = current; | 3663 | struct task_struct *curr = current; |
| 3746 | struct held_lock *hlock; | 3664 | struct held_lock *hlock; |
| 3747 | unsigned int depth; | 3665 | unsigned int depth; |
| 3748 | int ret, i; | 3666 | int i; |
| 3749 | 3667 | ||
| 3750 | if (unlikely(!debug_locks)) | 3668 | if (unlikely(!debug_locks)) |
| 3751 | return 0; | 3669 | return 0; |
| 3752 | 3670 | ||
| 3753 | ret = lock_release_crosslock(lock); | ||
| 3754 | /* | ||
| 3755 | * 2 means normal release operations are needed. Otherwise, it's | ||
| 3756 | * ok just to return with '0:fail, 1:success'. | ||
| 3757 | */ | ||
| 3758 | if (ret != 2) | ||
| 3759 | return ret; | ||
| 3760 | |||
| 3761 | depth = curr->lockdep_depth; | 3671 | depth = curr->lockdep_depth; |
| 3762 | /* | 3672 | /* |
| 3763 | * So we're all set to release this lock.. wait what lock? We don't | 3673 | * So we're all set to release this lock.. wait what lock? We don't |
| @@ -4675,495 +4585,3 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) | |||
| 4675 | dump_stack(); | 4585 | dump_stack(); |
| 4676 | } | 4586 | } |
| 4677 | EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); | 4587 | EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); |
| 4678 | |||
| 4679 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 4680 | |||
| 4681 | /* | ||
| 4682 | * Crossrelease works by recording a lock history for each thread and | ||
| 4683 | * connecting those historic locks that were taken after the | ||
| 4684 | * wait_for_completion() in the complete() context. | ||
| 4685 | * | ||
| 4686 | * Task-A Task-B | ||
| 4687 | * | ||
| 4688 | * mutex_lock(&A); | ||
| 4689 | * mutex_unlock(&A); | ||
| 4690 | * | ||
| 4691 | * wait_for_completion(&C); | ||
| 4692 | * lock_acquire_crosslock(); | ||
| 4693 | * atomic_inc_return(&cross_gen_id); | ||
| 4694 | * | | ||
| 4695 | * | mutex_lock(&B); | ||
| 4696 | * | mutex_unlock(&B); | ||
| 4697 | * | | ||
| 4698 | * | complete(&C); | ||
| 4699 | * `-- lock_commit_crosslock(); | ||
| 4700 | * | ||
| 4701 | * Which will then add a dependency between B and C. | ||
| 4702 | */ | ||
| 4703 | |||
| 4704 | #define xhlock(i) (current->xhlocks[(i) % MAX_XHLOCKS_NR]) | ||
| 4705 | |||
| 4706 | /* | ||
| 4707 | * Whenever a crosslock is held, cross_gen_id will be increased. | ||
| 4708 | */ | ||
| 4709 | static atomic_t cross_gen_id; /* Can be wrapped */ | ||
| 4710 | |||
| 4711 | /* | ||
| 4712 | * Make an entry of the ring buffer invalid. | ||
| 4713 | */ | ||
| 4714 | static inline void invalidate_xhlock(struct hist_lock *xhlock) | ||
| 4715 | { | ||
| 4716 | /* | ||
| 4717 | * Normally, xhlock->hlock.instance must be !NULL. | ||
| 4718 | */ | ||
| 4719 | xhlock->hlock.instance = NULL; | ||
| 4720 | } | ||
| 4721 | |||
| 4722 | /* | ||
| 4723 | * Lock history stacks; we have 2 nested lock history stacks: | ||
| 4724 | * | ||
| 4725 | * HARD(IRQ) | ||
| 4726 | * SOFT(IRQ) | ||
| 4727 | * | ||
| 4728 | * The thing is that once we complete a HARD/SOFT IRQ the future task locks | ||
| 4729 | * should not depend on any of the locks observed while running the IRQ. So | ||
| 4730 | * what we do is rewind the history buffer and erase all our knowledge of that | ||
| 4731 | * temporal event. | ||
| 4732 | */ | ||
| 4733 | |||
| 4734 | void crossrelease_hist_start(enum xhlock_context_t c) | ||
| 4735 | { | ||
| 4736 | struct task_struct *cur = current; | ||
| 4737 | |||
| 4738 | if (!cur->xhlocks) | ||
| 4739 | return; | ||
| 4740 | |||
| 4741 | cur->xhlock_idx_hist[c] = cur->xhlock_idx; | ||
| 4742 | cur->hist_id_save[c] = cur->hist_id; | ||
| 4743 | } | ||
| 4744 | |||
| 4745 | void crossrelease_hist_end(enum xhlock_context_t c) | ||
| 4746 | { | ||
| 4747 | struct task_struct *cur = current; | ||
| 4748 | |||
| 4749 | if (cur->xhlocks) { | ||
| 4750 | unsigned int idx = cur->xhlock_idx_hist[c]; | ||
| 4751 | struct hist_lock *h = &xhlock(idx); | ||
| 4752 | |||
| 4753 | cur->xhlock_idx = idx; | ||
| 4754 | |||
| 4755 | /* Check if the ring was overwritten. */ | ||
| 4756 | if (h->hist_id != cur->hist_id_save[c]) | ||
| 4757 | invalidate_xhlock(h); | ||
| 4758 | } | ||
| 4759 | } | ||
| 4760 | |||
| 4761 | /* | ||
| 4762 | * lockdep_invariant_state() is used to annotate independence inside a task, to | ||
| 4763 | * make one task look like multiple independent 'tasks'. | ||
| 4764 | * | ||
| 4765 | * Take for instance workqueues; each work is independent of the last. The | ||
| 4766 | * completion of a future work does not depend on the completion of a past work | ||
| 4767 | * (in general). Therefore we must not carry that (lock) dependency across | ||
| 4768 | * works. | ||
| 4769 | * | ||
| 4770 | * This is true for many things; pretty much all kthreads fall into this | ||
| 4771 | * pattern, where they have an invariant state and future completions do not | ||
| 4772 | * depend on past completions. Its just that since they all have the 'same' | ||
| 4773 | * form -- the kthread does the same over and over -- it doesn't typically | ||
| 4774 | * matter. | ||
| 4775 | * | ||
| 4776 | * The same is true for system-calls, once a system call is completed (we've | ||
| 4777 | * returned to userspace) the next system call does not depend on the lock | ||
| 4778 | * history of the previous system call. | ||
| 4779 | * | ||
| 4780 | * They key property for independence, this invariant state, is that it must be | ||
| 4781 | * a point where we hold no locks and have no history. Because if we were to | ||
| 4782 | * hold locks, the restore at _end() would not necessarily recover it's history | ||
| 4783 | * entry. Similarly, independence per-definition means it does not depend on | ||
| 4784 | * prior state. | ||
| 4785 | */ | ||
| 4786 | void lockdep_invariant_state(bool force) | ||
| 4787 | { | ||
| 4788 | /* | ||
| 4789 | * We call this at an invariant point, no current state, no history. | ||
| 4790 | * Verify the former, enforce the latter. | ||
| 4791 | */ | ||
| 4792 | WARN_ON_ONCE(!force && current->lockdep_depth); | ||
| 4793 | if (current->xhlocks) | ||
| 4794 | invalidate_xhlock(&xhlock(current->xhlock_idx)); | ||
| 4795 | } | ||
| 4796 | |||
| 4797 | static int cross_lock(struct lockdep_map *lock) | ||
| 4798 | { | ||
| 4799 | return lock ? lock->cross : 0; | ||
| 4800 | } | ||
| 4801 | |||
| 4802 | /* | ||
| 4803 | * This is needed to decide the relationship between wrapable variables. | ||
| 4804 | */ | ||
| 4805 | static inline int before(unsigned int a, unsigned int b) | ||
| 4806 | { | ||
| 4807 | return (int)(a - b) < 0; | ||
| 4808 | } | ||
| 4809 | |||
| 4810 | static inline struct lock_class *xhlock_class(struct hist_lock *xhlock) | ||
| 4811 | { | ||
| 4812 | return hlock_class(&xhlock->hlock); | ||
| 4813 | } | ||
| 4814 | |||
| 4815 | static inline struct lock_class *xlock_class(struct cross_lock *xlock) | ||
| 4816 | { | ||
| 4817 | return hlock_class(&xlock->hlock); | ||
| 4818 | } | ||
| 4819 | |||
| 4820 | /* | ||
| 4821 | * Should we check a dependency with previous one? | ||
| 4822 | */ | ||
| 4823 | static inline int depend_before(struct held_lock *hlock) | ||
| 4824 | { | ||
| 4825 | return hlock->read != 2 && hlock->check && !hlock->trylock; | ||
| 4826 | } | ||
| 4827 | |||
| 4828 | /* | ||
| 4829 | * Should we check a dependency with next one? | ||
| 4830 | */ | ||
| 4831 | static inline int depend_after(struct held_lock *hlock) | ||
| 4832 | { | ||
| 4833 | return hlock->read != 2 && hlock->check; | ||
| 4834 | } | ||
| 4835 | |||
| 4836 | /* | ||
| 4837 | * Check if the xhlock is valid, which would be false if, | ||
| 4838 | * | ||
| 4839 | * 1. Has not used after initializaion yet. | ||
| 4840 | * 2. Got invalidated. | ||
| 4841 | * | ||
| 4842 | * Remind hist_lock is implemented as a ring buffer. | ||
| 4843 | */ | ||
| 4844 | static inline int xhlock_valid(struct hist_lock *xhlock) | ||
| 4845 | { | ||
| 4846 | /* | ||
| 4847 | * xhlock->hlock.instance must be !NULL. | ||
| 4848 | */ | ||
| 4849 | return !!xhlock->hlock.instance; | ||
| 4850 | } | ||
| 4851 | |||
| 4852 | /* | ||
| 4853 | * Record a hist_lock entry. | ||
| 4854 | * | ||
| 4855 | * Irq disable is only required. | ||
| 4856 | */ | ||
| 4857 | static void add_xhlock(struct held_lock *hlock) | ||
| 4858 | { | ||
| 4859 | unsigned int idx = ++current->xhlock_idx; | ||
| 4860 | struct hist_lock *xhlock = &xhlock(idx); | ||
| 4861 | |||
| 4862 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
| 4863 | /* | ||
| 4864 | * This can be done locklessly because they are all task-local | ||
| 4865 | * state, we must however ensure IRQs are disabled. | ||
| 4866 | */ | ||
| 4867 | WARN_ON_ONCE(!irqs_disabled()); | ||
| 4868 | #endif | ||
| 4869 | |||
| 4870 | /* Initialize hist_lock's members */ | ||
| 4871 | xhlock->hlock = *hlock; | ||
| 4872 | xhlock->hist_id = ++current->hist_id; | ||
| 4873 | |||
| 4874 | xhlock->trace.nr_entries = 0; | ||
| 4875 | xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES; | ||
| 4876 | xhlock->trace.entries = xhlock->trace_entries; | ||
| 4877 | |||
| 4878 | if (crossrelease_fullstack) { | ||
| 4879 | xhlock->trace.skip = 3; | ||
| 4880 | save_stack_trace(&xhlock->trace); | ||
| 4881 | } else { | ||
| 4882 | xhlock->trace.nr_entries = 1; | ||
| 4883 | xhlock->trace.entries[0] = hlock->acquire_ip; | ||
| 4884 | } | ||
| 4885 | } | ||
| 4886 | |||
| 4887 | static inline int same_context_xhlock(struct hist_lock *xhlock) | ||
| 4888 | { | ||
| 4889 | return xhlock->hlock.irq_context == task_irq_context(current); | ||
| 4890 | } | ||
| 4891 | |||
| 4892 | /* | ||
| 4893 | * This should be lockless as far as possible because this would be | ||
| 4894 | * called very frequently. | ||
| 4895 | */ | ||
| 4896 | static void check_add_xhlock(struct held_lock *hlock) | ||
| 4897 | { | ||
| 4898 | /* | ||
| 4899 | * Record a hist_lock, only in case that acquisitions ahead | ||
| 4900 | * could depend on the held_lock. For example, if the held_lock | ||
| 4901 | * is trylock then acquisitions ahead never depends on that. | ||
| 4902 | * In that case, we don't need to record it. Just return. | ||
| 4903 | */ | ||
| 4904 | if (!current->xhlocks || !depend_before(hlock)) | ||
| 4905 | return; | ||
| 4906 | |||
| 4907 | add_xhlock(hlock); | ||
| 4908 | } | ||
| 4909 | |||
| 4910 | /* | ||
| 4911 | * For crosslock. | ||
| 4912 | */ | ||
| 4913 | static int add_xlock(struct held_lock *hlock) | ||
| 4914 | { | ||
| 4915 | struct cross_lock *xlock; | ||
| 4916 | unsigned int gen_id; | ||
| 4917 | |||
| 4918 | if (!graph_lock()) | ||
| 4919 | return 0; | ||
| 4920 | |||
| 4921 | xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock; | ||
| 4922 | |||
| 4923 | /* | ||
| 4924 | * When acquisitions for a crosslock are overlapped, we use | ||
| 4925 | * nr_acquire to perform commit for them, based on cross_gen_id | ||
| 4926 | * of the first acquisition, which allows to add additional | ||
| 4927 | * dependencies. | ||
| 4928 | * | ||
| 4929 | * Moreover, when no acquisition of a crosslock is in progress, | ||
| 4930 | * we should not perform commit because the lock might not exist | ||
| 4931 | * any more, which might cause incorrect memory access. So we | ||
| 4932 | * have to track the number of acquisitions of a crosslock. | ||
| 4933 | * | ||
| 4934 | * depend_after() is necessary to initialize only the first | ||
| 4935 | * valid xlock so that the xlock can be used on its commit. | ||
| 4936 | */ | ||
| 4937 | if (xlock->nr_acquire++ && depend_after(&xlock->hlock)) | ||
| 4938 | goto unlock; | ||
| 4939 | |||
| 4940 | gen_id = (unsigned int)atomic_inc_return(&cross_gen_id); | ||
| 4941 | xlock->hlock = *hlock; | ||
| 4942 | xlock->hlock.gen_id = gen_id; | ||
| 4943 | unlock: | ||
| 4944 | graph_unlock(); | ||
| 4945 | return 1; | ||
| 4946 | } | ||
| 4947 | |||
| 4948 | /* | ||
| 4949 | * Called for both normal and crosslock acquires. Normal locks will be | ||
| 4950 | * pushed on the hist_lock queue. Cross locks will record state and | ||
| 4951 | * stop regular lock_acquire() to avoid being placed on the held_lock | ||
| 4952 | * stack. | ||
| 4953 | * | ||
| 4954 | * Return: 0 - failure; | ||
| 4955 | * 1 - crosslock, done; | ||
| 4956 | * 2 - normal lock, continue to held_lock[] ops. | ||
| 4957 | */ | ||
| 4958 | static int lock_acquire_crosslock(struct held_lock *hlock) | ||
| 4959 | { | ||
| 4960 | /* | ||
| 4961 | * CONTEXT 1 CONTEXT 2 | ||
| 4962 | * --------- --------- | ||
| 4963 | * lock A (cross) | ||
| 4964 | * X = atomic_inc_return(&cross_gen_id) | ||
| 4965 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
| 4966 | * Y = atomic_read_acquire(&cross_gen_id) | ||
| 4967 | * lock B | ||
| 4968 | * | ||
| 4969 | * atomic_read_acquire() is for ordering between A and B, | ||
| 4970 | * IOW, A happens before B, when CONTEXT 2 see Y >= X. | ||
| 4971 | * | ||
| 4972 | * Pairs with atomic_inc_return() in add_xlock(). | ||
| 4973 | */ | ||
| 4974 | hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id); | ||
| 4975 | |||
| 4976 | if (cross_lock(hlock->instance)) | ||
| 4977 | return add_xlock(hlock); | ||
| 4978 | |||
| 4979 | check_add_xhlock(hlock); | ||
| 4980 | return 2; | ||
| 4981 | } | ||
| 4982 | |||
| 4983 | static int copy_trace(struct stack_trace *trace) | ||
| 4984 | { | ||
| 4985 | unsigned long *buf = stack_trace + nr_stack_trace_entries; | ||
| 4986 | unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; | ||
| 4987 | unsigned int nr = min(max_nr, trace->nr_entries); | ||
| 4988 | |||
| 4989 | trace->nr_entries = nr; | ||
| 4990 | memcpy(buf, trace->entries, nr * sizeof(trace->entries[0])); | ||
| 4991 | trace->entries = buf; | ||
| 4992 | nr_stack_trace_entries += nr; | ||
| 4993 | |||
| 4994 | if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { | ||
| 4995 | if (!debug_locks_off_graph_unlock()) | ||
| 4996 | return 0; | ||
| 4997 | |||
| 4998 | print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); | ||
| 4999 | dump_stack(); | ||
| 5000 | |||
| 5001 | return 0; | ||
| 5002 | } | ||
| 5003 | |||
| 5004 | return 1; | ||
| 5005 | } | ||
| 5006 | |||
| 5007 | static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock) | ||
| 5008 | { | ||
| 5009 | unsigned int xid, pid; | ||
| 5010 | u64 chain_key; | ||
| 5011 | |||
| 5012 | xid = xlock_class(xlock) - lock_classes; | ||
| 5013 | chain_key = iterate_chain_key((u64)0, xid); | ||
| 5014 | pid = xhlock_class(xhlock) - lock_classes; | ||
| 5015 | chain_key = iterate_chain_key(chain_key, pid); | ||
| 5016 | |||
| 5017 | if (lookup_chain_cache(chain_key)) | ||
| 5018 | return 1; | ||
| 5019 | |||
| 5020 | if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context, | ||
| 5021 | chain_key)) | ||
| 5022 | return 0; | ||
| 5023 | |||
| 5024 | if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1, | ||
| 5025 | &xhlock->trace, copy_trace)) | ||
| 5026 | return 0; | ||
| 5027 | |||
| 5028 | return 1; | ||
| 5029 | } | ||
| 5030 | |||
| 5031 | static void commit_xhlocks(struct cross_lock *xlock) | ||
| 5032 | { | ||
| 5033 | unsigned int cur = current->xhlock_idx; | ||
| 5034 | unsigned int prev_hist_id = xhlock(cur).hist_id; | ||
| 5035 | unsigned int i; | ||
| 5036 | |||
| 5037 | if (!graph_lock()) | ||
| 5038 | return; | ||
| 5039 | |||
| 5040 | if (xlock->nr_acquire) { | ||
| 5041 | for (i = 0; i < MAX_XHLOCKS_NR; i++) { | ||
| 5042 | struct hist_lock *xhlock = &xhlock(cur - i); | ||
| 5043 | |||
| 5044 | if (!xhlock_valid(xhlock)) | ||
| 5045 | break; | ||
| 5046 | |||
| 5047 | if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id)) | ||
| 5048 | break; | ||
| 5049 | |||
| 5050 | if (!same_context_xhlock(xhlock)) | ||
| 5051 | break; | ||
| 5052 | |||
| 5053 | /* | ||
| 5054 | * Filter out the cases where the ring buffer was | ||
| 5055 | * overwritten and the current entry has a bigger | ||
| 5056 | * hist_id than the previous one, which is impossible | ||
| 5057 | * otherwise: | ||
| 5058 | */ | ||
| 5059 | if (unlikely(before(prev_hist_id, xhlock->hist_id))) | ||
| 5060 | break; | ||
| 5061 | |||
| 5062 | prev_hist_id = xhlock->hist_id; | ||
| 5063 | |||
| 5064 | /* | ||
| 5065 | * commit_xhlock() returns 0 with graph_lock already | ||
| 5066 | * released if fail. | ||
| 5067 | */ | ||
| 5068 | if (!commit_xhlock(xlock, xhlock)) | ||
| 5069 | return; | ||
| 5070 | } | ||
| 5071 | } | ||
| 5072 | |||
| 5073 | graph_unlock(); | ||
| 5074 | } | ||
| 5075 | |||
| 5076 | void lock_commit_crosslock(struct lockdep_map *lock) | ||
| 5077 | { | ||
| 5078 | struct cross_lock *xlock; | ||
| 5079 | unsigned long flags; | ||
| 5080 | |||
| 5081 | if (unlikely(!debug_locks || current->lockdep_recursion)) | ||
| 5082 | return; | ||
| 5083 | |||
| 5084 | if (!current->xhlocks) | ||
| 5085 | return; | ||
| 5086 | |||
| 5087 | /* | ||
| 5088 | * Do commit hist_locks with the cross_lock, only in case that | ||
| 5089 | * the cross_lock could depend on acquisitions after that. | ||
| 5090 | * | ||
| 5091 | * For example, if the cross_lock does not have the 'check' flag | ||
| 5092 | * then we don't need to check dependencies and commit for that. | ||
| 5093 | * Just skip it. In that case, of course, the cross_lock does | ||
| 5094 | * not depend on acquisitions ahead, either. | ||
| 5095 | * | ||
| 5096 | * WARNING: Don't do that in add_xlock() in advance. When an | ||
| 5097 | * acquisition context is different from the commit context, | ||
| 5098 | * invalid(skipped) cross_lock might be accessed. | ||
| 5099 | */ | ||
| 5100 | if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock)) | ||
| 5101 | return; | ||
| 5102 | |||
| 5103 | raw_local_irq_save(flags); | ||
| 5104 | check_flags(flags); | ||
| 5105 | current->lockdep_recursion = 1; | ||
| 5106 | xlock = &((struct lockdep_map_cross *)lock)->xlock; | ||
| 5107 | commit_xhlocks(xlock); | ||
| 5108 | current->lockdep_recursion = 0; | ||
| 5109 | raw_local_irq_restore(flags); | ||
| 5110 | } | ||
| 5111 | EXPORT_SYMBOL_GPL(lock_commit_crosslock); | ||
| 5112 | |||
| 5113 | /* | ||
| 5114 | * Return: 0 - failure; | ||
| 5115 | * 1 - crosslock, done; | ||
| 5116 | * 2 - normal lock, continue to held_lock[] ops. | ||
| 5117 | */ | ||
| 5118 | static int lock_release_crosslock(struct lockdep_map *lock) | ||
| 5119 | { | ||
| 5120 | if (cross_lock(lock)) { | ||
| 5121 | if (!graph_lock()) | ||
| 5122 | return 0; | ||
| 5123 | ((struct lockdep_map_cross *)lock)->xlock.nr_acquire--; | ||
| 5124 | graph_unlock(); | ||
| 5125 | return 1; | ||
| 5126 | } | ||
| 5127 | return 2; | ||
| 5128 | } | ||
| 5129 | |||
| 5130 | static void cross_init(struct lockdep_map *lock, int cross) | ||
| 5131 | { | ||
| 5132 | if (cross) | ||
| 5133 | ((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0; | ||
| 5134 | |||
| 5135 | lock->cross = cross; | ||
| 5136 | |||
| 5137 | /* | ||
| 5138 | * Crossrelease assumes that the ring buffer size of xhlocks | ||
| 5139 | * is aligned with power of 2. So force it on build. | ||
| 5140 | */ | ||
| 5141 | BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1)); | ||
| 5142 | } | ||
| 5143 | |||
| 5144 | void lockdep_init_task(struct task_struct *task) | ||
| 5145 | { | ||
| 5146 | int i; | ||
| 5147 | |||
| 5148 | task->xhlock_idx = UINT_MAX; | ||
| 5149 | task->hist_id = 0; | ||
| 5150 | |||
| 5151 | for (i = 0; i < XHLOCK_CTX_NR; i++) { | ||
| 5152 | task->xhlock_idx_hist[i] = UINT_MAX; | ||
| 5153 | task->hist_id_save[i] = 0; | ||
| 5154 | } | ||
| 5155 | |||
| 5156 | task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR, | ||
| 5157 | GFP_KERNEL); | ||
| 5158 | } | ||
| 5159 | |||
| 5160 | void lockdep_free_task(struct task_struct *task) | ||
| 5161 | { | ||
| 5162 | if (task->xhlocks) { | ||
| 5163 | void *tmp = task->xhlocks; | ||
| 5164 | /* Diable crossrelease for current */ | ||
| 5165 | task->xhlocks = NULL; | ||
| 5166 | kfree(tmp); | ||
| 5167 | } | ||
| 5168 | } | ||
| 5169 | #endif | ||
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 1fd1a7543cdd..936f3d14dd6b 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c | |||
| @@ -66,12 +66,8 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ | |||
| 66 | break; \ | 66 | break; \ |
| 67 | preempt_enable(); \ | 67 | preempt_enable(); \ |
| 68 | \ | 68 | \ |
| 69 | if (!(lock)->break_lock) \ | 69 | arch_##op##_relax(&lock->raw_lock); \ |
| 70 | (lock)->break_lock = 1; \ | ||
| 71 | while ((lock)->break_lock) \ | ||
| 72 | arch_##op##_relax(&lock->raw_lock); \ | ||
| 73 | } \ | 70 | } \ |
| 74 | (lock)->break_lock = 0; \ | ||
| 75 | } \ | 71 | } \ |
| 76 | \ | 72 | \ |
| 77 | unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ | 73 | unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ |
| @@ -86,12 +82,9 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ | |||
| 86 | local_irq_restore(flags); \ | 82 | local_irq_restore(flags); \ |
| 87 | preempt_enable(); \ | 83 | preempt_enable(); \ |
| 88 | \ | 84 | \ |
| 89 | if (!(lock)->break_lock) \ | 85 | arch_##op##_relax(&lock->raw_lock); \ |
| 90 | (lock)->break_lock = 1; \ | ||
| 91 | while ((lock)->break_lock) \ | ||
| 92 | arch_##op##_relax(&lock->raw_lock); \ | ||
| 93 | } \ | 86 | } \ |
| 94 | (lock)->break_lock = 0; \ | 87 | \ |
| 95 | return flags; \ | 88 | return flags; \ |
| 96 | } \ | 89 | } \ |
| 97 | \ | 90 | \ |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 75554f366fd3..644fa2e3d993 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -5097,17 +5097,6 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) | |||
| 5097 | return ret; | 5097 | return ret; |
| 5098 | } | 5098 | } |
| 5099 | 5099 | ||
| 5100 | /** | ||
| 5101 | * sys_sched_rr_get_interval - return the default timeslice of a process. | ||
| 5102 | * @pid: pid of the process. | ||
| 5103 | * @interval: userspace pointer to the timeslice value. | ||
| 5104 | * | ||
| 5105 | * this syscall writes the default timeslice value of a given process | ||
| 5106 | * into the user-space timespec buffer. A value of '0' means infinity. | ||
| 5107 | * | ||
| 5108 | * Return: On success, 0 and the timeslice is in @interval. Otherwise, | ||
| 5109 | * an error code. | ||
| 5110 | */ | ||
| 5111 | static int sched_rr_get_interval(pid_t pid, struct timespec64 *t) | 5100 | static int sched_rr_get_interval(pid_t pid, struct timespec64 *t) |
| 5112 | { | 5101 | { |
| 5113 | struct task_struct *p; | 5102 | struct task_struct *p; |
| @@ -5144,6 +5133,17 @@ out_unlock: | |||
| 5144 | return retval; | 5133 | return retval; |
| 5145 | } | 5134 | } |
| 5146 | 5135 | ||
| 5136 | /** | ||
| 5137 | * sys_sched_rr_get_interval - return the default timeslice of a process. | ||
| 5138 | * @pid: pid of the process. | ||
| 5139 | * @interval: userspace pointer to the timeslice value. | ||
| 5140 | * | ||
| 5141 | * this syscall writes the default timeslice value of a given process | ||
| 5142 | * into the user-space timespec buffer. A value of '0' means infinity. | ||
| 5143 | * | ||
| 5144 | * Return: On success, 0 and the timeslice is in @interval. Otherwise, | ||
| 5145 | * an error code. | ||
| 5146 | */ | ||
| 5147 | SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | 5147 | SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, |
| 5148 | struct timespec __user *, interval) | 5148 | struct timespec __user *, interval) |
| 5149 | { | 5149 | { |
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 2f52ec0f1539..d6717a3331a1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c | |||
| @@ -244,7 +244,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, | |||
| 244 | #ifdef CONFIG_NO_HZ_COMMON | 244 | #ifdef CONFIG_NO_HZ_COMMON |
| 245 | static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) | 245 | static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) |
| 246 | { | 246 | { |
| 247 | unsigned long idle_calls = tick_nohz_get_idle_calls(); | 247 | unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); |
| 248 | bool ret = idle_calls == sg_cpu->saved_idle_calls; | 248 | bool ret = idle_calls == sg_cpu->saved_idle_calls; |
| 249 | 249 | ||
| 250 | sg_cpu->saved_idle_calls = idle_calls; | 250 | sg_cpu->saved_idle_calls = idle_calls; |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 4056c19ca3f0..665ace2fc558 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
| @@ -2034,8 +2034,9 @@ static void pull_rt_task(struct rq *this_rq) | |||
| 2034 | bool resched = false; | 2034 | bool resched = false; |
| 2035 | struct task_struct *p; | 2035 | struct task_struct *p; |
| 2036 | struct rq *src_rq; | 2036 | struct rq *src_rq; |
| 2037 | int rt_overload_count = rt_overloaded(this_rq); | ||
| 2037 | 2038 | ||
| 2038 | if (likely(!rt_overloaded(this_rq))) | 2039 | if (likely(!rt_overload_count)) |
| 2039 | return; | 2040 | return; |
| 2040 | 2041 | ||
| 2041 | /* | 2042 | /* |
| @@ -2044,6 +2045,11 @@ static void pull_rt_task(struct rq *this_rq) | |||
| 2044 | */ | 2045 | */ |
| 2045 | smp_rmb(); | 2046 | smp_rmb(); |
| 2046 | 2047 | ||
| 2048 | /* If we are the only overloaded CPU do nothing */ | ||
| 2049 | if (rt_overload_count == 1 && | ||
| 2050 | cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask)) | ||
| 2051 | return; | ||
| 2052 | |||
| 2047 | #ifdef HAVE_RT_PUSH_IPI | 2053 | #ifdef HAVE_RT_PUSH_IPI |
| 2048 | if (sched_feat(RT_PUSH_IPI)) { | 2054 | if (sched_feat(RT_PUSH_IPI)) { |
| 2049 | tell_cpu_to_push(this_rq); | 2055 | tell_cpu_to_push(this_rq); |
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index e776fc8cc1df..f6b5f19223d6 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig | |||
| @@ -95,6 +95,7 @@ config NO_HZ_FULL | |||
| 95 | select RCU_NOCB_CPU | 95 | select RCU_NOCB_CPU |
| 96 | select VIRT_CPU_ACCOUNTING_GEN | 96 | select VIRT_CPU_ACCOUNTING_GEN |
| 97 | select IRQ_WORK | 97 | select IRQ_WORK |
| 98 | select CPU_ISOLATION | ||
| 98 | help | 99 | help |
| 99 | Adaptively try to shutdown the tick whenever possible, even when | 100 | Adaptively try to shutdown the tick whenever possible, even when |
| 100 | the CPU is running tasks. Typically this requires running a single | 101 | the CPU is running tasks. Typically this requires running a single |
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 13d6881f908b..ec999f32c840 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c | |||
| @@ -434,17 +434,22 @@ static struct pid *good_sigevent(sigevent_t * event) | |||
| 434 | { | 434 | { |
| 435 | struct task_struct *rtn = current->group_leader; | 435 | struct task_struct *rtn = current->group_leader; |
| 436 | 436 | ||
| 437 | if ((event->sigev_notify & SIGEV_THREAD_ID ) && | 437 | switch (event->sigev_notify) { |
| 438 | (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || | 438 | case SIGEV_SIGNAL | SIGEV_THREAD_ID: |
| 439 | !same_thread_group(rtn, current) || | 439 | rtn = find_task_by_vpid(event->sigev_notify_thread_id); |
| 440 | (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) | 440 | if (!rtn || !same_thread_group(rtn, current)) |
| 441 | return NULL; | ||
| 442 | /* FALLTHRU */ | ||
| 443 | case SIGEV_SIGNAL: | ||
| 444 | case SIGEV_THREAD: | ||
| 445 | if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) | ||
| 446 | return NULL; | ||
| 447 | /* FALLTHRU */ | ||
| 448 | case SIGEV_NONE: | ||
| 449 | return task_pid(rtn); | ||
| 450 | default: | ||
| 441 | return NULL; | 451 | return NULL; |
| 442 | 452 | } | |
| 443 | if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && | ||
| 444 | ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) | ||
| 445 | return NULL; | ||
| 446 | |||
| 447 | return task_pid(rtn); | ||
| 448 | } | 453 | } |
| 449 | 454 | ||
| 450 | static struct k_itimer * alloc_posix_timer(void) | 455 | static struct k_itimer * alloc_posix_timer(void) |
| @@ -669,7 +674,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) | |||
| 669 | struct timespec64 ts64; | 674 | struct timespec64 ts64; |
| 670 | bool sig_none; | 675 | bool sig_none; |
| 671 | 676 | ||
| 672 | sig_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; | 677 | sig_none = timr->it_sigev_notify == SIGEV_NONE; |
| 673 | iv = timr->it_interval; | 678 | iv = timr->it_interval; |
| 674 | 679 | ||
| 675 | /* interval timer ? */ | 680 | /* interval timer ? */ |
| @@ -856,7 +861,7 @@ int common_timer_set(struct k_itimer *timr, int flags, | |||
| 856 | 861 | ||
| 857 | timr->it_interval = timespec64_to_ktime(new_setting->it_interval); | 862 | timr->it_interval = timespec64_to_ktime(new_setting->it_interval); |
| 858 | expires = timespec64_to_ktime(new_setting->it_value); | 863 | expires = timespec64_to_ktime(new_setting->it_value); |
| 859 | sigev_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; | 864 | sigev_none = timr->it_sigev_notify == SIGEV_NONE; |
| 860 | 865 | ||
| 861 | kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); | 866 | kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); |
| 862 | timr->it_active = !sigev_none; | 867 | timr->it_active = !sigev_none; |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 99578f06c8d4..f7cc7abfcf25 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -650,6 +650,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | |||
| 650 | ts->next_tick = 0; | 650 | ts->next_tick = 0; |
| 651 | } | 651 | } |
| 652 | 652 | ||
| 653 | static inline bool local_timer_softirq_pending(void) | ||
| 654 | { | ||
| 655 | return local_softirq_pending() & TIMER_SOFTIRQ; | ||
| 656 | } | ||
| 657 | |||
| 653 | static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | 658 | static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, |
| 654 | ktime_t now, int cpu) | 659 | ktime_t now, int cpu) |
| 655 | { | 660 | { |
| @@ -666,8 +671,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
| 666 | } while (read_seqretry(&jiffies_lock, seq)); | 671 | } while (read_seqretry(&jiffies_lock, seq)); |
| 667 | ts->last_jiffies = basejiff; | 672 | ts->last_jiffies = basejiff; |
| 668 | 673 | ||
| 669 | if (rcu_needs_cpu(basemono, &next_rcu) || | 674 | /* |
| 670 | arch_needs_cpu() || irq_work_needs_cpu()) { | 675 | * Keep the periodic tick, when RCU, architecture or irq_work |
| 676 | * requests it. | ||
| 677 | * Aside of that check whether the local timer softirq is | ||
| 678 | * pending. If so its a bad idea to call get_next_timer_interrupt() | ||
| 679 | * because there is an already expired timer, so it will request | ||
| 680 | * immeditate expiry, which rearms the hardware timer with a | ||
| 681 | * minimal delta which brings us back to this place | ||
| 682 | * immediately. Lather, rinse and repeat... | ||
| 683 | */ | ||
| 684 | if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() || | ||
| 685 | irq_work_needs_cpu() || local_timer_softirq_pending()) { | ||
| 671 | next_tick = basemono + TICK_NSEC; | 686 | next_tick = basemono + TICK_NSEC; |
| 672 | } else { | 687 | } else { |
| 673 | /* | 688 | /* |
| @@ -986,6 +1001,19 @@ ktime_t tick_nohz_get_sleep_length(void) | |||
| 986 | } | 1001 | } |
| 987 | 1002 | ||
| 988 | /** | 1003 | /** |
| 1004 | * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value | ||
| 1005 | * for a particular CPU. | ||
| 1006 | * | ||
| 1007 | * Called from the schedutil frequency scaling governor in scheduler context. | ||
| 1008 | */ | ||
| 1009 | unsigned long tick_nohz_get_idle_calls_cpu(int cpu) | ||
| 1010 | { | ||
| 1011 | struct tick_sched *ts = tick_get_tick_sched(cpu); | ||
| 1012 | |||
| 1013 | return ts->idle_calls; | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | /** | ||
| 989 | * tick_nohz_get_idle_calls - return the current idle calls counter value | 1017 | * tick_nohz_get_idle_calls - return the current idle calls counter value |
| 990 | * | 1018 | * |
| 991 | * Called from the schedutil frequency scaling governor in scheduler context. | 1019 | * Called from the schedutil frequency scaling governor in scheduler context. |
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ffebcf878fba..89a9e1b4264a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c | |||
| @@ -823,11 +823,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) | |||
| 823 | struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); | 823 | struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); |
| 824 | 824 | ||
| 825 | /* | 825 | /* |
| 826 | * If the timer is deferrable and nohz is active then we need to use | 826 | * If the timer is deferrable and NO_HZ_COMMON is set then we need |
| 827 | * the deferrable base. | 827 | * to use the deferrable base. |
| 828 | */ | 828 | */ |
| 829 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && | 829 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) |
| 830 | (tflags & TIMER_DEFERRABLE)) | ||
| 831 | base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); | 830 | base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); |
| 832 | return base; | 831 | return base; |
| 833 | } | 832 | } |
| @@ -837,11 +836,10 @@ static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) | |||
| 837 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); | 836 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); |
| 838 | 837 | ||
| 839 | /* | 838 | /* |
| 840 | * If the timer is deferrable and nohz is active then we need to use | 839 | * If the timer is deferrable and NO_HZ_COMMON is set then we need |
| 841 | * the deferrable base. | 840 | * to use the deferrable base. |
| 842 | */ | 841 | */ |
| 843 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && | 842 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) |
| 844 | (tflags & TIMER_DEFERRABLE)) | ||
| 845 | base = this_cpu_ptr(&timer_bases[BASE_DEF]); | 843 | base = this_cpu_ptr(&timer_bases[BASE_DEF]); |
| 846 | return base; | 844 | return base; |
| 847 | } | 845 | } |
| @@ -1009,8 +1007,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option | |||
| 1009 | if (!ret && (options & MOD_TIMER_PENDING_ONLY)) | 1007 | if (!ret && (options & MOD_TIMER_PENDING_ONLY)) |
| 1010 | goto out_unlock; | 1008 | goto out_unlock; |
| 1011 | 1009 | ||
| 1012 | debug_activate(timer, expires); | ||
| 1013 | |||
| 1014 | new_base = get_target_base(base, timer->flags); | 1010 | new_base = get_target_base(base, timer->flags); |
| 1015 | 1011 | ||
| 1016 | if (base != new_base) { | 1012 | if (base != new_base) { |
| @@ -1034,6 +1030,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option | |||
| 1034 | } | 1030 | } |
| 1035 | } | 1031 | } |
| 1036 | 1032 | ||
| 1033 | debug_activate(timer, expires); | ||
| 1034 | |||
| 1037 | timer->expires = expires; | 1035 | timer->expires = expires; |
| 1038 | /* | 1036 | /* |
| 1039 | * If 'idx' was calculated above and the base time did not advance | 1037 | * If 'idx' was calculated above and the base time did not advance |
| @@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) | |||
| 1684 | base->must_forward_clk = false; | 1682 | base->must_forward_clk = false; |
| 1685 | 1683 | ||
| 1686 | __run_timers(base); | 1684 | __run_timers(base); |
| 1687 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) | 1685 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) |
| 1688 | __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); | 1686 | __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); |
| 1689 | } | 1687 | } |
| 1690 | 1688 | ||
| @@ -1855,6 +1853,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h | |||
| 1855 | } | 1853 | } |
| 1856 | } | 1854 | } |
| 1857 | 1855 | ||
| 1856 | int timers_prepare_cpu(unsigned int cpu) | ||
| 1857 | { | ||
| 1858 | struct timer_base *base; | ||
| 1859 | int b; | ||
| 1860 | |||
| 1861 | for (b = 0; b < NR_BASES; b++) { | ||
| 1862 | base = per_cpu_ptr(&timer_bases[b], cpu); | ||
| 1863 | base->clk = jiffies; | ||
| 1864 | base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; | ||
| 1865 | base->is_idle = false; | ||
| 1866 | base->must_forward_clk = true; | ||
| 1867 | } | ||
| 1868 | return 0; | ||
| 1869 | } | ||
| 1870 | |||
| 1858 | int timers_dead_cpu(unsigned int cpu) | 1871 | int timers_dead_cpu(unsigned int cpu) |
| 1859 | { | 1872 | { |
| 1860 | struct timer_base *old_base; | 1873 | struct timer_base *old_base; |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index af7dad126c13..904c952ac383 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
| @@ -164,6 +164,7 @@ config PREEMPTIRQ_EVENTS | |||
| 164 | bool "Enable trace events for preempt and irq disable/enable" | 164 | bool "Enable trace events for preempt and irq disable/enable" |
| 165 | select TRACE_IRQFLAGS | 165 | select TRACE_IRQFLAGS |
| 166 | depends on DEBUG_PREEMPT || !PROVE_LOCKING | 166 | depends on DEBUG_PREEMPT || !PROVE_LOCKING |
| 167 | depends on TRACING | ||
| 167 | default n | 168 | default n |
| 168 | help | 169 | help |
| 169 | Enable tracing of disable and enable events for preemption and irqs. | 170 | Enable tracing of disable and enable events for preemption and irqs. |
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0ce99c379c30..40207c2a4113 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c | |||
| @@ -343,14 +343,13 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { | |||
| 343 | .arg4_type = ARG_CONST_SIZE, | 343 | .arg4_type = ARG_CONST_SIZE, |
| 344 | }; | 344 | }; |
| 345 | 345 | ||
| 346 | static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); | 346 | static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); |
| 347 | 347 | ||
| 348 | static __always_inline u64 | 348 | static __always_inline u64 |
| 349 | __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, | 349 | __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, |
| 350 | u64 flags, struct perf_raw_record *raw) | 350 | u64 flags, struct perf_sample_data *sd) |
| 351 | { | 351 | { |
| 352 | struct bpf_array *array = container_of(map, struct bpf_array, map); | 352 | struct bpf_array *array = container_of(map, struct bpf_array, map); |
| 353 | struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd); | ||
| 354 | unsigned int cpu = smp_processor_id(); | 353 | unsigned int cpu = smp_processor_id(); |
| 355 | u64 index = flags & BPF_F_INDEX_MASK; | 354 | u64 index = flags & BPF_F_INDEX_MASK; |
| 356 | struct bpf_event_entry *ee; | 355 | struct bpf_event_entry *ee; |
| @@ -373,8 +372,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, | |||
| 373 | if (unlikely(event->oncpu != cpu)) | 372 | if (unlikely(event->oncpu != cpu)) |
| 374 | return -EOPNOTSUPP; | 373 | return -EOPNOTSUPP; |
| 375 | 374 | ||
| 376 | perf_sample_data_init(sd, 0, 0); | ||
| 377 | sd->raw = raw; | ||
| 378 | perf_event_output(event, sd, regs); | 375 | perf_event_output(event, sd, regs); |
| 379 | return 0; | 376 | return 0; |
| 380 | } | 377 | } |
| @@ -382,6 +379,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, | |||
| 382 | BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, | 379 | BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, |
| 383 | u64, flags, void *, data, u64, size) | 380 | u64, flags, void *, data, u64, size) |
| 384 | { | 381 | { |
| 382 | struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); | ||
| 385 | struct perf_raw_record raw = { | 383 | struct perf_raw_record raw = { |
| 386 | .frag = { | 384 | .frag = { |
| 387 | .size = size, | 385 | .size = size, |
| @@ -392,7 +390,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, | |||
| 392 | if (unlikely(flags & ~(BPF_F_INDEX_MASK))) | 390 | if (unlikely(flags & ~(BPF_F_INDEX_MASK))) |
| 393 | return -EINVAL; | 391 | return -EINVAL; |
| 394 | 392 | ||
| 395 | return __bpf_perf_event_output(regs, map, flags, &raw); | 393 | perf_sample_data_init(sd, 0, 0); |
| 394 | sd->raw = &raw; | ||
| 395 | |||
| 396 | return __bpf_perf_event_output(regs, map, flags, sd); | ||
| 396 | } | 397 | } |
| 397 | 398 | ||
| 398 | static const struct bpf_func_proto bpf_perf_event_output_proto = { | 399 | static const struct bpf_func_proto bpf_perf_event_output_proto = { |
| @@ -407,10 +408,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { | |||
| 407 | }; | 408 | }; |
| 408 | 409 | ||
| 409 | static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); | 410 | static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); |
| 411 | static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd); | ||
| 410 | 412 | ||
| 411 | u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, | 413 | u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, |
| 412 | void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) | 414 | void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) |
| 413 | { | 415 | { |
| 416 | struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd); | ||
| 414 | struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); | 417 | struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); |
| 415 | struct perf_raw_frag frag = { | 418 | struct perf_raw_frag frag = { |
| 416 | .copy = ctx_copy, | 419 | .copy = ctx_copy, |
| @@ -428,8 +431,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, | |||
| 428 | }; | 431 | }; |
| 429 | 432 | ||
| 430 | perf_fetch_caller_regs(regs); | 433 | perf_fetch_caller_regs(regs); |
| 434 | perf_sample_data_init(sd, 0, 0); | ||
| 435 | sd->raw = &raw; | ||
| 431 | 436 | ||
| 432 | return __bpf_perf_event_output(regs, map, flags, &raw); | 437 | return __bpf_perf_event_output(regs, map, flags, sd); |
| 433 | } | 438 | } |
| 434 | 439 | ||
| 435 | BPF_CALL_0(bpf_get_current_task) | 440 | BPF_CALL_0(bpf_get_current_task) |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 91874a95060d..9ab18995ff1e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
| 280 | /* Missed count stored at end */ | 280 | /* Missed count stored at end */ |
| 281 | #define RB_MISSED_STORED (1 << 30) | 281 | #define RB_MISSED_STORED (1 << 30) |
| 282 | 282 | ||
| 283 | #define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED) | ||
| 284 | |||
| 283 | struct buffer_data_page { | 285 | struct buffer_data_page { |
| 284 | u64 time_stamp; /* page time stamp */ | 286 | u64 time_stamp; /* page time stamp */ |
| 285 | local_t commit; /* write committed index */ | 287 | local_t commit; /* write committed index */ |
| @@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage) | |||
| 331 | */ | 333 | */ |
| 332 | size_t ring_buffer_page_len(void *page) | 334 | size_t ring_buffer_page_len(void *page) |
| 333 | { | 335 | { |
| 334 | return local_read(&((struct buffer_data_page *)page)->commit) | 336 | struct buffer_data_page *bpage = page; |
| 337 | |||
| 338 | return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS) | ||
| 335 | + BUF_PAGE_HDR_SIZE; | 339 | + BUF_PAGE_HDR_SIZE; |
| 336 | } | 340 | } |
| 337 | 341 | ||
| @@ -1799,12 +1803,6 @@ void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) | |||
| 1799 | } | 1803 | } |
| 1800 | EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); | 1804 | EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); |
| 1801 | 1805 | ||
| 1802 | static __always_inline void * | ||
| 1803 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) | ||
| 1804 | { | ||
| 1805 | return bpage->data + index; | ||
| 1806 | } | ||
| 1807 | |||
| 1808 | static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) | 1806 | static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) |
| 1809 | { | 1807 | { |
| 1810 | return bpage->page->data + index; | 1808 | return bpage->page->data + index; |
| @@ -4406,8 +4404,13 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) | |||
| 4406 | { | 4404 | { |
| 4407 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 4405 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
| 4408 | struct buffer_data_page *bpage = data; | 4406 | struct buffer_data_page *bpage = data; |
| 4407 | struct page *page = virt_to_page(bpage); | ||
| 4409 | unsigned long flags; | 4408 | unsigned long flags; |
| 4410 | 4409 | ||
| 4410 | /* If the page is still in use someplace else, we can't reuse it */ | ||
| 4411 | if (page_ref_count(page) > 1) | ||
| 4412 | goto out; | ||
| 4413 | |||
| 4411 | local_irq_save(flags); | 4414 | local_irq_save(flags); |
| 4412 | arch_spin_lock(&cpu_buffer->lock); | 4415 | arch_spin_lock(&cpu_buffer->lock); |
| 4413 | 4416 | ||
| @@ -4419,6 +4422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) | |||
| 4419 | arch_spin_unlock(&cpu_buffer->lock); | 4422 | arch_spin_unlock(&cpu_buffer->lock); |
| 4420 | local_irq_restore(flags); | 4423 | local_irq_restore(flags); |
| 4421 | 4424 | ||
| 4425 | out: | ||
| 4422 | free_page((unsigned long)bpage); | 4426 | free_page((unsigned long)bpage); |
| 4423 | } | 4427 | } |
| 4424 | EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); | 4428 | EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 73e67b68c53b..2a8d8a294345 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -362,7 +362,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct | |||
| 362 | } | 362 | } |
| 363 | 363 | ||
| 364 | /** | 364 | /** |
| 365 | * trace_pid_filter_add_remove - Add or remove a task from a pid_list | 365 | * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list |
| 366 | * @pid_list: The list to modify | 366 | * @pid_list: The list to modify |
| 367 | * @self: The current task for fork or NULL for exit | 367 | * @self: The current task for fork or NULL for exit |
| 368 | * @task: The task to add or remove | 368 | * @task: The task to add or remove |
| @@ -925,7 +925,7 @@ static void tracing_snapshot_instance(struct trace_array *tr) | |||
| 925 | } | 925 | } |
| 926 | 926 | ||
| 927 | /** | 927 | /** |
| 928 | * trace_snapshot - take a snapshot of the current buffer. | 928 | * tracing_snapshot - take a snapshot of the current buffer. |
| 929 | * | 929 | * |
| 930 | * This causes a swap between the snapshot buffer and the current live | 930 | * This causes a swap between the snapshot buffer and the current live |
| 931 | * tracing buffer. You can use this to take snapshots of the live | 931 | * tracing buffer. You can use this to take snapshots of the live |
| @@ -1004,9 +1004,9 @@ int tracing_alloc_snapshot(void) | |||
| 1004 | EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); | 1004 | EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); |
| 1005 | 1005 | ||
| 1006 | /** | 1006 | /** |
| 1007 | * trace_snapshot_alloc - allocate and take a snapshot of the current buffer. | 1007 | * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer. |
| 1008 | * | 1008 | * |
| 1009 | * This is similar to trace_snapshot(), but it will allocate the | 1009 | * This is similar to tracing_snapshot(), but it will allocate the |
| 1010 | * snapshot buffer if it isn't already allocated. Use this only | 1010 | * snapshot buffer if it isn't already allocated. Use this only |
| 1011 | * where it is safe to sleep, as the allocation may sleep. | 1011 | * where it is safe to sleep, as the allocation may sleep. |
| 1012 | * | 1012 | * |
| @@ -1303,7 +1303,7 @@ unsigned long __read_mostly tracing_thresh; | |||
| 1303 | /* | 1303 | /* |
| 1304 | * Copy the new maximum trace into the separate maximum-trace | 1304 | * Copy the new maximum trace into the separate maximum-trace |
| 1305 | * structure. (this way the maximum trace is permanently saved, | 1305 | * structure. (this way the maximum trace is permanently saved, |
| 1306 | * for later retrieval via /sys/kernel/debug/tracing/latency_trace) | 1306 | * for later retrieval via /sys/kernel/tracing/tracing_max_latency) |
| 1307 | */ | 1307 | */ |
| 1308 | static void | 1308 | static void |
| 1309 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | 1309 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) |
| @@ -2415,7 +2415,7 @@ trace_process_export(struct trace_export *export, | |||
| 2415 | 2415 | ||
| 2416 | entry = ring_buffer_event_data(event); | 2416 | entry = ring_buffer_event_data(event); |
| 2417 | size = ring_buffer_event_length(event); | 2417 | size = ring_buffer_event_length(event); |
| 2418 | export->write(entry, size); | 2418 | export->write(export, entry, size); |
| 2419 | } | 2419 | } |
| 2420 | 2420 | ||
| 2421 | static DEFINE_MUTEX(ftrace_export_lock); | 2421 | static DEFINE_MUTEX(ftrace_export_lock); |
| @@ -4178,37 +4178,30 @@ static const struct file_operations show_traces_fops = { | |||
| 4178 | .llseek = seq_lseek, | 4178 | .llseek = seq_lseek, |
| 4179 | }; | 4179 | }; |
| 4180 | 4180 | ||
| 4181 | /* | ||
| 4182 | * The tracer itself will not take this lock, but still we want | ||
| 4183 | * to provide a consistent cpumask to user-space: | ||
| 4184 | */ | ||
| 4185 | static DEFINE_MUTEX(tracing_cpumask_update_lock); | ||
| 4186 | |||
| 4187 | /* | ||
| 4188 | * Temporary storage for the character representation of the | ||
| 4189 | * CPU bitmask (and one more byte for the newline): | ||
| 4190 | */ | ||
| 4191 | static char mask_str[NR_CPUS + 1]; | ||
| 4192 | |||
| 4193 | static ssize_t | 4181 | static ssize_t |
| 4194 | tracing_cpumask_read(struct file *filp, char __user *ubuf, | 4182 | tracing_cpumask_read(struct file *filp, char __user *ubuf, |
| 4195 | size_t count, loff_t *ppos) | 4183 | size_t count, loff_t *ppos) |
| 4196 | { | 4184 | { |
| 4197 | struct trace_array *tr = file_inode(filp)->i_private; | 4185 | struct trace_array *tr = file_inode(filp)->i_private; |
| 4186 | char *mask_str; | ||
| 4198 | int len; | 4187 | int len; |
| 4199 | 4188 | ||
| 4200 | mutex_lock(&tracing_cpumask_update_lock); | 4189 | len = snprintf(NULL, 0, "%*pb\n", |
| 4190 | cpumask_pr_args(tr->tracing_cpumask)) + 1; | ||
| 4191 | mask_str = kmalloc(len, GFP_KERNEL); | ||
| 4192 | if (!mask_str) | ||
| 4193 | return -ENOMEM; | ||
| 4201 | 4194 | ||
| 4202 | len = snprintf(mask_str, count, "%*pb\n", | 4195 | len = snprintf(mask_str, len, "%*pb\n", |
| 4203 | cpumask_pr_args(tr->tracing_cpumask)); | 4196 | cpumask_pr_args(tr->tracing_cpumask)); |
| 4204 | if (len >= count) { | 4197 | if (len >= count) { |
| 4205 | count = -EINVAL; | 4198 | count = -EINVAL; |
| 4206 | goto out_err; | 4199 | goto out_err; |
| 4207 | } | 4200 | } |
| 4208 | count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); | 4201 | count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); |
| 4209 | 4202 | ||
| 4210 | out_err: | 4203 | out_err: |
| 4211 | mutex_unlock(&tracing_cpumask_update_lock); | 4204 | kfree(mask_str); |
| 4212 | 4205 | ||
| 4213 | return count; | 4206 | return count; |
| 4214 | } | 4207 | } |
| @@ -4228,8 +4221,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, | |||
| 4228 | if (err) | 4221 | if (err) |
| 4229 | goto err_unlock; | 4222 | goto err_unlock; |
| 4230 | 4223 | ||
| 4231 | mutex_lock(&tracing_cpumask_update_lock); | ||
| 4232 | |||
| 4233 | local_irq_disable(); | 4224 | local_irq_disable(); |
| 4234 | arch_spin_lock(&tr->max_lock); | 4225 | arch_spin_lock(&tr->max_lock); |
| 4235 | for_each_tracing_cpu(cpu) { | 4226 | for_each_tracing_cpu(cpu) { |
| @@ -4252,8 +4243,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, | |||
| 4252 | local_irq_enable(); | 4243 | local_irq_enable(); |
| 4253 | 4244 | ||
| 4254 | cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); | 4245 | cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); |
| 4255 | |||
| 4256 | mutex_unlock(&tracing_cpumask_update_lock); | ||
| 4257 | free_cpumask_var(tracing_cpumask_new); | 4246 | free_cpumask_var(tracing_cpumask_new); |
| 4258 | 4247 | ||
| 4259 | return count; | 4248 | return count; |
| @@ -6780,7 +6769,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 6780 | .spd_release = buffer_spd_release, | 6769 | .spd_release = buffer_spd_release, |
| 6781 | }; | 6770 | }; |
| 6782 | struct buffer_ref *ref; | 6771 | struct buffer_ref *ref; |
| 6783 | int entries, size, i; | 6772 | int entries, i; |
| 6784 | ssize_t ret = 0; | 6773 | ssize_t ret = 0; |
| 6785 | 6774 | ||
| 6786 | #ifdef CONFIG_TRACER_MAX_TRACE | 6775 | #ifdef CONFIG_TRACER_MAX_TRACE |
| @@ -6834,14 +6823,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 6834 | break; | 6823 | break; |
| 6835 | } | 6824 | } |
| 6836 | 6825 | ||
| 6837 | /* | ||
| 6838 | * zero out any left over data, this is going to | ||
| 6839 | * user land. | ||
| 6840 | */ | ||
| 6841 | size = ring_buffer_page_len(ref->page); | ||
| 6842 | if (size < PAGE_SIZE) | ||
| 6843 | memset(ref->page + size, 0, PAGE_SIZE - size); | ||
| 6844 | |||
| 6845 | page = virt_to_page(ref->page); | 6826 | page = virt_to_page(ref->page); |
| 6846 | 6827 | ||
| 6847 | spd.pages[i] = page; | 6828 | spd.pages[i] = page; |
| @@ -7599,6 +7580,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size | |||
| 7599 | buf->data = alloc_percpu(struct trace_array_cpu); | 7580 | buf->data = alloc_percpu(struct trace_array_cpu); |
| 7600 | if (!buf->data) { | 7581 | if (!buf->data) { |
| 7601 | ring_buffer_free(buf->buffer); | 7582 | ring_buffer_free(buf->buffer); |
| 7583 | buf->buffer = NULL; | ||
| 7602 | return -ENOMEM; | 7584 | return -ENOMEM; |
| 7603 | } | 7585 | } |
| 7604 | 7586 | ||
| @@ -7622,7 +7604,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) | |||
| 7622 | allocate_snapshot ? size : 1); | 7604 | allocate_snapshot ? size : 1); |
| 7623 | if (WARN_ON(ret)) { | 7605 | if (WARN_ON(ret)) { |
| 7624 | ring_buffer_free(tr->trace_buffer.buffer); | 7606 | ring_buffer_free(tr->trace_buffer.buffer); |
| 7607 | tr->trace_buffer.buffer = NULL; | ||
| 7625 | free_percpu(tr->trace_buffer.data); | 7608 | free_percpu(tr->trace_buffer.data); |
| 7609 | tr->trace_buffer.data = NULL; | ||
| 7626 | return -ENOMEM; | 7610 | return -ENOMEM; |
| 7627 | } | 7611 | } |
| 7628 | tr->allocated_snapshot = allocate_snapshot; | 7612 | tr->allocated_snapshot = allocate_snapshot; |
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 734accc02418..3c7bfc4bf5e9 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
| @@ -209,6 +209,10 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, | |||
| 209 | if (__this_cpu_read(disable_stack_tracer) != 1) | 209 | if (__this_cpu_read(disable_stack_tracer) != 1) |
| 210 | goto out; | 210 | goto out; |
| 211 | 211 | ||
| 212 | /* If rcu is not watching, then save stack trace can fail */ | ||
| 213 | if (!rcu_is_watching()) | ||
| 214 | goto out; | ||
| 215 | |||
| 212 | ip += MCOUNT_INSN_SIZE; | 216 | ip += MCOUNT_INSN_SIZE; |
| 213 | 217 | ||
| 214 | check_stack(ip, &stack); | 218 | check_stack(ip, &stack); |
diff --git a/kernel/uid16.c b/kernel/uid16.c index ce74a4901d2b..ef1da2a5f9bd 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c | |||
| @@ -192,6 +192,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) | |||
| 192 | return retval; | 192 | return retval; |
| 193 | } | 193 | } |
| 194 | 194 | ||
| 195 | groups_sort(group_info); | ||
| 195 | retval = set_current_groups(group_info); | 196 | retval = set_current_groups(group_info); |
| 196 | put_group_info(group_info); | 197 | put_group_info(group_info); |
| 197 | 198 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8fdb710bfdd7..43d18cb46308 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -38,7 +38,6 @@ | |||
| 38 | #include <linux/hardirq.h> | 38 | #include <linux/hardirq.h> |
| 39 | #include <linux/mempolicy.h> | 39 | #include <linux/mempolicy.h> |
| 40 | #include <linux/freezer.h> | 40 | #include <linux/freezer.h> |
| 41 | #include <linux/kallsyms.h> | ||
| 42 | #include <linux/debug_locks.h> | 41 | #include <linux/debug_locks.h> |
| 43 | #include <linux/lockdep.h> | 42 | #include <linux/lockdep.h> |
| 44 | #include <linux/idr.h> | 43 | #include <linux/idr.h> |
| @@ -48,6 +47,7 @@ | |||
| 48 | #include <linux/nodemask.h> | 47 | #include <linux/nodemask.h> |
| 49 | #include <linux/moduleparam.h> | 48 | #include <linux/moduleparam.h> |
| 50 | #include <linux/uaccess.h> | 49 | #include <linux/uaccess.h> |
| 50 | #include <linux/sched/isolation.h> | ||
| 51 | 51 | ||
| 52 | #include "workqueue_internal.h" | 52 | #include "workqueue_internal.h" |
| 53 | 53 | ||
| @@ -1634,7 +1634,7 @@ static void worker_enter_idle(struct worker *worker) | |||
| 1634 | mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); | 1634 | mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); |
| 1635 | 1635 | ||
| 1636 | /* | 1636 | /* |
| 1637 | * Sanity check nr_running. Because wq_unbind_fn() releases | 1637 | * Sanity check nr_running. Because unbind_workers() releases |
| 1638 | * pool->lock between setting %WORKER_UNBOUND and zapping | 1638 | * pool->lock between setting %WORKER_UNBOUND and zapping |
| 1639 | * nr_running, the warning may trigger spuriously. Check iff | 1639 | * nr_running, the warning may trigger spuriously. Check iff |
| 1640 | * unbind is not in progress. | 1640 | * unbind is not in progress. |
| @@ -4510,9 +4510,8 @@ void show_workqueue_state(void) | |||
| 4510 | * cpu comes back online. | 4510 | * cpu comes back online. |
| 4511 | */ | 4511 | */ |
| 4512 | 4512 | ||
| 4513 | static void wq_unbind_fn(struct work_struct *work) | 4513 | static void unbind_workers(int cpu) |
| 4514 | { | 4514 | { |
| 4515 | int cpu = smp_processor_id(); | ||
| 4516 | struct worker_pool *pool; | 4515 | struct worker_pool *pool; |
| 4517 | struct worker *worker; | 4516 | struct worker *worker; |
| 4518 | 4517 | ||
| @@ -4589,16 +4588,6 @@ static void rebind_workers(struct worker_pool *pool) | |||
| 4589 | 4588 | ||
| 4590 | spin_lock_irq(&pool->lock); | 4589 | spin_lock_irq(&pool->lock); |
| 4591 | 4590 | ||
| 4592 | /* | ||
| 4593 | * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED | ||
| 4594 | * w/o preceding DOWN_PREPARE. Work around it. CPU hotplug is | ||
| 4595 | * being reworked and this can go away in time. | ||
| 4596 | */ | ||
| 4597 | if (!(pool->flags & POOL_DISASSOCIATED)) { | ||
| 4598 | spin_unlock_irq(&pool->lock); | ||
| 4599 | return; | ||
| 4600 | } | ||
| 4601 | |||
| 4602 | pool->flags &= ~POOL_DISASSOCIATED; | 4591 | pool->flags &= ~POOL_DISASSOCIATED; |
| 4603 | 4592 | ||
| 4604 | for_each_pool_worker(worker, pool) { | 4593 | for_each_pool_worker(worker, pool) { |
| @@ -4709,12 +4698,13 @@ int workqueue_online_cpu(unsigned int cpu) | |||
| 4709 | 4698 | ||
| 4710 | int workqueue_offline_cpu(unsigned int cpu) | 4699 | int workqueue_offline_cpu(unsigned int cpu) |
| 4711 | { | 4700 | { |
| 4712 | struct work_struct unbind_work; | ||
| 4713 | struct workqueue_struct *wq; | 4701 | struct workqueue_struct *wq; |
| 4714 | 4702 | ||
| 4715 | /* unbinding per-cpu workers should happen on the local CPU */ | 4703 | /* unbinding per-cpu workers should happen on the local CPU */ |
| 4716 | INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn); | 4704 | if (WARN_ON(cpu != smp_processor_id())) |
| 4717 | queue_work_on(cpu, system_highpri_wq, &unbind_work); | 4705 | return -1; |
| 4706 | |||
| 4707 | unbind_workers(cpu); | ||
| 4718 | 4708 | ||
| 4719 | /* update NUMA affinity of unbound workqueues */ | 4709 | /* update NUMA affinity of unbound workqueues */ |
| 4720 | mutex_lock(&wq_pool_mutex); | 4710 | mutex_lock(&wq_pool_mutex); |
| @@ -4722,9 +4712,6 @@ int workqueue_offline_cpu(unsigned int cpu) | |||
| 4722 | wq_update_unbound_numa(wq, cpu, false); | 4712 | wq_update_unbound_numa(wq, cpu, false); |
| 4723 | mutex_unlock(&wq_pool_mutex); | 4713 | mutex_unlock(&wq_pool_mutex); |
| 4724 | 4714 | ||
| 4725 | /* wait for per-cpu unbinding to finish */ | ||
| 4726 | flush_work(&unbind_work); | ||
| 4727 | destroy_work_on_stack(&unbind_work); | ||
| 4728 | return 0; | 4715 | return 0; |
| 4729 | } | 4716 | } |
| 4730 | 4717 | ||
| @@ -4957,6 +4944,10 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) | |||
| 4957 | if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) | 4944 | if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) |
| 4958 | return -ENOMEM; | 4945 | return -ENOMEM; |
| 4959 | 4946 | ||
| 4947 | /* | ||
| 4948 | * Not excluding isolated cpus on purpose. | ||
| 4949 | * If the user wishes to include them, we allow that. | ||
| 4950 | */ | ||
| 4960 | cpumask_and(cpumask, cpumask, cpu_possible_mask); | 4951 | cpumask_and(cpumask, cpumask, cpu_possible_mask); |
| 4961 | if (!cpumask_empty(cpumask)) { | 4952 | if (!cpumask_empty(cpumask)) { |
| 4962 | apply_wqattrs_lock(); | 4953 | apply_wqattrs_lock(); |
| @@ -5555,7 +5546,7 @@ int __init workqueue_init_early(void) | |||
| 5555 | WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); | 5546 | WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); |
| 5556 | 5547 | ||
| 5557 | BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); | 5548 | BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); |
| 5558 | cpumask_copy(wq_unbound_cpumask, cpu_possible_mask); | 5549 | cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN)); |
| 5559 | 5550 | ||
| 5560 | pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); | 5551 | pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); |
| 5561 | 5552 | ||
