aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS18
-rw-r--r--Makefile2
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/entry/entry_64_compat.S6
-rw-r--r--arch/x86/include/asm/mmu_context.h18
-rw-r--r--arch/x86/kernel/crash.c1
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c2
-rw-r--r--arch/x86/kernel/tsc.c30
-rw-r--r--arch/x86/lib/kaslr.c4
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c4
-rw-r--r--block/blk-core.c11
-rw-r--r--block/blk-merge.c10
-rw-r--r--drivers/edac/altera_edac.h4
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c25
-rw-r--r--drivers/irqchip/irq-gic-v3-mbi.c2
-rw-r--r--drivers/irqchip/irq-madera.c2
-rw-r--r--drivers/irqchip/irq-stm32-exti.c1
-rw-r--r--drivers/net/caif/caif_serial.c5
-rw-r--r--drivers/net/dsa/mv88e6xxx/serdes.c2
-rw-r--r--drivers/net/ethernet/alteon/acenic.c2
-rw-r--r--drivers/net/ethernet/altera/altera_msgdma.c3
-rw-r--r--drivers/net/ethernet/amd/amd8111e.c2
-rw-r--r--drivers/net/ethernet/apple/bmac.c2
-rw-r--r--drivers/net/ethernet/broadcom/b44.c4
-rw-r--r--drivers/net/ethernet/cadence/macb.h3
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c28
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.c5
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_ethtool.c16
-rw-r--r--drivers/net/ethernet/hisilicon/hns_mdio.c2
-rw-r--r--drivers/net/ethernet/i825xx/82596.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c5
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c8
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.c12
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.h3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ll2.c20
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sriov.c10
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_vf.c10
-rw-r--r--drivers/net/ethernet/realtek/8139cp.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c4
-rw-r--r--drivers/net/ethernet/ti/cpmac.c2
-rw-r--r--drivers/vhost/net.c3
-rw-r--r--drivers/vhost/scsi.c2
-rw-r--r--drivers/vhost/vhost.c7
-rw-r--r--drivers/vhost/vhost.h4
-rw-r--r--drivers/vhost/vsock.c2
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--include/linux/sched/wake_q.h6
-rw-r--r--include/net/tls.h2
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/futex.c13
-rw-r--r--kernel/irq/irqdesc.c2
-rw-r--r--kernel/irq/manage.c3
-rw-r--r--kernel/locking/rwsem-xadd.c11
-rw-r--r--kernel/sched/core.c19
-rw-r--r--kernel/time/posix-cpu-timers.c1
-rw-r--r--mm/page_alloc.c12
-rw-r--r--net/bridge/netfilter/ebtables.c9
-rw-r--r--net/core/dev.c3
-rw-r--r--net/decnet/dn_dev.c2
-rw-r--r--net/ipv4/ip_vti.c50
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv6/ip6mr.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c12
-rw-r--r--net/netfilter/nfnetlink_osf.c4
-rw-r--r--net/netfilter/nft_compat.c189
-rw-r--r--net/netrom/nr_timer.c20
-rw-r--r--net/rose/rose_route.c5
-rw-r--r--net/tls/tls_sw.c6
-rw-r--r--net/xfrm/xfrm_policy.c63
-rw-r--r--net/xfrm/xfrm_user.c13
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy.sh153
-rw-r--r--tools/testing/selftests/x86/protection_keys.c41
78 files changed, 755 insertions, 284 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 9f33d9559594..019a2bcfbd09 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16689,6 +16689,24 @@ T: git git://linuxtv.org/media_tree.git
16689S: Maintained 16689S: Maintained
16690F: drivers/media/tuners/tuner-xc2028.* 16690F: drivers/media/tuners/tuner-xc2028.*
16691 16691
16692XDP (eXpress Data Path)
16693M: Alexei Starovoitov <ast@kernel.org>
16694M: Daniel Borkmann <daniel@iogearbox.net>
16695M: David S. Miller <davem@davemloft.net>
16696M: Jakub Kicinski <jakub.kicinski@netronome.com>
16697M: Jesper Dangaard Brouer <hawk@kernel.org>
16698M: John Fastabend <john.fastabend@gmail.com>
16699L: netdev@vger.kernel.org
16700L: xdp-newbies@vger.kernel.org
16701S: Supported
16702F: net/core/xdp.c
16703F: include/net/xdp.h
16704F: kernel/bpf/devmap.c
16705F: kernel/bpf/cpumap.c
16706F: include/trace/events/xdp.h
16707K: xdp
16708N: xdp
16709
16692XDP SOCKETS (AF_XDP) 16710XDP SOCKETS (AF_XDP)
16693M: Björn Töpel <bjorn.topel@intel.com> 16711M: Björn Töpel <bjorn.topel@intel.com>
16694M: Magnus Karlsson <magnus.karlsson@intel.com> 16712M: Magnus Karlsson <magnus.karlsson@intel.com>
diff --git a/Makefile b/Makefile
index f5b1d0d168e0..141653226f3c 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
2VERSION = 5 2VERSION = 5
3PATCHLEVEL = 0 3PATCHLEVEL = 0
4SUBLEVEL = 0 4SUBLEVEL = 0
5EXTRAVERSION = -rc3 5EXTRAVERSION = -rc4
6NAME = Shy Crocodile 6NAME = Shy Crocodile
7 7
8# *DOCUMENTATION* 8# *DOCUMENTATION*
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4b4a7f32b68e..26387c7bf305 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -198,7 +198,7 @@ config X86
198 select IRQ_FORCED_THREADING 198 select IRQ_FORCED_THREADING
199 select NEED_SG_DMA_LENGTH 199 select NEED_SG_DMA_LENGTH
200 select PCI_DOMAINS if PCI 200 select PCI_DOMAINS if PCI
201 select PCI_LOCKLESS_CONFIG 201 select PCI_LOCKLESS_CONFIG if PCI
202 select PERF_EVENTS 202 select PERF_EVENTS
203 select RTC_LIB 203 select RTC_LIB
204 select RTC_MC146818_LIB 204 select RTC_MC146818_LIB
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 8eaf8952c408..39913770a44d 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -361,7 +361,8 @@ ENTRY(entry_INT80_compat)
361 361
362 /* Need to switch before accessing the thread stack. */ 362 /* Need to switch before accessing the thread stack. */
363 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi 363 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
364 movq %rsp, %rdi 364 /* In the Xen PV case we already run on the thread stack. */
365 ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
365 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 366 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
366 367
367 pushq 6*8(%rdi) /* regs->ss */ 368 pushq 6*8(%rdi) /* regs->ss */
@@ -370,8 +371,9 @@ ENTRY(entry_INT80_compat)
370 pushq 3*8(%rdi) /* regs->cs */ 371 pushq 3*8(%rdi) /* regs->cs */
371 pushq 2*8(%rdi) /* regs->ip */ 372 pushq 2*8(%rdi) /* regs->ip */
372 pushq 1*8(%rdi) /* regs->orig_ax */ 373 pushq 1*8(%rdi) /* regs->orig_ax */
373
374 pushq (%rdi) /* pt_regs->di */ 374 pushq (%rdi) /* pt_regs->di */
375.Lint80_keep_stack:
376
375 pushq %rsi /* pt_regs->si */ 377 pushq %rsi /* pt_regs->si */
376 xorl %esi, %esi /* nospec si */ 378 xorl %esi, %esi /* nospec si */
377 pushq %rdx /* pt_regs->dx */ 379 pushq %rdx /* pt_regs->dx */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 0ca50611e8ce..19d18fae6ec6 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -178,6 +178,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
178 178
179void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); 179void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
180 180
181/*
182 * Init a new mm. Used on mm copies, like at fork()
183 * and on mm's that are brand-new, like at execve().
184 */
181static inline int init_new_context(struct task_struct *tsk, 185static inline int init_new_context(struct task_struct *tsk,
182 struct mm_struct *mm) 186 struct mm_struct *mm)
183{ 187{
@@ -228,8 +232,22 @@ do { \
228} while (0) 232} while (0)
229#endif 233#endif
230 234
235static inline void arch_dup_pkeys(struct mm_struct *oldmm,
236 struct mm_struct *mm)
237{
238#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
239 if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
240 return;
241
242 /* Duplicate the oldmm pkey state in mm: */
243 mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
244 mm->context.execute_only_pkey = oldmm->context.execute_only_pkey;
245#endif
246}
247
231static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 248static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
232{ 249{
250 arch_dup_pkeys(oldmm, mm);
233 paravirt_arch_dup_mmap(oldmm, mm); 251 paravirt_arch_dup_mmap(oldmm, mm);
234 return ldt_dup_context(oldmm, mm); 252 return ldt_dup_context(oldmm, mm);
235} 253}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c8b07d8ea5a2..17ffc869cab8 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -470,6 +470,7 @@ int crash_load_segments(struct kimage *image)
470 470
471 kbuf.memsz = kbuf.bufsz; 471 kbuf.memsz = kbuf.bufsz;
472 kbuf.buf_align = ELF_CORE_HEADER_ALIGN; 472 kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
473 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
473 ret = kexec_add_buffer(&kbuf); 474 ret = kexec_add_buffer(&kbuf);
474 if (ret) { 475 if (ret) {
475 vfree((void *)image->arch.elf_headers); 476 vfree((void *)image->arch.elf_headers);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b0acb22e5a46..dfd3aca82c61 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -21,10 +21,6 @@
21 21
22#define HPET_MASK CLOCKSOURCE_MASK(32) 22#define HPET_MASK CLOCKSOURCE_MASK(32)
23 23
24/* FSEC = 10^-15
25 NSEC = 10^-9 */
26#define FSEC_PER_NSEC 1000000L
27
28#define HPET_DEV_USED_BIT 2 24#define HPET_DEV_USED_BIT 2
29#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT) 25#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT)
30#define HPET_DEV_VALID 0x8 26#define HPET_DEV_VALID 0x8
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 278cd07228dd..0d5efa34f359 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -434,6 +434,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
434 kbuf.memsz = PAGE_ALIGN(header->init_size); 434 kbuf.memsz = PAGE_ALIGN(header->init_size);
435 kbuf.buf_align = header->kernel_alignment; 435 kbuf.buf_align = header->kernel_alignment;
436 kbuf.buf_min = MIN_KERNEL_LOAD_ADDR; 436 kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
437 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
437 ret = kexec_add_buffer(&kbuf); 438 ret = kexec_add_buffer(&kbuf);
438 if (ret) 439 if (ret)
439 goto out_free_params; 440 goto out_free_params;
@@ -448,6 +449,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
448 kbuf.bufsz = kbuf.memsz = initrd_len; 449 kbuf.bufsz = kbuf.memsz = initrd_len;
449 kbuf.buf_align = PAGE_SIZE; 450 kbuf.buf_align = PAGE_SIZE;
450 kbuf.buf_min = MIN_INITRD_LOAD_ADDR; 451 kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
452 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
451 ret = kexec_add_buffer(&kbuf); 453 ret = kexec_add_buffer(&kbuf);
452 if (ret) 454 if (ret)
453 goto out_free_params; 455 goto out_free_params;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index e9f777bfed40..3fae23834069 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -297,15 +297,16 @@ static int __init tsc_setup(char *str)
297 297
298__setup("tsc=", tsc_setup); 298__setup("tsc=", tsc_setup);
299 299
300#define MAX_RETRIES 5 300#define MAX_RETRIES 5
301#define SMI_TRESHOLD 50000 301#define TSC_DEFAULT_THRESHOLD 0x20000
302 302
303/* 303/*
304 * Read TSC and the reference counters. Take care of SMI disturbance 304 * Read TSC and the reference counters. Take care of any disturbances
305 */ 305 */
306static u64 tsc_read_refs(u64 *p, int hpet) 306static u64 tsc_read_refs(u64 *p, int hpet)
307{ 307{
308 u64 t1, t2; 308 u64 t1, t2;
309 u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;
309 int i; 310 int i;
310 311
311 for (i = 0; i < MAX_RETRIES; i++) { 312 for (i = 0; i < MAX_RETRIES; i++) {
@@ -315,7 +316,7 @@ static u64 tsc_read_refs(u64 *p, int hpet)
315 else 316 else
316 *p = acpi_pm_read_early(); 317 *p = acpi_pm_read_early();
317 t2 = get_cycles(); 318 t2 = get_cycles();
318 if ((t2 - t1) < SMI_TRESHOLD) 319 if ((t2 - t1) < thresh)
319 return t2; 320 return t2;
320 } 321 }
321 return ULLONG_MAX; 322 return ULLONG_MAX;
@@ -703,15 +704,15 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
703 * zero. In each wait loop iteration we read the TSC and check 704 * zero. In each wait loop iteration we read the TSC and check
704 * the delta to the previous read. We keep track of the min 705 * the delta to the previous read. We keep track of the min
705 * and max values of that delta. The delta is mostly defined 706 * and max values of that delta. The delta is mostly defined
706 * by the IO time of the PIT access, so we can detect when a 707 * by the IO time of the PIT access, so we can detect when
707 * SMI/SMM disturbance happened between the two reads. If the 708 * any disturbance happened between the two reads. If the
708 * maximum time is significantly larger than the minimum time, 709 * maximum time is significantly larger than the minimum time,
709 * then we discard the result and have another try. 710 * then we discard the result and have another try.
710 * 711 *
711 * 2) Reference counter. If available we use the HPET or the 712 * 2) Reference counter. If available we use the HPET or the
712 * PMTIMER as a reference to check the sanity of that value. 713 * PMTIMER as a reference to check the sanity of that value.
713 * We use separate TSC readouts and check inside of the 714 * We use separate TSC readouts and check inside of the
714 * reference read for a SMI/SMM disturbance. We dicard 715 * reference read for any possible disturbance. We dicard
715 * disturbed values here as well. We do that around the PIT 716 * disturbed values here as well. We do that around the PIT
716 * calibration delay loop as we have to wait for a certain 717 * calibration delay loop as we have to wait for a certain
717 * amount of time anyway. 718 * amount of time anyway.
@@ -744,7 +745,7 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
744 if (ref1 == ref2) 745 if (ref1 == ref2)
745 continue; 746 continue;
746 747
747 /* Check, whether the sampling was disturbed by an SMI */ 748 /* Check, whether the sampling was disturbed */
748 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) 749 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
749 continue; 750 continue;
750 751
@@ -1268,7 +1269,7 @@ static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
1268 */ 1269 */
1269static void tsc_refine_calibration_work(struct work_struct *work) 1270static void tsc_refine_calibration_work(struct work_struct *work)
1270{ 1271{
1271 static u64 tsc_start = -1, ref_start; 1272 static u64 tsc_start = ULLONG_MAX, ref_start;
1272 static int hpet; 1273 static int hpet;
1273 u64 tsc_stop, ref_stop, delta; 1274 u64 tsc_stop, ref_stop, delta;
1274 unsigned long freq; 1275 unsigned long freq;
@@ -1283,14 +1284,15 @@ static void tsc_refine_calibration_work(struct work_struct *work)
1283 * delayed the first time we expire. So set the workqueue 1284 * delayed the first time we expire. So set the workqueue
1284 * again once we know timers are working. 1285 * again once we know timers are working.
1285 */ 1286 */
1286 if (tsc_start == -1) { 1287 if (tsc_start == ULLONG_MAX) {
1288restart:
1287 /* 1289 /*
1288 * Only set hpet once, to avoid mixing hardware 1290 * Only set hpet once, to avoid mixing hardware
1289 * if the hpet becomes enabled later. 1291 * if the hpet becomes enabled later.
1290 */ 1292 */
1291 hpet = is_hpet_enabled(); 1293 hpet = is_hpet_enabled();
1292 schedule_delayed_work(&tsc_irqwork, HZ);
1293 tsc_start = tsc_read_refs(&ref_start, hpet); 1294 tsc_start = tsc_read_refs(&ref_start, hpet);
1295 schedule_delayed_work(&tsc_irqwork, HZ);
1294 return; 1296 return;
1295 } 1297 }
1296 1298
@@ -1300,9 +1302,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
1300 if (ref_start == ref_stop) 1302 if (ref_start == ref_stop)
1301 goto out; 1303 goto out;
1302 1304
1303 /* Check, whether the sampling was disturbed by an SMI */ 1305 /* Check, whether the sampling was disturbed */
1304 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) 1306 if (tsc_stop == ULLONG_MAX)
1305 goto out; 1307 goto restart;
1306 1308
1307 delta = tsc_stop - tsc_start; 1309 delta = tsc_stop - tsc_start;
1308 delta *= 1000000LL; 1310 delta *= 1000000LL;
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index 79778ab200e4..a53665116458 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -36,8 +36,8 @@ static inline u16 i8254(void)
36 u16 status, timer; 36 u16 status, timer;
37 37
38 do { 38 do {
39 outb(I8254_PORT_CONTROL, 39 outb(I8254_CMD_READBACK | I8254_SELECT_COUNTER0,
40 I8254_CMD_READBACK | I8254_SELECT_COUNTER0); 40 I8254_PORT_CONTROL);
41 status = inb(I8254_PORT_COUNTER0); 41 status = inb(I8254_PORT_COUNTER0);
42 timer = inb(I8254_PORT_COUNTER0); 42 timer = inb(I8254_PORT_COUNTER0);
43 timer |= inb(I8254_PORT_COUNTER0) << 8; 43 timer |= inb(I8254_PORT_COUNTER0) << 8;
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index a19ef1a416ff..4aa9b1480866 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -158,8 +158,8 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
158 pmd = pmd_offset(pud, ppd->vaddr); 158 pmd = pmd_offset(pud, ppd->vaddr);
159 if (pmd_none(*pmd)) { 159 if (pmd_none(*pmd)) {
160 pte = ppd->pgtable_area; 160 pte = ppd->pgtable_area;
161 memset(pte, 0, sizeof(pte) * PTRS_PER_PTE); 161 memset(pte, 0, sizeof(*pte) * PTRS_PER_PTE);
162 ppd->pgtable_area += sizeof(pte) * PTRS_PER_PTE; 162 ppd->pgtable_area += sizeof(*pte) * PTRS_PER_PTE;
163 set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte))); 163 set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte)));
164 } 164 }
165 165
diff --git a/block/blk-core.c b/block/blk-core.c
index 1ccec27d20c3..3c5f61ceeb67 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1083,18 +1083,7 @@ blk_qc_t generic_make_request(struct bio *bio)
1083 /* Create a fresh bio_list for all subordinate requests */ 1083 /* Create a fresh bio_list for all subordinate requests */
1084 bio_list_on_stack[1] = bio_list_on_stack[0]; 1084 bio_list_on_stack[1] = bio_list_on_stack[0];
1085 bio_list_init(&bio_list_on_stack[0]); 1085 bio_list_init(&bio_list_on_stack[0]);
1086
1087 /*
1088 * Since we're recursing into make_request here, ensure
1089 * that we mark this bio as already having entered the queue.
1090 * If not, and the queue is going away, we can get stuck
1091 * forever on waiting for the queue reference to drop. But
1092 * that will never happen, as we're already holding a
1093 * reference to it.
1094 */
1095 bio_set_flag(bio, BIO_QUEUE_ENTERED);
1096 ret = q->make_request_fn(q, bio); 1086 ret = q->make_request_fn(q, bio);
1097 bio_clear_flag(bio, BIO_QUEUE_ENTERED);
1098 1087
1099 /* sort new bios into those for a lower level 1088 /* sort new bios into those for a lower level
1100 * and those for the same level 1089 * and those for the same level
diff --git a/block/blk-merge.c b/block/blk-merge.c
index d79a22f111d1..71e9ac03f621 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -272,6 +272,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
272 /* there isn't chance to merge the splitted bio */ 272 /* there isn't chance to merge the splitted bio */
273 split->bi_opf |= REQ_NOMERGE; 273 split->bi_opf |= REQ_NOMERGE;
274 274
275 /*
276 * Since we're recursing into make_request here, ensure
277 * that we mark this bio as already having entered the queue.
278 * If not, and the queue is going away, we can get stuck
279 * forever on waiting for the queue reference to drop. But
280 * that will never happen, as we're already holding a
281 * reference to it.
282 */
283 bio_set_flag(*bio, BIO_QUEUE_ENTERED);
284
275 bio_chain(split, *bio); 285 bio_chain(split, *bio);
276 trace_block_split(q, split, (*bio)->bi_iter.bi_sector); 286 trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
277 generic_make_request(*bio); 287 generic_make_request(*bio);
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 4213cb0bb2a7..f8664bac9fa8 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -295,8 +295,8 @@ struct altr_sdram_mc_data {
295#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 295#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0
296 296
297/* Sticky registers for Uncorrected Errors */ 297/* Sticky registers for Uncorrected Errors */
298#define S10_SYSMGR_UE_VAL_OFST 0x120 298#define S10_SYSMGR_UE_VAL_OFST 0x220
299#define S10_SYSMGR_UE_ADDR_OFST 0x124 299#define S10_SYSMGR_UE_ADDR_OFST 0x224
300 300
301#define S10_DDR0_IRQ_MASK BIT(16) 301#define S10_DDR0_IRQ_MASK BIT(16)
302 302
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index db20e992a40f..7f2a45445b00 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2399,13 +2399,14 @@ static void its_free_device(struct its_device *its_dev)
2399 kfree(its_dev); 2399 kfree(its_dev);
2400} 2400}
2401 2401
2402static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq) 2402static int its_alloc_device_irq(struct its_device *dev, int nvecs, irq_hw_number_t *hwirq)
2403{ 2403{
2404 int idx; 2404 int idx;
2405 2405
2406 idx = find_first_zero_bit(dev->event_map.lpi_map, 2406 idx = bitmap_find_free_region(dev->event_map.lpi_map,
2407 dev->event_map.nr_lpis); 2407 dev->event_map.nr_lpis,
2408 if (idx == dev->event_map.nr_lpis) 2408 get_count_order(nvecs));
2409 if (idx < 0)
2409 return -ENOSPC; 2410 return -ENOSPC;
2410 2411
2411 *hwirq = dev->event_map.lpi_base + idx; 2412 *hwirq = dev->event_map.lpi_base + idx;
@@ -2501,21 +2502,21 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
2501 int err; 2502 int err;
2502 int i; 2503 int i;
2503 2504
2504 for (i = 0; i < nr_irqs; i++) { 2505 err = its_alloc_device_irq(its_dev, nr_irqs, &hwirq);
2505 err = its_alloc_device_irq(its_dev, &hwirq); 2506 if (err)
2506 if (err) 2507 return err;
2507 return err;
2508 2508
2509 err = its_irq_gic_domain_alloc(domain, virq + i, hwirq); 2509 for (i = 0; i < nr_irqs; i++) {
2510 err = its_irq_gic_domain_alloc(domain, virq + i, hwirq + i);
2510 if (err) 2511 if (err)
2511 return err; 2512 return err;
2512 2513
2513 irq_domain_set_hwirq_and_chip(domain, virq + i, 2514 irq_domain_set_hwirq_and_chip(domain, virq + i,
2514 hwirq, &its_irq_chip, its_dev); 2515 hwirq + i, &its_irq_chip, its_dev);
2515 irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i))); 2516 irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i)));
2516 pr_debug("ID:%d pID:%d vID:%d\n", 2517 pr_debug("ID:%d pID:%d vID:%d\n",
2517 (int)(hwirq - its_dev->event_map.lpi_base), 2518 (int)(hwirq + i - its_dev->event_map.lpi_base),
2518 (int) hwirq, virq + i); 2519 (int)(hwirq + i), virq + i);
2519 } 2520 }
2520 2521
2521 return 0; 2522 return 0;
diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c
index ad70e7c416e3..fbfa7ff6deb1 100644
--- a/drivers/irqchip/irq-gic-v3-mbi.c
+++ b/drivers/irqchip/irq-gic-v3-mbi.c
@@ -24,7 +24,7 @@ struct mbi_range {
24 unsigned long *bm; 24 unsigned long *bm;
25}; 25};
26 26
27static struct mutex mbi_lock; 27static DEFINE_MUTEX(mbi_lock);
28static phys_addr_t mbi_phys_base; 28static phys_addr_t mbi_phys_base;
29static struct mbi_range *mbi_ranges; 29static struct mbi_range *mbi_ranges;
30static unsigned int mbi_range_nr; 30static unsigned int mbi_range_nr;
diff --git a/drivers/irqchip/irq-madera.c b/drivers/irqchip/irq-madera.c
index e9256dee1a45..8b81271c823c 100644
--- a/drivers/irqchip/irq-madera.c
+++ b/drivers/irqchip/irq-madera.c
@@ -7,7 +7,6 @@
7 */ 7 */
8 8
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/gpio.h>
11#include <linux/interrupt.h> 10#include <linux/interrupt.h>
12#include <linux/irq.h> 11#include <linux/irq.h>
13#include <linux/irqdomain.h> 12#include <linux/irqdomain.h>
@@ -16,7 +15,6 @@
16#include <linux/slab.h> 15#include <linux/slab.h>
17#include <linux/of.h> 16#include <linux/of.h>
18#include <linux/of_device.h> 17#include <linux/of_device.h>
19#include <linux/of_gpio.h>
20#include <linux/of_irq.h> 18#include <linux/of_irq.h>
21#include <linux/irqchip/irq-madera.h> 19#include <linux/irqchip/irq-madera.h>
22#include <linux/mfd/madera/core.h> 20#include <linux/mfd/madera/core.h>
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index 6edfd4bfa169..a93296b9b45d 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -822,6 +822,7 @@ out_unmap:
822static const struct irq_domain_ops stm32_exti_h_domain_ops = { 822static const struct irq_domain_ops stm32_exti_h_domain_ops = {
823 .alloc = stm32_exti_h_domain_alloc, 823 .alloc = stm32_exti_h_domain_alloc,
824 .free = irq_domain_free_irqs_common, 824 .free = irq_domain_free_irqs_common,
825 .xlate = irq_domain_xlate_twocell,
825}; 826};
826 827
827static int 828static int
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index a0f954f36c09..44e6c7b1b222 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -257,10 +257,7 @@ static int handle_tx(struct ser_device *ser)
257 if (skb->len == 0) { 257 if (skb->len == 0) {
258 struct sk_buff *tmp = skb_dequeue(&ser->head); 258 struct sk_buff *tmp = skb_dequeue(&ser->head);
259 WARN_ON(tmp != skb); 259 WARN_ON(tmp != skb);
260 if (in_interrupt()) 260 dev_consume_skb_any(skb);
261 dev_kfree_skb_irq(skb);
262 else
263 kfree_skb(skb);
264 } 261 }
265 } 262 }
266 /* Send flow off if queue is empty */ 263 /* Send flow off if queue is empty */
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 2caa8c8b4b55..1bfc5ff8d81d 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -664,7 +664,7 @@ int mv88e6390_serdes_irq_setup(struct mv88e6xxx_chip *chip, int port)
664 if (port < 9) 664 if (port < 9)
665 return 0; 665 return 0;
666 666
667 return mv88e6390_serdes_irq_setup(chip, port); 667 return mv88e6390x_serdes_irq_setup(chip, port);
668} 668}
669 669
670void mv88e6390x_serdes_irq_free(struct mv88e6xxx_chip *chip, int port) 670void mv88e6390x_serdes_irq_free(struct mv88e6xxx_chip *chip, int port)
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 4f11f98347ed..1827ef1f6d55 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -2059,7 +2059,7 @@ static inline void ace_tx_int(struct net_device *dev,
2059 if (skb) { 2059 if (skb) {
2060 dev->stats.tx_packets++; 2060 dev->stats.tx_packets++;
2061 dev->stats.tx_bytes += skb->len; 2061 dev->stats.tx_bytes += skb->len;
2062 dev_kfree_skb_irq(skb); 2062 dev_consume_skb_irq(skb);
2063 info->skb = NULL; 2063 info->skb = NULL;
2064 } 2064 }
2065 2065
diff --git a/drivers/net/ethernet/altera/altera_msgdma.c b/drivers/net/ethernet/altera/altera_msgdma.c
index 0fb986ba3290..0ae723f75341 100644
--- a/drivers/net/ethernet/altera/altera_msgdma.c
+++ b/drivers/net/ethernet/altera/altera_msgdma.c
@@ -145,7 +145,8 @@ u32 msgdma_tx_completions(struct altera_tse_private *priv)
145 & 0xffff; 145 & 0xffff;
146 146
147 if (inuse) { /* Tx FIFO is not empty */ 147 if (inuse) { /* Tx FIFO is not empty */
148 ready = priv->tx_prod - priv->tx_cons - inuse - 1; 148 ready = max_t(int,
149 priv->tx_prod - priv->tx_cons - inuse - 1, 0);
149 } else { 150 } else {
150 /* Check for buffered last packet */ 151 /* Check for buffered last packet */
151 status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status)); 152 status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status));
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index b9632928496e..145fe71fd155 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -666,7 +666,7 @@ static int amd8111e_tx(struct net_device *dev)
666 pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[tx_index], 666 pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[tx_index],
667 lp->tx_skbuff[tx_index]->len, 667 lp->tx_skbuff[tx_index]->len,
668 PCI_DMA_TODEVICE); 668 PCI_DMA_TODEVICE);
669 dev_kfree_skb_irq (lp->tx_skbuff[tx_index]); 669 dev_consume_skb_irq(lp->tx_skbuff[tx_index]);
670 lp->tx_skbuff[tx_index] = NULL; 670 lp->tx_skbuff[tx_index] = NULL;
671 lp->tx_dma_addr[tx_index] = 0; 671 lp->tx_dma_addr[tx_index] = 0;
672 } 672 }
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index 6a8e2567f2bd..4d3855ceb500 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -777,7 +777,7 @@ static irqreturn_t bmac_txdma_intr(int irq, void *dev_id)
777 777
778 if (bp->tx_bufs[bp->tx_empty]) { 778 if (bp->tx_bufs[bp->tx_empty]) {
779 ++dev->stats.tx_packets; 779 ++dev->stats.tx_packets;
780 dev_kfree_skb_irq(bp->tx_bufs[bp->tx_empty]); 780 dev_consume_skb_irq(bp->tx_bufs[bp->tx_empty]);
781 } 781 }
782 bp->tx_bufs[bp->tx_empty] = NULL; 782 bp->tx_bufs[bp->tx_empty] = NULL;
783 bp->tx_fullup = 0; 783 bp->tx_fullup = 0;
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index f44808959ff3..97ab0dd25552 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -638,7 +638,7 @@ static void b44_tx(struct b44 *bp)
638 bytes_compl += skb->len; 638 bytes_compl += skb->len;
639 pkts_compl++; 639 pkts_compl++;
640 640
641 dev_kfree_skb_irq(skb); 641 dev_consume_skb_irq(skb);
642 } 642 }
643 643
644 netdev_completed_queue(bp->dev, pkts_compl, bytes_compl); 644 netdev_completed_queue(bp->dev, pkts_compl, bytes_compl);
@@ -1012,7 +1012,7 @@ static netdev_tx_t b44_start_xmit(struct sk_buff *skb, struct net_device *dev)
1012 } 1012 }
1013 1013
1014 skb_copy_from_linear_data(skb, skb_put(bounce_skb, len), len); 1014 skb_copy_from_linear_data(skb, skb_put(bounce_skb, len), len);
1015 dev_kfree_skb_any(skb); 1015 dev_consume_skb_any(skb);
1016 skb = bounce_skb; 1016 skb = bounce_skb;
1017 } 1017 }
1018 1018
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 3d45f4c92cf6..9bbaad9f3d63 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -643,6 +643,7 @@
643#define MACB_CAPS_JUMBO 0x00000020 643#define MACB_CAPS_JUMBO 0x00000020
644#define MACB_CAPS_GEM_HAS_PTP 0x00000040 644#define MACB_CAPS_GEM_HAS_PTP 0x00000040
645#define MACB_CAPS_BD_RD_PREFETCH 0x00000080 645#define MACB_CAPS_BD_RD_PREFETCH 0x00000080
646#define MACB_CAPS_NEEDS_RSTONUBR 0x00000100
646#define MACB_CAPS_FIFO_MODE 0x10000000 647#define MACB_CAPS_FIFO_MODE 0x10000000
647#define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 648#define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000
648#define MACB_CAPS_SG_DISABLED 0x40000000 649#define MACB_CAPS_SG_DISABLED 0x40000000
@@ -1214,6 +1215,8 @@ struct macb {
1214 1215
1215 int rx_bd_rd_prefetch; 1216 int rx_bd_rd_prefetch;
1216 int tx_bd_rd_prefetch; 1217 int tx_bd_rd_prefetch;
1218
1219 u32 rx_intr_mask;
1217}; 1220};
1218 1221
1219#ifdef CONFIG_MACB_USE_HWSTAMP 1222#ifdef CONFIG_MACB_USE_HWSTAMP
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 66cc7927061a..2b2882615e8b 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -56,8 +56,7 @@
56/* level of occupied TX descriptors under which we wake up TX process */ 56/* level of occupied TX descriptors under which we wake up TX process */
57#define MACB_TX_WAKEUP_THRESH(bp) (3 * (bp)->tx_ring_size / 4) 57#define MACB_TX_WAKEUP_THRESH(bp) (3 * (bp)->tx_ring_size / 4)
58 58
59#define MACB_RX_INT_FLAGS (MACB_BIT(RCOMP) | MACB_BIT(RXUBR) \ 59#define MACB_RX_INT_FLAGS (MACB_BIT(RCOMP) | MACB_BIT(ISR_ROVR))
60 | MACB_BIT(ISR_ROVR))
61#define MACB_TX_ERR_FLAGS (MACB_BIT(ISR_TUND) \ 60#define MACB_TX_ERR_FLAGS (MACB_BIT(ISR_TUND) \
62 | MACB_BIT(ISR_RLE) \ 61 | MACB_BIT(ISR_RLE) \
63 | MACB_BIT(TXERR)) 62 | MACB_BIT(TXERR))
@@ -1270,7 +1269,7 @@ static int macb_poll(struct napi_struct *napi, int budget)
1270 queue_writel(queue, ISR, MACB_BIT(RCOMP)); 1269 queue_writel(queue, ISR, MACB_BIT(RCOMP));
1271 napi_reschedule(napi); 1270 napi_reschedule(napi);
1272 } else { 1271 } else {
1273 queue_writel(queue, IER, MACB_RX_INT_FLAGS); 1272 queue_writel(queue, IER, bp->rx_intr_mask);
1274 } 1273 }
1275 } 1274 }
1276 1275
@@ -1288,7 +1287,7 @@ static void macb_hresp_error_task(unsigned long data)
1288 u32 ctrl; 1287 u32 ctrl;
1289 1288
1290 for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { 1289 for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
1291 queue_writel(queue, IDR, MACB_RX_INT_FLAGS | 1290 queue_writel(queue, IDR, bp->rx_intr_mask |
1292 MACB_TX_INT_FLAGS | 1291 MACB_TX_INT_FLAGS |
1293 MACB_BIT(HRESP)); 1292 MACB_BIT(HRESP));
1294 } 1293 }
@@ -1318,7 +1317,7 @@ static void macb_hresp_error_task(unsigned long data)
1318 1317
1319 /* Enable interrupts */ 1318 /* Enable interrupts */
1320 queue_writel(queue, IER, 1319 queue_writel(queue, IER,
1321 MACB_RX_INT_FLAGS | 1320 bp->rx_intr_mask |
1322 MACB_TX_INT_FLAGS | 1321 MACB_TX_INT_FLAGS |
1323 MACB_BIT(HRESP)); 1322 MACB_BIT(HRESP));
1324 } 1323 }
@@ -1372,14 +1371,14 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
1372 (unsigned int)(queue - bp->queues), 1371 (unsigned int)(queue - bp->queues),
1373 (unsigned long)status); 1372 (unsigned long)status);
1374 1373
1375 if (status & MACB_RX_INT_FLAGS) { 1374 if (status & bp->rx_intr_mask) {
1376 /* There's no point taking any more interrupts 1375 /* There's no point taking any more interrupts
1377 * until we have processed the buffers. The 1376 * until we have processed the buffers. The
1378 * scheduling call may fail if the poll routine 1377 * scheduling call may fail if the poll routine
1379 * is already scheduled, so disable interrupts 1378 * is already scheduled, so disable interrupts
1380 * now. 1379 * now.
1381 */ 1380 */
1382 queue_writel(queue, IDR, MACB_RX_INT_FLAGS); 1381 queue_writel(queue, IDR, bp->rx_intr_mask);
1383 if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) 1382 if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
1384 queue_writel(queue, ISR, MACB_BIT(RCOMP)); 1383 queue_writel(queue, ISR, MACB_BIT(RCOMP));
1385 1384
@@ -1412,8 +1411,9 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
1412 /* There is a hardware issue under heavy load where DMA can 1411 /* There is a hardware issue under heavy load where DMA can
1413 * stop, this causes endless "used buffer descriptor read" 1412 * stop, this causes endless "used buffer descriptor read"
1414 * interrupts but it can be cleared by re-enabling RX. See 1413 * interrupts but it can be cleared by re-enabling RX. See
1415 * the at91 manual, section 41.3.1 or the Zynq manual 1414 * the at91rm9200 manual, section 41.3.1 or the Zynq manual
1416 * section 16.7.4 for details. 1415 * section 16.7.4 for details. RXUBR is only enabled for
1416 * these two versions.
1417 */ 1417 */
1418 if (status & MACB_BIT(RXUBR)) { 1418 if (status & MACB_BIT(RXUBR)) {
1419 ctrl = macb_readl(bp, NCR); 1419 ctrl = macb_readl(bp, NCR);
@@ -2259,7 +2259,7 @@ static void macb_init_hw(struct macb *bp)
2259 2259
2260 /* Enable interrupts */ 2260 /* Enable interrupts */
2261 queue_writel(queue, IER, 2261 queue_writel(queue, IER,
2262 MACB_RX_INT_FLAGS | 2262 bp->rx_intr_mask |
2263 MACB_TX_INT_FLAGS | 2263 MACB_TX_INT_FLAGS |
2264 MACB_BIT(HRESP)); 2264 MACB_BIT(HRESP));
2265 } 2265 }
@@ -3907,6 +3907,7 @@ static const struct macb_config sama5d4_config = {
3907}; 3907};
3908 3908
3909static const struct macb_config emac_config = { 3909static const struct macb_config emac_config = {
3910 .caps = MACB_CAPS_NEEDS_RSTONUBR,
3910 .clk_init = at91ether_clk_init, 3911 .clk_init = at91ether_clk_init,
3911 .init = at91ether_init, 3912 .init = at91ether_init,
3912}; 3913};
@@ -3928,7 +3929,8 @@ static const struct macb_config zynqmp_config = {
3928}; 3929};
3929 3930
3930static const struct macb_config zynq_config = { 3931static const struct macb_config zynq_config = {
3931 .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF, 3932 .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF |
3933 MACB_CAPS_NEEDS_RSTONUBR,
3932 .dma_burst_length = 16, 3934 .dma_burst_length = 16,
3933 .clk_init = macb_clk_init, 3935 .clk_init = macb_clk_init,
3934 .init = macb_init, 3936 .init = macb_init,
@@ -4083,6 +4085,10 @@ static int macb_probe(struct platform_device *pdev)
4083 macb_dma_desc_get_size(bp); 4085 macb_dma_desc_get_size(bp);
4084 } 4086 }
4085 4087
4088 bp->rx_intr_mask = MACB_RX_INT_FLAGS;
4089 if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR)
4090 bp->rx_intr_mask |= MACB_BIT(RXUBR);
4091
4086 mac = of_get_mac_address(np); 4092 mac = of_get_mac_address(np);
4087 if (mac) { 4093 if (mac) {
4088 ether_addr_copy(bp->dev->dev_addr, mac); 4094 ether_addr_copy(bp->dev->dev_addr, mac);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 5b33238c6680..60e7d7ae3787 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2418,6 +2418,8 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
2418out_notify_fail: 2418out_notify_fail:
2419 (void)cancel_work_sync(&priv->service_task); 2419 (void)cancel_work_sync(&priv->service_task);
2420out_read_prop_fail: 2420out_read_prop_fail:
2421 /* safe for ACPI FW */
2422 of_node_put(to_of_node(priv->fwnode));
2421 free_netdev(ndev); 2423 free_netdev(ndev);
2422 return ret; 2424 return ret;
2423} 2425}
@@ -2447,6 +2449,9 @@ static int hns_nic_dev_remove(struct platform_device *pdev)
2447 set_bit(NIC_STATE_REMOVING, &priv->state); 2449 set_bit(NIC_STATE_REMOVING, &priv->state);
2448 (void)cancel_work_sync(&priv->service_task); 2450 (void)cancel_work_sync(&priv->service_task);
2449 2451
2452 /* safe for ACPI FW */
2453 of_node_put(to_of_node(priv->fwnode));
2454
2450 free_netdev(ndev); 2455 free_netdev(ndev);
2451 return 0; 2456 return 0;
2452} 2457}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 8e9b95871d30..ce15d2350db9 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -1157,16 +1157,18 @@ static int hns_get_regs_len(struct net_device *net_dev)
1157 */ 1157 */
1158static int hns_nic_nway_reset(struct net_device *netdev) 1158static int hns_nic_nway_reset(struct net_device *netdev)
1159{ 1159{
1160 int ret = 0;
1161 struct phy_device *phy = netdev->phydev; 1160 struct phy_device *phy = netdev->phydev;
1162 1161
1163 if (netif_running(netdev)) { 1162 if (!netif_running(netdev))
1164 /* if autoneg is disabled, don't restart auto-negotiation */ 1163 return 0;
1165 if (phy && phy->autoneg == AUTONEG_ENABLE)
1166 ret = genphy_restart_aneg(phy);
1167 }
1168 1164
1169 return ret; 1165 if (!phy)
1166 return -EOPNOTSUPP;
1167
1168 if (phy->autoneg != AUTONEG_ENABLE)
1169 return -EINVAL;
1170
1171 return genphy_restart_aneg(phy);
1170} 1172}
1171 1173
1172static u32 1174static u32
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index 017e08452d8c..baf5cc251f32 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -321,7 +321,7 @@ static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
321 } 321 }
322 322
323 hns_mdio_cmd_write(mdio_dev, is_c45, 323 hns_mdio_cmd_write(mdio_dev, is_c45,
324 MDIO_C45_WRITE_ADDR, phy_id, devad); 324 MDIO_C45_READ, phy_id, devad);
325 } 325 }
326 326
327 /* Step 5: waitting for MDIO_COMMAND_REG 's mdio_start==0,*/ 327 /* Step 5: waitting for MDIO_COMMAND_REG 's mdio_start==0,*/
diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c
index d719668a6684..92929750f832 100644
--- a/drivers/net/ethernet/i825xx/82596.c
+++ b/drivers/net/ethernet/i825xx/82596.c
@@ -1310,7 +1310,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id)
1310 dev->stats.tx_aborted_errors++; 1310 dev->stats.tx_aborted_errors++;
1311 } 1311 }
1312 1312
1313 dev_kfree_skb_irq(skb); 1313 dev_consume_skb_irq(skb);
1314 1314
1315 tx_cmd->cmd.command = 0; /* Mark free */ 1315 tx_cmd->cmd.command = 0; /* Mark free */
1316 break; 1316 break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 17b6babbed8a..099d307e6f25 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -949,7 +949,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
949 if (params->rx_dim_enabled) 949 if (params->rx_dim_enabled)
950 __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); 950 __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
951 951
952 if (params->pflags & MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) 952 if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE))
953 __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); 953 __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
954 954
955 return 0; 955 return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index edb34b397c53..5d2e0c2f6624 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1114,9 +1114,17 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
1114 struct mlx5e_priv *priv = netdev_priv(dev); 1114 struct mlx5e_priv *priv = netdev_priv(dev);
1115 struct mlx5e_rep_priv *rpriv = priv->ppriv; 1115 struct mlx5e_rep_priv *rpriv = priv->ppriv;
1116 struct mlx5_eswitch_rep *rep = rpriv->rep; 1116 struct mlx5_eswitch_rep *rep = rpriv->rep;
1117 int ret; 1117 int ret, pf_num;
1118
1119 ret = mlx5_lag_get_pf_num(priv->mdev, &pf_num);
1120 if (ret)
1121 return ret;
1122
1123 if (rep->vport == FDB_UPLINK_VPORT)
1124 ret = snprintf(buf, len, "p%d", pf_num);
1125 else
1126 ret = snprintf(buf, len, "pf%dvf%d", pf_num, rep->vport - 1);
1118 1127
1119 ret = snprintf(buf, len, "%d", rep->vport - 1);
1120 if (ret >= len) 1128 if (ret >= len)
1121 return -EOPNOTSUPP; 1129 return -EOPNOTSUPP;
1122 1130
@@ -1264,6 +1272,18 @@ static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr)
1264 return 0; 1272 return 0;
1265} 1273}
1266 1274
1275static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
1276 __be16 vlan_proto)
1277{
1278 netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n");
1279
1280 if (vlan != 0)
1281 return -EOPNOTSUPP;
1282
1283 /* allow setting 0-vid for compatibility with libvirt */
1284 return 0;
1285}
1286
1267static const struct switchdev_ops mlx5e_rep_switchdev_ops = { 1287static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
1268 .switchdev_port_attr_get = mlx5e_attr_get, 1288 .switchdev_port_attr_get = mlx5e_attr_get,
1269}; 1289};
@@ -1298,6 +1318,7 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
1298 .ndo_set_vf_rate = mlx5e_set_vf_rate, 1318 .ndo_set_vf_rate = mlx5e_set_vf_rate,
1299 .ndo_get_vf_config = mlx5e_get_vf_config, 1319 .ndo_get_vf_config = mlx5e_get_vf_config,
1300 .ndo_get_vf_stats = mlx5e_get_vf_stats, 1320 .ndo_get_vf_stats = mlx5e_get_vf_stats,
1321 .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan,
1301}; 1322};
1302 1323
1303bool mlx5e_eswitch_rep(struct net_device *netdev) 1324bool mlx5e_eswitch_rep(struct net_device *netdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index a44ea7b85614..5b492b67f4e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1134,13 +1134,6 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
1134 int err = 0; 1134 int err = 0;
1135 u8 *smac_v; 1135 u8 *smac_v;
1136 1136
1137 if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {
1138 mlx5_core_warn(esw->dev,
1139 "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
1140 vport->vport);
1141 return -EPERM;
1142 }
1143
1144 esw_vport_cleanup_ingress_rules(esw, vport); 1137 esw_vport_cleanup_ingress_rules(esw, vport);
1145 1138
1146 if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { 1139 if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
@@ -1728,7 +1721,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
1728 int vport_num; 1721 int vport_num;
1729 int err; 1722 int err;
1730 1723
1731 if (!MLX5_ESWITCH_MANAGER(dev)) 1724 if (!MLX5_VPORT_MANAGER(dev))
1732 return 0; 1725 return 0;
1733 1726
1734 esw_info(dev, 1727 esw_info(dev,
@@ -1797,7 +1790,7 @@ abort:
1797 1790
1798void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) 1791void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
1799{ 1792{
1800 if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev)) 1793 if (!esw || !MLX5_VPORT_MANAGER(esw->dev))
1801 return; 1794 return;
1802 1795
1803 esw_info(esw->dev, "cleanup\n"); 1796 esw_info(esw->dev, "cleanup\n");
@@ -1827,13 +1820,10 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
1827 mutex_lock(&esw->state_lock); 1820 mutex_lock(&esw->state_lock);
1828 evport = &esw->vports[vport]; 1821 evport = &esw->vports[vport];
1829 1822
1830 if (evport->info.spoofchk && !is_valid_ether_addr(mac)) { 1823 if (evport->info.spoofchk && !is_valid_ether_addr(mac))
1831 mlx5_core_warn(esw->dev, 1824 mlx5_core_warn(esw->dev,
1832 "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", 1825 "Set invalid MAC while spoofchk is on, vport(%d)\n",
1833 vport); 1826 vport);
1834 err = -EPERM;
1835 goto unlock;
1836 }
1837 1827
1838 err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); 1828 err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
1839 if (err) { 1829 if (err) {
@@ -1979,6 +1969,10 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
1979 evport = &esw->vports[vport]; 1969 evport = &esw->vports[vport];
1980 pschk = evport->info.spoofchk; 1970 pschk = evport->info.spoofchk;
1981 evport->info.spoofchk = spoofchk; 1971 evport->info.spoofchk = spoofchk;
1972 if (pschk && !is_valid_ether_addr(evport->info.mac))
1973 mlx5_core_warn(esw->dev,
1974 "Spoofchk in set while MAC is invalid, vport(%d)\n",
1975 evport->vport);
1982 if (evport->enabled && esw->mode == SRIOV_LEGACY) 1976 if (evport->enabled && esw->mode == SRIOV_LEGACY)
1983 err = esw_vport_ingress_config(esw, evport); 1977 err = esw_vport_ingress_config(esw, evport);
1984 if (err) 1978 if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 3a6baed722d8..2d223385dc81 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -616,6 +616,27 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
616 } 616 }
617} 617}
618 618
619int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num)
620{
621 struct mlx5_lag *ldev;
622 int n;
623
624 ldev = mlx5_lag_dev_get(dev);
625 if (!ldev) {
626 mlx5_core_warn(dev, "no lag device, can't get pf num\n");
627 return -EINVAL;
628 }
629
630 for (n = 0; n < MLX5_MAX_PORTS; n++)
631 if (ldev->pf[n].dev == dev) {
632 *pf_num = n;
633 return 0;
634 }
635
636 mlx5_core_warn(dev, "wasn't able to locate pf in the lag device\n");
637 return -EINVAL;
638}
639
619/* Must be called with intf_mutex held */ 640/* Must be called with intf_mutex held */
620void mlx5_lag_remove(struct mlx5_core_dev *dev) 641void mlx5_lag_remove(struct mlx5_core_dev *dev)
621{ 642{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index c68dcea5985b..5300b0b6d836 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -187,6 +187,8 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
187 MLX5_CAP_GEN(dev, lag_master); 187 MLX5_CAP_GEN(dev, lag_master);
188} 188}
189 189
190int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num);
191
190void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); 192void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
191void mlx5_lag_update(struct mlx5_core_dev *dev); 193void mlx5_lag_update(struct mlx5_core_dev *dev);
192 194
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 388f205a497f..370ca94b6775 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -44,14 +44,15 @@ static struct mlx5_core_rsc_common *
44mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) 44mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
45{ 45{
46 struct mlx5_core_rsc_common *common; 46 struct mlx5_core_rsc_common *common;
47 unsigned long flags;
47 48
48 spin_lock(&table->lock); 49 spin_lock_irqsave(&table->lock, flags);
49 50
50 common = radix_tree_lookup(&table->tree, rsn); 51 common = radix_tree_lookup(&table->tree, rsn);
51 if (common) 52 if (common)
52 atomic_inc(&common->refcount); 53 atomic_inc(&common->refcount);
53 54
54 spin_unlock(&table->lock); 55 spin_unlock_irqrestore(&table->lock, flags);
55 56
56 return common; 57 return common;
57} 58}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index b17003d9066c..e2cbd77646a2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -795,19 +795,19 @@ static void qed_init_qm_pq(struct qed_hwfn *p_hwfn,
795 795
796/* get pq index according to PQ_FLAGS */ 796/* get pq index according to PQ_FLAGS */
797static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, 797static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn,
798 u32 pq_flags) 798 unsigned long pq_flags)
799{ 799{
800 struct qed_qm_info *qm_info = &p_hwfn->qm_info; 800 struct qed_qm_info *qm_info = &p_hwfn->qm_info;
801 801
802 /* Can't have multiple flags set here */ 802 /* Can't have multiple flags set here */
803 if (bitmap_weight((unsigned long *)&pq_flags, 803 if (bitmap_weight(&pq_flags,
804 sizeof(pq_flags) * BITS_PER_BYTE) > 1) { 804 sizeof(pq_flags) * BITS_PER_BYTE) > 1) {
805 DP_ERR(p_hwfn, "requested multiple pq flags 0x%x\n", pq_flags); 805 DP_ERR(p_hwfn, "requested multiple pq flags 0x%lx\n", pq_flags);
806 goto err; 806 goto err;
807 } 807 }
808 808
809 if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) { 809 if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) {
810 DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags); 810 DP_ERR(p_hwfn, "pq flag 0x%lx is not set\n", pq_flags);
811 goto err; 811 goto err;
812 } 812 }
813 813
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 67c02ea93906..e68ca83ae915 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -609,6 +609,10 @@ qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn,
609 (!!(accept_filter & QED_ACCEPT_MCAST_MATCHED) && 609 (!!(accept_filter & QED_ACCEPT_MCAST_MATCHED) &&
610 !!(accept_filter & QED_ACCEPT_MCAST_UNMATCHED))); 610 !!(accept_filter & QED_ACCEPT_MCAST_UNMATCHED)));
611 611
612 SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL,
613 (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) &&
614 !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED)));
615
612 SET_FIELD(state, ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL, 616 SET_FIELD(state, ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL,
613 !!(accept_filter & QED_ACCEPT_BCAST)); 617 !!(accept_filter & QED_ACCEPT_BCAST));
614 618
@@ -744,6 +748,11 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn,
744 return rc; 748 return rc;
745 } 749 }
746 750
751 if (p_params->update_ctl_frame_check) {
752 p_cmn->ctl_frame_mac_check_en = p_params->mac_chk_en;
753 p_cmn->ctl_frame_ethtype_check_en = p_params->ethtype_chk_en;
754 }
755
747 /* Update mcast bins for VFs, PF doesn't use this functionality */ 756 /* Update mcast bins for VFs, PF doesn't use this functionality */
748 qed_sp_update_mcast_bin(p_hwfn, p_ramrod, p_params); 757 qed_sp_update_mcast_bin(p_hwfn, p_ramrod, p_params);
749 758
@@ -2688,7 +2697,8 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev,
2688 if (type == QED_FILTER_RX_MODE_TYPE_PROMISC) { 2697 if (type == QED_FILTER_RX_MODE_TYPE_PROMISC) {
2689 accept_flags.rx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED | 2698 accept_flags.rx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED |
2690 QED_ACCEPT_MCAST_UNMATCHED; 2699 QED_ACCEPT_MCAST_UNMATCHED;
2691 accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; 2700 accept_flags.tx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED |
2701 QED_ACCEPT_MCAST_UNMATCHED;
2692 } else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC) { 2702 } else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC) {
2693 accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; 2703 accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED;
2694 accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; 2704 accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index 8d80f1095d17..7127d5aaac42 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -219,6 +219,9 @@ struct qed_sp_vport_update_params {
219 struct qed_rss_params *rss_params; 219 struct qed_rss_params *rss_params;
220 struct qed_filter_accept_flags accept_flags; 220 struct qed_filter_accept_flags accept_flags;
221 struct qed_sge_tpa_params *sge_tpa_params; 221 struct qed_sge_tpa_params *sge_tpa_params;
222 u8 update_ctl_frame_check;
223 u8 mac_chk_en;
224 u8 ethtype_chk_en;
222}; 225};
223 226
224int qed_sp_vport_update(struct qed_hwfn *p_hwfn, 227int qed_sp_vport_update(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index d9237c65a838..b5f419b71287 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -2451,19 +2451,24 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
2451{ 2451{
2452 struct qed_ll2_tx_pkt_info pkt; 2452 struct qed_ll2_tx_pkt_info pkt;
2453 const skb_frag_t *frag; 2453 const skb_frag_t *frag;
2454 u8 flags = 0, nr_frags;
2454 int rc = -EINVAL, i; 2455 int rc = -EINVAL, i;
2455 dma_addr_t mapping; 2456 dma_addr_t mapping;
2456 u16 vlan = 0; 2457 u16 vlan = 0;
2457 u8 flags = 0;
2458 2458
2459 if (unlikely(skb->ip_summed != CHECKSUM_NONE)) { 2459 if (unlikely(skb->ip_summed != CHECKSUM_NONE)) {
2460 DP_INFO(cdev, "Cannot transmit a checksummed packet\n"); 2460 DP_INFO(cdev, "Cannot transmit a checksummed packet\n");
2461 return -EINVAL; 2461 return -EINVAL;
2462 } 2462 }
2463 2463
2464 if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { 2464 /* Cache number of fragments from SKB since SKB may be freed by
2465 * the completion routine after calling qed_ll2_prepare_tx_packet()
2466 */
2467 nr_frags = skb_shinfo(skb)->nr_frags;
2468
2469 if (1 + nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) {
2465 DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n", 2470 DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n",
2466 1 + skb_shinfo(skb)->nr_frags); 2471 1 + nr_frags);
2467 return -EINVAL; 2472 return -EINVAL;
2468 } 2473 }
2469 2474
@@ -2485,7 +2490,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
2485 } 2490 }
2486 2491
2487 memset(&pkt, 0, sizeof(pkt)); 2492 memset(&pkt, 0, sizeof(pkt));
2488 pkt.num_of_bds = 1 + skb_shinfo(skb)->nr_frags; 2493 pkt.num_of_bds = 1 + nr_frags;
2489 pkt.vlan = vlan; 2494 pkt.vlan = vlan;
2490 pkt.bd_flags = flags; 2495 pkt.bd_flags = flags;
2491 pkt.tx_dest = QED_LL2_TX_DEST_NW; 2496 pkt.tx_dest = QED_LL2_TX_DEST_NW;
@@ -2496,12 +2501,17 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
2496 test_bit(QED_LL2_XMIT_FLAGS_FIP_DISCOVERY, &xmit_flags)) 2501 test_bit(QED_LL2_XMIT_FLAGS_FIP_DISCOVERY, &xmit_flags))
2497 pkt.remove_stag = true; 2502 pkt.remove_stag = true;
2498 2503
2504 /* qed_ll2_prepare_tx_packet() may actually send the packet if
2505 * there are no fragments in the skb and subsequently the completion
2506 * routine may run and free the SKB, so no dereferencing the SKB
2507 * beyond this point unless skb has any fragments.
2508 */
2499 rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle, 2509 rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle,
2500 &pkt, 1); 2510 &pkt, 1);
2501 if (rc) 2511 if (rc)
2502 goto err; 2512 goto err;
2503 2513
2504 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 2514 for (i = 0; i < nr_frags; i++) {
2505 frag = &skb_shinfo(skb)->frags[i]; 2515 frag = &skb_shinfo(skb)->frags[i];
2506 2516
2507 mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0, 2517 mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 71e28be58102..9faaa6df78ed 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -1969,7 +1969,9 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn,
1969 params.vport_id = vf->vport_id; 1969 params.vport_id = vf->vport_id;
1970 params.max_buffers_per_cqe = start->max_buffers_per_cqe; 1970 params.max_buffers_per_cqe = start->max_buffers_per_cqe;
1971 params.mtu = vf->mtu; 1971 params.mtu = vf->mtu;
1972 params.check_mac = true; 1972
1973 /* Non trusted VFs should enable control frame filtering */
1974 params.check_mac = !vf->p_vf_info.is_trusted_configured;
1973 1975
1974 rc = qed_sp_eth_vport_start(p_hwfn, &params); 1976 rc = qed_sp_eth_vport_start(p_hwfn, &params);
1975 if (rc) { 1977 if (rc) {
@@ -5137,6 +5139,9 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn)
5137 params.opaque_fid = vf->opaque_fid; 5139 params.opaque_fid = vf->opaque_fid;
5138 params.vport_id = vf->vport_id; 5140 params.vport_id = vf->vport_id;
5139 5141
5142 params.update_ctl_frame_check = 1;
5143 params.mac_chk_en = !vf_info->is_trusted_configured;
5144
5140 if (vf_info->rx_accept_mode & mask) { 5145 if (vf_info->rx_accept_mode & mask) {
5141 flags->update_rx_mode_config = 1; 5146 flags->update_rx_mode_config = 1;
5142 flags->rx_accept_filter = vf_info->rx_accept_mode; 5147 flags->rx_accept_filter = vf_info->rx_accept_mode;
@@ -5154,7 +5159,8 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn)
5154 } 5159 }
5155 5160
5156 if (flags->update_rx_mode_config || 5161 if (flags->update_rx_mode_config ||
5157 flags->update_tx_mode_config) 5162 flags->update_tx_mode_config ||
5163 params.update_ctl_frame_check)
5158 qed_sp_vport_update(hwfn, &params, 5164 qed_sp_vport_update(hwfn, &params,
5159 QED_SPQ_MODE_EBLOCK, NULL); 5165 QED_SPQ_MODE_EBLOCK, NULL);
5160 } 5166 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index b6cccf44bf40..5dda547772c1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -261,6 +261,7 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
261 struct pfvf_acquire_resp_tlv *resp = &p_iov->pf2vf_reply->acquire_resp; 261 struct pfvf_acquire_resp_tlv *resp = &p_iov->pf2vf_reply->acquire_resp;
262 struct pf_vf_pfdev_info *pfdev_info = &resp->pfdev_info; 262 struct pf_vf_pfdev_info *pfdev_info = &resp->pfdev_info;
263 struct vf_pf_resc_request *p_resc; 263 struct vf_pf_resc_request *p_resc;
264 u8 retry_cnt = VF_ACQUIRE_THRESH;
264 bool resources_acquired = false; 265 bool resources_acquired = false;
265 struct vfpf_acquire_tlv *req; 266 struct vfpf_acquire_tlv *req;
266 int rc = 0, attempts = 0; 267 int rc = 0, attempts = 0;
@@ -314,6 +315,15 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
314 315
315 /* send acquire request */ 316 /* send acquire request */
316 rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); 317 rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
318
319 /* Re-try acquire in case of vf-pf hw channel timeout */
320 if (retry_cnt && rc == -EBUSY) {
321 DP_VERBOSE(p_hwfn, QED_MSG_IOV,
322 "VF retrying to acquire due to VPC timeout\n");
323 retry_cnt--;
324 continue;
325 }
326
317 if (rc) 327 if (rc)
318 goto exit; 328 goto exit;
319 329
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 44f6e4873aad..4f910c4f67b0 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -691,7 +691,7 @@ static void cp_tx (struct cp_private *cp)
691 } 691 }
692 bytes_compl += skb->len; 692 bytes_compl += skb->len;
693 pkts_compl++; 693 pkts_compl++;
694 dev_kfree_skb_irq(skb); 694 dev_consume_skb_irq(skb);
695 } 695 }
696 696
697 cp->tx_skb[tx_tail] = NULL; 697 cp->tx_skb[tx_tail] = NULL;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 7b923362ee55..3b174eae77c1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1342,8 +1342,10 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
1342 } 1342 }
1343 1343
1344 ret = phy_power_on(bsp_priv, true); 1344 ret = phy_power_on(bsp_priv, true);
1345 if (ret) 1345 if (ret) {
1346 gmac_clk_enable(bsp_priv, false);
1346 return ret; 1347 return ret;
1348 }
1347 1349
1348 pm_runtime_enable(dev); 1350 pm_runtime_enable(dev);
1349 pm_runtime_get_sync(dev); 1351 pm_runtime_get_sync(dev);
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 810dfc7de1f9..e2d47b24a869 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -608,7 +608,7 @@ static void cpmac_end_xmit(struct net_device *dev, int queue)
608 netdev_dbg(dev, "sent 0x%p, len=%d\n", 608 netdev_dbg(dev, "sent 0x%p, len=%d\n",
609 desc->skb, desc->skb->len); 609 desc->skb, desc->skb->len);
610 610
611 dev_kfree_skb_irq(desc->skb); 611 dev_consume_skb_irq(desc->skb);
612 desc->skb = NULL; 612 desc->skb = NULL;
613 if (__netif_subqueue_stopped(dev, queue)) 613 if (__netif_subqueue_stopped(dev, queue))
614 netif_wake_subqueue(dev, queue); 614 netif_wake_subqueue(dev, queue);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index bca86bf7189f..df51a35cf537 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1337,7 +1337,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
1337 n->vqs[i].rx_ring = NULL; 1337 n->vqs[i].rx_ring = NULL;
1338 vhost_net_buf_init(&n->vqs[i].rxq); 1338 vhost_net_buf_init(&n->vqs[i].rxq);
1339 } 1339 }
1340 vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); 1340 vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
1341 UIO_MAXIOV + VHOST_NET_BATCH);
1341 1342
1342 vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); 1343 vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
1343 vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); 1344 vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 344684f3e2e4..23593cb23dd0 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1627,7 +1627,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
1627 vqs[i] = &vs->vqs[i].vq; 1627 vqs[i] = &vs->vqs[i].vq;
1628 vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; 1628 vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
1629 } 1629 }
1630 vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ); 1630 vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV);
1631 1631
1632 vhost_scsi_init_inflight(vs, NULL); 1632 vhost_scsi_init_inflight(vs, NULL);
1633 1633
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 15a216cdd507..24a129fcdd61 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -390,9 +390,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
390 vq->indirect = kmalloc_array(UIO_MAXIOV, 390 vq->indirect = kmalloc_array(UIO_MAXIOV,
391 sizeof(*vq->indirect), 391 sizeof(*vq->indirect),
392 GFP_KERNEL); 392 GFP_KERNEL);
393 vq->log = kmalloc_array(UIO_MAXIOV, sizeof(*vq->log), 393 vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log),
394 GFP_KERNEL); 394 GFP_KERNEL);
395 vq->heads = kmalloc_array(UIO_MAXIOV, sizeof(*vq->heads), 395 vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
396 GFP_KERNEL); 396 GFP_KERNEL);
397 if (!vq->indirect || !vq->log || !vq->heads) 397 if (!vq->indirect || !vq->log || !vq->heads)
398 goto err_nomem; 398 goto err_nomem;
@@ -414,7 +414,7 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
414} 414}
415 415
416void vhost_dev_init(struct vhost_dev *dev, 416void vhost_dev_init(struct vhost_dev *dev,
417 struct vhost_virtqueue **vqs, int nvqs) 417 struct vhost_virtqueue **vqs, int nvqs, int iov_limit)
418{ 418{
419 struct vhost_virtqueue *vq; 419 struct vhost_virtqueue *vq;
420 int i; 420 int i;
@@ -427,6 +427,7 @@ void vhost_dev_init(struct vhost_dev *dev,
427 dev->iotlb = NULL; 427 dev->iotlb = NULL;
428 dev->mm = NULL; 428 dev->mm = NULL;
429 dev->worker = NULL; 429 dev->worker = NULL;
430 dev->iov_limit = iov_limit;
430 init_llist_head(&dev->work_list); 431 init_llist_head(&dev->work_list);
431 init_waitqueue_head(&dev->wait); 432 init_waitqueue_head(&dev->wait);
432 INIT_LIST_HEAD(&dev->read_list); 433 INIT_LIST_HEAD(&dev->read_list);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 1b675dad5e05..9490e7ddb340 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -170,9 +170,11 @@ struct vhost_dev {
170 struct list_head read_list; 170 struct list_head read_list;
171 struct list_head pending_list; 171 struct list_head pending_list;
172 wait_queue_head_t wait; 172 wait_queue_head_t wait;
173 int iov_limit;
173}; 174};
174 175
175void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); 176void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
177 int nvqs, int iov_limit);
176long vhost_dev_set_owner(struct vhost_dev *dev); 178long vhost_dev_set_owner(struct vhost_dev *dev);
177bool vhost_dev_has_owner(struct vhost_dev *dev); 179bool vhost_dev_has_owner(struct vhost_dev *dev);
178long vhost_dev_check_owner(struct vhost_dev *); 180long vhost_dev_check_owner(struct vhost_dev *);
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 3fbc068eaa9b..bb5fc0e9fbc2 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -531,7 +531,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
531 vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; 531 vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
532 vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; 532 vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
533 533
534 vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); 534 vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV);
535 535
536 file->private_data = vsock; 536 file->private_data = vsock;
537 spin_lock_init(&vsock->send_pkt_list_lock); 537 spin_lock_init(&vsock->send_pkt_list_lock);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c672f34235e7..4a728dba02e2 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -260,6 +260,7 @@ struct irq_affinity {
260/** 260/**
261 * struct irq_affinity_desc - Interrupt affinity descriptor 261 * struct irq_affinity_desc - Interrupt affinity descriptor
262 * @mask: cpumask to hold the affinity assignment 262 * @mask: cpumask to hold the affinity assignment
263 * @is_managed: 1 if the interrupt is managed internally
263 */ 264 */
264struct irq_affinity_desc { 265struct irq_affinity_desc {
265 struct cpumask mask; 266 struct cpumask mask;
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index 10b19a192b2d..545f37138057 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -24,9 +24,13 @@
24 * called near the end of a function. Otherwise, the list can be 24 * called near the end of a function. Otherwise, the list can be
25 * re-initialized for later re-use by wake_q_init(). 25 * re-initialized for later re-use by wake_q_init().
26 * 26 *
27 * Note that this can cause spurious wakeups. schedule() callers 27 * NOTE that this can cause spurious wakeups. schedule() callers
28 * must ensure the call is done inside a loop, confirming that the 28 * must ensure the call is done inside a loop, confirming that the
29 * wakeup condition has in fact occurred. 29 * wakeup condition has in fact occurred.
30 *
31 * NOTE that there is no guarantee the wakeup will happen any later than the
32 * wake_q_add() location. Therefore task must be ready to be woken at the
33 * location of the wake_q_add().
30 */ 34 */
31 35
32#include <linux/sched.h> 36#include <linux/sched.h>
diff --git a/include/net/tls.h b/include/net/tls.h
index 90bf52db573e..4592606e136a 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -120,6 +120,8 @@ struct tls_rec {
120 struct scatterlist sg_aead_out[2]; 120 struct scatterlist sg_aead_out[2];
121 121
122 char aad_space[TLS_AAD_SPACE_SIZE]; 122 char aad_space[TLS_AAD_SPACE_SIZE];
123 u8 iv_data[TLS_CIPHER_AES_GCM_128_IV_SIZE +
124 TLS_CIPHER_AES_GCM_128_SALT_SIZE];
123 struct aead_request aead_req; 125 struct aead_request aead_req;
124 u8 aead_req_ctx[]; 126 u8 aead_req_ctx[];
125}; 127};
diff --git a/kernel/exit.c b/kernel/exit.c
index 284f2fe9a293..3fb7be001964 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -307,7 +307,7 @@ void rcuwait_wake_up(struct rcuwait *w)
307 * MB (A) MB (B) 307 * MB (A) MB (B)
308 * [L] cond [L] tsk 308 * [L] cond [L] tsk
309 */ 309 */
310 smp_rmb(); /* (B) */ 310 smp_mb(); /* (B) */
311 311
312 /* 312 /*
313 * Avoid using task_rcu_dereference() magic as long as we are careful, 313 * Avoid using task_rcu_dereference() magic as long as we are careful,
diff --git a/kernel/futex.c b/kernel/futex.c
index be3bff2315ff..fdd312da0992 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1452,11 +1452,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1452 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) 1452 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
1453 return; 1453 return;
1454 1454
1455 /* 1455 get_task_struct(p);
1456 * Queue the task for later wakeup for after we've released
1457 * the hb->lock. wake_q_add() grabs reference to p.
1458 */
1459 wake_q_add(wake_q, p);
1460 __unqueue_futex(q); 1456 __unqueue_futex(q);
1461 /* 1457 /*
1462 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL 1458 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
@@ -1466,6 +1462,13 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1466 * plist_del in __unqueue_futex(). 1462 * plist_del in __unqueue_futex().
1467 */ 1463 */
1468 smp_store_release(&q->lock_ptr, NULL); 1464 smp_store_release(&q->lock_ptr, NULL);
1465
1466 /*
1467 * Queue the task for later wakeup for after we've released
1468 * the hb->lock. wake_q_add() grabs reference to p.
1469 */
1470 wake_q_add(wake_q, p);
1471 put_task_struct(p);
1469} 1472}
1470 1473
1471/* 1474/*
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index ee062b7939d3..ef8ad36cadcf 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -457,7 +457,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
457 457
458 /* Validate affinity mask(s) */ 458 /* Validate affinity mask(s) */
459 if (affinity) { 459 if (affinity) {
460 for (i = 0; i < cnt; i++, i++) { 460 for (i = 0; i < cnt; i++) {
461 if (cpumask_empty(&affinity[i].mask)) 461 if (cpumask_empty(&affinity[i].mask))
462 return -EINVAL; 462 return -EINVAL;
463 } 463 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index a4888ce4667a..84b54a17b95d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -393,6 +393,9 @@ int irq_setup_affinity(struct irq_desc *desc)
393 } 393 }
394 394
395 cpumask_and(&mask, cpu_online_mask, set); 395 cpumask_and(&mask, cpu_online_mask, set);
396 if (cpumask_empty(&mask))
397 cpumask_copy(&mask, cpu_online_mask);
398
396 if (node != NUMA_NO_NODE) { 399 if (node != NUMA_NO_NODE) {
397 const struct cpumask *nodemask = cpumask_of_node(node); 400 const struct cpumask *nodemask = cpumask_of_node(node);
398 401
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 09b180063ee1..50d9af615dc4 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -198,15 +198,22 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
198 woken++; 198 woken++;
199 tsk = waiter->task; 199 tsk = waiter->task;
200 200
201 wake_q_add(wake_q, tsk); 201 get_task_struct(tsk);
202 list_del(&waiter->list); 202 list_del(&waiter->list);
203 /* 203 /*
204 * Ensure that the last operation is setting the reader 204 * Ensure calling get_task_struct() before setting the reader
205 * waiter to nil such that rwsem_down_read_failed() cannot 205 * waiter to nil such that rwsem_down_read_failed() cannot
206 * race with do_exit() by always holding a reference count 206 * race with do_exit() by always holding a reference count
207 * to the task to wakeup. 207 * to the task to wakeup.
208 */ 208 */
209 smp_store_release(&waiter->task, NULL); 209 smp_store_release(&waiter->task, NULL);
210 /*
211 * Ensure issuing the wakeup (either by us or someone else)
212 * after setting the reader waiter to nil.
213 */
214 wake_q_add(wake_q, tsk);
215 /* wake_q_add() already take the task ref */
216 put_task_struct(tsk);
210 } 217 }
211 218
212 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; 219 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a674c7db2f29..d8d76a65cfdd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -396,6 +396,18 @@ static bool set_nr_if_polling(struct task_struct *p)
396#endif 396#endif
397#endif 397#endif
398 398
399/**
400 * wake_q_add() - queue a wakeup for 'later' waking.
401 * @head: the wake_q_head to add @task to
402 * @task: the task to queue for 'later' wakeup
403 *
404 * Queue a task for later wakeup, most likely by the wake_up_q() call in the
405 * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
406 * instantly.
407 *
408 * This function must be used as-if it were wake_up_process(); IOW the task
409 * must be ready to be woken at this location.
410 */
399void wake_q_add(struct wake_q_head *head, struct task_struct *task) 411void wake_q_add(struct wake_q_head *head, struct task_struct *task)
400{ 412{
401 struct wake_q_node *node = &task->wake_q; 413 struct wake_q_node *node = &task->wake_q;
@@ -405,10 +417,11 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
405 * its already queued (either by us or someone else) and will get the 417 * its already queued (either by us or someone else) and will get the
406 * wakeup due to that. 418 * wakeup due to that.
407 * 419 *
408 * This cmpxchg() executes a full barrier, which pairs with the full 420 * In order to ensure that a pending wakeup will observe our pending
409 * barrier executed by the wakeup in wake_up_q(). 421 * state, even in the failed case, an explicit smp_mb() must be used.
410 */ 422 */
411 if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) 423 smp_mb__before_atomic();
424 if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))
412 return; 425 return;
413 426
414 get_task_struct(task); 427 get_task_struct(task);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 8f0644af40be..80f955210861 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -685,6 +685,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
685 * set up the signal and overrun bookkeeping. 685 * set up the signal and overrun bookkeeping.
686 */ 686 */
687 timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); 687 timer->it.cpu.incr = timespec64_to_ns(&new->it_interval);
688 timer->it_interval = ns_to_ktime(timer->it.cpu.incr);
688 689
689 /* 690 /*
690 * This acts as a modification timestamp for the timer, 691 * This acts as a modification timestamp for the timer,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d295c9bc01a8..35fdde041f5c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5701,18 +5701,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
5701 cond_resched(); 5701 cond_resched();
5702 } 5702 }
5703 } 5703 }
5704#ifdef CONFIG_SPARSEMEM
5705 /*
5706 * If the zone does not span the rest of the section then
5707 * we should at least initialize those pages. Otherwise we
5708 * could blow up on a poisoned page in some paths which depend
5709 * on full sections being initialized (e.g. memory hotplug).
5710 */
5711 while (end_pfn % PAGES_PER_SECTION) {
5712 __init_single_page(pfn_to_page(end_pfn), end_pfn, zone, nid);
5713 end_pfn++;
5714 }
5715#endif
5716} 5704}
5717 5705
5718#ifdef CONFIG_ZONE_DEVICE 5706#ifdef CONFIG_ZONE_DEVICE
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5e55cef0cec3..6693e209efe8 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2293,9 +2293,12 @@ static int compat_do_replace(struct net *net, void __user *user,
2293 2293
2294 xt_compat_lock(NFPROTO_BRIDGE); 2294 xt_compat_lock(NFPROTO_BRIDGE);
2295 2295
2296 ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); 2296 if (tmp.nentries) {
2297 if (ret < 0) 2297 ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
2298 goto out_unlock; 2298 if (ret < 0)
2299 goto out_unlock;
2300 }
2301
2299 ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); 2302 ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
2300 if (ret < 0) 2303 if (ret < 0)
2301 goto out_unlock; 2304 goto out_unlock;
diff --git a/net/core/dev.c b/net/core/dev.c
index 82f20022259d..8e276e0192a1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8712,6 +8712,9 @@ int init_dummy_netdev(struct net_device *dev)
8712 set_bit(__LINK_STATE_PRESENT, &dev->state); 8712 set_bit(__LINK_STATE_PRESENT, &dev->state);
8713 set_bit(__LINK_STATE_START, &dev->state); 8713 set_bit(__LINK_STATE_START, &dev->state);
8714 8714
8715 /* napi_busy_loop stats accounting wants this */
8716 dev_net_set(dev, &init_net);
8717
8715 /* Note : We dont allocate pcpu_refcnt for dummy devices, 8718 /* Note : We dont allocate pcpu_refcnt for dummy devices,
8716 * because users of this 'device' dont need to change 8719 * because users of this 'device' dont need to change
8717 * its refcount. 8720 * its refcount.
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index d0b3e69c6b39..0962f9201baa 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -56,7 +56,7 @@
56#include <net/dn_neigh.h> 56#include <net/dn_neigh.h>
57#include <net/dn_fib.h> 57#include <net/dn_fib.h>
58 58
59#define DN_IFREQ_SIZE (sizeof(struct ifreq) - sizeof(struct sockaddr) + sizeof(struct sockaddr_dn)) 59#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn))
60 60
61static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00}; 61static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00};
62static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00}; 62static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00};
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index d7b43e700023..68a21bf75dd0 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -74,6 +74,33 @@ drop:
74 return 0; 74 return 0;
75} 75}
76 76
77static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi,
78 int encap_type)
79{
80 struct ip_tunnel *tunnel;
81 const struct iphdr *iph = ip_hdr(skb);
82 struct net *net = dev_net(skb->dev);
83 struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
84
85 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
86 iph->saddr, iph->daddr, 0);
87 if (tunnel) {
88 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
89 goto drop;
90
91 XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
92
93 skb->dev = tunnel->dev;
94
95 return xfrm_input(skb, nexthdr, spi, encap_type);
96 }
97
98 return -EINVAL;
99drop:
100 kfree_skb(skb);
101 return 0;
102}
103
77static int vti_rcv(struct sk_buff *skb) 104static int vti_rcv(struct sk_buff *skb)
78{ 105{
79 XFRM_SPI_SKB_CB(skb)->family = AF_INET; 106 XFRM_SPI_SKB_CB(skb)->family = AF_INET;
@@ -82,6 +109,14 @@ static int vti_rcv(struct sk_buff *skb)
82 return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); 109 return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
83} 110}
84 111
112static int vti_rcv_ipip(struct sk_buff *skb)
113{
114 XFRM_SPI_SKB_CB(skb)->family = AF_INET;
115 XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
116
117 return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0);
118}
119
85static int vti_rcv_cb(struct sk_buff *skb, int err) 120static int vti_rcv_cb(struct sk_buff *skb, int err)
86{ 121{
87 unsigned short family; 122 unsigned short family;
@@ -435,6 +470,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
435 .priority = 100, 470 .priority = 100,
436}; 471};
437 472
473static struct xfrm_tunnel ipip_handler __read_mostly = {
474 .handler = vti_rcv_ipip,
475 .err_handler = vti4_err,
476 .priority = 0,
477};
478
438static int __net_init vti_init_net(struct net *net) 479static int __net_init vti_init_net(struct net *net)
439{ 480{
440 int err; 481 int err;
@@ -603,6 +644,13 @@ static int __init vti_init(void)
603 if (err < 0) 644 if (err < 0)
604 goto xfrm_proto_comp_failed; 645 goto xfrm_proto_comp_failed;
605 646
647 msg = "ipip tunnel";
648 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
649 if (err < 0) {
650 pr_info("%s: cant't register tunnel\n",__func__);
651 goto xfrm_tunnel_failed;
652 }
653
606 msg = "netlink interface"; 654 msg = "netlink interface";
607 err = rtnl_link_register(&vti_link_ops); 655 err = rtnl_link_register(&vti_link_ops);
608 if (err < 0) 656 if (err < 0)
@@ -612,6 +660,8 @@ static int __init vti_init(void)
612 660
613rtnl_link_failed: 661rtnl_link_failed:
614 xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); 662 xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
663xfrm_tunnel_failed:
664 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
615xfrm_proto_comp_failed: 665xfrm_proto_comp_failed:
616 xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); 666 xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
617xfrm_proto_ah_failed: 667xfrm_proto_ah_failed:
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index b61977db9b7f..2a909e5f9ba0 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -846,9 +846,9 @@ static int clusterip_net_init(struct net *net)
846 846
847static void clusterip_net_exit(struct net *net) 847static void clusterip_net_exit(struct net *net)
848{ 848{
849#ifdef CONFIG_PROC_FS
849 struct clusterip_net *cn = clusterip_pernet(net); 850 struct clusterip_net *cn = clusterip_pernet(net);
850 851
851#ifdef CONFIG_PROC_FS
852 mutex_lock(&cn->mutex); 852 mutex_lock(&cn->mutex);
853 proc_remove(cn->procdir); 853 proc_remove(cn->procdir);
854 cn->procdir = NULL; 854 cn->procdir = NULL;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 30337b38274b..cc01aa3f2b5e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1516,6 +1516,9 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
1516 continue; 1516 continue;
1517 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1517 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1518 list_del_rcu(&c->list); 1518 list_del_rcu(&c->list);
1519 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1520 FIB_EVENT_ENTRY_DEL,
1521 (struct mfc6_cache *)c, mrt->id);
1519 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1522 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1520 mr_cache_put(c); 1523 mr_cache_put(c);
1521 } 1524 }
@@ -1524,10 +1527,6 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
1524 spin_lock_bh(&mfc_unres_lock); 1527 spin_lock_bh(&mfc_unres_lock);
1525 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1528 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1526 list_del(&c->list); 1529 list_del(&c->list);
1527 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1528 FIB_EVENT_ENTRY_DEL,
1529 (struct mfc6_cache *)c,
1530 mrt->id);
1531 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1530 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1532 RTM_DELROUTE); 1531 RTM_DELROUTE);
1533 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1532 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 432141f04af3..7d6318664eb2 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2221,6 +2221,18 @@ static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user
2221 u->udp_timeout); 2221 u->udp_timeout);
2222 2222
2223#ifdef CONFIG_IP_VS_PROTO_TCP 2223#ifdef CONFIG_IP_VS_PROTO_TCP
2224 if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) ||
2225 u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) {
2226 return -EINVAL;
2227 }
2228#endif
2229
2230#ifdef CONFIG_IP_VS_PROTO_UDP
2231 if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ))
2232 return -EINVAL;
2233#endif
2234
2235#ifdef CONFIG_IP_VS_PROTO_TCP
2224 if (u->tcp_timeout) { 2236 if (u->tcp_timeout) {
2225 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2237 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2226 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] 2238 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 6f41dd74729d..1f1d90c1716b 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -66,6 +66,7 @@ static bool nf_osf_match_one(const struct sk_buff *skb,
66 int ttl_check, 66 int ttl_check,
67 struct nf_osf_hdr_ctx *ctx) 67 struct nf_osf_hdr_ctx *ctx)
68{ 68{
69 const __u8 *optpinit = ctx->optp;
69 unsigned int check_WSS = 0; 70 unsigned int check_WSS = 0;
70 int fmatch = FMATCH_WRONG; 71 int fmatch = FMATCH_WRONG;
71 int foptsize, optnum; 72 int foptsize, optnum;
@@ -155,6 +156,9 @@ static bool nf_osf_match_one(const struct sk_buff *skb,
155 } 156 }
156 } 157 }
157 158
159 if (fmatch != FMATCH_OK)
160 ctx->optp = optpinit;
161
158 return fmatch == FMATCH_OK; 162 return fmatch == FMATCH_OK;
159} 163}
160 164
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 7334e0b80a5e..5eb269428832 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -22,11 +22,15 @@
22#include <linux/netfilter_bridge/ebtables.h> 22#include <linux/netfilter_bridge/ebtables.h>
23#include <linux/netfilter_arp/arp_tables.h> 23#include <linux/netfilter_arp/arp_tables.h>
24#include <net/netfilter/nf_tables.h> 24#include <net/netfilter/nf_tables.h>
25#include <net/netns/generic.h>
25 26
26struct nft_xt { 27struct nft_xt {
27 struct list_head head; 28 struct list_head head;
28 struct nft_expr_ops ops; 29 struct nft_expr_ops ops;
29 unsigned int refcnt; 30 refcount_t refcnt;
31
32 /* used only when transaction mutex is locked */
33 unsigned int listcnt;
30 34
31 /* Unlike other expressions, ops doesn't have static storage duration. 35 /* Unlike other expressions, ops doesn't have static storage duration.
32 * nft core assumes they do. We use kfree_rcu so that nft core can 36 * nft core assumes they do. We use kfree_rcu so that nft core can
@@ -43,10 +47,24 @@ struct nft_xt_match_priv {
43 void *info; 47 void *info;
44}; 48};
45 49
50struct nft_compat_net {
51 struct list_head nft_target_list;
52 struct list_head nft_match_list;
53};
54
55static unsigned int nft_compat_net_id __read_mostly;
56static struct nft_expr_type nft_match_type;
57static struct nft_expr_type nft_target_type;
58
59static struct nft_compat_net *nft_compat_pernet(struct net *net)
60{
61 return net_generic(net, nft_compat_net_id);
62}
63
46static bool nft_xt_put(struct nft_xt *xt) 64static bool nft_xt_put(struct nft_xt *xt)
47{ 65{
48 if (--xt->refcnt == 0) { 66 if (refcount_dec_and_test(&xt->refcnt)) {
49 list_del(&xt->head); 67 WARN_ON_ONCE(!list_empty(&xt->head));
50 kfree_rcu(xt, rcu_head); 68 kfree_rcu(xt, rcu_head);
51 return true; 69 return true;
52 } 70 }
@@ -273,7 +291,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
273 return -EINVAL; 291 return -EINVAL;
274 292
275 nft_xt = container_of(expr->ops, struct nft_xt, ops); 293 nft_xt = container_of(expr->ops, struct nft_xt, ops);
276 nft_xt->refcnt++; 294 refcount_inc(&nft_xt->refcnt);
277 return 0; 295 return 0;
278} 296}
279 297
@@ -486,7 +504,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
486 return ret; 504 return ret;
487 505
488 nft_xt = container_of(expr->ops, struct nft_xt, ops); 506 nft_xt = container_of(expr->ops, struct nft_xt, ops);
489 nft_xt->refcnt++; 507 refcount_inc(&nft_xt->refcnt);
490 return 0; 508 return 0;
491} 509}
492 510
@@ -540,6 +558,43 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
540 __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); 558 __nft_match_destroy(ctx, expr, nft_expr_priv(expr));
541} 559}
542 560
561static void nft_compat_activate(const struct nft_ctx *ctx,
562 const struct nft_expr *expr,
563 struct list_head *h)
564{
565 struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
566
567 if (xt->listcnt == 0)
568 list_add(&xt->head, h);
569
570 xt->listcnt++;
571}
572
573static void nft_compat_activate_mt(const struct nft_ctx *ctx,
574 const struct nft_expr *expr)
575{
576 struct nft_compat_net *cn = nft_compat_pernet(ctx->net);
577
578 nft_compat_activate(ctx, expr, &cn->nft_match_list);
579}
580
581static void nft_compat_activate_tg(const struct nft_ctx *ctx,
582 const struct nft_expr *expr)
583{
584 struct nft_compat_net *cn = nft_compat_pernet(ctx->net);
585
586 nft_compat_activate(ctx, expr, &cn->nft_target_list);
587}
588
589static void nft_compat_deactivate(const struct nft_ctx *ctx,
590 const struct nft_expr *expr)
591{
592 struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
593
594 if (--xt->listcnt == 0)
595 list_del_init(&xt->head);
596}
597
543static void 598static void
544nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) 599nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
545{ 600{
@@ -734,10 +789,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
734 .cb = nfnl_nft_compat_cb, 789 .cb = nfnl_nft_compat_cb,
735}; 790};
736 791
737static LIST_HEAD(nft_match_list);
738
739static struct nft_expr_type nft_match_type;
740
741static bool nft_match_cmp(const struct xt_match *match, 792static bool nft_match_cmp(const struct xt_match *match,
742 const char *name, u32 rev, u32 family) 793 const char *name, u32 rev, u32 family)
743{ 794{
@@ -749,6 +800,7 @@ static const struct nft_expr_ops *
749nft_match_select_ops(const struct nft_ctx *ctx, 800nft_match_select_ops(const struct nft_ctx *ctx,
750 const struct nlattr * const tb[]) 801 const struct nlattr * const tb[])
751{ 802{
803 struct nft_compat_net *cn;
752 struct nft_xt *nft_match; 804 struct nft_xt *nft_match;
753 struct xt_match *match; 805 struct xt_match *match;
754 unsigned int matchsize; 806 unsigned int matchsize;
@@ -765,8 +817,10 @@ nft_match_select_ops(const struct nft_ctx *ctx,
765 rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); 817 rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
766 family = ctx->family; 818 family = ctx->family;
767 819
820 cn = nft_compat_pernet(ctx->net);
821
768 /* Re-use the existing match if it's already loaded. */ 822 /* Re-use the existing match if it's already loaded. */
769 list_for_each_entry(nft_match, &nft_match_list, head) { 823 list_for_each_entry(nft_match, &cn->nft_match_list, head) {
770 struct xt_match *match = nft_match->ops.data; 824 struct xt_match *match = nft_match->ops.data;
771 825
772 if (nft_match_cmp(match, mt_name, rev, family)) 826 if (nft_match_cmp(match, mt_name, rev, family))
@@ -789,11 +843,13 @@ nft_match_select_ops(const struct nft_ctx *ctx,
789 goto err; 843 goto err;
790 } 844 }
791 845
792 nft_match->refcnt = 0; 846 refcount_set(&nft_match->refcnt, 0);
793 nft_match->ops.type = &nft_match_type; 847 nft_match->ops.type = &nft_match_type;
794 nft_match->ops.eval = nft_match_eval; 848 nft_match->ops.eval = nft_match_eval;
795 nft_match->ops.init = nft_match_init; 849 nft_match->ops.init = nft_match_init;
796 nft_match->ops.destroy = nft_match_destroy; 850 nft_match->ops.destroy = nft_match_destroy;
851 nft_match->ops.activate = nft_compat_activate_mt;
852 nft_match->ops.deactivate = nft_compat_deactivate;
797 nft_match->ops.dump = nft_match_dump; 853 nft_match->ops.dump = nft_match_dump;
798 nft_match->ops.validate = nft_match_validate; 854 nft_match->ops.validate = nft_match_validate;
799 nft_match->ops.data = match; 855 nft_match->ops.data = match;
@@ -810,7 +866,8 @@ nft_match_select_ops(const struct nft_ctx *ctx,
810 866
811 nft_match->ops.size = matchsize; 867 nft_match->ops.size = matchsize;
812 868
813 list_add(&nft_match->head, &nft_match_list); 869 nft_match->listcnt = 1;
870 list_add(&nft_match->head, &cn->nft_match_list);
814 871
815 return &nft_match->ops; 872 return &nft_match->ops;
816err: 873err:
@@ -826,10 +883,6 @@ static struct nft_expr_type nft_match_type __read_mostly = {
826 .owner = THIS_MODULE, 883 .owner = THIS_MODULE,
827}; 884};
828 885
829static LIST_HEAD(nft_target_list);
830
831static struct nft_expr_type nft_target_type;
832
833static bool nft_target_cmp(const struct xt_target *tg, 886static bool nft_target_cmp(const struct xt_target *tg,
834 const char *name, u32 rev, u32 family) 887 const char *name, u32 rev, u32 family)
835{ 888{
@@ -841,6 +894,7 @@ static const struct nft_expr_ops *
841nft_target_select_ops(const struct nft_ctx *ctx, 894nft_target_select_ops(const struct nft_ctx *ctx,
842 const struct nlattr * const tb[]) 895 const struct nlattr * const tb[])
843{ 896{
897 struct nft_compat_net *cn;
844 struct nft_xt *nft_target; 898 struct nft_xt *nft_target;
845 struct xt_target *target; 899 struct xt_target *target;
846 char *tg_name; 900 char *tg_name;
@@ -861,8 +915,9 @@ nft_target_select_ops(const struct nft_ctx *ctx,
861 strcmp(tg_name, "standard") == 0) 915 strcmp(tg_name, "standard") == 0)
862 return ERR_PTR(-EINVAL); 916 return ERR_PTR(-EINVAL);
863 917
918 cn = nft_compat_pernet(ctx->net);
864 /* Re-use the existing target if it's already loaded. */ 919 /* Re-use the existing target if it's already loaded. */
865 list_for_each_entry(nft_target, &nft_target_list, head) { 920 list_for_each_entry(nft_target, &cn->nft_target_list, head) {
866 struct xt_target *target = nft_target->ops.data; 921 struct xt_target *target = nft_target->ops.data;
867 922
868 if (!target->target) 923 if (!target->target)
@@ -893,11 +948,13 @@ nft_target_select_ops(const struct nft_ctx *ctx,
893 goto err; 948 goto err;
894 } 949 }
895 950
896 nft_target->refcnt = 0; 951 refcount_set(&nft_target->refcnt, 0);
897 nft_target->ops.type = &nft_target_type; 952 nft_target->ops.type = &nft_target_type;
898 nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); 953 nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
899 nft_target->ops.init = nft_target_init; 954 nft_target->ops.init = nft_target_init;
900 nft_target->ops.destroy = nft_target_destroy; 955 nft_target->ops.destroy = nft_target_destroy;
956 nft_target->ops.activate = nft_compat_activate_tg;
957 nft_target->ops.deactivate = nft_compat_deactivate;
901 nft_target->ops.dump = nft_target_dump; 958 nft_target->ops.dump = nft_target_dump;
902 nft_target->ops.validate = nft_target_validate; 959 nft_target->ops.validate = nft_target_validate;
903 nft_target->ops.data = target; 960 nft_target->ops.data = target;
@@ -907,7 +964,8 @@ nft_target_select_ops(const struct nft_ctx *ctx,
907 else 964 else
908 nft_target->ops.eval = nft_target_eval_xt; 965 nft_target->ops.eval = nft_target_eval_xt;
909 966
910 list_add(&nft_target->head, &nft_target_list); 967 nft_target->listcnt = 1;
968 list_add(&nft_target->head, &cn->nft_target_list);
911 969
912 return &nft_target->ops; 970 return &nft_target->ops;
913err: 971err:
@@ -923,13 +981,74 @@ static struct nft_expr_type nft_target_type __read_mostly = {
923 .owner = THIS_MODULE, 981 .owner = THIS_MODULE,
924}; 982};
925 983
984static int __net_init nft_compat_init_net(struct net *net)
985{
986 struct nft_compat_net *cn = nft_compat_pernet(net);
987
988 INIT_LIST_HEAD(&cn->nft_target_list);
989 INIT_LIST_HEAD(&cn->nft_match_list);
990
991 return 0;
992}
993
994static void __net_exit nft_compat_exit_net(struct net *net)
995{
996 struct nft_compat_net *cn = nft_compat_pernet(net);
997 struct nft_xt *xt, *next;
998
999 if (list_empty(&cn->nft_match_list) &&
1000 list_empty(&cn->nft_target_list))
1001 return;
1002
1003 /* If there was an error that caused nft_xt expr to not be initialized
1004 * fully and noone else requested the same expression later, the lists
1005 * contain 0-refcount entries that still hold module reference.
1006 *
1007 * Clean them here.
1008 */
1009 mutex_lock(&net->nft.commit_mutex);
1010 list_for_each_entry_safe(xt, next, &cn->nft_target_list, head) {
1011 struct xt_target *target = xt->ops.data;
1012
1013 list_del_init(&xt->head);
1014
1015 if (refcount_read(&xt->refcnt))
1016 continue;
1017 module_put(target->me);
1018 kfree(xt);
1019 }
1020
1021 list_for_each_entry_safe(xt, next, &cn->nft_match_list, head) {
1022 struct xt_match *match = xt->ops.data;
1023
1024 list_del_init(&xt->head);
1025
1026 if (refcount_read(&xt->refcnt))
1027 continue;
1028 module_put(match->me);
1029 kfree(xt);
1030 }
1031 mutex_unlock(&net->nft.commit_mutex);
1032}
1033
1034static struct pernet_operations nft_compat_net_ops = {
1035 .init = nft_compat_init_net,
1036 .exit = nft_compat_exit_net,
1037 .id = &nft_compat_net_id,
1038 .size = sizeof(struct nft_compat_net),
1039};
1040
926static int __init nft_compat_module_init(void) 1041static int __init nft_compat_module_init(void)
927{ 1042{
928 int ret; 1043 int ret;
929 1044
1045 ret = register_pernet_subsys(&nft_compat_net_ops);
1046 if (ret < 0)
1047 goto err_target;
1048
930 ret = nft_register_expr(&nft_match_type); 1049 ret = nft_register_expr(&nft_match_type);
931 if (ret < 0) 1050 if (ret < 0)
932 return ret; 1051 goto err_pernet;
933 1052
934 ret = nft_register_expr(&nft_target_type); 1053 ret = nft_register_expr(&nft_target_type);
935 if (ret < 0) 1054 if (ret < 0)
@@ -942,45 +1061,21 @@ static int __init nft_compat_module_init(void)
942 } 1061 }
943 1062
944 return ret; 1063 return ret;
945
946err_target: 1064err_target:
947 nft_unregister_expr(&nft_target_type); 1065 nft_unregister_expr(&nft_target_type);
948err_match: 1066err_match:
949 nft_unregister_expr(&nft_match_type); 1067 nft_unregister_expr(&nft_match_type);
1068err_pernet:
1069 unregister_pernet_subsys(&nft_compat_net_ops);
950 return ret; 1070 return ret;
951} 1071}
952 1072
953static void __exit nft_compat_module_exit(void) 1073static void __exit nft_compat_module_exit(void)
954{ 1074{
955 struct nft_xt *xt, *next;
956
957 /* list should be empty here, it can be non-empty only in case there
958 * was an error that caused nft_xt expr to not be initialized fully
959 * and noone else requested the same expression later.
960 *
961 * In this case, the lists contain 0-refcount entries that still
962 * hold module reference.
963 */
964 list_for_each_entry_safe(xt, next, &nft_target_list, head) {
965 struct xt_target *target = xt->ops.data;
966
967 if (WARN_ON_ONCE(xt->refcnt))
968 continue;
969 module_put(target->me);
970 kfree(xt);
971 }
972
973 list_for_each_entry_safe(xt, next, &nft_match_list, head) {
974 struct xt_match *match = xt->ops.data;
975
976 if (WARN_ON_ONCE(xt->refcnt))
977 continue;
978 module_put(match->me);
979 kfree(xt);
980 }
981 nfnetlink_subsys_unregister(&nfnl_compat_subsys); 1075 nfnetlink_subsys_unregister(&nfnl_compat_subsys);
982 nft_unregister_expr(&nft_target_type); 1076 nft_unregister_expr(&nft_target_type);
983 nft_unregister_expr(&nft_match_type); 1077 nft_unregister_expr(&nft_match_type);
1078 unregister_pernet_subsys(&nft_compat_net_ops);
984} 1079}
985 1080
986MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); 1081MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index cbd51ed5a2d7..908e53ab47a4 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -52,21 +52,21 @@ void nr_start_t1timer(struct sock *sk)
52{ 52{
53 struct nr_sock *nr = nr_sk(sk); 53 struct nr_sock *nr = nr_sk(sk);
54 54
55 mod_timer(&nr->t1timer, jiffies + nr->t1); 55 sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1);
56} 56}
57 57
58void nr_start_t2timer(struct sock *sk) 58void nr_start_t2timer(struct sock *sk)
59{ 59{
60 struct nr_sock *nr = nr_sk(sk); 60 struct nr_sock *nr = nr_sk(sk);
61 61
62 mod_timer(&nr->t2timer, jiffies + nr->t2); 62 sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2);
63} 63}
64 64
65void nr_start_t4timer(struct sock *sk) 65void nr_start_t4timer(struct sock *sk)
66{ 66{
67 struct nr_sock *nr = nr_sk(sk); 67 struct nr_sock *nr = nr_sk(sk);
68 68
69 mod_timer(&nr->t4timer, jiffies + nr->t4); 69 sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4);
70} 70}
71 71
72void nr_start_idletimer(struct sock *sk) 72void nr_start_idletimer(struct sock *sk)
@@ -74,37 +74,37 @@ void nr_start_idletimer(struct sock *sk)
74 struct nr_sock *nr = nr_sk(sk); 74 struct nr_sock *nr = nr_sk(sk);
75 75
76 if (nr->idle > 0) 76 if (nr->idle > 0)
77 mod_timer(&nr->idletimer, jiffies + nr->idle); 77 sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle);
78} 78}
79 79
80void nr_start_heartbeat(struct sock *sk) 80void nr_start_heartbeat(struct sock *sk)
81{ 81{
82 mod_timer(&sk->sk_timer, jiffies + 5 * HZ); 82 sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ);
83} 83}
84 84
85void nr_stop_t1timer(struct sock *sk) 85void nr_stop_t1timer(struct sock *sk)
86{ 86{
87 del_timer(&nr_sk(sk)->t1timer); 87 sk_stop_timer(sk, &nr_sk(sk)->t1timer);
88} 88}
89 89
90void nr_stop_t2timer(struct sock *sk) 90void nr_stop_t2timer(struct sock *sk)
91{ 91{
92 del_timer(&nr_sk(sk)->t2timer); 92 sk_stop_timer(sk, &nr_sk(sk)->t2timer);
93} 93}
94 94
95void nr_stop_t4timer(struct sock *sk) 95void nr_stop_t4timer(struct sock *sk)
96{ 96{
97 del_timer(&nr_sk(sk)->t4timer); 97 sk_stop_timer(sk, &nr_sk(sk)->t4timer);
98} 98}
99 99
100void nr_stop_idletimer(struct sock *sk) 100void nr_stop_idletimer(struct sock *sk)
101{ 101{
102 del_timer(&nr_sk(sk)->idletimer); 102 sk_stop_timer(sk, &nr_sk(sk)->idletimer);
103} 103}
104 104
105void nr_stop_heartbeat(struct sock *sk) 105void nr_stop_heartbeat(struct sock *sk)
106{ 106{
107 del_timer(&sk->sk_timer); 107 sk_stop_timer(sk, &sk->sk_timer);
108} 108}
109 109
110int nr_t1timer_running(struct sock *sk) 110int nr_t1timer_running(struct sock *sk)
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 77e9f85a2c92..f2ff21d7df08 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -850,6 +850,7 @@ void rose_link_device_down(struct net_device *dev)
850 850
851/* 851/*
852 * Route a frame to an appropriate AX.25 connection. 852 * Route a frame to an appropriate AX.25 connection.
853 * A NULL ax25_cb indicates an internally generated frame.
853 */ 854 */
854int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) 855int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
855{ 856{
@@ -867,6 +868,10 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
867 868
868 if (skb->len < ROSE_MIN_LEN) 869 if (skb->len < ROSE_MIN_LEN)
869 return res; 870 return res;
871
872 if (!ax25)
873 return rose_loopback_queue(skb, NULL);
874
870 frametype = skb->data[2]; 875 frametype = skb->data[2];
871 lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); 876 lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
872 if (frametype == ROSE_CALL_REQUEST && 877 if (frametype == ROSE_CALL_REQUEST &&
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 86b9527c4826..3f2a6af27e62 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -447,6 +447,8 @@ static int tls_do_encryption(struct sock *sk,
447 struct scatterlist *sge = sk_msg_elem(msg_en, start); 447 struct scatterlist *sge = sk_msg_elem(msg_en, start);
448 int rc; 448 int rc;
449 449
450 memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data));
451
450 sge->offset += tls_ctx->tx.prepend_size; 452 sge->offset += tls_ctx->tx.prepend_size;
451 sge->length -= tls_ctx->tx.prepend_size; 453 sge->length -= tls_ctx->tx.prepend_size;
452 454
@@ -456,7 +458,7 @@ static int tls_do_encryption(struct sock *sk,
456 aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); 458 aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
457 aead_request_set_crypt(aead_req, rec->sg_aead_in, 459 aead_request_set_crypt(aead_req, rec->sg_aead_in,
458 rec->sg_aead_out, 460 rec->sg_aead_out,
459 data_len, tls_ctx->tx.iv); 461 data_len, rec->iv_data);
460 462
461 aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, 463 aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
462 tls_encrypt_done, sk); 464 tls_encrypt_done, sk);
@@ -1901,7 +1903,9 @@ void tls_sw_free_resources_tx(struct sock *sk)
1901 if (atomic_read(&ctx->encrypt_pending)) 1903 if (atomic_read(&ctx->encrypt_pending))
1902 crypto_wait_req(-EINPROGRESS, &ctx->async_wait); 1904 crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
1903 1905
1906 release_sock(sk);
1904 cancel_delayed_work_sync(&ctx->tx_work.work); 1907 cancel_delayed_work_sync(&ctx->tx_work.work);
1908 lock_sock(sk);
1905 1909
1906 /* Tx whatever records we can transmit and abandon the rest */ 1910 /* Tx whatever records we can transmit and abandon the rest */
1907 tls_tx_records(sk, -1); 1911 tls_tx_records(sk, -1);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 934492bad8e0..ba0a4048c846 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -680,16 +680,6 @@ static void xfrm_hash_resize(struct work_struct *work)
680 mutex_unlock(&hash_resize_mutex); 680 mutex_unlock(&hash_resize_mutex);
681} 681}
682 682
683static void xfrm_hash_reset_inexact_table(struct net *net)
684{
685 struct xfrm_pol_inexact_bin *b;
686
687 lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
688
689 list_for_each_entry(b, &net->xfrm.inexact_bins, inexact_bins)
690 INIT_HLIST_HEAD(&b->hhead);
691}
692
693/* Make sure *pol can be inserted into fastbin. 683/* Make sure *pol can be inserted into fastbin.
694 * Useful to check that later insert requests will be sucessful 684 * Useful to check that later insert requests will be sucessful
695 * (provided xfrm_policy_lock is held throughout). 685 * (provided xfrm_policy_lock is held throughout).
@@ -833,13 +823,13 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
833 u16 family) 823 u16 family)
834{ 824{
835 unsigned int matched_s, matched_d; 825 unsigned int matched_s, matched_d;
836 struct hlist_node *newpos = NULL;
837 struct xfrm_policy *policy, *p; 826 struct xfrm_policy *policy, *p;
838 827
839 matched_s = 0; 828 matched_s = 0;
840 matched_d = 0; 829 matched_d = 0;
841 830
842 list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { 831 list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
832 struct hlist_node *newpos = NULL;
843 bool matches_s, matches_d; 833 bool matches_s, matches_d;
844 834
845 if (!policy->bydst_reinsert) 835 if (!policy->bydst_reinsert)
@@ -849,16 +839,19 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
849 839
850 policy->bydst_reinsert = false; 840 policy->bydst_reinsert = false;
851 hlist_for_each_entry(p, &n->hhead, bydst) { 841 hlist_for_each_entry(p, &n->hhead, bydst) {
852 if (policy->priority >= p->priority) 842 if (policy->priority > p->priority)
843 newpos = &p->bydst;
844 else if (policy->priority == p->priority &&
845 policy->pos > p->pos)
853 newpos = &p->bydst; 846 newpos = &p->bydst;
854 else 847 else
855 break; 848 break;
856 } 849 }
857 850
858 if (newpos) 851 if (newpos)
859 hlist_add_behind(&policy->bydst, newpos); 852 hlist_add_behind_rcu(&policy->bydst, newpos);
860 else 853 else
861 hlist_add_head(&policy->bydst, &n->hhead); 854 hlist_add_head_rcu(&policy->bydst, &n->hhead);
862 855
863 /* paranoia checks follow. 856 /* paranoia checks follow.
864 * Check that the reinserted policy matches at least 857 * Check that the reinserted policy matches at least
@@ -893,12 +886,13 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net,
893 struct rb_root *new, 886 struct rb_root *new,
894 u16 family) 887 u16 family)
895{ 888{
896 struct rb_node **p, *parent = NULL;
897 struct xfrm_pol_inexact_node *node; 889 struct xfrm_pol_inexact_node *node;
890 struct rb_node **p, *parent;
898 891
899 /* we should not have another subtree here */ 892 /* we should not have another subtree here */
900 WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root)); 893 WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root));
901 894restart:
895 parent = NULL;
902 p = &new->rb_node; 896 p = &new->rb_node;
903 while (*p) { 897 while (*p) {
904 u8 prefixlen; 898 u8 prefixlen;
@@ -918,12 +912,11 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net,
918 } else { 912 } else {
919 struct xfrm_policy *tmp; 913 struct xfrm_policy *tmp;
920 914
921 hlist_for_each_entry(tmp, &node->hhead, bydst) 915 hlist_for_each_entry(tmp, &n->hhead, bydst) {
922 tmp->bydst_reinsert = true;
923 hlist_for_each_entry(tmp, &n->hhead, bydst)
924 tmp->bydst_reinsert = true; 916 tmp->bydst_reinsert = true;
917 hlist_del_rcu(&tmp->bydst);
918 }
925 919
926 INIT_HLIST_HEAD(&node->hhead);
927 xfrm_policy_inexact_list_reinsert(net, node, family); 920 xfrm_policy_inexact_list_reinsert(net, node, family);
928 921
929 if (node->prefixlen == n->prefixlen) { 922 if (node->prefixlen == n->prefixlen) {
@@ -935,8 +928,7 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net,
935 kfree_rcu(n, rcu); 928 kfree_rcu(n, rcu);
936 n = node; 929 n = node;
937 n->prefixlen = prefixlen; 930 n->prefixlen = prefixlen;
938 *p = new->rb_node; 931 goto restart;
939 parent = NULL;
940 } 932 }
941 } 933 }
942 934
@@ -965,12 +957,11 @@ static void xfrm_policy_inexact_node_merge(struct net *net,
965 family); 957 family);
966 } 958 }
967 959
968 hlist_for_each_entry(tmp, &v->hhead, bydst) 960 hlist_for_each_entry(tmp, &v->hhead, bydst) {
969 tmp->bydst_reinsert = true;
970 hlist_for_each_entry(tmp, &n->hhead, bydst)
971 tmp->bydst_reinsert = true; 961 tmp->bydst_reinsert = true;
962 hlist_del_rcu(&tmp->bydst);
963 }
972 964
973 INIT_HLIST_HEAD(&n->hhead);
974 xfrm_policy_inexact_list_reinsert(net, n, family); 965 xfrm_policy_inexact_list_reinsert(net, n, family);
975} 966}
976 967
@@ -1235,6 +1226,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
1235 } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); 1226 } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
1236 1227
1237 spin_lock_bh(&net->xfrm.xfrm_policy_lock); 1228 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1229 write_seqcount_begin(&xfrm_policy_hash_generation);
1238 1230
1239 /* make sure that we can insert the indirect policies again before 1231 /* make sure that we can insert the indirect policies again before
1240 * we start with destructive action. 1232 * we start with destructive action.
@@ -1278,10 +1270,14 @@ static void xfrm_hash_rebuild(struct work_struct *work)
1278 } 1270 }
1279 1271
1280 /* reset the bydst and inexact table in all directions */ 1272 /* reset the bydst and inexact table in all directions */
1281 xfrm_hash_reset_inexact_table(net);
1282
1283 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 1273 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
1284 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); 1274 struct hlist_node *n;
1275
1276 hlist_for_each_entry_safe(policy, n,
1277 &net->xfrm.policy_inexact[dir],
1278 bydst_inexact_list)
1279 hlist_del_init(&policy->bydst_inexact_list);
1280
1285 hmask = net->xfrm.policy_bydst[dir].hmask; 1281 hmask = net->xfrm.policy_bydst[dir].hmask;
1286 odst = net->xfrm.policy_bydst[dir].table; 1282 odst = net->xfrm.policy_bydst[dir].table;
1287 for (i = hmask; i >= 0; i--) 1283 for (i = hmask; i >= 0; i--)
@@ -1313,6 +1309,9 @@ static void xfrm_hash_rebuild(struct work_struct *work)
1313 newpos = NULL; 1309 newpos = NULL;
1314 chain = policy_hash_bysel(net, &policy->selector, 1310 chain = policy_hash_bysel(net, &policy->selector,
1315 policy->family, dir); 1311 policy->family, dir);
1312
1313 hlist_del_rcu(&policy->bydst);
1314
1316 if (!chain) { 1315 if (!chain) {
1317 void *p = xfrm_policy_inexact_insert(policy, dir, 0); 1316 void *p = xfrm_policy_inexact_insert(policy, dir, 0);
1318 1317
@@ -1334,6 +1333,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
1334 1333
1335out_unlock: 1334out_unlock:
1336 __xfrm_policy_inexact_flush(net); 1335 __xfrm_policy_inexact_flush(net);
1336 write_seqcount_end(&xfrm_policy_hash_generation);
1337 spin_unlock_bh(&net->xfrm.xfrm_policy_lock); 1337 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1338 1338
1339 mutex_unlock(&hash_resize_mutex); 1339 mutex_unlock(&hash_resize_mutex);
@@ -2600,7 +2600,10 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
2600 dst_copy_metrics(dst1, dst); 2600 dst_copy_metrics(dst1, dst);
2601 2601
2602 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { 2602 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
2603 __u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); 2603 __u32 mark = 0;
2604
2605 if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
2606 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
2604 2607
2605 family = xfrm[i]->props.family; 2608 family = xfrm[i]->props.family;
2606 dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif, 2609 dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 277c1c46fe94..c6d26afcf89d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1488,10 +1488,15 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
1488 if (!ut[i].family) 1488 if (!ut[i].family)
1489 ut[i].family = family; 1489 ut[i].family = family;
1490 1490
1491 if ((ut[i].mode == XFRM_MODE_TRANSPORT) && 1491 switch (ut[i].mode) {
1492 (ut[i].family != prev_family)) 1492 case XFRM_MODE_TUNNEL:
1493 return -EINVAL; 1493 case XFRM_MODE_BEET:
1494 1494 break;
1495 default:
1496 if (ut[i].family != prev_family)
1497 return -EINVAL;
1498 break;
1499 }
1495 if (ut[i].mode >= XFRM_MODE_MAX) 1500 if (ut[i].mode >= XFRM_MODE_MAX)
1496 return -EINVAL; 1501 return -EINVAL;
1497 1502
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
index 8db35b99457c..71d7fdc513c1 100755
--- a/tools/testing/selftests/net/xfrm_policy.sh
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -28,6 +28,19 @@ KEY_AES=0x0123456789abcdef0123456789012345
28SPI1=0x1 28SPI1=0x1
29SPI2=0x2 29SPI2=0x2
30 30
31do_esp_policy() {
32 local ns=$1
33 local me=$2
34 local remote=$3
35 local lnet=$4
36 local rnet=$5
37
38 # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
39 ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow
40 # to fwd decrypted packets after esp processing:
41 ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow
42}
43
31do_esp() { 44do_esp() {
32 local ns=$1 45 local ns=$1
33 local me=$2 46 local me=$2
@@ -40,10 +53,59 @@ do_esp() {
40 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet 53 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
41 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet 54 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
42 55
43 # to encrypt packets as they go out (includes forwarded packets that need encapsulation) 56 do_esp_policy $ns $me $remote $lnet $rnet
44 ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow 57}
45 # to fwd decrypted packets after esp processing: 58
46 ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow 59# add policies with different netmasks, to make sure kernel carries
60# the policies contained within new netmask over when search tree is
61# re-built.
62# peer netns that are supposed to be encapsulated via esp have addresses
63# in the 10.0.1.0/24 and 10.0.2.0/24 subnets, respectively.
64#
65# Adding a policy for '10.0.1.0/23' will make it necessary to
66# alter the prefix of 10.0.1.0 subnet.
67# In case new prefix overlaps with existing node, the node and all
68# policies it carries need to be merged with the existing one(s).
69#
70# Do that here.
71do_overlap()
72{
73 local ns=$1
74
75 # adds new nodes to tree (neither network exists yet in policy database).
76 ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block
77
78 # adds a new node in the 10.0.0.0/24 tree (dst node exists).
79 ip -net $ns xfrm policy add src 10.2.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block
80
81 # adds a 10.2.0.0/23 node, but for different dst.
82 ip -net $ns xfrm policy add src 10.2.0.0/23 dst 10.0.1.0/24 dir fwd priority 200 action block
83
84 # dst now overlaps with the 10.0.1.0/24 ESP policy in fwd.
85 # kernel must 'promote' existing one (10.0.0.0/24) to 10.0.0.0/23.
86 # But 10.0.0.0/23 also includes existing 10.0.1.0/24, so that node
87 # also has to be merged too, including source-sorted subtrees.
88 # old:
89 # 10.0.0.0/24 (node 1 in dst tree of the bin)
90 # 10.1.0.0/24 (node in src tree of dst node 1)
91 # 10.2.0.0/24 (node in src tree of dst node 1)
92 # 10.0.1.0/24 (node 2 in dst tree of the bin)
93 # 10.0.2.0/24 (node in src tree of dst node 2)
94 # 10.2.0.0/24 (node in src tree of dst node 2)
95 #
96 # The next 'policy add' adds dst '10.0.0.0/23', which means
97 # that dst node 1 and dst node 2 have to be merged including
98 # the sub-tree. As no duplicates are allowed, policies in
99 # the two '10.0.2.0/24' are also merged.
100 #
101 # after the 'add', internal search tree should look like this:
102 # 10.0.0.0/23 (node in dst tree of bin)
103 # 10.0.2.0/24 (node in src tree of dst node)
104 # 10.1.0.0/24 (node in src tree of dst node)
105 # 10.2.0.0/24 (node in src tree of dst node)
106 #
107 # 10.0.0.0/24 and 10.0.1.0/24 nodes have been merged as 10.0.0.0/23.
108 ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/23 dir fwd priority 200 action block
47} 109}
48 110
49do_esp_policy_get_check() { 111do_esp_policy_get_check() {
@@ -160,6 +222,41 @@ check_xfrm() {
160 return $lret 222 return $lret
161} 223}
162 224
225check_exceptions()
226{
227 logpostfix="$1"
228 local lret=0
229
230 # ping to .254 should be excluded from the tunnel (exception is in place).
231 check_xfrm 0 254
232 if [ $? -ne 0 ]; then
233 echo "FAIL: expected ping to .254 to fail ($logpostfix)"
234 lret=1
235 else
236 echo "PASS: ping to .254 bypassed ipsec tunnel ($logpostfix)"
237 fi
238
239 # ping to .253 should use use ipsec due to direct policy exception.
240 check_xfrm 1 253
241 if [ $? -ne 0 ]; then
242 echo "FAIL: expected ping to .253 to use ipsec tunnel ($logpostfix)"
243 lret=1
244 else
245 echo "PASS: direct policy matches ($logpostfix)"
246 fi
247
248 # ping to .2 should use ipsec.
249 check_xfrm 1 2
250 if [ $? -ne 0 ]; then
251 echo "FAIL: expected ping to .2 to use ipsec tunnel ($logpostfix)"
252 lret=1
253 else
254 echo "PASS: policy matches ($logpostfix)"
255 fi
256
257 return $lret
258}
259
163#check for needed privileges 260#check for needed privileges
164if [ "$(id -u)" -ne 0 ];then 261if [ "$(id -u)" -ne 0 ];then
165 echo "SKIP: Need root privileges" 262 echo "SKIP: Need root privileges"
@@ -270,33 +367,45 @@ do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
270do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 367do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96
271do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 368do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96
272 369
273# ping to .254 should now be excluded from the tunnel 370check_exceptions "exceptions"
274check_xfrm 0 254
275if [ $? -ne 0 ]; then 371if [ $? -ne 0 ]; then
276 echo "FAIL: expected ping to .254 to fail"
277 ret=1 372 ret=1
278else
279 echo "PASS: ping to .254 bypassed ipsec tunnel"
280fi 373fi
281 374
282# ping to .253 should use use ipsec due to direct policy exception. 375# insert block policies with adjacent/overlapping netmasks
283check_xfrm 1 253 376do_overlap ns3
284if [ $? -ne 0 ]; then
285 echo "FAIL: expected ping to .253 to use ipsec tunnel"
286 ret=1
287else
288 echo "PASS: direct policy matches"
289fi
290 377
291# ping to .2 should use ipsec. 378check_exceptions "exceptions and block policies"
292check_xfrm 1 2
293if [ $? -ne 0 ]; then 379if [ $? -ne 0 ]; then
294 echo "FAIL: expected ping to .2 to use ipsec tunnel"
295 ret=1 380 ret=1
296else
297 echo "PASS: policy matches"
298fi 381fi
299 382
383for n in ns3 ns4;do
384 ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125
385 sleep $((RANDOM%5))
386done
387
388check_exceptions "exceptions and block policies after hresh changes"
389
390# full flush of policy db, check everything gets freed incl. internal meta data
391ip -net ns3 xfrm policy flush
392
393do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24
394do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
395
396# move inexact policies to hash table
397ip -net ns3 xfrm policy set hthresh4 16 16
398
399sleep $((RANDOM%5))
400check_exceptions "exceptions and block policies after hthresh change in ns3"
401
402# restore original hthresh settings -- move policies back to tables
403for n in ns3 ns4;do
404 ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128
405 sleep $((RANDOM%5))
406done
407check_exceptions "exceptions and block policies after hresh change to normal"
408
300for i in 1 2 3 4;do ip netns del ns$i;done 409for i in 1 2 3 4;do ip netns del ns$i;done
301 410
302exit $ret 411exit $ret
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index 460b4bdf4c1e..5d546dcdbc80 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -1133,6 +1133,21 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
1133 pkey_assert(err); 1133 pkey_assert(err);
1134} 1134}
1135 1135
1136void become_child(void)
1137{
1138 pid_t forkret;
1139
1140 forkret = fork();
1141 pkey_assert(forkret >= 0);
1142 dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
1143
1144 if (!forkret) {
1145 /* in the child */
1146 return;
1147 }
1148 exit(0);
1149}
1150
1136/* Assumes that all pkeys other than 'pkey' are unallocated */ 1151/* Assumes that all pkeys other than 'pkey' are unallocated */
1137void test_pkey_alloc_exhaust(int *ptr, u16 pkey) 1152void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
1138{ 1153{
@@ -1141,7 +1156,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
1141 int nr_allocated_pkeys = 0; 1156 int nr_allocated_pkeys = 0;
1142 int i; 1157 int i;
1143 1158
1144 for (i = 0; i < NR_PKEYS*2; i++) { 1159 for (i = 0; i < NR_PKEYS*3; i++) {
1145 int new_pkey; 1160 int new_pkey;
1146 dprintf1("%s() alloc loop: %d\n", __func__, i); 1161 dprintf1("%s() alloc loop: %d\n", __func__, i);
1147 new_pkey = alloc_pkey(); 1162 new_pkey = alloc_pkey();
@@ -1152,21 +1167,27 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
1152 if ((new_pkey == -1) && (errno == ENOSPC)) { 1167 if ((new_pkey == -1) && (errno == ENOSPC)) {
1153 dprintf2("%s() failed to allocate pkey after %d tries\n", 1168 dprintf2("%s() failed to allocate pkey after %d tries\n",
1154 __func__, nr_allocated_pkeys); 1169 __func__, nr_allocated_pkeys);
1155 break; 1170 } else {
1171 /*
1172 * Ensure the number of successes never
1173 * exceeds the number of keys supported
1174 * in the hardware.
1175 */
1176 pkey_assert(nr_allocated_pkeys < NR_PKEYS);
1177 allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
1156 } 1178 }
1157 pkey_assert(nr_allocated_pkeys < NR_PKEYS); 1179
1158 allocated_pkeys[nr_allocated_pkeys++] = new_pkey; 1180 /*
1181 * Make sure that allocation state is properly
1182 * preserved across fork().
1183 */
1184 if (i == NR_PKEYS*2)
1185 become_child();
1159 } 1186 }
1160 1187
1161 dprintf3("%s()::%d\n", __func__, __LINE__); 1188 dprintf3("%s()::%d\n", __func__, __LINE__);
1162 1189
1163 /* 1190 /*
1164 * ensure it did not reach the end of the loop without
1165 * failure:
1166 */
1167 pkey_assert(i < NR_PKEYS*2);
1168
1169 /*
1170 * There are 16 pkeys supported in hardware. Three are 1191 * There are 16 pkeys supported in hardware. Three are
1171 * allocated by the time we get here: 1192 * allocated by the time we get here:
1172 * 1. The default key (0) 1193 * 1. The default key (0)