aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/bpf/core.c69
-rw-r--r--kernel/bpf/devmap.c14
-rw-r--r--kernel/bpf/syscall.c12
-rw-r--r--kernel/dma/Kconfig50
-rw-r--r--kernel/dma/Makefile11
-rw-r--r--kernel/dma/coherent.c434
-rw-r--r--kernel/dma/contiguous.c278
-rw-r--r--kernel/dma/debug.c1773
-rw-r--r--kernel/dma/direct.c204
-rw-r--r--kernel/dma/mapping.c345
-rw-r--r--kernel/dma/noncoherent.c102
-rw-r--r--kernel/dma/swiotlb.c1088
-rw-r--r--kernel/dma/virt.c59
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/events/ring_buffer.c6
-rw-r--r--kernel/irq/debugfs.c1
-rw-r--r--kernel/locking/lockdep.c12
-rw-r--r--kernel/locking/rwsem.c1
-rw-r--r--kernel/rseq.c7
-rw-r--r--kernel/softirq.c6
-rw-r--r--kernel/time/hrtimer.c2
-rw-r--r--kernel/time/posix-cpu-timers.c2
-rw-r--r--kernel/time/time.c6
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--kernel/trace/trace_events_filter.c10
26 files changed, 4460 insertions, 41 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d2001624fe7a..04bc07c2b42a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -41,6 +41,7 @@ obj-y += printk/
41obj-y += irq/ 41obj-y += irq/
42obj-y += rcu/ 42obj-y += rcu/
43obj-y += livepatch/ 43obj-y += livepatch/
44obj-y += dma/
44 45
45obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o 46obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
46obj-$(CONFIG_FREEZER) += freezer.o 47obj-$(CONFIG_FREEZER) += freezer.o
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9f1493705f40..a9e6c04d0f4a 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -350,6 +350,20 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
350 return prog_adj; 350 return prog_adj;
351} 351}
352 352
353void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
354{
355 int i;
356
357 for (i = 0; i < fp->aux->func_cnt; i++)
358 bpf_prog_kallsyms_del(fp->aux->func[i]);
359}
360
361void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
362{
363 bpf_prog_kallsyms_del_subprogs(fp);
364 bpf_prog_kallsyms_del(fp);
365}
366
353#ifdef CONFIG_BPF_JIT 367#ifdef CONFIG_BPF_JIT
354/* All BPF JIT sysctl knobs here. */ 368/* All BPF JIT sysctl knobs here. */
355int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); 369int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
@@ -584,6 +598,8 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
584 bpf_fill_ill_insns(hdr, size); 598 bpf_fill_ill_insns(hdr, size);
585 599
586 hdr->pages = size / PAGE_SIZE; 600 hdr->pages = size / PAGE_SIZE;
601 hdr->locked = 0;
602
587 hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), 603 hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
588 PAGE_SIZE - sizeof(*hdr)); 604 PAGE_SIZE - sizeof(*hdr));
589 start = (get_random_int() % hole) & ~(alignment - 1); 605 start = (get_random_int() % hole) & ~(alignment - 1);
@@ -1434,6 +1450,33 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
1434 return 0; 1450 return 0;
1435} 1451}
1436 1452
1453static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
1454{
1455#ifdef CONFIG_ARCH_HAS_SET_MEMORY
1456 int i, err;
1457
1458 for (i = 0; i < fp->aux->func_cnt; i++) {
1459 err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
1460 if (err)
1461 return err;
1462 }
1463
1464 return bpf_prog_check_pages_ro_single(fp);
1465#endif
1466 return 0;
1467}
1468
1469static void bpf_prog_select_func(struct bpf_prog *fp)
1470{
1471#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1472 u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
1473
1474 fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
1475#else
1476 fp->bpf_func = __bpf_prog_ret0_warn;
1477#endif
1478}
1479
1437/** 1480/**
1438 * bpf_prog_select_runtime - select exec runtime for BPF program 1481 * bpf_prog_select_runtime - select exec runtime for BPF program
1439 * @fp: bpf_prog populated with internal BPF program 1482 * @fp: bpf_prog populated with internal BPF program
@@ -1444,13 +1487,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
1444 */ 1487 */
1445struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) 1488struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
1446{ 1489{
1447#ifndef CONFIG_BPF_JIT_ALWAYS_ON 1490 /* In case of BPF to BPF calls, verifier did all the prep
1448 u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); 1491 * work with regards to JITing, etc.
1492 */
1493 if (fp->bpf_func)
1494 goto finalize;
1449 1495
1450 fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; 1496 bpf_prog_select_func(fp);
1451#else
1452 fp->bpf_func = __bpf_prog_ret0_warn;
1453#endif
1454 1497
1455 /* eBPF JITs can rewrite the program in case constant 1498 /* eBPF JITs can rewrite the program in case constant
1456 * blinding is active. However, in case of error during 1499 * blinding is active. However, in case of error during
@@ -1471,6 +1514,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
1471 if (*err) 1514 if (*err)
1472 return fp; 1515 return fp;
1473 } 1516 }
1517
1518finalize:
1474 bpf_prog_lock_ro(fp); 1519 bpf_prog_lock_ro(fp);
1475 1520
1476 /* The tail call compatibility check can only be done at 1521 /* The tail call compatibility check can only be done at
@@ -1479,7 +1524,17 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
1479 * all eBPF JITs might immediately support all features. 1524 * all eBPF JITs might immediately support all features.
1480 */ 1525 */
1481 *err = bpf_check_tail_call(fp); 1526 *err = bpf_check_tail_call(fp);
1482 1527 if (*err)
1528 return fp;
1529
1530 /* Checkpoint: at this point onwards any cBPF -> eBPF or
1531 * native eBPF program is read-only. If we failed to change
1532 * the page attributes (e.g. allocation failure from
1533 * splitting large pages), then reject the whole program
1534 * in order to guarantee not ending up with any W+X pages
1535 * from BPF side in kernel.
1536 */
1537 *err = bpf_prog_check_pages_ro_locked(fp);
1483 return fp; 1538 return fp;
1484} 1539}
1485EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); 1540EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a7cc7b3494a9..642c97f6d1b8 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -345,6 +345,20 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
345 return bq_enqueue(dst, xdpf, dev_rx); 345 return bq_enqueue(dst, xdpf, dev_rx);
346} 346}
347 347
348int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
349 struct bpf_prog *xdp_prog)
350{
351 int err;
352
353 err = __xdp_generic_ok_fwd_dev(skb, dst->dev);
354 if (unlikely(err))
355 return err;
356 skb->dev = dst->dev;
357 generic_xdp_tx(skb, xdp_prog);
358
359 return 0;
360}
361
348static void *dev_map_lookup_elem(struct bpf_map *map, void *key) 362static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
349{ 363{
350 struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key); 364 struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0fa20624707f..35dc466641f2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1034,14 +1034,9 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
1034static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 1034static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
1035{ 1035{
1036 if (atomic_dec_and_test(&prog->aux->refcnt)) { 1036 if (atomic_dec_and_test(&prog->aux->refcnt)) {
1037 int i;
1038
1039 /* bpf_prog_free_id() must be called first */ 1037 /* bpf_prog_free_id() must be called first */
1040 bpf_prog_free_id(prog, do_idr_lock); 1038 bpf_prog_free_id(prog, do_idr_lock);
1041 1039 bpf_prog_kallsyms_del_all(prog);
1042 for (i = 0; i < prog->aux->func_cnt; i++)
1043 bpf_prog_kallsyms_del(prog->aux->func[i]);
1044 bpf_prog_kallsyms_del(prog);
1045 1040
1046 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 1041 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
1047 } 1042 }
@@ -1358,9 +1353,7 @@ static int bpf_prog_load(union bpf_attr *attr)
1358 if (err < 0) 1353 if (err < 0)
1359 goto free_used_maps; 1354 goto free_used_maps;
1360 1355
1361 /* eBPF program is ready to be JITed */ 1356 prog = bpf_prog_select_runtime(prog, &err);
1362 if (!prog->bpf_func)
1363 prog = bpf_prog_select_runtime(prog, &err);
1364 if (err < 0) 1357 if (err < 0)
1365 goto free_used_maps; 1358 goto free_used_maps;
1366 1359
@@ -1384,6 +1377,7 @@ static int bpf_prog_load(union bpf_attr *attr)
1384 return err; 1377 return err;
1385 1378
1386free_used_maps: 1379free_used_maps:
1380 bpf_prog_kallsyms_del_subprogs(prog);
1387 free_used_maps(prog->aux); 1381 free_used_maps(prog->aux);
1388free_prog: 1382free_prog:
1389 bpf_prog_uncharge_memlock(prog); 1383 bpf_prog_uncharge_memlock(prog);
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
new file mode 100644
index 000000000000..9bd54304446f
--- /dev/null
+++ b/kernel/dma/Kconfig
@@ -0,0 +1,50 @@
1
2config HAS_DMA
3 bool
4 depends on !NO_DMA
5 default y
6
7config NEED_SG_DMA_LENGTH
8 bool
9
10config NEED_DMA_MAP_STATE
11 bool
12
13config ARCH_DMA_ADDR_T_64BIT
14 def_bool 64BIT || PHYS_ADDR_T_64BIT
15
16config HAVE_GENERIC_DMA_COHERENT
17 bool
18
19config ARCH_HAS_SYNC_DMA_FOR_DEVICE
20 bool
21
22config ARCH_HAS_SYNC_DMA_FOR_CPU
23 bool
24 select NEED_DMA_MAP_STATE
25
26config DMA_DIRECT_OPS
27 bool
28 depends on HAS_DMA
29
30config DMA_NONCOHERENT_OPS
31 bool
32 depends on HAS_DMA
33 select DMA_DIRECT_OPS
34
35config DMA_NONCOHERENT_MMAP
36 bool
37 depends on DMA_NONCOHERENT_OPS
38
39config DMA_NONCOHERENT_CACHE_SYNC
40 bool
41 depends on DMA_NONCOHERENT_OPS
42
43config DMA_VIRT_OPS
44 bool
45 depends on HAS_DMA
46
47config SWIOTLB
48 bool
49 select DMA_DIRECT_OPS
50 select NEED_DMA_MAP_STATE
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
new file mode 100644
index 000000000000..6de44e4eb454
--- /dev/null
+++ b/kernel/dma/Makefile
@@ -0,0 +1,11 @@
1# SPDX-License-Identifier: GPL-2.0
2
3obj-$(CONFIG_HAS_DMA) += mapping.o
4obj-$(CONFIG_DMA_CMA) += contiguous.o
5obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o
6obj-$(CONFIG_DMA_DIRECT_OPS) += direct.o
7obj-$(CONFIG_DMA_NONCOHERENT_OPS) += noncoherent.o
8obj-$(CONFIG_DMA_VIRT_OPS) += virt.o
9obj-$(CONFIG_DMA_API_DEBUG) += debug.o
10obj-$(CONFIG_SWIOTLB) += swiotlb.o
11
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
new file mode 100644
index 000000000000..597d40893862
--- /dev/null
+++ b/kernel/dma/coherent.c
@@ -0,0 +1,434 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Coherent per-device memory handling.
4 * Borrowed from i386
5 */
6#include <linux/io.h>
7#include <linux/slab.h>
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/dma-mapping.h>
11
12struct dma_coherent_mem {
13 void *virt_base;
14 dma_addr_t device_base;
15 unsigned long pfn_base;
16 int size;
17 int flags;
18 unsigned long *bitmap;
19 spinlock_t spinlock;
20 bool use_dev_dma_pfn_offset;
21};
22
23static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init;
24
25static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *dev)
26{
27 if (dev && dev->dma_mem)
28 return dev->dma_mem;
29 return NULL;
30}
31
32static inline dma_addr_t dma_get_device_base(struct device *dev,
33 struct dma_coherent_mem * mem)
34{
35 if (mem->use_dev_dma_pfn_offset)
36 return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT;
37 else
38 return mem->device_base;
39}
40
41static int dma_init_coherent_memory(
42 phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags,
43 struct dma_coherent_mem **mem)
44{
45 struct dma_coherent_mem *dma_mem = NULL;
46 void __iomem *mem_base = NULL;
47 int pages = size >> PAGE_SHIFT;
48 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
49 int ret;
50
51 if (!size) {
52 ret = -EINVAL;
53 goto out;
54 }
55
56 mem_base = memremap(phys_addr, size, MEMREMAP_WC);
57 if (!mem_base) {
58 ret = -EINVAL;
59 goto out;
60 }
61 dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
62 if (!dma_mem) {
63 ret = -ENOMEM;
64 goto out;
65 }
66 dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
67 if (!dma_mem->bitmap) {
68 ret = -ENOMEM;
69 goto out;
70 }
71
72 dma_mem->virt_base = mem_base;
73 dma_mem->device_base = device_addr;
74 dma_mem->pfn_base = PFN_DOWN(phys_addr);
75 dma_mem->size = pages;
76 dma_mem->flags = flags;
77 spin_lock_init(&dma_mem->spinlock);
78
79 *mem = dma_mem;
80 return 0;
81
82out:
83 kfree(dma_mem);
84 if (mem_base)
85 memunmap(mem_base);
86 return ret;
87}
88
89static void dma_release_coherent_memory(struct dma_coherent_mem *mem)
90{
91 if (!mem)
92 return;
93
94 memunmap(mem->virt_base);
95 kfree(mem->bitmap);
96 kfree(mem);
97}
98
99static int dma_assign_coherent_memory(struct device *dev,
100 struct dma_coherent_mem *mem)
101{
102 if (!dev)
103 return -ENODEV;
104
105 if (dev->dma_mem)
106 return -EBUSY;
107
108 dev->dma_mem = mem;
109 return 0;
110}
111
112int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
113 dma_addr_t device_addr, size_t size, int flags)
114{
115 struct dma_coherent_mem *mem;
116 int ret;
117
118 ret = dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem);
119 if (ret)
120 return ret;
121
122 ret = dma_assign_coherent_memory(dev, mem);
123 if (ret)
124 dma_release_coherent_memory(mem);
125 return ret;
126}
127EXPORT_SYMBOL(dma_declare_coherent_memory);
128
129void dma_release_declared_memory(struct device *dev)
130{
131 struct dma_coherent_mem *mem = dev->dma_mem;
132
133 if (!mem)
134 return;
135 dma_release_coherent_memory(mem);
136 dev->dma_mem = NULL;
137}
138EXPORT_SYMBOL(dma_release_declared_memory);
139
140void *dma_mark_declared_memory_occupied(struct device *dev,
141 dma_addr_t device_addr, size_t size)
142{
143 struct dma_coherent_mem *mem = dev->dma_mem;
144 unsigned long flags;
145 int pos, err;
146
147 size += device_addr & ~PAGE_MASK;
148
149 if (!mem)
150 return ERR_PTR(-EINVAL);
151
152 spin_lock_irqsave(&mem->spinlock, flags);
153 pos = PFN_DOWN(device_addr - dma_get_device_base(dev, mem));
154 err = bitmap_allocate_region(mem->bitmap, pos, get_order(size));
155 spin_unlock_irqrestore(&mem->spinlock, flags);
156
157 if (err != 0)
158 return ERR_PTR(err);
159 return mem->virt_base + (pos << PAGE_SHIFT);
160}
161EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
162
163static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
164 ssize_t size, dma_addr_t *dma_handle)
165{
166 int order = get_order(size);
167 unsigned long flags;
168 int pageno;
169 void *ret;
170
171 spin_lock_irqsave(&mem->spinlock, flags);
172
173 if (unlikely(size > (mem->size << PAGE_SHIFT)))
174 goto err;
175
176 pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
177 if (unlikely(pageno < 0))
178 goto err;
179
180 /*
181 * Memory was found in the coherent area.
182 */
183 *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
184 ret = mem->virt_base + (pageno << PAGE_SHIFT);
185 spin_unlock_irqrestore(&mem->spinlock, flags);
186 memset(ret, 0, size);
187 return ret;
188err:
189 spin_unlock_irqrestore(&mem->spinlock, flags);
190 return NULL;
191}
192
193/**
194 * dma_alloc_from_dev_coherent() - allocate memory from device coherent pool
195 * @dev: device from which we allocate memory
196 * @size: size of requested memory area
197 * @dma_handle: This will be filled with the correct dma handle
198 * @ret: This pointer will be filled with the virtual address
199 * to allocated area.
200 *
201 * This function should be only called from per-arch dma_alloc_coherent()
202 * to support allocation from per-device coherent memory pools.
203 *
204 * Returns 0 if dma_alloc_coherent should continue with allocating from
205 * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
206 */
207int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
208 dma_addr_t *dma_handle, void **ret)
209{
210 struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
211
212 if (!mem)
213 return 0;
214
215 *ret = __dma_alloc_from_coherent(mem, size, dma_handle);
216 if (*ret)
217 return 1;
218
219 /*
220 * In the case where the allocation can not be satisfied from the
221 * per-device area, try to fall back to generic memory if the
222 * constraints allow it.
223 */
224 return mem->flags & DMA_MEMORY_EXCLUSIVE;
225}
226EXPORT_SYMBOL(dma_alloc_from_dev_coherent);
227
228void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
229{
230 if (!dma_coherent_default_memory)
231 return NULL;
232
233 return __dma_alloc_from_coherent(dma_coherent_default_memory, size,
234 dma_handle);
235}
236
237static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
238 int order, void *vaddr)
239{
240 if (mem && vaddr >= mem->virt_base && vaddr <
241 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
242 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
243 unsigned long flags;
244
245 spin_lock_irqsave(&mem->spinlock, flags);
246 bitmap_release_region(mem->bitmap, page, order);
247 spin_unlock_irqrestore(&mem->spinlock, flags);
248 return 1;
249 }
250 return 0;
251}
252
253/**
254 * dma_release_from_dev_coherent() - free memory to device coherent memory pool
255 * @dev: device from which the memory was allocated
256 * @order: the order of pages allocated
257 * @vaddr: virtual address of allocated pages
258 *
259 * This checks whether the memory was allocated from the per-device
260 * coherent memory pool and if so, releases that memory.
261 *
262 * Returns 1 if we correctly released the memory, or 0 if the caller should
263 * proceed with releasing memory from generic pools.
264 */
265int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr)
266{
267 struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
268
269 return __dma_release_from_coherent(mem, order, vaddr);
270}
271EXPORT_SYMBOL(dma_release_from_dev_coherent);
272
273int dma_release_from_global_coherent(int order, void *vaddr)
274{
275 if (!dma_coherent_default_memory)
276 return 0;
277
278 return __dma_release_from_coherent(dma_coherent_default_memory, order,
279 vaddr);
280}
281
282static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem,
283 struct vm_area_struct *vma, void *vaddr, size_t size, int *ret)
284{
285 if (mem && vaddr >= mem->virt_base && vaddr + size <=
286 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
287 unsigned long off = vma->vm_pgoff;
288 int start = (vaddr - mem->virt_base) >> PAGE_SHIFT;
289 int user_count = vma_pages(vma);
290 int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
291
292 *ret = -ENXIO;
293 if (off < count && user_count <= count - off) {
294 unsigned long pfn = mem->pfn_base + start + off;
295 *ret = remap_pfn_range(vma, vma->vm_start, pfn,
296 user_count << PAGE_SHIFT,
297 vma->vm_page_prot);
298 }
299 return 1;
300 }
301 return 0;
302}
303
304/**
305 * dma_mmap_from_dev_coherent() - mmap memory from the device coherent pool
306 * @dev: device from which the memory was allocated
307 * @vma: vm_area for the userspace memory
308 * @vaddr: cpu address returned by dma_alloc_from_dev_coherent
309 * @size: size of the memory buffer allocated
310 * @ret: result from remap_pfn_range()
311 *
312 * This checks whether the memory was allocated from the per-device
313 * coherent memory pool and if so, maps that memory to the provided vma.
314 *
315 * Returns 1 if @vaddr belongs to the device coherent pool and the caller
316 * should return @ret, or 0 if they should proceed with mapping memory from
317 * generic areas.
318 */
319int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
320 void *vaddr, size_t size, int *ret)
321{
322 struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
323
324 return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret);
325}
326EXPORT_SYMBOL(dma_mmap_from_dev_coherent);
327
328int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
329 size_t size, int *ret)
330{
331 if (!dma_coherent_default_memory)
332 return 0;
333
334 return __dma_mmap_from_coherent(dma_coherent_default_memory, vma,
335 vaddr, size, ret);
336}
337
338/*
339 * Support for reserved memory regions defined in device tree
340 */
341#ifdef CONFIG_OF_RESERVED_MEM
342#include <linux/of.h>
343#include <linux/of_fdt.h>
344#include <linux/of_reserved_mem.h>
345
346static struct reserved_mem *dma_reserved_default_memory __initdata;
347
348static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
349{
350 struct dma_coherent_mem *mem = rmem->priv;
351 int ret;
352
353 if (!mem) {
354 ret = dma_init_coherent_memory(rmem->base, rmem->base,
355 rmem->size,
356 DMA_MEMORY_EXCLUSIVE, &mem);
357 if (ret) {
358 pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
359 &rmem->base, (unsigned long)rmem->size / SZ_1M);
360 return ret;
361 }
362 }
363 mem->use_dev_dma_pfn_offset = true;
364 rmem->priv = mem;
365 dma_assign_coherent_memory(dev, mem);
366 return 0;
367}
368
369static void rmem_dma_device_release(struct reserved_mem *rmem,
370 struct device *dev)
371{
372 if (dev)
373 dev->dma_mem = NULL;
374}
375
376static const struct reserved_mem_ops rmem_dma_ops = {
377 .device_init = rmem_dma_device_init,
378 .device_release = rmem_dma_device_release,
379};
380
381static int __init rmem_dma_setup(struct reserved_mem *rmem)
382{
383 unsigned long node = rmem->fdt_node;
384
385 if (of_get_flat_dt_prop(node, "reusable", NULL))
386 return -EINVAL;
387
388#ifdef CONFIG_ARM
389 if (!of_get_flat_dt_prop(node, "no-map", NULL)) {
390 pr_err("Reserved memory: regions without no-map are not yet supported\n");
391 return -EINVAL;
392 }
393
394 if (of_get_flat_dt_prop(node, "linux,dma-default", NULL)) {
395 WARN(dma_reserved_default_memory,
396 "Reserved memory: region for default DMA coherent area is redefined\n");
397 dma_reserved_default_memory = rmem;
398 }
399#endif
400
401 rmem->ops = &rmem_dma_ops;
402 pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n",
403 &rmem->base, (unsigned long)rmem->size / SZ_1M);
404 return 0;
405}
406
407static int __init dma_init_reserved_memory(void)
408{
409 const struct reserved_mem_ops *ops;
410 int ret;
411
412 if (!dma_reserved_default_memory)
413 return -ENOMEM;
414
415 ops = dma_reserved_default_memory->ops;
416
417 /*
418 * We rely on rmem_dma_device_init() does not propagate error of
419 * dma_assign_coherent_memory() for "NULL" device.
420 */
421 ret = ops->device_init(dma_reserved_default_memory, NULL);
422
423 if (!ret) {
424 dma_coherent_default_memory = dma_reserved_default_memory->priv;
425 pr_info("DMA: default coherent area is set\n");
426 }
427
428 return ret;
429}
430
431core_initcall(dma_init_reserved_memory);
432
433RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup);
434#endif
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
new file mode 100644
index 000000000000..d987dcd1bd56
--- /dev/null
+++ b/kernel/dma/contiguous.c
@@ -0,0 +1,278 @@
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Contiguous Memory Allocator for DMA mapping framework
4 * Copyright (c) 2010-2011 by Samsung Electronics.
5 * Written by:
6 * Marek Szyprowski <m.szyprowski@samsung.com>
7 * Michal Nazarewicz <mina86@mina86.com>
8 */
9
10#define pr_fmt(fmt) "cma: " fmt
11
12#ifdef CONFIG_CMA_DEBUG
13#ifndef DEBUG
14# define DEBUG
15#endif
16#endif
17
18#include <asm/page.h>
19#include <asm/dma-contiguous.h>
20
21#include <linux/memblock.h>
22#include <linux/err.h>
23#include <linux/sizes.h>
24#include <linux/dma-contiguous.h>
25#include <linux/cma.h>
26
27#ifdef CONFIG_CMA_SIZE_MBYTES
28#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
29#else
30#define CMA_SIZE_MBYTES 0
31#endif
32
33struct cma *dma_contiguous_default_area;
34
35/*
36 * Default global CMA area size can be defined in kernel's .config.
37 * This is useful mainly for distro maintainers to create a kernel
38 * that works correctly for most supported systems.
39 * The size can be set in bytes or as a percentage of the total memory
40 * in the system.
41 *
42 * Users, who want to set the size of global CMA area for their system
43 * should use cma= kernel parameter.
44 */
45static const phys_addr_t size_bytes = (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M;
46static phys_addr_t size_cmdline = -1;
47static phys_addr_t base_cmdline;
48static phys_addr_t limit_cmdline;
49
50static int __init early_cma(char *p)
51{
52 pr_debug("%s(%s)\n", __func__, p);
53 size_cmdline = memparse(p, &p);
54 if (*p != '@')
55 return 0;
56 base_cmdline = memparse(p + 1, &p);
57 if (*p != '-') {
58 limit_cmdline = base_cmdline + size_cmdline;
59 return 0;
60 }
61 limit_cmdline = memparse(p + 1, &p);
62
63 return 0;
64}
65early_param("cma", early_cma);
66
67#ifdef CONFIG_CMA_SIZE_PERCENTAGE
68
69static phys_addr_t __init __maybe_unused cma_early_percent_memory(void)
70{
71 struct memblock_region *reg;
72 unsigned long total_pages = 0;
73
74 /*
75 * We cannot use memblock_phys_mem_size() here, because
76 * memblock_analyze() has not been called yet.
77 */
78 for_each_memblock(memory, reg)
79 total_pages += memblock_region_memory_end_pfn(reg) -
80 memblock_region_memory_base_pfn(reg);
81
82 return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT;
83}
84
85#else
86
87static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
88{
89 return 0;
90}
91
92#endif
93
94/**
95 * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling
96 * @limit: End address of the reserved memory (optional, 0 for any).
97 *
98 * This function reserves memory from early allocator. It should be
99 * called by arch specific code once the early allocator (memblock or bootmem)
100 * has been activated and all other subsystems have already allocated/reserved
101 * memory.
102 */
103void __init dma_contiguous_reserve(phys_addr_t limit)
104{
105 phys_addr_t selected_size = 0;
106 phys_addr_t selected_base = 0;
107 phys_addr_t selected_limit = limit;
108 bool fixed = false;
109
110 pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
111
112 if (size_cmdline != -1) {
113 selected_size = size_cmdline;
114 selected_base = base_cmdline;
115 selected_limit = min_not_zero(limit_cmdline, limit);
116 if (base_cmdline + size_cmdline == limit_cmdline)
117 fixed = true;
118 } else {
119#ifdef CONFIG_CMA_SIZE_SEL_MBYTES
120 selected_size = size_bytes;
121#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)
122 selected_size = cma_early_percent_memory();
123#elif defined(CONFIG_CMA_SIZE_SEL_MIN)
124 selected_size = min(size_bytes, cma_early_percent_memory());
125#elif defined(CONFIG_CMA_SIZE_SEL_MAX)
126 selected_size = max(size_bytes, cma_early_percent_memory());
127#endif
128 }
129
130 if (selected_size && !dma_contiguous_default_area) {
131 pr_debug("%s: reserving %ld MiB for global area\n", __func__,
132 (unsigned long)selected_size / SZ_1M);
133
134 dma_contiguous_reserve_area(selected_size, selected_base,
135 selected_limit,
136 &dma_contiguous_default_area,
137 fixed);
138 }
139}
140
141/**
142 * dma_contiguous_reserve_area() - reserve custom contiguous area
143 * @size: Size of the reserved area (in bytes),
144 * @base: Base address of the reserved area optional, use 0 for any
145 * @limit: End address of the reserved memory (optional, 0 for any).
146 * @res_cma: Pointer to store the created cma region.
147 * @fixed: hint about where to place the reserved area
148 *
149 * This function reserves memory from early allocator. It should be
150 * called by arch specific code once the early allocator (memblock or bootmem)
151 * has been activated and all other subsystems have already allocated/reserved
152 * memory. This function allows to create custom reserved areas for specific
153 * devices.
154 *
155 * If @fixed is true, reserve contiguous area at exactly @base. If false,
156 * reserve in range from @base to @limit.
157 */
158int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
159 phys_addr_t limit, struct cma **res_cma,
160 bool fixed)
161{
162 int ret;
163
164 ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed,
165 "reserved", res_cma);
166 if (ret)
167 return ret;
168
169 /* Architecture specific contiguous memory fixup. */
170 dma_contiguous_early_fixup(cma_get_base(*res_cma),
171 cma_get_size(*res_cma));
172
173 return 0;
174}
175
176/**
177 * dma_alloc_from_contiguous() - allocate pages from contiguous area
178 * @dev: Pointer to device for which the allocation is performed.
179 * @count: Requested number of pages.
180 * @align: Requested alignment of pages (in PAGE_SIZE order).
181 * @gfp_mask: GFP flags to use for this allocation.
182 *
183 * This function allocates memory buffer for specified device. It uses
184 * device specific contiguous memory area if available or the default
185 * global one. Requires architecture specific dev_get_cma_area() helper
186 * function.
187 */
188struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
189 unsigned int align, gfp_t gfp_mask)
190{
191 if (align > CONFIG_CMA_ALIGNMENT)
192 align = CONFIG_CMA_ALIGNMENT;
193
194 return cma_alloc(dev_get_cma_area(dev), count, align, gfp_mask);
195}
196
197/**
198 * dma_release_from_contiguous() - release allocated pages
199 * @dev: Pointer to device for which the pages were allocated.
200 * @pages: Allocated pages.
201 * @count: Number of allocated pages.
202 *
203 * This function releases memory allocated by dma_alloc_from_contiguous().
204 * It returns false when provided pages do not belong to contiguous area and
205 * true otherwise.
206 */
207bool dma_release_from_contiguous(struct device *dev, struct page *pages,
208 int count)
209{
210 return cma_release(dev_get_cma_area(dev), pages, count);
211}
212
213/*
214 * Support for reserved memory regions defined in device tree
215 */
216#ifdef CONFIG_OF_RESERVED_MEM
217#include <linux/of.h>
218#include <linux/of_fdt.h>
219#include <linux/of_reserved_mem.h>
220
221#undef pr_fmt
222#define pr_fmt(fmt) fmt
223
224static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
225{
226 dev_set_cma_area(dev, rmem->priv);
227 return 0;
228}
229
230static void rmem_cma_device_release(struct reserved_mem *rmem,
231 struct device *dev)
232{
233 dev_set_cma_area(dev, NULL);
234}
235
236static const struct reserved_mem_ops rmem_cma_ops = {
237 .device_init = rmem_cma_device_init,
238 .device_release = rmem_cma_device_release,
239};
240
241static int __init rmem_cma_setup(struct reserved_mem *rmem)
242{
243 phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
244 phys_addr_t mask = align - 1;
245 unsigned long node = rmem->fdt_node;
246 struct cma *cma;
247 int err;
248
249 if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
250 of_get_flat_dt_prop(node, "no-map", NULL))
251 return -EINVAL;
252
253 if ((rmem->base & mask) || (rmem->size & mask)) {
254 pr_err("Reserved memory: incorrect alignment of CMA region\n");
255 return -EINVAL;
256 }
257
258 err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
259 if (err) {
260 pr_err("Reserved memory: unable to setup CMA region\n");
261 return err;
262 }
263 /* Architecture specific contiguous memory fixup. */
264 dma_contiguous_early_fixup(rmem->base, rmem->size);
265
266 if (of_get_flat_dt_prop(node, "linux,cma-default", NULL))
267 dma_contiguous_set_default(cma);
268
269 rmem->ops = &rmem_cma_ops;
270 rmem->priv = cma;
271
272 pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
273 &rmem->base, (unsigned long)rmem->size / SZ_1M);
274
275 return 0;
276}
277RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup);
278#endif
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
new file mode 100644
index 000000000000..c007d25bee09
--- /dev/null
+++ b/kernel/dma/debug.c
@@ -0,0 +1,1773 @@
1/*
2 * Copyright (C) 2008 Advanced Micro Devices, Inc.
3 *
4 * Author: Joerg Roedel <joerg.roedel@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/sched/task_stack.h>
21#include <linux/scatterlist.h>
22#include <linux/dma-mapping.h>
23#include <linux/sched/task.h>
24#include <linux/stacktrace.h>
25#include <linux/dma-debug.h>
26#include <linux/spinlock.h>
27#include <linux/vmalloc.h>
28#include <linux/debugfs.h>
29#include <linux/uaccess.h>
30#include <linux/export.h>
31#include <linux/device.h>
32#include <linux/types.h>
33#include <linux/sched.h>
34#include <linux/ctype.h>
35#include <linux/list.h>
36#include <linux/slab.h>
37
38#include <asm/sections.h>
39
40#define HASH_SIZE 1024ULL
41#define HASH_FN_SHIFT 13
42#define HASH_FN_MASK (HASH_SIZE - 1)
43
44/* allow architectures to override this if absolutely required */
45#ifndef PREALLOC_DMA_DEBUG_ENTRIES
46#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
47#endif
48
49enum {
50 dma_debug_single,
51 dma_debug_page,
52 dma_debug_sg,
53 dma_debug_coherent,
54 dma_debug_resource,
55};
56
57enum map_err_types {
58 MAP_ERR_CHECK_NOT_APPLICABLE,
59 MAP_ERR_NOT_CHECKED,
60 MAP_ERR_CHECKED,
61};
62
63#define DMA_DEBUG_STACKTRACE_ENTRIES 5
64
65/**
66 * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
67 * @list: node on pre-allocated free_entries list
68 * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
69 * @type: single, page, sg, coherent
70 * @pfn: page frame of the start address
71 * @offset: offset of mapping relative to pfn
72 * @size: length of the mapping
73 * @direction: enum dma_data_direction
74 * @sg_call_ents: 'nents' from dma_map_sg
75 * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
76 * @map_err_type: track whether dma_mapping_error() was checked
77 * @stacktrace: support backtraces when a violation is detected
78 */
79struct dma_debug_entry {
80 struct list_head list;
81 struct device *dev;
82 int type;
83 unsigned long pfn;
84 size_t offset;
85 u64 dev_addr;
86 u64 size;
87 int direction;
88 int sg_call_ents;
89 int sg_mapped_ents;
90 enum map_err_types map_err_type;
91#ifdef CONFIG_STACKTRACE
92 struct stack_trace stacktrace;
93 unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
94#endif
95};
96
97typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *);
98
99struct hash_bucket {
100 struct list_head list;
101 spinlock_t lock;
102} ____cacheline_aligned_in_smp;
103
104/* Hash list to save the allocated dma addresses */
105static struct hash_bucket dma_entry_hash[HASH_SIZE];
106/* List of pre-allocated dma_debug_entry's */
107static LIST_HEAD(free_entries);
108/* Lock for the list above */
109static DEFINE_SPINLOCK(free_entries_lock);
110
111/* Global disable flag - will be set in case of an error */
112static bool global_disable __read_mostly;
113
114/* Early initialization disable flag, set at the end of dma_debug_init */
115static bool dma_debug_initialized __read_mostly;
116
117static inline bool dma_debug_disabled(void)
118{
119 return global_disable || !dma_debug_initialized;
120}
121
122/* Global error count */
123static u32 error_count;
124
125/* Global error show enable*/
126static u32 show_all_errors __read_mostly;
127/* Number of errors to show */
128static u32 show_num_errors = 1;
129
130static u32 num_free_entries;
131static u32 min_free_entries;
132static u32 nr_total_entries;
133
134/* number of preallocated entries requested by kernel cmdline */
135static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
136
137/* debugfs dentry's for the stuff above */
138static struct dentry *dma_debug_dent __read_mostly;
139static struct dentry *global_disable_dent __read_mostly;
140static struct dentry *error_count_dent __read_mostly;
141static struct dentry *show_all_errors_dent __read_mostly;
142static struct dentry *show_num_errors_dent __read_mostly;
143static struct dentry *num_free_entries_dent __read_mostly;
144static struct dentry *min_free_entries_dent __read_mostly;
145static struct dentry *filter_dent __read_mostly;
146
147/* per-driver filter related state */
148
149#define NAME_MAX_LEN 64
150
151static char current_driver_name[NAME_MAX_LEN] __read_mostly;
152static struct device_driver *current_driver __read_mostly;
153
154static DEFINE_RWLOCK(driver_name_lock);
155
156static const char *const maperr2str[] = {
157 [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable",
158 [MAP_ERR_NOT_CHECKED] = "dma map error not checked",
159 [MAP_ERR_CHECKED] = "dma map error checked",
160};
161
162static const char *type2name[5] = { "single", "page",
163 "scather-gather", "coherent",
164 "resource" };
165
166static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
167 "DMA_FROM_DEVICE", "DMA_NONE" };
168
169/*
170 * The access to some variables in this macro is racy. We can't use atomic_t
171 * here because all these variables are exported to debugfs. Some of them even
172 * writeable. This is also the reason why a lock won't help much. But anyway,
173 * the races are no big deal. Here is why:
174 *
175 * error_count: the addition is racy, but the worst thing that can happen is
176 * that we don't count some errors
177 * show_num_errors: the subtraction is racy. Also no big deal because in
178 * worst case this will result in one warning more in the
179 * system log than the user configured. This variable is
180 * writeable via debugfs.
181 */
182static inline void dump_entry_trace(struct dma_debug_entry *entry)
183{
184#ifdef CONFIG_STACKTRACE
185 if (entry) {
186 pr_warning("Mapped at:\n");
187 print_stack_trace(&entry->stacktrace, 0);
188 }
189#endif
190}
191
192static bool driver_filter(struct device *dev)
193{
194 struct device_driver *drv;
195 unsigned long flags;
196 bool ret;
197
198 /* driver filter off */
199 if (likely(!current_driver_name[0]))
200 return true;
201
202 /* driver filter on and initialized */
203 if (current_driver && dev && dev->driver == current_driver)
204 return true;
205
206 /* driver filter on, but we can't filter on a NULL device... */
207 if (!dev)
208 return false;
209
210 if (current_driver || !current_driver_name[0])
211 return false;
212
213 /* driver filter on but not yet initialized */
214 drv = dev->driver;
215 if (!drv)
216 return false;
217
218 /* lock to protect against change of current_driver_name */
219 read_lock_irqsave(&driver_name_lock, flags);
220
221 ret = false;
222 if (drv->name &&
223 strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
224 current_driver = drv;
225 ret = true;
226 }
227
228 read_unlock_irqrestore(&driver_name_lock, flags);
229
230 return ret;
231}
232
233#define err_printk(dev, entry, format, arg...) do { \
234 error_count += 1; \
235 if (driver_filter(dev) && \
236 (show_all_errors || show_num_errors > 0)) { \
237 WARN(1, "%s %s: " format, \
238 dev ? dev_driver_string(dev) : "NULL", \
239 dev ? dev_name(dev) : "NULL", ## arg); \
240 dump_entry_trace(entry); \
241 } \
242 if (!show_all_errors && show_num_errors > 0) \
243 show_num_errors -= 1; \
244 } while (0);
245
246/*
247 * Hash related functions
248 *
249 * Every DMA-API request is saved into a struct dma_debug_entry. To
250 * have quick access to these structs they are stored into a hash.
251 */
252static int hash_fn(struct dma_debug_entry *entry)
253{
254 /*
255 * Hash function is based on the dma address.
256 * We use bits 20-27 here as the index into the hash
257 */
258 return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK;
259}
260
261/*
262 * Request exclusive access to a hash bucket for a given dma_debug_entry.
263 */
264static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
265 unsigned long *flags)
266 __acquires(&dma_entry_hash[idx].lock)
267{
268 int idx = hash_fn(entry);
269 unsigned long __flags;
270
271 spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags);
272 *flags = __flags;
273 return &dma_entry_hash[idx];
274}
275
276/*
277 * Give up exclusive access to the hash bucket
278 */
279static void put_hash_bucket(struct hash_bucket *bucket,
280 unsigned long *flags)
281 __releases(&bucket->lock)
282{
283 unsigned long __flags = *flags;
284
285 spin_unlock_irqrestore(&bucket->lock, __flags);
286}
287
288static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b)
289{
290 return ((a->dev_addr == b->dev_addr) &&
291 (a->dev == b->dev)) ? true : false;
292}
293
294static bool containing_match(struct dma_debug_entry *a,
295 struct dma_debug_entry *b)
296{
297 if (a->dev != b->dev)
298 return false;
299
300 if ((b->dev_addr <= a->dev_addr) &&
301 ((b->dev_addr + b->size) >= (a->dev_addr + a->size)))
302 return true;
303
304 return false;
305}
306
307/*
308 * Search a given entry in the hash bucket list
309 */
310static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket,
311 struct dma_debug_entry *ref,
312 match_fn match)
313{
314 struct dma_debug_entry *entry, *ret = NULL;
315 int matches = 0, match_lvl, last_lvl = -1;
316
317 list_for_each_entry(entry, &bucket->list, list) {
318 if (!match(ref, entry))
319 continue;
320
321 /*
322 * Some drivers map the same physical address multiple
323 * times. Without a hardware IOMMU this results in the
324 * same device addresses being put into the dma-debug
325 * hash multiple times too. This can result in false
326 * positives being reported. Therefore we implement a
327 * best-fit algorithm here which returns the entry from
328 * the hash which fits best to the reference value
329 * instead of the first-fit.
330 */
331 matches += 1;
332 match_lvl = 0;
333 entry->size == ref->size ? ++match_lvl : 0;
334 entry->type == ref->type ? ++match_lvl : 0;
335 entry->direction == ref->direction ? ++match_lvl : 0;
336 entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0;
337
338 if (match_lvl == 4) {
339 /* perfect-fit - return the result */
340 return entry;
341 } else if (match_lvl > last_lvl) {
342 /*
343 * We found an entry that fits better then the
344 * previous one or it is the 1st match.
345 */
346 last_lvl = match_lvl;
347 ret = entry;
348 }
349 }
350
351 /*
352 * If we have multiple matches but no perfect-fit, just return
353 * NULL.
354 */
355 ret = (matches == 1) ? ret : NULL;
356
357 return ret;
358}
359
360static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket,
361 struct dma_debug_entry *ref)
362{
363 return __hash_bucket_find(bucket, ref, exact_match);
364}
365
366static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
367 struct dma_debug_entry *ref,
368 unsigned long *flags)
369{
370
371 unsigned int max_range = dma_get_max_seg_size(ref->dev);
372 struct dma_debug_entry *entry, index = *ref;
373 unsigned int range = 0;
374
375 while (range <= max_range) {
376 entry = __hash_bucket_find(*bucket, ref, containing_match);
377
378 if (entry)
379 return entry;
380
381 /*
382 * Nothing found, go back a hash bucket
383 */
384 put_hash_bucket(*bucket, flags);
385 range += (1 << HASH_FN_SHIFT);
386 index.dev_addr -= (1 << HASH_FN_SHIFT);
387 *bucket = get_hash_bucket(&index, flags);
388 }
389
390 return NULL;
391}
392
393/*
394 * Add an entry to a hash bucket
395 */
396static void hash_bucket_add(struct hash_bucket *bucket,
397 struct dma_debug_entry *entry)
398{
399 list_add_tail(&entry->list, &bucket->list);
400}
401
402/*
403 * Remove entry from a hash bucket list
404 */
405static void hash_bucket_del(struct dma_debug_entry *entry)
406{
407 list_del(&entry->list);
408}
409
410static unsigned long long phys_addr(struct dma_debug_entry *entry)
411{
412 if (entry->type == dma_debug_resource)
413 return __pfn_to_phys(entry->pfn) + entry->offset;
414
415 return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
416}
417
418/*
419 * Dump mapping entries for debugging purposes
420 */
421void debug_dma_dump_mappings(struct device *dev)
422{
423 int idx;
424
425 for (idx = 0; idx < HASH_SIZE; idx++) {
426 struct hash_bucket *bucket = &dma_entry_hash[idx];
427 struct dma_debug_entry *entry;
428 unsigned long flags;
429
430 spin_lock_irqsave(&bucket->lock, flags);
431
432 list_for_each_entry(entry, &bucket->list, list) {
433 if (!dev || dev == entry->dev) {
434 dev_info(entry->dev,
435 "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
436 type2name[entry->type], idx,
437 phys_addr(entry), entry->pfn,
438 entry->dev_addr, entry->size,
439 dir2name[entry->direction],
440 maperr2str[entry->map_err_type]);
441 }
442 }
443
444 spin_unlock_irqrestore(&bucket->lock, flags);
445 }
446}
447
448/*
449 * For each mapping (initial cacheline in the case of
450 * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a
451 * scatterlist, or the cacheline specified in dma_map_single) insert
452 * into this tree using the cacheline as the key. At
453 * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If
454 * the entry already exists at insertion time add a tag as a reference
455 * count for the overlapping mappings. For now, the overlap tracking
456 * just ensures that 'unmaps' balance 'maps' before marking the
457 * cacheline idle, but we should also be flagging overlaps as an API
458 * violation.
459 *
460 * Memory usage is mostly constrained by the maximum number of available
461 * dma-debug entries in that we need a free dma_debug_entry before
462 * inserting into the tree. In the case of dma_map_page and
463 * dma_alloc_coherent there is only one dma_debug_entry and one
464 * dma_active_cacheline entry to track per event. dma_map_sg(), on the
465 * other hand, consumes a single dma_debug_entry, but inserts 'nents'
466 * entries into the tree.
467 *
468 * At any time debug_dma_assert_idle() can be called to trigger a
469 * warning if any cachelines in the given page are in the active set.
470 */
471static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
472static DEFINE_SPINLOCK(radix_lock);
473#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
474#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
475#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
476
477static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry)
478{
479 return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) +
480 (entry->offset >> L1_CACHE_SHIFT);
481}
482
483static int active_cacheline_read_overlap(phys_addr_t cln)
484{
485 int overlap = 0, i;
486
487 for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
488 if (radix_tree_tag_get(&dma_active_cacheline, cln, i))
489 overlap |= 1 << i;
490 return overlap;
491}
492
493static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
494{
495 int i;
496
497 if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0)
498 return overlap;
499
500 for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
501 if (overlap & 1 << i)
502 radix_tree_tag_set(&dma_active_cacheline, cln, i);
503 else
504 radix_tree_tag_clear(&dma_active_cacheline, cln, i);
505
506 return overlap;
507}
508
509static void active_cacheline_inc_overlap(phys_addr_t cln)
510{
511 int overlap = active_cacheline_read_overlap(cln);
512
513 overlap = active_cacheline_set_overlap(cln, ++overlap);
514
515 /* If we overflowed the overlap counter then we're potentially
516 * leaking dma-mappings. Otherwise, if maps and unmaps are
517 * balanced then this overflow may cause false negatives in
518 * debug_dma_assert_idle() as the cacheline may be marked idle
519 * prematurely.
520 */
521 WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
522 "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n",
523 ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
524}
525
526static int active_cacheline_dec_overlap(phys_addr_t cln)
527{
528 int overlap = active_cacheline_read_overlap(cln);
529
530 return active_cacheline_set_overlap(cln, --overlap);
531}
532
533static int active_cacheline_insert(struct dma_debug_entry *entry)
534{
535 phys_addr_t cln = to_cacheline_number(entry);
536 unsigned long flags;
537 int rc;
538
539 /* If the device is not writing memory then we don't have any
540 * concerns about the cpu consuming stale data. This mitigates
541 * legitimate usages of overlapping mappings.
542 */
543 if (entry->direction == DMA_TO_DEVICE)
544 return 0;
545
546 spin_lock_irqsave(&radix_lock, flags);
547 rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
548 if (rc == -EEXIST)
549 active_cacheline_inc_overlap(cln);
550 spin_unlock_irqrestore(&radix_lock, flags);
551
552 return rc;
553}
554
555static void active_cacheline_remove(struct dma_debug_entry *entry)
556{
557 phys_addr_t cln = to_cacheline_number(entry);
558 unsigned long flags;
559
560 /* ...mirror the insert case */
561 if (entry->direction == DMA_TO_DEVICE)
562 return;
563
564 spin_lock_irqsave(&radix_lock, flags);
565 /* since we are counting overlaps the final put of the
566 * cacheline will occur when the overlap count is 0.
567 * active_cacheline_dec_overlap() returns -1 in that case
568 */
569 if (active_cacheline_dec_overlap(cln) < 0)
570 radix_tree_delete(&dma_active_cacheline, cln);
571 spin_unlock_irqrestore(&radix_lock, flags);
572}
573
574/**
575 * debug_dma_assert_idle() - assert that a page is not undergoing dma
576 * @page: page to lookup in the dma_active_cacheline tree
577 *
578 * Place a call to this routine in cases where the cpu touching the page
579 * before the dma completes (page is dma_unmapped) will lead to data
580 * corruption.
581 */
582void debug_dma_assert_idle(struct page *page)
583{
584 static struct dma_debug_entry *ents[CACHELINES_PER_PAGE];
585 struct dma_debug_entry *entry = NULL;
586 void **results = (void **) &ents;
587 unsigned int nents, i;
588 unsigned long flags;
589 phys_addr_t cln;
590
591 if (dma_debug_disabled())
592 return;
593
594 if (!page)
595 return;
596
597 cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT;
598 spin_lock_irqsave(&radix_lock, flags);
599 nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln,
600 CACHELINES_PER_PAGE);
601 for (i = 0; i < nents; i++) {
602 phys_addr_t ent_cln = to_cacheline_number(ents[i]);
603
604 if (ent_cln == cln) {
605 entry = ents[i];
606 break;
607 } else if (ent_cln >= cln + CACHELINES_PER_PAGE)
608 break;
609 }
610 spin_unlock_irqrestore(&radix_lock, flags);
611
612 if (!entry)
613 return;
614
615 cln = to_cacheline_number(entry);
616 err_printk(entry->dev, entry,
617 "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n",
618 &cln);
619}
620
621/*
622 * Wrapper function for adding an entry to the hash.
623 * This function takes care of locking itself.
624 */
625static void add_dma_entry(struct dma_debug_entry *entry)
626{
627 struct hash_bucket *bucket;
628 unsigned long flags;
629 int rc;
630
631 bucket = get_hash_bucket(entry, &flags);
632 hash_bucket_add(bucket, entry);
633 put_hash_bucket(bucket, &flags);
634
635 rc = active_cacheline_insert(entry);
636 if (rc == -ENOMEM) {
637 pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n");
638 global_disable = true;
639 }
640
641 /* TODO: report -EEXIST errors here as overlapping mappings are
642 * not supported by the DMA API
643 */
644}
645
646static struct dma_debug_entry *__dma_entry_alloc(void)
647{
648 struct dma_debug_entry *entry;
649
650 entry = list_entry(free_entries.next, struct dma_debug_entry, list);
651 list_del(&entry->list);
652 memset(entry, 0, sizeof(*entry));
653
654 num_free_entries -= 1;
655 if (num_free_entries < min_free_entries)
656 min_free_entries = num_free_entries;
657
658 return entry;
659}
660
661/* struct dma_entry allocator
662 *
663 * The next two functions implement the allocator for
664 * struct dma_debug_entries.
665 */
666static struct dma_debug_entry *dma_entry_alloc(void)
667{
668 struct dma_debug_entry *entry;
669 unsigned long flags;
670
671 spin_lock_irqsave(&free_entries_lock, flags);
672
673 if (list_empty(&free_entries)) {
674 global_disable = true;
675 spin_unlock_irqrestore(&free_entries_lock, flags);
676 pr_err("DMA-API: debugging out of memory - disabling\n");
677 return NULL;
678 }
679
680 entry = __dma_entry_alloc();
681
682 spin_unlock_irqrestore(&free_entries_lock, flags);
683
684#ifdef CONFIG_STACKTRACE
685 entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
686 entry->stacktrace.entries = entry->st_entries;
687 entry->stacktrace.skip = 2;
688 save_stack_trace(&entry->stacktrace);
689#endif
690
691 return entry;
692}
693
694static void dma_entry_free(struct dma_debug_entry *entry)
695{
696 unsigned long flags;
697
698 active_cacheline_remove(entry);
699
700 /*
701 * add to beginning of the list - this way the entries are
702 * more likely cache hot when they are reallocated.
703 */
704 spin_lock_irqsave(&free_entries_lock, flags);
705 list_add(&entry->list, &free_entries);
706 num_free_entries += 1;
707 spin_unlock_irqrestore(&free_entries_lock, flags);
708}
709
710int dma_debug_resize_entries(u32 num_entries)
711{
712 int i, delta, ret = 0;
713 unsigned long flags;
714 struct dma_debug_entry *entry;
715 LIST_HEAD(tmp);
716
717 spin_lock_irqsave(&free_entries_lock, flags);
718
719 if (nr_total_entries < num_entries) {
720 delta = num_entries - nr_total_entries;
721
722 spin_unlock_irqrestore(&free_entries_lock, flags);
723
724 for (i = 0; i < delta; i++) {
725 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
726 if (!entry)
727 break;
728
729 list_add_tail(&entry->list, &tmp);
730 }
731
732 spin_lock_irqsave(&free_entries_lock, flags);
733
734 list_splice(&tmp, &free_entries);
735 nr_total_entries += i;
736 num_free_entries += i;
737 } else {
738 delta = nr_total_entries - num_entries;
739
740 for (i = 0; i < delta && !list_empty(&free_entries); i++) {
741 entry = __dma_entry_alloc();
742 kfree(entry);
743 }
744
745 nr_total_entries -= i;
746 }
747
748 if (nr_total_entries != num_entries)
749 ret = 1;
750
751 spin_unlock_irqrestore(&free_entries_lock, flags);
752
753 return ret;
754}
755
756/*
757 * DMA-API debugging init code
758 *
759 * The init code does two things:
760 * 1. Initialize core data structures
761 * 2. Preallocate a given number of dma_debug_entry structs
762 */
763
764static int prealloc_memory(u32 num_entries)
765{
766 struct dma_debug_entry *entry, *next_entry;
767 int i;
768
769 for (i = 0; i < num_entries; ++i) {
770 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
771 if (!entry)
772 goto out_err;
773
774 list_add_tail(&entry->list, &free_entries);
775 }
776
777 num_free_entries = num_entries;
778 min_free_entries = num_entries;
779
780 pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
781
782 return 0;
783
784out_err:
785
786 list_for_each_entry_safe(entry, next_entry, &free_entries, list) {
787 list_del(&entry->list);
788 kfree(entry);
789 }
790
791 return -ENOMEM;
792}
793
794static ssize_t filter_read(struct file *file, char __user *user_buf,
795 size_t count, loff_t *ppos)
796{
797 char buf[NAME_MAX_LEN + 1];
798 unsigned long flags;
799 int len;
800
801 if (!current_driver_name[0])
802 return 0;
803
804 /*
805 * We can't copy to userspace directly because current_driver_name can
806 * only be read under the driver_name_lock with irqs disabled. So
807 * create a temporary copy first.
808 */
809 read_lock_irqsave(&driver_name_lock, flags);
810 len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
811 read_unlock_irqrestore(&driver_name_lock, flags);
812
813 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
814}
815
816static ssize_t filter_write(struct file *file, const char __user *userbuf,
817 size_t count, loff_t *ppos)
818{
819 char buf[NAME_MAX_LEN];
820 unsigned long flags;
821 size_t len;
822 int i;
823
824 /*
825 * We can't copy from userspace directly. Access to
826 * current_driver_name is protected with a write_lock with irqs
827 * disabled. Since copy_from_user can fault and may sleep we
828 * need to copy to temporary buffer first
829 */
830 len = min(count, (size_t)(NAME_MAX_LEN - 1));
831 if (copy_from_user(buf, userbuf, len))
832 return -EFAULT;
833
834 buf[len] = 0;
835
836 write_lock_irqsave(&driver_name_lock, flags);
837
838 /*
839 * Now handle the string we got from userspace very carefully.
840 * The rules are:
841 * - only use the first token we got
842 * - token delimiter is everything looking like a space
843 * character (' ', '\n', '\t' ...)
844 *
845 */
846 if (!isalnum(buf[0])) {
847 /*
848 * If the first character userspace gave us is not
849 * alphanumerical then assume the filter should be
850 * switched off.
851 */
852 if (current_driver_name[0])
853 pr_info("DMA-API: switching off dma-debug driver filter\n");
854 current_driver_name[0] = 0;
855 current_driver = NULL;
856 goto out_unlock;
857 }
858
859 /*
860 * Now parse out the first token and use it as the name for the
861 * driver to filter for.
862 */
863 for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
864 current_driver_name[i] = buf[i];
865 if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
866 break;
867 }
868 current_driver_name[i] = 0;
869 current_driver = NULL;
870
871 pr_info("DMA-API: enable driver filter for driver [%s]\n",
872 current_driver_name);
873
874out_unlock:
875 write_unlock_irqrestore(&driver_name_lock, flags);
876
877 return count;
878}
879
880static const struct file_operations filter_fops = {
881 .read = filter_read,
882 .write = filter_write,
883 .llseek = default_llseek,
884};
885
886static int dma_debug_fs_init(void)
887{
888 dma_debug_dent = debugfs_create_dir("dma-api", NULL);
889 if (!dma_debug_dent) {
890 pr_err("DMA-API: can not create debugfs directory\n");
891 return -ENOMEM;
892 }
893
894 global_disable_dent = debugfs_create_bool("disabled", 0444,
895 dma_debug_dent,
896 &global_disable);
897 if (!global_disable_dent)
898 goto out_err;
899
900 error_count_dent = debugfs_create_u32("error_count", 0444,
901 dma_debug_dent, &error_count);
902 if (!error_count_dent)
903 goto out_err;
904
905 show_all_errors_dent = debugfs_create_u32("all_errors", 0644,
906 dma_debug_dent,
907 &show_all_errors);
908 if (!show_all_errors_dent)
909 goto out_err;
910
911 show_num_errors_dent = debugfs_create_u32("num_errors", 0644,
912 dma_debug_dent,
913 &show_num_errors);
914 if (!show_num_errors_dent)
915 goto out_err;
916
917 num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444,
918 dma_debug_dent,
919 &num_free_entries);
920 if (!num_free_entries_dent)
921 goto out_err;
922
923 min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444,
924 dma_debug_dent,
925 &min_free_entries);
926 if (!min_free_entries_dent)
927 goto out_err;
928
929 filter_dent = debugfs_create_file("driver_filter", 0644,
930 dma_debug_dent, NULL, &filter_fops);
931 if (!filter_dent)
932 goto out_err;
933
934 return 0;
935
936out_err:
937 debugfs_remove_recursive(dma_debug_dent);
938
939 return -ENOMEM;
940}
941
942static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
943{
944 struct dma_debug_entry *entry;
945 unsigned long flags;
946 int count = 0, i;
947
948 for (i = 0; i < HASH_SIZE; ++i) {
949 spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
950 list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
951 if (entry->dev == dev) {
952 count += 1;
953 *out_entry = entry;
954 }
955 }
956 spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags);
957 }
958
959 return count;
960}
961
962static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
963{
964 struct device *dev = data;
965 struct dma_debug_entry *uninitialized_var(entry);
966 int count;
967
968 if (dma_debug_disabled())
969 return 0;
970
971 switch (action) {
972 case BUS_NOTIFY_UNBOUND_DRIVER:
973 count = device_dma_allocations(dev, &entry);
974 if (count == 0)
975 break;
976 err_printk(dev, entry, "DMA-API: device driver has pending "
977 "DMA allocations while released from device "
978 "[count=%d]\n"
979 "One of leaked entries details: "
980 "[device address=0x%016llx] [size=%llu bytes] "
981 "[mapped with %s] [mapped as %s]\n",
982 count, entry->dev_addr, entry->size,
983 dir2name[entry->direction], type2name[entry->type]);
984 break;
985 default:
986 break;
987 }
988
989 return 0;
990}
991
992void dma_debug_add_bus(struct bus_type *bus)
993{
994 struct notifier_block *nb;
995
996 if (dma_debug_disabled())
997 return;
998
999 nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
1000 if (nb == NULL) {
1001 pr_err("dma_debug_add_bus: out of memory\n");
1002 return;
1003 }
1004
1005 nb->notifier_call = dma_debug_device_change;
1006
1007 bus_register_notifier(bus, nb);
1008}
1009
1010static int dma_debug_init(void)
1011{
1012 int i;
1013
1014 /* Do not use dma_debug_initialized here, since we really want to be
1015 * called to set dma_debug_initialized
1016 */
1017 if (global_disable)
1018 return 0;
1019
1020 for (i = 0; i < HASH_SIZE; ++i) {
1021 INIT_LIST_HEAD(&dma_entry_hash[i].list);
1022 spin_lock_init(&dma_entry_hash[i].lock);
1023 }
1024
1025 if (dma_debug_fs_init() != 0) {
1026 pr_err("DMA-API: error creating debugfs entries - disabling\n");
1027 global_disable = true;
1028
1029 return 0;
1030 }
1031
1032 if (prealloc_memory(nr_prealloc_entries) != 0) {
1033 pr_err("DMA-API: debugging out of memory error - disabled\n");
1034 global_disable = true;
1035
1036 return 0;
1037 }
1038
1039 nr_total_entries = num_free_entries;
1040
1041 dma_debug_initialized = true;
1042
1043 pr_info("DMA-API: debugging enabled by kernel config\n");
1044 return 0;
1045}
1046core_initcall(dma_debug_init);
1047
1048static __init int dma_debug_cmdline(char *str)
1049{
1050 if (!str)
1051 return -EINVAL;
1052
1053 if (strncmp(str, "off", 3) == 0) {
1054 pr_info("DMA-API: debugging disabled on kernel command line\n");
1055 global_disable = true;
1056 }
1057
1058 return 0;
1059}
1060
1061static __init int dma_debug_entries_cmdline(char *str)
1062{
1063 if (!str)
1064 return -EINVAL;
1065 if (!get_option(&str, &nr_prealloc_entries))
1066 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
1067 return 0;
1068}
1069
1070__setup("dma_debug=", dma_debug_cmdline);
1071__setup("dma_debug_entries=", dma_debug_entries_cmdline);
1072
1073static void check_unmap(struct dma_debug_entry *ref)
1074{
1075 struct dma_debug_entry *entry;
1076 struct hash_bucket *bucket;
1077 unsigned long flags;
1078
1079 bucket = get_hash_bucket(ref, &flags);
1080 entry = bucket_find_exact(bucket, ref);
1081
1082 if (!entry) {
1083 /* must drop lock before calling dma_mapping_error */
1084 put_hash_bucket(bucket, &flags);
1085
1086 if (dma_mapping_error(ref->dev, ref->dev_addr)) {
1087 err_printk(ref->dev, NULL,
1088 "DMA-API: device driver tries to free an "
1089 "invalid DMA memory address\n");
1090 } else {
1091 err_printk(ref->dev, NULL,
1092 "DMA-API: device driver tries to free DMA "
1093 "memory it has not allocated [device "
1094 "address=0x%016llx] [size=%llu bytes]\n",
1095 ref->dev_addr, ref->size);
1096 }
1097 return;
1098 }
1099
1100 if (ref->size != entry->size) {
1101 err_printk(ref->dev, entry, "DMA-API: device driver frees "
1102 "DMA memory with different size "
1103 "[device address=0x%016llx] [map size=%llu bytes] "
1104 "[unmap size=%llu bytes]\n",
1105 ref->dev_addr, entry->size, ref->size);
1106 }
1107
1108 if (ref->type != entry->type) {
1109 err_printk(ref->dev, entry, "DMA-API: device driver frees "
1110 "DMA memory with wrong function "
1111 "[device address=0x%016llx] [size=%llu bytes] "
1112 "[mapped as %s] [unmapped as %s]\n",
1113 ref->dev_addr, ref->size,
1114 type2name[entry->type], type2name[ref->type]);
1115 } else if ((entry->type == dma_debug_coherent) &&
1116 (phys_addr(ref) != phys_addr(entry))) {
1117 err_printk(ref->dev, entry, "DMA-API: device driver frees "
1118 "DMA memory with different CPU address "
1119 "[device address=0x%016llx] [size=%llu bytes] "
1120 "[cpu alloc address=0x%016llx] "
1121 "[cpu free address=0x%016llx]",
1122 ref->dev_addr, ref->size,
1123 phys_addr(entry),
1124 phys_addr(ref));
1125 }
1126
1127 if (ref->sg_call_ents && ref->type == dma_debug_sg &&
1128 ref->sg_call_ents != entry->sg_call_ents) {
1129 err_printk(ref->dev, entry, "DMA-API: device driver frees "
1130 "DMA sg list with different entry count "
1131 "[map count=%d] [unmap count=%d]\n",
1132 entry->sg_call_ents, ref->sg_call_ents);
1133 }
1134
1135 /*
1136 * This may be no bug in reality - but most implementations of the
1137 * DMA API don't handle this properly, so check for it here
1138 */
1139 if (ref->direction != entry->direction) {
1140 err_printk(ref->dev, entry, "DMA-API: device driver frees "
1141 "DMA memory with different direction "
1142 "[device address=0x%016llx] [size=%llu bytes] "
1143 "[mapped with %s] [unmapped with %s]\n",
1144 ref->dev_addr, ref->size,
1145 dir2name[entry->direction],
1146 dir2name[ref->direction]);
1147 }
1148
1149 /*
1150 * Drivers should use dma_mapping_error() to check the returned
1151 * addresses of dma_map_single() and dma_map_page().
1152 * If not, print this warning message. See Documentation/DMA-API.txt.
1153 */
1154 if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
1155 err_printk(ref->dev, entry,
1156 "DMA-API: device driver failed to check map error"
1157 "[device address=0x%016llx] [size=%llu bytes] "
1158 "[mapped as %s]",
1159 ref->dev_addr, ref->size,
1160 type2name[entry->type]);
1161 }
1162
1163 hash_bucket_del(entry);
1164 dma_entry_free(entry);
1165
1166 put_hash_bucket(bucket, &flags);
1167}
1168
1169static void check_for_stack(struct device *dev,
1170 struct page *page, size_t offset)
1171{
1172 void *addr;
1173 struct vm_struct *stack_vm_area = task_stack_vm_area(current);
1174
1175 if (!stack_vm_area) {
1176 /* Stack is direct-mapped. */
1177 if (PageHighMem(page))
1178 return;
1179 addr = page_address(page) + offset;
1180 if (object_is_on_stack(addr))
1181 err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
1182 } else {
1183 /* Stack is vmalloced. */
1184 int i;
1185
1186 for (i = 0; i < stack_vm_area->nr_pages; i++) {
1187 if (page != stack_vm_area->pages[i])
1188 continue;
1189
1190 addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
1191 err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
1192 break;
1193 }
1194 }
1195}
1196
1197static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
1198{
1199 unsigned long a1 = (unsigned long)addr;
1200 unsigned long b1 = a1 + len;
1201 unsigned long a2 = (unsigned long)start;
1202 unsigned long b2 = (unsigned long)end;
1203
1204 return !(b1 <= a2 || a1 >= b2);
1205}
1206
1207static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
1208{
1209 if (overlap(addr, len, _stext, _etext) ||
1210 overlap(addr, len, __start_rodata, __end_rodata))
1211 err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
1212}
1213
1214static void check_sync(struct device *dev,
1215 struct dma_debug_entry *ref,
1216 bool to_cpu)
1217{
1218 struct dma_debug_entry *entry;
1219 struct hash_bucket *bucket;
1220 unsigned long flags;
1221
1222 bucket = get_hash_bucket(ref, &flags);
1223
1224 entry = bucket_find_contain(&bucket, ref, &flags);
1225
1226 if (!entry) {
1227 err_printk(dev, NULL, "DMA-API: device driver tries "
1228 "to sync DMA memory it has not allocated "
1229 "[device address=0x%016llx] [size=%llu bytes]\n",
1230 (unsigned long long)ref->dev_addr, ref->size);
1231 goto out;
1232 }
1233
1234 if (ref->size > entry->size) {
1235 err_printk(dev, entry, "DMA-API: device driver syncs"
1236 " DMA memory outside allocated range "
1237 "[device address=0x%016llx] "
1238 "[allocation size=%llu bytes] "
1239 "[sync offset+size=%llu]\n",
1240 entry->dev_addr, entry->size,
1241 ref->size);
1242 }
1243
1244 if (entry->direction == DMA_BIDIRECTIONAL)
1245 goto out;
1246
1247 if (ref->direction != entry->direction) {
1248 err_printk(dev, entry, "DMA-API: device driver syncs "
1249 "DMA memory with different direction "
1250 "[device address=0x%016llx] [size=%llu bytes] "
1251 "[mapped with %s] [synced with %s]\n",
1252 (unsigned long long)ref->dev_addr, entry->size,
1253 dir2name[entry->direction],
1254 dir2name[ref->direction]);
1255 }
1256
1257 if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
1258 !(ref->direction == DMA_TO_DEVICE))
1259 err_printk(dev, entry, "DMA-API: device driver syncs "
1260 "device read-only DMA memory for cpu "
1261 "[device address=0x%016llx] [size=%llu bytes] "
1262 "[mapped with %s] [synced with %s]\n",
1263 (unsigned long long)ref->dev_addr, entry->size,
1264 dir2name[entry->direction],
1265 dir2name[ref->direction]);
1266
1267 if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) &&
1268 !(ref->direction == DMA_FROM_DEVICE))
1269 err_printk(dev, entry, "DMA-API: device driver syncs "
1270 "device write-only DMA memory to device "
1271 "[device address=0x%016llx] [size=%llu bytes] "
1272 "[mapped with %s] [synced with %s]\n",
1273 (unsigned long long)ref->dev_addr, entry->size,
1274 dir2name[entry->direction],
1275 dir2name[ref->direction]);
1276
1277 if (ref->sg_call_ents && ref->type == dma_debug_sg &&
1278 ref->sg_call_ents != entry->sg_call_ents) {
1279 err_printk(ref->dev, entry, "DMA-API: device driver syncs "
1280 "DMA sg list with different entry count "
1281 "[map count=%d] [sync count=%d]\n",
1282 entry->sg_call_ents, ref->sg_call_ents);
1283 }
1284
1285out:
1286 put_hash_bucket(bucket, &flags);
1287}
1288
1289static void check_sg_segment(struct device *dev, struct scatterlist *sg)
1290{
1291#ifdef CONFIG_DMA_API_DEBUG_SG
1292 unsigned int max_seg = dma_get_max_seg_size(dev);
1293 u64 start, end, boundary = dma_get_seg_boundary(dev);
1294
1295 /*
1296 * Either the driver forgot to set dma_parms appropriately, or
1297 * whoever generated the list forgot to check them.
1298 */
1299 if (sg->length > max_seg)
1300 err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n",
1301 sg->length, max_seg);
1302 /*
1303 * In some cases this could potentially be the DMA API
1304 * implementation's fault, but it would usually imply that
1305 * the scatterlist was built inappropriately to begin with.
1306 */
1307 start = sg_dma_address(sg);
1308 end = start + sg_dma_len(sg) - 1;
1309 if ((start ^ end) & ~boundary)
1310 err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n",
1311 start, end, boundary);
1312#endif
1313}
1314
1315void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
1316 size_t size, int direction, dma_addr_t dma_addr,
1317 bool map_single)
1318{
1319 struct dma_debug_entry *entry;
1320
1321 if (unlikely(dma_debug_disabled()))
1322 return;
1323
1324 if (dma_mapping_error(dev, dma_addr))
1325 return;
1326
1327 entry = dma_entry_alloc();
1328 if (!entry)
1329 return;
1330
1331 entry->dev = dev;
1332 entry->type = dma_debug_page;
1333 entry->pfn = page_to_pfn(page);
1334 entry->offset = offset,
1335 entry->dev_addr = dma_addr;
1336 entry->size = size;
1337 entry->direction = direction;
1338 entry->map_err_type = MAP_ERR_NOT_CHECKED;
1339
1340 if (map_single)
1341 entry->type = dma_debug_single;
1342
1343 check_for_stack(dev, page, offset);
1344
1345 if (!PageHighMem(page)) {
1346 void *addr = page_address(page) + offset;
1347
1348 check_for_illegal_area(dev, addr, size);
1349 }
1350
1351 add_dma_entry(entry);
1352}
1353EXPORT_SYMBOL(debug_dma_map_page);
1354
1355void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
1356{
1357 struct dma_debug_entry ref;
1358 struct dma_debug_entry *entry;
1359 struct hash_bucket *bucket;
1360 unsigned long flags;
1361
1362 if (unlikely(dma_debug_disabled()))
1363 return;
1364
1365 ref.dev = dev;
1366 ref.dev_addr = dma_addr;
1367 bucket = get_hash_bucket(&ref, &flags);
1368
1369 list_for_each_entry(entry, &bucket->list, list) {
1370 if (!exact_match(&ref, entry))
1371 continue;
1372
1373 /*
1374 * The same physical address can be mapped multiple
1375 * times. Without a hardware IOMMU this results in the
1376 * same device addresses being put into the dma-debug
1377 * hash multiple times too. This can result in false
1378 * positives being reported. Therefore we implement a
1379 * best-fit algorithm here which updates the first entry
1380 * from the hash which fits the reference value and is
1381 * not currently listed as being checked.
1382 */
1383 if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
1384 entry->map_err_type = MAP_ERR_CHECKED;
1385 break;
1386 }
1387 }
1388
1389 put_hash_bucket(bucket, &flags);
1390}
1391EXPORT_SYMBOL(debug_dma_mapping_error);
1392
1393void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
1394 size_t size, int direction, bool map_single)
1395{
1396 struct dma_debug_entry ref = {
1397 .type = dma_debug_page,
1398 .dev = dev,
1399 .dev_addr = addr,
1400 .size = size,
1401 .direction = direction,
1402 };
1403
1404 if (unlikely(dma_debug_disabled()))
1405 return;
1406
1407 if (map_single)
1408 ref.type = dma_debug_single;
1409
1410 check_unmap(&ref);
1411}
1412EXPORT_SYMBOL(debug_dma_unmap_page);
1413
1414void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
1415 int nents, int mapped_ents, int direction)
1416{
1417 struct dma_debug_entry *entry;
1418 struct scatterlist *s;
1419 int i;
1420
1421 if (unlikely(dma_debug_disabled()))
1422 return;
1423
1424 for_each_sg(sg, s, mapped_ents, i) {
1425 entry = dma_entry_alloc();
1426 if (!entry)
1427 return;
1428
1429 entry->type = dma_debug_sg;
1430 entry->dev = dev;
1431 entry->pfn = page_to_pfn(sg_page(s));
1432 entry->offset = s->offset,
1433 entry->size = sg_dma_len(s);
1434 entry->dev_addr = sg_dma_address(s);
1435 entry->direction = direction;
1436 entry->sg_call_ents = nents;
1437 entry->sg_mapped_ents = mapped_ents;
1438
1439 check_for_stack(dev, sg_page(s), s->offset);
1440
1441 if (!PageHighMem(sg_page(s))) {
1442 check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
1443 }
1444
1445 check_sg_segment(dev, s);
1446
1447 add_dma_entry(entry);
1448 }
1449}
1450EXPORT_SYMBOL(debug_dma_map_sg);
1451
1452static int get_nr_mapped_entries(struct device *dev,
1453 struct dma_debug_entry *ref)
1454{
1455 struct dma_debug_entry *entry;
1456 struct hash_bucket *bucket;
1457 unsigned long flags;
1458 int mapped_ents;
1459
1460 bucket = get_hash_bucket(ref, &flags);
1461 entry = bucket_find_exact(bucket, ref);
1462 mapped_ents = 0;
1463
1464 if (entry)
1465 mapped_ents = entry->sg_mapped_ents;
1466 put_hash_bucket(bucket, &flags);
1467
1468 return mapped_ents;
1469}
1470
1471void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
1472 int nelems, int dir)
1473{
1474 struct scatterlist *s;
1475 int mapped_ents = 0, i;
1476
1477 if (unlikely(dma_debug_disabled()))
1478 return;
1479
1480 for_each_sg(sglist, s, nelems, i) {
1481
1482 struct dma_debug_entry ref = {
1483 .type = dma_debug_sg,
1484 .dev = dev,
1485 .pfn = page_to_pfn(sg_page(s)),
1486 .offset = s->offset,
1487 .dev_addr = sg_dma_address(s),
1488 .size = sg_dma_len(s),
1489 .direction = dir,
1490 .sg_call_ents = nelems,
1491 };
1492
1493 if (mapped_ents && i >= mapped_ents)
1494 break;
1495
1496 if (!i)
1497 mapped_ents = get_nr_mapped_entries(dev, &ref);
1498
1499 check_unmap(&ref);
1500 }
1501}
1502EXPORT_SYMBOL(debug_dma_unmap_sg);
1503
1504void debug_dma_alloc_coherent(struct device *dev, size_t size,
1505 dma_addr_t dma_addr, void *virt)
1506{
1507 struct dma_debug_entry *entry;
1508
1509 if (unlikely(dma_debug_disabled()))
1510 return;
1511
1512 if (unlikely(virt == NULL))
1513 return;
1514
1515 /* handle vmalloc and linear addresses */
1516 if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
1517 return;
1518
1519 entry = dma_entry_alloc();
1520 if (!entry)
1521 return;
1522
1523 entry->type = dma_debug_coherent;
1524 entry->dev = dev;
1525 entry->offset = offset_in_page(virt);
1526 entry->size = size;
1527 entry->dev_addr = dma_addr;
1528 entry->direction = DMA_BIDIRECTIONAL;
1529
1530 if (is_vmalloc_addr(virt))
1531 entry->pfn = vmalloc_to_pfn(virt);
1532 else
1533 entry->pfn = page_to_pfn(virt_to_page(virt));
1534
1535 add_dma_entry(entry);
1536}
1537EXPORT_SYMBOL(debug_dma_alloc_coherent);
1538
1539void debug_dma_free_coherent(struct device *dev, size_t size,
1540 void *virt, dma_addr_t addr)
1541{
1542 struct dma_debug_entry ref = {
1543 .type = dma_debug_coherent,
1544 .dev = dev,
1545 .offset = offset_in_page(virt),
1546 .dev_addr = addr,
1547 .size = size,
1548 .direction = DMA_BIDIRECTIONAL,
1549 };
1550
1551 /* handle vmalloc and linear addresses */
1552 if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
1553 return;
1554
1555 if (is_vmalloc_addr(virt))
1556 ref.pfn = vmalloc_to_pfn(virt);
1557 else
1558 ref.pfn = page_to_pfn(virt_to_page(virt));
1559
1560 if (unlikely(dma_debug_disabled()))
1561 return;
1562
1563 check_unmap(&ref);
1564}
1565EXPORT_SYMBOL(debug_dma_free_coherent);
1566
1567void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
1568 int direction, dma_addr_t dma_addr)
1569{
1570 struct dma_debug_entry *entry;
1571
1572 if (unlikely(dma_debug_disabled()))
1573 return;
1574
1575 entry = dma_entry_alloc();
1576 if (!entry)
1577 return;
1578
1579 entry->type = dma_debug_resource;
1580 entry->dev = dev;
1581 entry->pfn = PHYS_PFN(addr);
1582 entry->offset = offset_in_page(addr);
1583 entry->size = size;
1584 entry->dev_addr = dma_addr;
1585 entry->direction = direction;
1586 entry->map_err_type = MAP_ERR_NOT_CHECKED;
1587
1588 add_dma_entry(entry);
1589}
1590EXPORT_SYMBOL(debug_dma_map_resource);
1591
1592void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
1593 size_t size, int direction)
1594{
1595 struct dma_debug_entry ref = {
1596 .type = dma_debug_resource,
1597 .dev = dev,
1598 .dev_addr = dma_addr,
1599 .size = size,
1600 .direction = direction,
1601 };
1602
1603 if (unlikely(dma_debug_disabled()))
1604 return;
1605
1606 check_unmap(&ref);
1607}
1608EXPORT_SYMBOL(debug_dma_unmap_resource);
1609
1610void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
1611 size_t size, int direction)
1612{
1613 struct dma_debug_entry ref;
1614
1615 if (unlikely(dma_debug_disabled()))
1616 return;
1617
1618 ref.type = dma_debug_single;
1619 ref.dev = dev;
1620 ref.dev_addr = dma_handle;
1621 ref.size = size;
1622 ref.direction = direction;
1623 ref.sg_call_ents = 0;
1624
1625 check_sync(dev, &ref, true);
1626}
1627EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
1628
1629void debug_dma_sync_single_for_device(struct device *dev,
1630 dma_addr_t dma_handle, size_t size,
1631 int direction)
1632{
1633 struct dma_debug_entry ref;
1634
1635 if (unlikely(dma_debug_disabled()))
1636 return;
1637
1638 ref.type = dma_debug_single;
1639 ref.dev = dev;
1640 ref.dev_addr = dma_handle;
1641 ref.size = size;
1642 ref.direction = direction;
1643 ref.sg_call_ents = 0;
1644
1645 check_sync(dev, &ref, false);
1646}
1647EXPORT_SYMBOL(debug_dma_sync_single_for_device);
1648
1649void debug_dma_sync_single_range_for_cpu(struct device *dev,
1650 dma_addr_t dma_handle,
1651 unsigned long offset, size_t size,
1652 int direction)
1653{
1654 struct dma_debug_entry ref;
1655
1656 if (unlikely(dma_debug_disabled()))
1657 return;
1658
1659 ref.type = dma_debug_single;
1660 ref.dev = dev;
1661 ref.dev_addr = dma_handle;
1662 ref.size = offset + size;
1663 ref.direction = direction;
1664 ref.sg_call_ents = 0;
1665
1666 check_sync(dev, &ref, true);
1667}
1668EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu);
1669
1670void debug_dma_sync_single_range_for_device(struct device *dev,
1671 dma_addr_t dma_handle,
1672 unsigned long offset,
1673 size_t size, int direction)
1674{
1675 struct dma_debug_entry ref;
1676
1677 if (unlikely(dma_debug_disabled()))
1678 return;
1679
1680 ref.type = dma_debug_single;
1681 ref.dev = dev;
1682 ref.dev_addr = dma_handle;
1683 ref.size = offset + size;
1684 ref.direction = direction;
1685 ref.sg_call_ents = 0;
1686
1687 check_sync(dev, &ref, false);
1688}
1689EXPORT_SYMBOL(debug_dma_sync_single_range_for_device);
1690
1691void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
1692 int nelems, int direction)
1693{
1694 struct scatterlist *s;
1695 int mapped_ents = 0, i;
1696
1697 if (unlikely(dma_debug_disabled()))
1698 return;
1699
1700 for_each_sg(sg, s, nelems, i) {
1701
1702 struct dma_debug_entry ref = {
1703 .type = dma_debug_sg,
1704 .dev = dev,
1705 .pfn = page_to_pfn(sg_page(s)),
1706 .offset = s->offset,
1707 .dev_addr = sg_dma_address(s),
1708 .size = sg_dma_len(s),
1709 .direction = direction,
1710 .sg_call_ents = nelems,
1711 };
1712
1713 if (!i)
1714 mapped_ents = get_nr_mapped_entries(dev, &ref);
1715
1716 if (i >= mapped_ents)
1717 break;
1718
1719 check_sync(dev, &ref, true);
1720 }
1721}
1722EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
1723
1724void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
1725 int nelems, int direction)
1726{
1727 struct scatterlist *s;
1728 int mapped_ents = 0, i;
1729
1730 if (unlikely(dma_debug_disabled()))
1731 return;
1732
1733 for_each_sg(sg, s, nelems, i) {
1734
1735 struct dma_debug_entry ref = {
1736 .type = dma_debug_sg,
1737 .dev = dev,
1738 .pfn = page_to_pfn(sg_page(s)),
1739 .offset = s->offset,
1740 .dev_addr = sg_dma_address(s),
1741 .size = sg_dma_len(s),
1742 .direction = direction,
1743 .sg_call_ents = nelems,
1744 };
1745 if (!i)
1746 mapped_ents = get_nr_mapped_entries(dev, &ref);
1747
1748 if (i >= mapped_ents)
1749 break;
1750
1751 check_sync(dev, &ref, false);
1752 }
1753}
1754EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
1755
1756static int __init dma_debug_driver_setup(char *str)
1757{
1758 int i;
1759
1760 for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
1761 current_driver_name[i] = *str;
1762 if (*str == 0)
1763 break;
1764 }
1765
1766 if (current_driver_name[0])
1767 pr_info("DMA-API: enable driver filter for driver [%s]\n",
1768 current_driver_name);
1769
1770
1771 return 1;
1772}
1773__setup("dma_debug_driver=", dma_debug_driver_setup);
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
new file mode 100644
index 000000000000..8be8106270c2
--- /dev/null
+++ b/kernel/dma/direct.c
@@ -0,0 +1,204 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * DMA operations that map physical memory directly without using an IOMMU or
4 * flushing caches.
5 */
6#include <linux/export.h>
7#include <linux/mm.h>
8#include <linux/dma-direct.h>
9#include <linux/scatterlist.h>
10#include <linux/dma-contiguous.h>
11#include <linux/pfn.h>
12#include <linux/set_memory.h>
13
14#define DIRECT_MAPPING_ERROR 0
15
16/*
17 * Most architectures use ZONE_DMA for the first 16 Megabytes, but
18 * some use it for entirely different regions:
19 */
20#ifndef ARCH_ZONE_DMA_BITS
21#define ARCH_ZONE_DMA_BITS 24
22#endif
23
24/*
25 * For AMD SEV all DMA must be to unencrypted addresses.
26 */
27static inline bool force_dma_unencrypted(void)
28{
29 return sev_active();
30}
31
32static bool
33check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
34 const char *caller)
35{
36 if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
37 if (!dev->dma_mask) {
38 dev_err(dev,
39 "%s: call on device without dma_mask\n",
40 caller);
41 return false;
42 }
43
44 if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
45 dev_err(dev,
46 "%s: overflow %pad+%zu of device mask %llx\n",
47 caller, &dma_addr, size, *dev->dma_mask);
48 }
49 return false;
50 }
51 return true;
52}
53
54static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
55{
56 dma_addr_t addr = force_dma_unencrypted() ?
57 __phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
58 return addr + size - 1 <= dev->coherent_dma_mask;
59}
60
61void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
62 gfp_t gfp, unsigned long attrs)
63{
64 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
65 int page_order = get_order(size);
66 struct page *page = NULL;
67 void *ret;
68
69 /* we always manually zero the memory once we are done: */
70 gfp &= ~__GFP_ZERO;
71
72 /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
73 if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
74 gfp |= GFP_DMA;
75 if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
76 gfp |= GFP_DMA32;
77
78again:
79 /* CMA can be used only in the context which permits sleeping */
80 if (gfpflags_allow_blocking(gfp)) {
81 page = dma_alloc_from_contiguous(dev, count, page_order, gfp);
82 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
83 dma_release_from_contiguous(dev, page, count);
84 page = NULL;
85 }
86 }
87 if (!page)
88 page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
89
90 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
91 __free_pages(page, page_order);
92 page = NULL;
93
94 if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
95 dev->coherent_dma_mask < DMA_BIT_MASK(64) &&
96 !(gfp & (GFP_DMA32 | GFP_DMA))) {
97 gfp |= GFP_DMA32;
98 goto again;
99 }
100
101 if (IS_ENABLED(CONFIG_ZONE_DMA) &&
102 dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
103 !(gfp & GFP_DMA)) {
104 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
105 goto again;
106 }
107 }
108
109 if (!page)
110 return NULL;
111 ret = page_address(page);
112 if (force_dma_unencrypted()) {
113 set_memory_decrypted((unsigned long)ret, 1 << page_order);
114 *dma_handle = __phys_to_dma(dev, page_to_phys(page));
115 } else {
116 *dma_handle = phys_to_dma(dev, page_to_phys(page));
117 }
118 memset(ret, 0, size);
119 return ret;
120}
121
122/*
123 * NOTE: this function must never look at the dma_addr argument, because we want
124 * to be able to use it as a helper for iommu implementations as well.
125 */
126void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
127 dma_addr_t dma_addr, unsigned long attrs)
128{
129 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
130 unsigned int page_order = get_order(size);
131
132 if (force_dma_unencrypted())
133 set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
134 if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
135 free_pages((unsigned long)cpu_addr, page_order);
136}
137
138dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
139 unsigned long offset, size_t size, enum dma_data_direction dir,
140 unsigned long attrs)
141{
142 dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
143
144 if (!check_addr(dev, dma_addr, size, __func__))
145 return DIRECT_MAPPING_ERROR;
146 return dma_addr;
147}
148
149int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
150 enum dma_data_direction dir, unsigned long attrs)
151{
152 int i;
153 struct scatterlist *sg;
154
155 for_each_sg(sgl, sg, nents, i) {
156 BUG_ON(!sg_page(sg));
157
158 sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg));
159 if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__))
160 return 0;
161 sg_dma_len(sg) = sg->length;
162 }
163
164 return nents;
165}
166
167int dma_direct_supported(struct device *dev, u64 mask)
168{
169#ifdef CONFIG_ZONE_DMA
170 if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
171 return 0;
172#else
173 /*
174 * Because 32-bit DMA masks are so common we expect every architecture
175 * to be able to satisfy them - either by not supporting more physical
176 * memory, or by providing a ZONE_DMA32. If neither is the case, the
177 * architecture needs to use an IOMMU instead of the direct mapping.
178 */
179 if (mask < DMA_BIT_MASK(32))
180 return 0;
181#endif
182 /*
183 * Various PCI/PCIe bridges have broken support for > 32bit DMA even
184 * if the device itself might support it.
185 */
186 if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32))
187 return 0;
188 return 1;
189}
190
191int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
192{
193 return dma_addr == DIRECT_MAPPING_ERROR;
194}
195
196const struct dma_map_ops dma_direct_ops = {
197 .alloc = dma_direct_alloc,
198 .free = dma_direct_free,
199 .map_page = dma_direct_map_page,
200 .map_sg = dma_direct_map_sg,
201 .dma_supported = dma_direct_supported,
202 .mapping_error = dma_direct_mapping_error,
203};
204EXPORT_SYMBOL(dma_direct_ops);
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
new file mode 100644
index 000000000000..d2a92ddaac4d
--- /dev/null
+++ b/kernel/dma/mapping.c
@@ -0,0 +1,345 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * arch-independent dma-mapping routines
4 *
5 * Copyright (c) 2006 SUSE Linux Products GmbH
6 * Copyright (c) 2006 Tejun Heo <teheo@suse.de>
7 */
8
9#include <linux/acpi.h>
10#include <linux/dma-mapping.h>
11#include <linux/export.h>
12#include <linux/gfp.h>
13#include <linux/of_device.h>
14#include <linux/slab.h>
15#include <linux/vmalloc.h>
16
17/*
18 * Managed DMA API
19 */
20struct dma_devres {
21 size_t size;
22 void *vaddr;
23 dma_addr_t dma_handle;
24 unsigned long attrs;
25};
26
27static void dmam_release(struct device *dev, void *res)
28{
29 struct dma_devres *this = res;
30
31 dma_free_attrs(dev, this->size, this->vaddr, this->dma_handle,
32 this->attrs);
33}
34
35static int dmam_match(struct device *dev, void *res, void *match_data)
36{
37 struct dma_devres *this = res, *match = match_data;
38
39 if (this->vaddr == match->vaddr) {
40 WARN_ON(this->size != match->size ||
41 this->dma_handle != match->dma_handle);
42 return 1;
43 }
44 return 0;
45}
46
47/**
48 * dmam_alloc_coherent - Managed dma_alloc_coherent()
49 * @dev: Device to allocate coherent memory for
50 * @size: Size of allocation
51 * @dma_handle: Out argument for allocated DMA handle
52 * @gfp: Allocation flags
53 *
54 * Managed dma_alloc_coherent(). Memory allocated using this function
55 * will be automatically released on driver detach.
56 *
57 * RETURNS:
58 * Pointer to allocated memory on success, NULL on failure.
59 */
60void *dmam_alloc_coherent(struct device *dev, size_t size,
61 dma_addr_t *dma_handle, gfp_t gfp)
62{
63 struct dma_devres *dr;
64 void *vaddr;
65
66 dr = devres_alloc(dmam_release, sizeof(*dr), gfp);
67 if (!dr)
68 return NULL;
69
70 vaddr = dma_alloc_coherent(dev, size, dma_handle, gfp);
71 if (!vaddr) {
72 devres_free(dr);
73 return NULL;
74 }
75
76 dr->vaddr = vaddr;
77 dr->dma_handle = *dma_handle;
78 dr->size = size;
79
80 devres_add(dev, dr);
81
82 return vaddr;
83}
84EXPORT_SYMBOL(dmam_alloc_coherent);
85
86/**
87 * dmam_free_coherent - Managed dma_free_coherent()
88 * @dev: Device to free coherent memory for
89 * @size: Size of allocation
90 * @vaddr: Virtual address of the memory to free
91 * @dma_handle: DMA handle of the memory to free
92 *
93 * Managed dma_free_coherent().
94 */
95void dmam_free_coherent(struct device *dev, size_t size, void *vaddr,
96 dma_addr_t dma_handle)
97{
98 struct dma_devres match_data = { size, vaddr, dma_handle };
99
100 dma_free_coherent(dev, size, vaddr, dma_handle);
101 WARN_ON(devres_destroy(dev, dmam_release, dmam_match, &match_data));
102}
103EXPORT_SYMBOL(dmam_free_coherent);
104
105/**
106 * dmam_alloc_attrs - Managed dma_alloc_attrs()
107 * @dev: Device to allocate non_coherent memory for
108 * @size: Size of allocation
109 * @dma_handle: Out argument for allocated DMA handle
110 * @gfp: Allocation flags
111 * @attrs: Flags in the DMA_ATTR_* namespace.
112 *
113 * Managed dma_alloc_attrs(). Memory allocated using this function will be
114 * automatically released on driver detach.
115 *
116 * RETURNS:
117 * Pointer to allocated memory on success, NULL on failure.
118 */
119void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
120 gfp_t gfp, unsigned long attrs)
121{
122 struct dma_devres *dr;
123 void *vaddr;
124
125 dr = devres_alloc(dmam_release, sizeof(*dr), gfp);
126 if (!dr)
127 return NULL;
128
129 vaddr = dma_alloc_attrs(dev, size, dma_handle, gfp, attrs);
130 if (!vaddr) {
131 devres_free(dr);
132 return NULL;
133 }
134
135 dr->vaddr = vaddr;
136 dr->dma_handle = *dma_handle;
137 dr->size = size;
138 dr->attrs = attrs;
139
140 devres_add(dev, dr);
141
142 return vaddr;
143}
144EXPORT_SYMBOL(dmam_alloc_attrs);
145
146#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
147
148static void dmam_coherent_decl_release(struct device *dev, void *res)
149{
150 dma_release_declared_memory(dev);
151}
152
153/**
154 * dmam_declare_coherent_memory - Managed dma_declare_coherent_memory()
155 * @dev: Device to declare coherent memory for
156 * @phys_addr: Physical address of coherent memory to be declared
157 * @device_addr: Device address of coherent memory to be declared
158 * @size: Size of coherent memory to be declared
159 * @flags: Flags
160 *
161 * Managed dma_declare_coherent_memory().
162 *
163 * RETURNS:
164 * 0 on success, -errno on failure.
165 */
166int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
167 dma_addr_t device_addr, size_t size, int flags)
168{
169 void *res;
170 int rc;
171
172 res = devres_alloc(dmam_coherent_decl_release, 0, GFP_KERNEL);
173 if (!res)
174 return -ENOMEM;
175
176 rc = dma_declare_coherent_memory(dev, phys_addr, device_addr, size,
177 flags);
178 if (!rc)
179 devres_add(dev, res);
180 else
181 devres_free(res);
182
183 return rc;
184}
185EXPORT_SYMBOL(dmam_declare_coherent_memory);
186
187/**
188 * dmam_release_declared_memory - Managed dma_release_declared_memory().
189 * @dev: Device to release declared coherent memory for
190 *
191 * Managed dmam_release_declared_memory().
192 */
193void dmam_release_declared_memory(struct device *dev)
194{
195 WARN_ON(devres_destroy(dev, dmam_coherent_decl_release, NULL, NULL));
196}
197EXPORT_SYMBOL(dmam_release_declared_memory);
198
199#endif
200
201/*
202 * Create scatter-list for the already allocated DMA buffer.
203 */
204int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
205 void *cpu_addr, dma_addr_t handle, size_t size)
206{
207 struct page *page = virt_to_page(cpu_addr);
208 int ret;
209
210 ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
211 if (unlikely(ret))
212 return ret;
213
214 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
215 return 0;
216}
217EXPORT_SYMBOL(dma_common_get_sgtable);
218
219/*
220 * Create userspace mapping for the DMA-coherent memory.
221 */
222int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
223 void *cpu_addr, dma_addr_t dma_addr, size_t size)
224{
225 int ret = -ENXIO;
226#ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP
227 unsigned long user_count = vma_pages(vma);
228 unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
229 unsigned long off = vma->vm_pgoff;
230
231 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
232
233 if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
234 return ret;
235
236 if (off < count && user_count <= (count - off))
237 ret = remap_pfn_range(vma, vma->vm_start,
238 page_to_pfn(virt_to_page(cpu_addr)) + off,
239 user_count << PAGE_SHIFT,
240 vma->vm_page_prot);
241#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
242
243 return ret;
244}
245EXPORT_SYMBOL(dma_common_mmap);
246
247#ifdef CONFIG_MMU
248static struct vm_struct *__dma_common_pages_remap(struct page **pages,
249 size_t size, unsigned long vm_flags, pgprot_t prot,
250 const void *caller)
251{
252 struct vm_struct *area;
253
254 area = get_vm_area_caller(size, vm_flags, caller);
255 if (!area)
256 return NULL;
257
258 if (map_vm_area(area, prot, pages)) {
259 vunmap(area->addr);
260 return NULL;
261 }
262
263 return area;
264}
265
266/*
267 * remaps an array of PAGE_SIZE pages into another vm_area
268 * Cannot be used in non-sleeping contexts
269 */
270void *dma_common_pages_remap(struct page **pages, size_t size,
271 unsigned long vm_flags, pgprot_t prot,
272 const void *caller)
273{
274 struct vm_struct *area;
275
276 area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
277 if (!area)
278 return NULL;
279
280 area->pages = pages;
281
282 return area->addr;
283}
284
285/*
286 * remaps an allocated contiguous region into another vm_area.
287 * Cannot be used in non-sleeping contexts
288 */
289
290void *dma_common_contiguous_remap(struct page *page, size_t size,
291 unsigned long vm_flags,
292 pgprot_t prot, const void *caller)
293{
294 int i;
295 struct page **pages;
296 struct vm_struct *area;
297
298 pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
299 if (!pages)
300 return NULL;
301
302 for (i = 0; i < (size >> PAGE_SHIFT); i++)
303 pages[i] = nth_page(page, i);
304
305 area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
306
307 kfree(pages);
308
309 if (!area)
310 return NULL;
311 return area->addr;
312}
313
314/*
315 * unmaps a range previously mapped by dma_common_*_remap
316 */
317void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
318{
319 struct vm_struct *area = find_vm_area(cpu_addr);
320
321 if (!area || (area->flags & vm_flags) != vm_flags) {
322 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
323 return;
324 }
325
326 unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
327 vunmap(cpu_addr);
328}
329#endif
330
331/*
332 * enables DMA API use for a device
333 */
334int dma_configure(struct device *dev)
335{
336 if (dev->bus->dma_configure)
337 return dev->bus->dma_configure(dev);
338 return 0;
339}
340
341void dma_deconfigure(struct device *dev)
342{
343 of_dma_deconfigure(dev);
344 acpi_dma_deconfigure(dev);
345}
diff --git a/kernel/dma/noncoherent.c b/kernel/dma/noncoherent.c
new file mode 100644
index 000000000000..79e9a757387f
--- /dev/null
+++ b/kernel/dma/noncoherent.c
@@ -0,0 +1,102 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2018 Christoph Hellwig.
4 *
5 * DMA operations that map physical memory directly without providing cache
6 * coherence.
7 */
8#include <linux/export.h>
9#include <linux/mm.h>
10#include <linux/dma-direct.h>
11#include <linux/dma-noncoherent.h>
12#include <linux/scatterlist.h>
13
14static void dma_noncoherent_sync_single_for_device(struct device *dev,
15 dma_addr_t addr, size_t size, enum dma_data_direction dir)
16{
17 arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir);
18}
19
20static void dma_noncoherent_sync_sg_for_device(struct device *dev,
21 struct scatterlist *sgl, int nents, enum dma_data_direction dir)
22{
23 struct scatterlist *sg;
24 int i;
25
26 for_each_sg(sgl, sg, nents, i)
27 arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
28}
29
30static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page,
31 unsigned long offset, size_t size, enum dma_data_direction dir,
32 unsigned long attrs)
33{
34 dma_addr_t addr;
35
36 addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
37 if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
38 arch_sync_dma_for_device(dev, page_to_phys(page) + offset,
39 size, dir);
40 return addr;
41}
42
43static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl,
44 int nents, enum dma_data_direction dir, unsigned long attrs)
45{
46 nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs);
47 if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
48 dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir);
49 return nents;
50}
51
52#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
53static void dma_noncoherent_sync_single_for_cpu(struct device *dev,
54 dma_addr_t addr, size_t size, enum dma_data_direction dir)
55{
56 arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
57}
58
59static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
60 struct scatterlist *sgl, int nents, enum dma_data_direction dir)
61{
62 struct scatterlist *sg;
63 int i;
64
65 for_each_sg(sgl, sg, nents, i)
66 arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
67}
68
69static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr,
70 size_t size, enum dma_data_direction dir, unsigned long attrs)
71{
72 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
73 dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir);
74}
75
76static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl,
77 int nents, enum dma_data_direction dir, unsigned long attrs)
78{
79 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
80 dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir);
81}
82#endif
83
84const struct dma_map_ops dma_noncoherent_ops = {
85 .alloc = arch_dma_alloc,
86 .free = arch_dma_free,
87 .mmap = arch_dma_mmap,
88 .sync_single_for_device = dma_noncoherent_sync_single_for_device,
89 .sync_sg_for_device = dma_noncoherent_sync_sg_for_device,
90 .map_page = dma_noncoherent_map_page,
91 .map_sg = dma_noncoherent_map_sg,
92#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
93 .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu,
94 .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu,
95 .unmap_page = dma_noncoherent_unmap_page,
96 .unmap_sg = dma_noncoherent_unmap_sg,
97#endif
98 .dma_supported = dma_direct_supported,
99 .mapping_error = dma_direct_mapping_error,
100 .cache_sync = arch_dma_cache_sync,
101};
102EXPORT_SYMBOL(dma_noncoherent_ops);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
new file mode 100644
index 000000000000..904541055792
--- /dev/null
+++ b/kernel/dma/swiotlb.c
@@ -0,0 +1,1088 @@
1/*
2 * Dynamic DMA mapping support.
3 *
4 * This implementation is a fallback for platforms that do not support
5 * I/O TLBs (aka DMA address translation hardware).
6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
8 * Copyright (C) 2000, 2003 Hewlett-Packard Co
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 *
11 * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
12 * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
13 * unnecessary i-cache flushing.
14 * 04/07/.. ak Better overflow handling. Assorted fixes.
15 * 05/09/10 linville Add support for syncing ranges, support syncing for
16 * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
17 * 08/12/11 beckyb Add highmem support
18 */
19
20#include <linux/cache.h>
21#include <linux/dma-direct.h>
22#include <linux/mm.h>
23#include <linux/export.h>
24#include <linux/spinlock.h>
25#include <linux/string.h>
26#include <linux/swiotlb.h>
27#include <linux/pfn.h>
28#include <linux/types.h>
29#include <linux/ctype.h>
30#include <linux/highmem.h>
31#include <linux/gfp.h>
32#include <linux/scatterlist.h>
33#include <linux/mem_encrypt.h>
34#include <linux/set_memory.h>
35
36#include <asm/io.h>
37#include <asm/dma.h>
38
39#include <linux/init.h>
40#include <linux/bootmem.h>
41#include <linux/iommu-helper.h>
42
43#define CREATE_TRACE_POINTS
44#include <trace/events/swiotlb.h>
45
46#define OFFSET(val,align) ((unsigned long) \
47 ( (val) & ( (align) - 1)))
48
49#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
50
51/*
52 * Minimum IO TLB size to bother booting with. Systems with mainly
53 * 64bit capable cards will only lightly use the swiotlb. If we can't
54 * allocate a contiguous 1MB, we're probably in trouble anyway.
55 */
56#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
57
58enum swiotlb_force swiotlb_force;
59
60/*
61 * Used to do a quick range check in swiotlb_tbl_unmap_single and
62 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
63 * API.
64 */
65static phys_addr_t io_tlb_start, io_tlb_end;
66
67/*
68 * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
69 * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
70 */
71static unsigned long io_tlb_nslabs;
72
73/*
74 * When the IOMMU overflows we return a fallback buffer. This sets the size.
75 */
76static unsigned long io_tlb_overflow = 32*1024;
77
78static phys_addr_t io_tlb_overflow_buffer;
79
80/*
81 * This is a free list describing the number of free entries available from
82 * each index
83 */
84static unsigned int *io_tlb_list;
85static unsigned int io_tlb_index;
86
87/*
88 * Max segment that we can provide which (if pages are contingous) will
89 * not be bounced (unless SWIOTLB_FORCE is set).
90 */
91unsigned int max_segment;
92
93/*
94 * We need to save away the original address corresponding to a mapped entry
95 * for the sync operations.
96 */
97#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
98static phys_addr_t *io_tlb_orig_addr;
99
100/*
101 * Protect the above data structures in the map and unmap calls
102 */
103static DEFINE_SPINLOCK(io_tlb_lock);
104
105static int late_alloc;
106
107static int __init
108setup_io_tlb_npages(char *str)
109{
110 if (isdigit(*str)) {
111 io_tlb_nslabs = simple_strtoul(str, &str, 0);
112 /* avoid tail segment of size < IO_TLB_SEGSIZE */
113 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
114 }
115 if (*str == ',')
116 ++str;
117 if (!strcmp(str, "force")) {
118 swiotlb_force = SWIOTLB_FORCE;
119 } else if (!strcmp(str, "noforce")) {
120 swiotlb_force = SWIOTLB_NO_FORCE;
121 io_tlb_nslabs = 1;
122 }
123
124 return 0;
125}
126early_param("swiotlb", setup_io_tlb_npages);
127/* make io_tlb_overflow tunable too? */
128
129unsigned long swiotlb_nr_tbl(void)
130{
131 return io_tlb_nslabs;
132}
133EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
134
135unsigned int swiotlb_max_segment(void)
136{
137 return max_segment;
138}
139EXPORT_SYMBOL_GPL(swiotlb_max_segment);
140
141void swiotlb_set_max_segment(unsigned int val)
142{
143 if (swiotlb_force == SWIOTLB_FORCE)
144 max_segment = 1;
145 else
146 max_segment = rounddown(val, PAGE_SIZE);
147}
148
149/* default to 64MB */
150#define IO_TLB_DEFAULT_SIZE (64UL<<20)
151unsigned long swiotlb_size_or_default(void)
152{
153 unsigned long size;
154
155 size = io_tlb_nslabs << IO_TLB_SHIFT;
156
157 return size ? size : (IO_TLB_DEFAULT_SIZE);
158}
159
160static bool no_iotlb_memory;
161
162void swiotlb_print_info(void)
163{
164 unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
165 unsigned char *vstart, *vend;
166
167 if (no_iotlb_memory) {
168 pr_warn("software IO TLB: No low mem\n");
169 return;
170 }
171
172 vstart = phys_to_virt(io_tlb_start);
173 vend = phys_to_virt(io_tlb_end);
174
175 printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
176 (unsigned long long)io_tlb_start,
177 (unsigned long long)io_tlb_end,
178 bytes >> 20, vstart, vend - 1);
179}
180
181/*
182 * Early SWIOTLB allocation may be too early to allow an architecture to
183 * perform the desired operations. This function allows the architecture to
184 * call SWIOTLB when the operations are possible. It needs to be called
185 * before the SWIOTLB memory is used.
186 */
187void __init swiotlb_update_mem_attributes(void)
188{
189 void *vaddr;
190 unsigned long bytes;
191
192 if (no_iotlb_memory || late_alloc)
193 return;
194
195 vaddr = phys_to_virt(io_tlb_start);
196 bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
197 set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
198 memset(vaddr, 0, bytes);
199
200 vaddr = phys_to_virt(io_tlb_overflow_buffer);
201 bytes = PAGE_ALIGN(io_tlb_overflow);
202 set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
203 memset(vaddr, 0, bytes);
204}
205
206int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
207{
208 void *v_overflow_buffer;
209 unsigned long i, bytes;
210
211 bytes = nslabs << IO_TLB_SHIFT;
212
213 io_tlb_nslabs = nslabs;
214 io_tlb_start = __pa(tlb);
215 io_tlb_end = io_tlb_start + bytes;
216
217 /*
218 * Get the overflow emergency buffer
219 */
220 v_overflow_buffer = memblock_virt_alloc_low_nopanic(
221 PAGE_ALIGN(io_tlb_overflow),
222 PAGE_SIZE);
223 if (!v_overflow_buffer)
224 return -ENOMEM;
225
226 io_tlb_overflow_buffer = __pa(v_overflow_buffer);
227
228 /*
229 * Allocate and initialize the free list array. This array is used
230 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
231 * between io_tlb_start and io_tlb_end.
232 */
233 io_tlb_list = memblock_virt_alloc(
234 PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
235 PAGE_SIZE);
236 io_tlb_orig_addr = memblock_virt_alloc(
237 PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
238 PAGE_SIZE);
239 for (i = 0; i < io_tlb_nslabs; i++) {
240 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
241 io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
242 }
243 io_tlb_index = 0;
244
245 if (verbose)
246 swiotlb_print_info();
247
248 swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
249 return 0;
250}
251
252/*
253 * Statically reserve bounce buffer space and initialize bounce buffer data
254 * structures for the software IO TLB used to implement the DMA API.
255 */
256void __init
257swiotlb_init(int verbose)
258{
259 size_t default_size = IO_TLB_DEFAULT_SIZE;
260 unsigned char *vstart;
261 unsigned long bytes;
262
263 if (!io_tlb_nslabs) {
264 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
265 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
266 }
267
268 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
269
270 /* Get IO TLB memory from the low pages */
271 vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
272 if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
273 return;
274
275 if (io_tlb_start)
276 memblock_free_early(io_tlb_start,
277 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
278 pr_warn("Cannot allocate SWIOTLB buffer");
279 no_iotlb_memory = true;
280}
281
282/*
283 * Systems with larger DMA zones (those that don't support ISA) can
284 * initialize the swiotlb later using the slab allocator if needed.
285 * This should be just like above, but with some error catching.
286 */
287int
288swiotlb_late_init_with_default_size(size_t default_size)
289{
290 unsigned long bytes, req_nslabs = io_tlb_nslabs;
291 unsigned char *vstart = NULL;
292 unsigned int order;
293 int rc = 0;
294
295 if (!io_tlb_nslabs) {
296 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
297 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
298 }
299
300 /*
301 * Get IO TLB memory from the low pages
302 */
303 order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
304 io_tlb_nslabs = SLABS_PER_PAGE << order;
305 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
306
307 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
308 vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
309 order);
310 if (vstart)
311 break;
312 order--;
313 }
314
315 if (!vstart) {
316 io_tlb_nslabs = req_nslabs;
317 return -ENOMEM;
318 }
319 if (order != get_order(bytes)) {
320 printk(KERN_WARNING "Warning: only able to allocate %ld MB "
321 "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
322 io_tlb_nslabs = SLABS_PER_PAGE << order;
323 }
324 rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
325 if (rc)
326 free_pages((unsigned long)vstart, order);
327
328 return rc;
329}
330
331int
332swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
333{
334 unsigned long i, bytes;
335 unsigned char *v_overflow_buffer;
336
337 bytes = nslabs << IO_TLB_SHIFT;
338
339 io_tlb_nslabs = nslabs;
340 io_tlb_start = virt_to_phys(tlb);
341 io_tlb_end = io_tlb_start + bytes;
342
343 set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
344 memset(tlb, 0, bytes);
345
346 /*
347 * Get the overflow emergency buffer
348 */
349 v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
350 get_order(io_tlb_overflow));
351 if (!v_overflow_buffer)
352 goto cleanup2;
353
354 set_memory_decrypted((unsigned long)v_overflow_buffer,
355 io_tlb_overflow >> PAGE_SHIFT);
356 memset(v_overflow_buffer, 0, io_tlb_overflow);
357 io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
358
359 /*
360 * Allocate and initialize the free list array. This array is used
361 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
362 * between io_tlb_start and io_tlb_end.
363 */
364 io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
365 get_order(io_tlb_nslabs * sizeof(int)));
366 if (!io_tlb_list)
367 goto cleanup3;
368
369 io_tlb_orig_addr = (phys_addr_t *)
370 __get_free_pages(GFP_KERNEL,
371 get_order(io_tlb_nslabs *
372 sizeof(phys_addr_t)));
373 if (!io_tlb_orig_addr)
374 goto cleanup4;
375
376 for (i = 0; i < io_tlb_nslabs; i++) {
377 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
378 io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
379 }
380 io_tlb_index = 0;
381
382 swiotlb_print_info();
383
384 late_alloc = 1;
385
386 swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
387
388 return 0;
389
390cleanup4:
391 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
392 sizeof(int)));
393 io_tlb_list = NULL;
394cleanup3:
395 free_pages((unsigned long)v_overflow_buffer,
396 get_order(io_tlb_overflow));
397 io_tlb_overflow_buffer = 0;
398cleanup2:
399 io_tlb_end = 0;
400 io_tlb_start = 0;
401 io_tlb_nslabs = 0;
402 max_segment = 0;
403 return -ENOMEM;
404}
405
406void __init swiotlb_exit(void)
407{
408 if (!io_tlb_orig_addr)
409 return;
410
411 if (late_alloc) {
412 free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
413 get_order(io_tlb_overflow));
414 free_pages((unsigned long)io_tlb_orig_addr,
415 get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
416 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
417 sizeof(int)));
418 free_pages((unsigned long)phys_to_virt(io_tlb_start),
419 get_order(io_tlb_nslabs << IO_TLB_SHIFT));
420 } else {
421 memblock_free_late(io_tlb_overflow_buffer,
422 PAGE_ALIGN(io_tlb_overflow));
423 memblock_free_late(__pa(io_tlb_orig_addr),
424 PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
425 memblock_free_late(__pa(io_tlb_list),
426 PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
427 memblock_free_late(io_tlb_start,
428 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
429 }
430 io_tlb_nslabs = 0;
431 max_segment = 0;
432}
433
434int is_swiotlb_buffer(phys_addr_t paddr)
435{
436 return paddr >= io_tlb_start && paddr < io_tlb_end;
437}
438
439/*
440 * Bounce: copy the swiotlb buffer back to the original dma location
441 */
442static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
443 size_t size, enum dma_data_direction dir)
444{
445 unsigned long pfn = PFN_DOWN(orig_addr);
446 unsigned char *vaddr = phys_to_virt(tlb_addr);
447
448 if (PageHighMem(pfn_to_page(pfn))) {
449 /* The buffer does not have a mapping. Map it in and copy */
450 unsigned int offset = orig_addr & ~PAGE_MASK;
451 char *buffer;
452 unsigned int sz = 0;
453 unsigned long flags;
454
455 while (size) {
456 sz = min_t(size_t, PAGE_SIZE - offset, size);
457
458 local_irq_save(flags);
459 buffer = kmap_atomic(pfn_to_page(pfn));
460 if (dir == DMA_TO_DEVICE)
461 memcpy(vaddr, buffer + offset, sz);
462 else
463 memcpy(buffer + offset, vaddr, sz);
464 kunmap_atomic(buffer);
465 local_irq_restore(flags);
466
467 size -= sz;
468 pfn++;
469 vaddr += sz;
470 offset = 0;
471 }
472 } else if (dir == DMA_TO_DEVICE) {
473 memcpy(vaddr, phys_to_virt(orig_addr), size);
474 } else {
475 memcpy(phys_to_virt(orig_addr), vaddr, size);
476 }
477}
478
479phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
480 dma_addr_t tbl_dma_addr,
481 phys_addr_t orig_addr, size_t size,
482 enum dma_data_direction dir,
483 unsigned long attrs)
484{
485 unsigned long flags;
486 phys_addr_t tlb_addr;
487 unsigned int nslots, stride, index, wrap;
488 int i;
489 unsigned long mask;
490 unsigned long offset_slots;
491 unsigned long max_slots;
492
493 if (no_iotlb_memory)
494 panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
495
496 if (mem_encrypt_active())
497 pr_warn_once("%s is active and system is using DMA bounce buffers\n",
498 sme_active() ? "SME" : "SEV");
499
500 mask = dma_get_seg_boundary(hwdev);
501
502 tbl_dma_addr &= mask;
503
504 offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
505
506 /*
507 * Carefully handle integer overflow which can occur when mask == ~0UL.
508 */
509 max_slots = mask + 1
510 ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
511 : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
512
513 /*
514 * For mappings greater than or equal to a page, we limit the stride
515 * (and hence alignment) to a page size.
516 */
517 nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
518 if (size >= PAGE_SIZE)
519 stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
520 else
521 stride = 1;
522
523 BUG_ON(!nslots);
524
525 /*
526 * Find suitable number of IO TLB entries size that will fit this
527 * request and allocate a buffer from that IO TLB pool.
528 */
529 spin_lock_irqsave(&io_tlb_lock, flags);
530 index = ALIGN(io_tlb_index, stride);
531 if (index >= io_tlb_nslabs)
532 index = 0;
533 wrap = index;
534
535 do {
536 while (iommu_is_span_boundary(index, nslots, offset_slots,
537 max_slots)) {
538 index += stride;
539 if (index >= io_tlb_nslabs)
540 index = 0;
541 if (index == wrap)
542 goto not_found;
543 }
544
545 /*
546 * If we find a slot that indicates we have 'nslots' number of
547 * contiguous buffers, we allocate the buffers from that slot
548 * and mark the entries as '0' indicating unavailable.
549 */
550 if (io_tlb_list[index] >= nslots) {
551 int count = 0;
552
553 for (i = index; i < (int) (index + nslots); i++)
554 io_tlb_list[i] = 0;
555 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
556 io_tlb_list[i] = ++count;
557 tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
558
559 /*
560 * Update the indices to avoid searching in the next
561 * round.
562 */
563 io_tlb_index = ((index + nslots) < io_tlb_nslabs
564 ? (index + nslots) : 0);
565
566 goto found;
567 }
568 index += stride;
569 if (index >= io_tlb_nslabs)
570 index = 0;
571 } while (index != wrap);
572
573not_found:
574 spin_unlock_irqrestore(&io_tlb_lock, flags);
575 if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
576 dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
577 return SWIOTLB_MAP_ERROR;
578found:
579 spin_unlock_irqrestore(&io_tlb_lock, flags);
580
581 /*
582 * Save away the mapping from the original address to the DMA address.
583 * This is needed when we sync the memory. Then we sync the buffer if
584 * needed.
585 */
586 for (i = 0; i < nslots; i++)
587 io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
588 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
589 (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
590 swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
591
592 return tlb_addr;
593}
594
595/*
596 * Allocates bounce buffer and returns its physical address.
597 */
598static phys_addr_t
599map_single(struct device *hwdev, phys_addr_t phys, size_t size,
600 enum dma_data_direction dir, unsigned long attrs)
601{
602 dma_addr_t start_dma_addr;
603
604 if (swiotlb_force == SWIOTLB_NO_FORCE) {
605 dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
606 &phys);
607 return SWIOTLB_MAP_ERROR;
608 }
609
610 start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
611 return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
612 dir, attrs);
613}
614
615/*
616 * tlb_addr is the physical address of the bounce buffer to unmap.
617 */
618void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
619 size_t size, enum dma_data_direction dir,
620 unsigned long attrs)
621{
622 unsigned long flags;
623 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
624 int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
625 phys_addr_t orig_addr = io_tlb_orig_addr[index];
626
627 /*
628 * First, sync the memory before unmapping the entry
629 */
630 if (orig_addr != INVALID_PHYS_ADDR &&
631 !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
632 ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
633 swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
634
635 /*
636 * Return the buffer to the free list by setting the corresponding
637 * entries to indicate the number of contiguous entries available.
638 * While returning the entries to the free list, we merge the entries
639 * with slots below and above the pool being returned.
640 */
641 spin_lock_irqsave(&io_tlb_lock, flags);
642 {
643 count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
644 io_tlb_list[index + nslots] : 0);
645 /*
646 * Step 1: return the slots to the free list, merging the
647 * slots with superceeding slots
648 */
649 for (i = index + nslots - 1; i >= index; i--) {
650 io_tlb_list[i] = ++count;
651 io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
652 }
653 /*
654 * Step 2: merge the returned slots with the preceding slots,
655 * if available (non zero)
656 */
657 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
658 io_tlb_list[i] = ++count;
659 }
660 spin_unlock_irqrestore(&io_tlb_lock, flags);
661}
662
663void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
664 size_t size, enum dma_data_direction dir,
665 enum dma_sync_target target)
666{
667 int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
668 phys_addr_t orig_addr = io_tlb_orig_addr[index];
669
670 if (orig_addr == INVALID_PHYS_ADDR)
671 return;
672 orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
673
674 switch (target) {
675 case SYNC_FOR_CPU:
676 if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
677 swiotlb_bounce(orig_addr, tlb_addr,
678 size, DMA_FROM_DEVICE);
679 else
680 BUG_ON(dir != DMA_TO_DEVICE);
681 break;
682 case SYNC_FOR_DEVICE:
683 if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
684 swiotlb_bounce(orig_addr, tlb_addr,
685 size, DMA_TO_DEVICE);
686 else
687 BUG_ON(dir != DMA_FROM_DEVICE);
688 break;
689 default:
690 BUG();
691 }
692}
693
694static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
695 size_t size)
696{
697 u64 mask = DMA_BIT_MASK(32);
698
699 if (dev && dev->coherent_dma_mask)
700 mask = dev->coherent_dma_mask;
701 return addr + size - 1 <= mask;
702}
703
704static void *
705swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
706 unsigned long attrs)
707{
708 phys_addr_t phys_addr;
709
710 if (swiotlb_force == SWIOTLB_NO_FORCE)
711 goto out_warn;
712
713 phys_addr = swiotlb_tbl_map_single(dev,
714 __phys_to_dma(dev, io_tlb_start),
715 0, size, DMA_FROM_DEVICE, attrs);
716 if (phys_addr == SWIOTLB_MAP_ERROR)
717 goto out_warn;
718
719 *dma_handle = __phys_to_dma(dev, phys_addr);
720 if (!dma_coherent_ok(dev, *dma_handle, size))
721 goto out_unmap;
722
723 memset(phys_to_virt(phys_addr), 0, size);
724 return phys_to_virt(phys_addr);
725
726out_unmap:
727 dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
728 (unsigned long long)dev->coherent_dma_mask,
729 (unsigned long long)*dma_handle);
730
731 /*
732 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
733 * DMA_ATTR_SKIP_CPU_SYNC is optional.
734 */
735 swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
736 DMA_ATTR_SKIP_CPU_SYNC);
737out_warn:
738 if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
739 dev_warn(dev,
740 "swiotlb: coherent allocation failed, size=%zu\n",
741 size);
742 dump_stack();
743 }
744 return NULL;
745}
746
747static bool swiotlb_free_buffer(struct device *dev, size_t size,
748 dma_addr_t dma_addr)
749{
750 phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
751
752 WARN_ON_ONCE(irqs_disabled());
753
754 if (!is_swiotlb_buffer(phys_addr))
755 return false;
756
757 /*
758 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
759 * DMA_ATTR_SKIP_CPU_SYNC is optional.
760 */
761 swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
762 DMA_ATTR_SKIP_CPU_SYNC);
763 return true;
764}
765
766static void
767swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
768 int do_panic)
769{
770 if (swiotlb_force == SWIOTLB_NO_FORCE)
771 return;
772
773 /*
774 * Ran out of IOMMU space for this operation. This is very bad.
775 * Unfortunately the drivers cannot handle this operation properly.
776 * unless they check for dma_mapping_error (most don't)
777 * When the mapping is small enough return a static buffer to limit
778 * the damage, or panic when the transfer is too big.
779 */
780 dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
781 size);
782
783 if (size <= io_tlb_overflow || !do_panic)
784 return;
785
786 if (dir == DMA_BIDIRECTIONAL)
787 panic("DMA: Random memory could be DMA accessed\n");
788 if (dir == DMA_FROM_DEVICE)
789 panic("DMA: Random memory could be DMA written\n");
790 if (dir == DMA_TO_DEVICE)
791 panic("DMA: Random memory could be DMA read\n");
792}
793
794/*
795 * Map a single buffer of the indicated size for DMA in streaming mode. The
796 * physical address to use is returned.
797 *
798 * Once the device is given the dma address, the device owns this memory until
799 * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
800 */
801dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
802 unsigned long offset, size_t size,
803 enum dma_data_direction dir,
804 unsigned long attrs)
805{
806 phys_addr_t map, phys = page_to_phys(page) + offset;
807 dma_addr_t dev_addr = phys_to_dma(dev, phys);
808
809 BUG_ON(dir == DMA_NONE);
810 /*
811 * If the address happens to be in the device's DMA window,
812 * we can safely return the device addr and not worry about bounce
813 * buffering it.
814 */
815 if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
816 return dev_addr;
817
818 trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
819
820 /* Oh well, have to allocate and map a bounce buffer. */
821 map = map_single(dev, phys, size, dir, attrs);
822 if (map == SWIOTLB_MAP_ERROR) {
823 swiotlb_full(dev, size, dir, 1);
824 return __phys_to_dma(dev, io_tlb_overflow_buffer);
825 }
826
827 dev_addr = __phys_to_dma(dev, map);
828
829 /* Ensure that the address returned is DMA'ble */
830 if (dma_capable(dev, dev_addr, size))
831 return dev_addr;
832
833 attrs |= DMA_ATTR_SKIP_CPU_SYNC;
834 swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
835
836 return __phys_to_dma(dev, io_tlb_overflow_buffer);
837}
838
839/*
840 * Unmap a single streaming mode DMA translation. The dma_addr and size must
841 * match what was provided for in a previous swiotlb_map_page call. All
842 * other usages are undefined.
843 *
844 * After this call, reads by the cpu to the buffer are guaranteed to see
845 * whatever the device wrote there.
846 */
847static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
848 size_t size, enum dma_data_direction dir,
849 unsigned long attrs)
850{
851 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
852
853 BUG_ON(dir == DMA_NONE);
854
855 if (is_swiotlb_buffer(paddr)) {
856 swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
857 return;
858 }
859
860 if (dir != DMA_FROM_DEVICE)
861 return;
862
863 /*
864 * phys_to_virt doesn't work with hihgmem page but we could
865 * call dma_mark_clean() with hihgmem page here. However, we
866 * are fine since dma_mark_clean() is null on POWERPC. We can
867 * make dma_mark_clean() take a physical address if necessary.
868 */
869 dma_mark_clean(phys_to_virt(paddr), size);
870}
871
872void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
873 size_t size, enum dma_data_direction dir,
874 unsigned long attrs)
875{
876 unmap_single(hwdev, dev_addr, size, dir, attrs);
877}
878
879/*
880 * Make physical memory consistent for a single streaming mode DMA translation
881 * after a transfer.
882 *
883 * If you perform a swiotlb_map_page() but wish to interrogate the buffer
884 * using the cpu, yet do not wish to teardown the dma mapping, you must
885 * call this function before doing so. At the next point you give the dma
886 * address back to the card, you must first perform a
887 * swiotlb_dma_sync_for_device, and then the device again owns the buffer
888 */
889static void
890swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
891 size_t size, enum dma_data_direction dir,
892 enum dma_sync_target target)
893{
894 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
895
896 BUG_ON(dir == DMA_NONE);
897
898 if (is_swiotlb_buffer(paddr)) {
899 swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
900 return;
901 }
902
903 if (dir != DMA_FROM_DEVICE)
904 return;
905
906 dma_mark_clean(phys_to_virt(paddr), size);
907}
908
909void
910swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
911 size_t size, enum dma_data_direction dir)
912{
913 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
914}
915
916void
917swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
918 size_t size, enum dma_data_direction dir)
919{
920 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
921}
922
923/*
924 * Map a set of buffers described by scatterlist in streaming mode for DMA.
925 * This is the scatter-gather version of the above swiotlb_map_page
926 * interface. Here the scatter gather list elements are each tagged with the
927 * appropriate dma address and length. They are obtained via
928 * sg_dma_{address,length}(SG).
929 *
930 * NOTE: An implementation may be able to use a smaller number of
931 * DMA address/length pairs than there are SG table elements.
932 * (for example via virtual mapping capabilities)
933 * The routine returns the number of addr/length pairs actually
934 * used, at most nents.
935 *
936 * Device ownership issues as mentioned above for swiotlb_map_page are the
937 * same here.
938 */
939int
940swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
941 enum dma_data_direction dir, unsigned long attrs)
942{
943 struct scatterlist *sg;
944 int i;
945
946 BUG_ON(dir == DMA_NONE);
947
948 for_each_sg(sgl, sg, nelems, i) {
949 phys_addr_t paddr = sg_phys(sg);
950 dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
951
952 if (swiotlb_force == SWIOTLB_FORCE ||
953 !dma_capable(hwdev, dev_addr, sg->length)) {
954 phys_addr_t map = map_single(hwdev, sg_phys(sg),
955 sg->length, dir, attrs);
956 if (map == SWIOTLB_MAP_ERROR) {
957 /* Don't panic here, we expect map_sg users
958 to do proper error handling. */
959 swiotlb_full(hwdev, sg->length, dir, 0);
960 attrs |= DMA_ATTR_SKIP_CPU_SYNC;
961 swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
962 attrs);
963 sg_dma_len(sgl) = 0;
964 return 0;
965 }
966 sg->dma_address = __phys_to_dma(hwdev, map);
967 } else
968 sg->dma_address = dev_addr;
969 sg_dma_len(sg) = sg->length;
970 }
971 return nelems;
972}
973
974/*
975 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
976 * concerning calls here are the same as for swiotlb_unmap_page() above.
977 */
978void
979swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
980 int nelems, enum dma_data_direction dir,
981 unsigned long attrs)
982{
983 struct scatterlist *sg;
984 int i;
985
986 BUG_ON(dir == DMA_NONE);
987
988 for_each_sg(sgl, sg, nelems, i)
989 unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
990 attrs);
991}
992
993/*
994 * Make physical memory consistent for a set of streaming mode DMA translations
995 * after a transfer.
996 *
997 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
998 * and usage.
999 */
1000static void
1001swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
1002 int nelems, enum dma_data_direction dir,
1003 enum dma_sync_target target)
1004{
1005 struct scatterlist *sg;
1006 int i;
1007
1008 for_each_sg(sgl, sg, nelems, i)
1009 swiotlb_sync_single(hwdev, sg->dma_address,
1010 sg_dma_len(sg), dir, target);
1011}
1012
1013void
1014swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
1015 int nelems, enum dma_data_direction dir)
1016{
1017 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
1018}
1019
1020void
1021swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
1022 int nelems, enum dma_data_direction dir)
1023{
1024 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
1025}
1026
1027int
1028swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
1029{
1030 return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
1031}
1032
1033/*
1034 * Return whether the given device DMA address mask can be supported
1035 * properly. For example, if your device can only drive the low 24-bits
1036 * during bus mastering, then you would pass 0x00ffffff as the mask to
1037 * this function.
1038 */
1039int
1040swiotlb_dma_supported(struct device *hwdev, u64 mask)
1041{
1042 return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
1043}
1044
1045void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
1046 gfp_t gfp, unsigned long attrs)
1047{
1048 void *vaddr;
1049
1050 /* temporary workaround: */
1051 if (gfp & __GFP_NOWARN)
1052 attrs |= DMA_ATTR_NO_WARN;
1053
1054 /*
1055 * Don't print a warning when the first allocation attempt fails.
1056 * swiotlb_alloc_coherent() will print a warning when the DMA memory
1057 * allocation ultimately failed.
1058 */
1059 gfp |= __GFP_NOWARN;
1060
1061 vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
1062 if (!vaddr)
1063 vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
1064 return vaddr;
1065}
1066
1067void swiotlb_free(struct device *dev, size_t size, void *vaddr,
1068 dma_addr_t dma_addr, unsigned long attrs)
1069{
1070 if (!swiotlb_free_buffer(dev, size, dma_addr))
1071 dma_direct_free(dev, size, vaddr, dma_addr, attrs);
1072}
1073
1074const struct dma_map_ops swiotlb_dma_ops = {
1075 .mapping_error = swiotlb_dma_mapping_error,
1076 .alloc = swiotlb_alloc,
1077 .free = swiotlb_free,
1078 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
1079 .sync_single_for_device = swiotlb_sync_single_for_device,
1080 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
1081 .sync_sg_for_device = swiotlb_sync_sg_for_device,
1082 .map_sg = swiotlb_map_sg_attrs,
1083 .unmap_sg = swiotlb_unmap_sg_attrs,
1084 .map_page = swiotlb_map_page,
1085 .unmap_page = swiotlb_unmap_page,
1086 .dma_supported = dma_direct_supported,
1087};
1088EXPORT_SYMBOL(swiotlb_dma_ops);
diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c
new file mode 100644
index 000000000000..631ddec4b60a
--- /dev/null
+++ b/kernel/dma/virt.c
@@ -0,0 +1,59 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * DMA operations that map to virtual addresses without flushing memory.
4 */
5#include <linux/export.h>
6#include <linux/mm.h>
7#include <linux/dma-mapping.h>
8#include <linux/scatterlist.h>
9
10static void *dma_virt_alloc(struct device *dev, size_t size,
11 dma_addr_t *dma_handle, gfp_t gfp,
12 unsigned long attrs)
13{
14 void *ret;
15
16 ret = (void *)__get_free_pages(gfp, get_order(size));
17 if (ret)
18 *dma_handle = (uintptr_t)ret;
19 return ret;
20}
21
22static void dma_virt_free(struct device *dev, size_t size,
23 void *cpu_addr, dma_addr_t dma_addr,
24 unsigned long attrs)
25{
26 free_pages((unsigned long)cpu_addr, get_order(size));
27}
28
29static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page,
30 unsigned long offset, size_t size,
31 enum dma_data_direction dir,
32 unsigned long attrs)
33{
34 return (uintptr_t)(page_address(page) + offset);
35}
36
37static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl,
38 int nents, enum dma_data_direction dir,
39 unsigned long attrs)
40{
41 int i;
42 struct scatterlist *sg;
43
44 for_each_sg(sgl, sg, nents, i) {
45 BUG_ON(!sg_page(sg));
46 sg_dma_address(sg) = (uintptr_t)sg_virt(sg);
47 sg_dma_len(sg) = sg->length;
48 }
49
50 return nents;
51}
52
53const struct dma_map_ops dma_virt_ops = {
54 .alloc = dma_virt_alloc,
55 .free = dma_virt_free,
56 .map_page = dma_virt_map_page,
57 .map_sg = dma_virt_map_sg,
58};
59EXPORT_SYMBOL(dma_virt_ops);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 80cca2b30c4f..8f0434a9951a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6482,7 +6482,7 @@ void perf_prepare_sample(struct perf_event_header *header,
6482 data->phys_addr = perf_virt_to_phys(data->addr); 6482 data->phys_addr = perf_virt_to_phys(data->addr);
6483} 6483}
6484 6484
6485static void __always_inline 6485static __always_inline void
6486__perf_event_output(struct perf_event *event, 6486__perf_event_output(struct perf_event *event,
6487 struct perf_sample_data *data, 6487 struct perf_sample_data *data,
6488 struct pt_regs *regs, 6488 struct pt_regs *regs,
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 045a37e9ddee..5d3cf407e374 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -103,7 +103,7 @@ out:
103 preempt_enable(); 103 preempt_enable();
104} 104}
105 105
106static bool __always_inline 106static __always_inline bool
107ring_buffer_has_space(unsigned long head, unsigned long tail, 107ring_buffer_has_space(unsigned long head, unsigned long tail,
108 unsigned long data_size, unsigned int size, 108 unsigned long data_size, unsigned int size,
109 bool backward) 109 bool backward)
@@ -114,7 +114,7 @@ ring_buffer_has_space(unsigned long head, unsigned long tail,
114 return CIRC_SPACE(tail, head, data_size) >= size; 114 return CIRC_SPACE(tail, head, data_size) >= size;
115} 115}
116 116
117static int __always_inline 117static __always_inline int
118__perf_output_begin(struct perf_output_handle *handle, 118__perf_output_begin(struct perf_output_handle *handle,
119 struct perf_event *event, unsigned int size, 119 struct perf_event *event, unsigned int size,
120 bool backward) 120 bool backward)
@@ -414,7 +414,7 @@ err:
414} 414}
415EXPORT_SYMBOL_GPL(perf_aux_output_begin); 415EXPORT_SYMBOL_GPL(perf_aux_output_begin);
416 416
417static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb) 417static __always_inline bool rb_need_aux_wakeup(struct ring_buffer *rb)
418{ 418{
419 if (rb->aux_overwrite) 419 if (rb->aux_overwrite)
420 return false; 420 return false;
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index 4dadeb3d6666..6f636136cccc 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -55,6 +55,7 @@ static const struct irq_bit_descr irqchip_flags[] = {
55 BIT_MASK_DESCR(IRQCHIP_SKIP_SET_WAKE), 55 BIT_MASK_DESCR(IRQCHIP_SKIP_SET_WAKE),
56 BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE), 56 BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE),
57 BIT_MASK_DESCR(IRQCHIP_EOI_THREADED), 57 BIT_MASK_DESCR(IRQCHIP_EOI_THREADED),
58 BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI),
58}; 59};
59 60
60static void 61static void
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index edcac5de7ebc..5fa4d3138bf1 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1265,11 +1265,11 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
1265 this.parent = NULL; 1265 this.parent = NULL;
1266 this.class = class; 1266 this.class = class;
1267 1267
1268 local_irq_save(flags); 1268 raw_local_irq_save(flags);
1269 arch_spin_lock(&lockdep_lock); 1269 arch_spin_lock(&lockdep_lock);
1270 ret = __lockdep_count_forward_deps(&this); 1270 ret = __lockdep_count_forward_deps(&this);
1271 arch_spin_unlock(&lockdep_lock); 1271 arch_spin_unlock(&lockdep_lock);
1272 local_irq_restore(flags); 1272 raw_local_irq_restore(flags);
1273 1273
1274 return ret; 1274 return ret;
1275} 1275}
@@ -1292,11 +1292,11 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
1292 this.parent = NULL; 1292 this.parent = NULL;
1293 this.class = class; 1293 this.class = class;
1294 1294
1295 local_irq_save(flags); 1295 raw_local_irq_save(flags);
1296 arch_spin_lock(&lockdep_lock); 1296 arch_spin_lock(&lockdep_lock);
1297 ret = __lockdep_count_backward_deps(&this); 1297 ret = __lockdep_count_backward_deps(&this);
1298 arch_spin_unlock(&lockdep_lock); 1298 arch_spin_unlock(&lockdep_lock);
1299 local_irq_restore(flags); 1299 raw_local_irq_restore(flags);
1300 1300
1301 return ret; 1301 return ret;
1302} 1302}
@@ -4411,7 +4411,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
4411 if (unlikely(!debug_locks)) 4411 if (unlikely(!debug_locks))
4412 return; 4412 return;
4413 4413
4414 local_irq_save(flags); 4414 raw_local_irq_save(flags);
4415 for (i = 0; i < curr->lockdep_depth; i++) { 4415 for (i = 0; i < curr->lockdep_depth; i++) {
4416 hlock = curr->held_locks + i; 4416 hlock = curr->held_locks + i;
4417 4417
@@ -4422,7 +4422,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
4422 print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock); 4422 print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock);
4423 break; 4423 break;
4424 } 4424 }
4425 local_irq_restore(flags); 4425 raw_local_irq_restore(flags);
4426} 4426}
4427EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); 4427EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
4428 4428
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index bc1e507be9ff..776308d2fa9e 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -181,6 +181,7 @@ void down_read_non_owner(struct rw_semaphore *sem)
181 might_sleep(); 181 might_sleep();
182 182
183 __down_read(sem); 183 __down_read(sem);
184 rwsem_set_reader_owned(sem);
184} 185}
185 186
186EXPORT_SYMBOL(down_read_non_owner); 187EXPORT_SYMBOL(down_read_non_owner);
diff --git a/kernel/rseq.c b/kernel/rseq.c
index ae306f90c514..22b6acf1ad63 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -251,10 +251,10 @@ static int rseq_ip_fixup(struct pt_regs *regs)
251 * respect to other threads scheduled on the same CPU, and with respect 251 * respect to other threads scheduled on the same CPU, and with respect
252 * to signal handlers. 252 * to signal handlers.
253 */ 253 */
254void __rseq_handle_notify_resume(struct pt_regs *regs) 254void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
255{ 255{
256 struct task_struct *t = current; 256 struct task_struct *t = current;
257 int ret; 257 int ret, sig;
258 258
259 if (unlikely(t->flags & PF_EXITING)) 259 if (unlikely(t->flags & PF_EXITING))
260 return; 260 return;
@@ -268,7 +268,8 @@ void __rseq_handle_notify_resume(struct pt_regs *regs)
268 return; 268 return;
269 269
270error: 270error:
271 force_sig(SIGSEGV, t); 271 sig = ksig ? ksig->sig : 0;
272 force_sigsegv(sig, t);
272} 273}
273 274
274#ifdef CONFIG_DEBUG_RSEQ 275#ifdef CONFIG_DEBUG_RSEQ
diff --git a/kernel/softirq.c b/kernel/softirq.c
index de2f57fddc04..900dcfee542c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -139,9 +139,13 @@ static void __local_bh_enable(unsigned int cnt)
139{ 139{
140 lockdep_assert_irqs_disabled(); 140 lockdep_assert_irqs_disabled();
141 141
142 if (preempt_count() == cnt)
143 trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
144
142 if (softirq_count() == (cnt & SOFTIRQ_MASK)) 145 if (softirq_count() == (cnt & SOFTIRQ_MASK))
143 trace_softirqs_on(_RET_IP_); 146 trace_softirqs_on(_RET_IP_);
144 preempt_count_sub(cnt); 147
148 __preempt_count_sub(cnt);
145} 149}
146 150
147/* 151/*
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 055a4a728c00..3e93c54bd3a1 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1659,7 +1659,7 @@ EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
1659int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) 1659int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
1660{ 1660{
1661 switch(restart->nanosleep.type) { 1661 switch(restart->nanosleep.type) {
1662#ifdef CONFIG_COMPAT 1662#ifdef CONFIG_COMPAT_32BIT_TIME
1663 case TT_COMPAT: 1663 case TT_COMPAT:
1664 if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp)) 1664 if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp))
1665 return -EFAULT; 1665 return -EFAULT;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 5a6251ac6f7a..9cdf54b04ca8 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -604,7 +604,6 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
604 /* 604 /*
605 * Disarm any old timer after extracting its expiry time. 605 * Disarm any old timer after extracting its expiry time.
606 */ 606 */
607 lockdep_assert_irqs_disabled();
608 607
609 ret = 0; 608 ret = 0;
610 old_incr = timer->it.cpu.incr; 609 old_incr = timer->it.cpu.incr;
@@ -1049,7 +1048,6 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer)
1049 /* 1048 /*
1050 * Now re-arm for the new expiry time. 1049 * Now re-arm for the new expiry time.
1051 */ 1050 */
1052 lockdep_assert_irqs_disabled();
1053 arm_timer(timer); 1051 arm_timer(timer);
1054unlock: 1052unlock:
1055 unlock_task_sighand(p, &flags); 1053 unlock_task_sighand(p, &flags);
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 6fa99213fc72..2b41e8e2d31d 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -28,6 +28,7 @@
28 */ 28 */
29 29
30#include <linux/export.h> 30#include <linux/export.h>
31#include <linux/kernel.h>
31#include <linux/timex.h> 32#include <linux/timex.h>
32#include <linux/capability.h> 33#include <linux/capability.h>
33#include <linux/timekeeper_internal.h> 34#include <linux/timekeeper_internal.h>
@@ -314,9 +315,10 @@ unsigned int jiffies_to_msecs(const unsigned long j)
314 return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); 315 return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
315#else 316#else
316# if BITS_PER_LONG == 32 317# if BITS_PER_LONG == 32
317 return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32; 318 return (HZ_TO_MSEC_MUL32 * j + (1ULL << HZ_TO_MSEC_SHR32) - 1) >>
319 HZ_TO_MSEC_SHR32;
318# else 320# else
319 return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN; 321 return DIV_ROUND_UP(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN);
320# endif 322# endif
321#endif 323#endif
322} 324}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c9336e98ac59..a0079b4c7a49 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1360,8 +1360,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1360void 1360void
1361update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 1361update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362{ 1362{
1363 struct ring_buffer *buf;
1364
1365 if (tr->stop_count) 1363 if (tr->stop_count)
1366 return; 1364 return;
1367 1365
@@ -1375,9 +1373,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1375 1373
1376 arch_spin_lock(&tr->max_lock); 1374 arch_spin_lock(&tr->max_lock);
1377 1375
1378 buf = tr->trace_buffer.buffer; 1376 swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1379 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380 tr->max_buffer.buffer = buf;
1381 1377
1382 __update_max_tr(tr, tsk, cpu); 1378 __update_max_tr(tr, tsk, cpu);
1383 arch_spin_unlock(&tr->max_lock); 1379 arch_spin_unlock(&tr->max_lock);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e1c818dbc0d7..0dceb77d1d42 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -78,7 +78,8 @@ static const char * ops[] = { OPS };
78 C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \ 78 C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \
79 C(INVALID_FILTER, "Meaningless filter expression"), \ 79 C(INVALID_FILTER, "Meaningless filter expression"), \
80 C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ 80 C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \
81 C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), 81 C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \
82 C(NO_FILTER, "No filter found"),
82 83
83#undef C 84#undef C
84#define C(a, b) FILT_ERR_##a 85#define C(a, b) FILT_ERR_##a
@@ -550,6 +551,13 @@ predicate_parse(const char *str, int nr_parens, int nr_preds,
550 goto out_free; 551 goto out_free;
551 } 552 }
552 553
554 if (!N) {
555 /* No program? */
556 ret = -EINVAL;
557 parse_error(pe, FILT_ERR_NO_FILTER, ptr - str);
558 goto out_free;
559 }
560
553 prog[N].pred = NULL; /* #13 */ 561 prog[N].pred = NULL; /* #13 */
554 prog[N].target = 1; /* TRUE */ 562 prog[N].target = 1; /* TRUE */
555 prog[N+1].pred = NULL; 563 prog[N+1].pred = NULL;