summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2018-08-03 05:55:12 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2018-08-03 05:55:12 -0400
commitc5f5aeef9b55b362ad5a0e04e4b41cd63b208842 (patch)
tree0ee2bb561e879db0a990d998359f6516dff3393b /lib
parenta4789089937941959be6c18fa53e1fc0189257fd (diff)
parentc7513c2a2714204d3588ecaa170ae628fd0d217e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux
Merge mainline to pick up c7513c2a2714 ("crypto/arm64: aes-ce-gcm - add missing kernel_neon_begin/end pair").
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig47
-rw-r--r--lib/Kconfig.kasan3
-rw-r--r--lib/Makefile12
-rw-r--r--lib/dec_and_lock.c16
-rw-r--r--lib/dma-debug.c1773
-rw-r--r--lib/dma-direct.c204
-rw-r--r--lib/dma-noncoherent.c102
-rw-r--r--lib/dma-virt.c61
-rw-r--r--lib/iov_iter.c77
-rw-r--r--lib/percpu_ida.c2
-rw-r--r--lib/refcount.c28
-rw-r--r--lib/rhashtable.c27
-rw-r--r--lib/scatterlist.c6
-rw-r--r--lib/swiotlb.c1087
-rw-r--r--lib/test_bpf.c20
-rw-r--r--lib/test_printf.c7
16 files changed, 161 insertions, 3311 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index e34b04b56057..706836ec314d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -420,60 +420,15 @@ config HAS_IOPORT_MAP
420 depends on HAS_IOMEM && !NO_IOPORT_MAP 420 depends on HAS_IOMEM && !NO_IOPORT_MAP
421 default y 421 default y
422 422
423config HAS_DMA 423source "kernel/dma/Kconfig"
424 bool
425 depends on !NO_DMA
426 default y
427 424
428config SGL_ALLOC 425config SGL_ALLOC
429 bool 426 bool
430 default n 427 default n
431 428
432config NEED_SG_DMA_LENGTH
433 bool
434
435config NEED_DMA_MAP_STATE
436 bool
437
438config ARCH_DMA_ADDR_T_64BIT
439 def_bool 64BIT || PHYS_ADDR_T_64BIT
440
441config IOMMU_HELPER 429config IOMMU_HELPER
442 bool 430 bool
443 431
444config ARCH_HAS_SYNC_DMA_FOR_DEVICE
445 bool
446
447config ARCH_HAS_SYNC_DMA_FOR_CPU
448 bool
449 select NEED_DMA_MAP_STATE
450
451config DMA_DIRECT_OPS
452 bool
453 depends on HAS_DMA
454
455config DMA_NONCOHERENT_OPS
456 bool
457 depends on HAS_DMA
458 select DMA_DIRECT_OPS
459
460config DMA_NONCOHERENT_MMAP
461 bool
462 depends on DMA_NONCOHERENT_OPS
463
464config DMA_NONCOHERENT_CACHE_SYNC
465 bool
466 depends on DMA_NONCOHERENT_OPS
467
468config DMA_VIRT_OPS
469 bool
470 depends on HAS_DMA
471
472config SWIOTLB
473 bool
474 select DMA_DIRECT_OPS
475 select NEED_DMA_MAP_STATE
476
477config CHECK_SIGNATURE 432config CHECK_SIGNATURE
478 bool 433 bool
479 434
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 3d35d062970d..befb127507c0 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -5,7 +5,8 @@ if HAVE_ARCH_KASAN
5 5
6config KASAN 6config KASAN
7 bool "KASan: runtime memory debugger" 7 bool "KASan: runtime memory debugger"
8 depends on SLUB || (SLAB && !DEBUG_SLAB) 8 depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
9 select SLUB_DEBUG if SLUB
9 select CONSTRUCTORS 10 select CONSTRUCTORS
10 select STACKDEPOT 11 select STACKDEPOT
11 help 12 help
diff --git a/lib/Makefile b/lib/Makefile
index 956b320292fe..90dc5520b784 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -23,15 +23,12 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
23 sha1.o chacha20.o irq_regs.o argv_split.o \ 23 sha1.o chacha20.o irq_regs.o argv_split.o \
24 flex_proportions.o ratelimit.o show_mem.o \ 24 flex_proportions.o ratelimit.o show_mem.o \
25 is_single_threaded.o plist.o decompress.o kobject_uevent.o \ 25 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
26 earlycpio.o seq_buf.o siphash.o \ 26 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
27 nmi_backtrace.o nodemask.o win_minmax.o 27 nmi_backtrace.o nodemask.o win_minmax.o
28 28
29lib-$(CONFIG_PRINTK) += dump_stack.o 29lib-$(CONFIG_PRINTK) += dump_stack.o
30lib-$(CONFIG_MMU) += ioremap.o 30lib-$(CONFIG_MMU) += ioremap.o
31lib-$(CONFIG_SMP) += cpumask.o 31lib-$(CONFIG_SMP) += cpumask.o
32lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
33lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o
34lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
35 32
36lib-y += kobject.o klist.o 33lib-y += kobject.o klist.o
37obj-y += lockref.o 34obj-y += lockref.o
@@ -98,10 +95,6 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
98obj-$(CONFIG_DEBUG_LIST) += list_debug.o 95obj-$(CONFIG_DEBUG_LIST) += list_debug.o
99obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o 96obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
100 97
101ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
102 lib-y += dec_and_lock.o
103endif
104
105obj-$(CONFIG_BITREVERSE) += bitrev.o 98obj-$(CONFIG_BITREVERSE) += bitrev.o
106obj-$(CONFIG_RATIONAL) += rational.o 99obj-$(CONFIG_RATIONAL) += rational.o
107obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o 100obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o
@@ -148,7 +141,6 @@ obj-$(CONFIG_SMP) += percpu_counter.o
148obj-$(CONFIG_AUDIT_GENERIC) += audit.o 141obj-$(CONFIG_AUDIT_GENERIC) += audit.o
149obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o 142obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
150 143
151obj-$(CONFIG_SWIOTLB) += swiotlb.o
152obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o 144obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
153obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o 145obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
154obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o 146obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
@@ -169,8 +161,6 @@ obj-$(CONFIG_NLATTR) += nlattr.o
169 161
170obj-$(CONFIG_LRU_CACHE) += lru_cache.o 162obj-$(CONFIG_LRU_CACHE) += lru_cache.o
171 163
172obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
173
174obj-$(CONFIG_GENERIC_CSUM) += checksum.o 164obj-$(CONFIG_GENERIC_CSUM) += checksum.o
175 165
176obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o 166obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index 347fa7ac2e8a..9555b68bb774 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -33,3 +33,19 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
33} 33}
34 34
35EXPORT_SYMBOL(_atomic_dec_and_lock); 35EXPORT_SYMBOL(_atomic_dec_and_lock);
36
37int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
38 unsigned long *flags)
39{
40 /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
41 if (atomic_add_unless(atomic, -1, 1))
42 return 0;
43
44 /* Otherwise do it the slow way */
45 spin_lock_irqsave(lock, *flags);
46 if (atomic_dec_and_test(atomic))
47 return 1;
48 spin_unlock_irqrestore(lock, *flags);
49 return 0;
50}
51EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave);
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
deleted file mode 100644
index c007d25bee09..000000000000
--- a/lib/dma-debug.c
+++ /dev/null
@@ -1,1773 +0,0 @@
1/*
2 * Copyright (C) 2008 Advanced Micro Devices, Inc.
3 *
4 * Author: Joerg Roedel <joerg.roedel@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/sched/task_stack.h>
21#include <linux/scatterlist.h>
22#include <linux/dma-mapping.h>
23#include <linux/sched/task.h>
24#include <linux/stacktrace.h>
25#include <linux/dma-debug.h>
26#include <linux/spinlock.h>
27#include <linux/vmalloc.h>
28#include <linux/debugfs.h>
29#include <linux/uaccess.h>
30#include <linux/export.h>
31#include <linux/device.h>
32#include <linux/types.h>
33#include <linux/sched.h>
34#include <linux/ctype.h>
35#include <linux/list.h>
36#include <linux/slab.h>
37
38#include <asm/sections.h>
39
40#define HASH_SIZE 1024ULL
41#define HASH_FN_SHIFT 13
42#define HASH_FN_MASK (HASH_SIZE - 1)
43
44/* allow architectures to override this if absolutely required */
45#ifndef PREALLOC_DMA_DEBUG_ENTRIES
46#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
47#endif
48
49enum {
50 dma_debug_single,
51 dma_debug_page,
52 dma_debug_sg,
53 dma_debug_coherent,
54 dma_debug_resource,
55};
56
57enum map_err_types {
58 MAP_ERR_CHECK_NOT_APPLICABLE,
59 MAP_ERR_NOT_CHECKED,
60 MAP_ERR_CHECKED,
61};
62
63#define DMA_DEBUG_STACKTRACE_ENTRIES 5
64
65/**
66 * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
67 * @list: node on pre-allocated free_entries list
68 * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
69 * @type: single, page, sg, coherent
70 * @pfn: page frame of the start address
71 * @offset: offset of mapping relative to pfn
72 * @size: length of the mapping
73 * @direction: enum dma_data_direction
74 * @sg_call_ents: 'nents' from dma_map_sg
75 * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
76 * @map_err_type: track whether dma_mapping_error() was checked
77 * @stacktrace: support backtraces when a violation is detected
78 */
79struct dma_debug_entry {
80 struct list_head list;
81 struct device *dev;
82 int type;
83 unsigned long pfn;
84 size_t offset;
85 u64 dev_addr;
86 u64 size;
87 int direction;
88 int sg_call_ents;
89 int sg_mapped_ents;
90 enum map_err_types map_err_type;
91#ifdef CONFIG_STACKTRACE
92 struct stack_trace stacktrace;
93 unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
94#endif
95};
96
97typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *);
98
99struct hash_bucket {
100 struct list_head list;
101 spinlock_t lock;
102} ____cacheline_aligned_in_smp;
103
104/* Hash list to save the allocated dma addresses */
105static struct hash_bucket dma_entry_hash[HASH_SIZE];
106/* List of pre-allocated dma_debug_entry's */
107static LIST_HEAD(free_entries);
108/* Lock for the list above */
109static DEFINE_SPINLOCK(free_entries_lock);
110
111/* Global disable flag - will be set in case of an error */
112static bool global_disable __read_mostly;
113
114/* Early initialization disable flag, set at the end of dma_debug_init */
115static bool dma_debug_initialized __read_mostly;
116
117static inline bool dma_debug_disabled(void)
118{
119 return global_disable || !dma_debug_initialized;
120}
121
122/* Global error count */
123static u32 error_count;
124
125/* Global error show enable*/
126static u32 show_all_errors __read_mostly;
127/* Number of errors to show */
128static u32 show_num_errors = 1;
129
130static u32 num_free_entries;
131static u32 min_free_entries;
132static u32 nr_total_entries;
133
134/* number of preallocated entries requested by kernel cmdline */
135static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
136
137/* debugfs dentry's for the stuff above */
138static struct dentry *dma_debug_dent __read_mostly;
139static struct dentry *global_disable_dent __read_mostly;
140static struct dentry *error_count_dent __read_mostly;
141static struct dentry *show_all_errors_dent __read_mostly;
142static struct dentry *show_num_errors_dent __read_mostly;
143static struct dentry *num_free_entries_dent __read_mostly;
144static struct dentry *min_free_entries_dent __read_mostly;
145static struct dentry *filter_dent __read_mostly;
146
147/* per-driver filter related state */
148
149#define NAME_MAX_LEN 64
150
151static char current_driver_name[NAME_MAX_LEN] __read_mostly;
152static struct device_driver *current_driver __read_mostly;
153
154static DEFINE_RWLOCK(driver_name_lock);
155
156static const char *const maperr2str[] = {
157 [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable",
158 [MAP_ERR_NOT_CHECKED] = "dma map error not checked",
159 [MAP_ERR_CHECKED] = "dma map error checked",
160};
161
162static const char *type2name[5] = { "single", "page",
163 "scather-gather", "coherent",
164 "resource" };
165
166static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
167 "DMA_FROM_DEVICE", "DMA_NONE" };
168
169/*
170 * The access to some variables in this macro is racy. We can't use atomic_t
171 * here because all these variables are exported to debugfs. Some of them even
172 * writeable. This is also the reason why a lock won't help much. But anyway,
173 * the races are no big deal. Here is why:
174 *
175 * error_count: the addition is racy, but the worst thing that can happen is
176 * that we don't count some errors
177 * show_num_errors: the subtraction is racy. Also no big deal because in
178 * worst case this will result in one warning more in the
179 * system log than the user configured. This variable is
180 * writeable via debugfs.
181 */
182static inline void dump_entry_trace(struct dma_debug_entry *entry)
183{
184#ifdef CONFIG_STACKTRACE
185 if (entry) {
186 pr_warning("Mapped at:\n");
187 print_stack_trace(&entry->stacktrace, 0);
188 }
189#endif
190}
191
192static bool driver_filter(struct device *dev)
193{
194 struct device_driver *drv;
195 unsigned long flags;
196 bool ret;
197
198 /* driver filter off */
199 if (likely(!current_driver_name[0]))
200 return true;
201
202 /* driver filter on and initialized */
203 if (current_driver && dev && dev->driver == current_driver)
204 return true;
205
206 /* driver filter on, but we can't filter on a NULL device... */
207 if (!dev)
208 return false;
209
210 if (current_driver || !current_driver_name[0])
211 return false;
212
213 /* driver filter on but not yet initialized */
214 drv = dev->driver;
215 if (!drv)
216 return false;
217
218 /* lock to protect against change of current_driver_name */
219 read_lock_irqsave(&driver_name_lock, flags);
220
221 ret = false;
222 if (drv->name &&
223 strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
224 current_driver = drv;
225 ret = true;
226 }
227
228 read_unlock_irqrestore(&driver_name_lock, flags);
229
230 return ret;
231}
232
233#define err_printk(dev, entry, format, arg...) do { \
234 error_count += 1; \
235 if (driver_filter(dev) && \
236 (show_all_errors || show_num_errors > 0)) { \
237 WARN(1, "%s %s: " format, \
238 dev ? dev_driver_string(dev) : "NULL", \
239 dev ? dev_name(dev) : "NULL", ## arg); \
240 dump_entry_trace(entry); \
241 } \
242 if (!show_all_errors && show_num_errors > 0) \
243 show_num_errors -= 1; \
244 } while (0);
245
246/*
247 * Hash related functions
248 *
249 * Every DMA-API request is saved into a struct dma_debug_entry. To
250 * have quick access to these structs they are stored into a hash.
251 */
252static int hash_fn(struct dma_debug_entry *entry)
253{
254 /*
255 * Hash function is based on the dma address.
256 * We use bits 20-27 here as the index into the hash
257 */
258 return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK;
259}
260
261/*
262 * Request exclusive access to a hash bucket for a given dma_debug_entry.
263 */
264static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
265 unsigned long *flags)
266 __acquires(&dma_entry_hash[idx].lock)
267{
268 int idx = hash_fn(entry);
269 unsigned long __flags;
270
271 spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags);
272 *flags = __flags;
273 return &dma_entry_hash[idx];
274}
275
276/*
277 * Give up exclusive access to the hash bucket
278 */
279static void put_hash_bucket(struct hash_bucket *bucket,
280 unsigned long *flags)
281 __releases(&bucket->lock)
282{
283 unsigned long __flags = *flags;
284
285 spin_unlock_irqrestore(&bucket->lock, __flags);
286}
287
288static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b)
289{
290 return ((a->dev_addr == b->dev_addr) &&
291 (a->dev == b->dev)) ? true : false;
292}
293
294static bool containing_match(struct dma_debug_entry *a,
295 struct dma_debug_entry *b)
296{
297 if (a->dev != b->dev)
298 return false;
299
300 if ((b->dev_addr <= a->dev_addr) &&
301 ((b->dev_addr + b->size) >= (a->dev_addr + a->size)))
302 return true;
303
304 return false;
305}
306
307/*
308 * Search a given entry in the hash bucket list
309 */
310static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket,
311 struct dma_debug_entry *ref,
312 match_fn match)
313{
314 struct dma_debug_entry *entry, *ret = NULL;
315 int matches = 0, match_lvl, last_lvl = -1;
316
317 list_for_each_entry(entry, &bucket->list, list) {
318 if (!match(ref, entry))
319 continue;
320
321 /*
322 * Some drivers map the same physical address multiple
323 * times. Without a hardware IOMMU this results in the
324 * same device addresses being put into the dma-debug
325 * hash multiple times too. This can result in false
326 * positives being reported. Therefore we implement a
327 * best-fit algorithm here which returns the entry from
328 * the hash which fits best to the reference value
329 * instead of the first-fit.
330 */
331 matches += 1;
332 match_lvl = 0;
333 entry->size == ref->size ? ++match_lvl : 0;
334 entry->type == ref->type ? ++match_lvl : 0;
335 entry->direction == ref->direction ? ++match_lvl : 0;
336 entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0;
337
338 if (match_lvl == 4) {
339 /* perfect-fit - return the result */
340 return entry;
341 } else if (match_lvl > last_lvl) {
342 /*
343 * We found an entry that fits better then the
344 * previous one or it is the 1st match.
345 */
346 last_lvl = match_lvl;
347 ret = entry;
348 }
349 }
350
351 /*
352 * If we have multiple matches but no perfect-fit, just return
353 * NULL.
354 */
355 ret = (matches == 1) ? ret : NULL;
356
357 return ret;
358}
359
360static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket,
361 struct dma_debug_entry *ref)
362{
363 return __hash_bucket_find(bucket, ref, exact_match);
364}
365
366static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
367 struct dma_debug_entry *ref,
368 unsigned long *flags)
369{
370
371 unsigned int max_range = dma_get_max_seg_size(ref->dev);
372 struct dma_debug_entry *entry, index = *ref;
373 unsigned int range = 0;
374
375 while (range <= max_range) {
376 entry = __hash_bucket_find(*bucket, ref, containing_match);
377
378 if (entry)
379 return entry;
380
381 /*
382 * Nothing found, go back a hash bucket
383 */
384 put_hash_bucket(*bucket, flags);
385 range += (1 << HASH_FN_SHIFT);
386 index.dev_addr -= (1 << HASH_FN_SHIFT);
387 *bucket = get_hash_bucket(&index, flags);
388 }
389
390 return NULL;
391}
392
393/*
394 * Add an entry to a hash bucket
395 */
396static void hash_bucket_add(struct hash_bucket *bucket,
397 struct dma_debug_entry *entry)
398{
399 list_add_tail(&entry->list, &bucket->list);
400}
401
402/*
403 * Remove entry from a hash bucket list
404 */
405static void hash_bucket_del(struct dma_debug_entry *entry)
406{
407 list_del(&entry->list);
408}
409
410static unsigned long long phys_addr(struct dma_debug_entry *entry)
411{
412 if (entry->type == dma_debug_resource)
413 return __pfn_to_phys(entry->pfn) + entry->offset;
414
415 return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
416}
417
418/*
419 * Dump mapping entries for debugging purposes
420 */
421void debug_dma_dump_mappings(struct device *dev)
422{
423 int idx;
424
425 for (idx = 0; idx < HASH_SIZE; idx++) {
426 struct hash_bucket *bucket = &dma_entry_hash[idx];
427 struct dma_debug_entry *entry;
428 unsigned long flags;
429
430 spin_lock_irqsave(&bucket->lock, flags);
431
432 list_for_each_entry(entry, &bucket->list, list) {
433 if (!dev || dev == entry->dev) {
434 dev_info(entry->dev,
435 "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
436 type2name[entry->type], idx,
437 phys_addr(entry), entry->pfn,
438 entry->dev_addr, entry->size,
439 dir2name[entry->direction],
440 maperr2str[entry->map_err_type]);
441 }
442 }
443
444 spin_unlock_irqrestore(&bucket->lock, flags);
445 }
446}
447
448/*
449 * For each mapping (initial cacheline in the case of
450 * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a
451 * scatterlist, or the cacheline specified in dma_map_single) insert
452 * into this tree using the cacheline as the key. At
453 * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If
454 * the entry already exists at insertion time add a tag as a reference
455 * count for the overlapping mappings. For now, the overlap tracking
456 * just ensures that 'unmaps' balance 'maps' before marking the
457 * cacheline idle, but we should also be flagging overlaps as an API
458 * violation.
459 *
460 * Memory usage is mostly constrained by the maximum number of available
461 * dma-debug entries in that we need a free dma_debug_entry before
462 * inserting into the tree. In the case of dma_map_page and
463 * dma_alloc_coherent there is only one dma_debug_entry and one
464 * dma_active_cacheline entry to track per event. dma_map_sg(), on the
465 * other hand, consumes a single dma_debug_entry, but inserts 'nents'
466 * entries into the tree.
467 *
468 * At any time debug_dma_assert_idle() can be called to trigger a
469 * warning if any cachelines in the given page are in the active set.
470 */
471static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
472static DEFINE_SPINLOCK(radix_lock);
473#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
474#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
475#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
476
477static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry)
478{
479 return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) +
480 (entry->offset >> L1_CACHE_SHIFT);
481}
482
483static int active_cacheline_read_overlap(phys_addr_t cln)
484{
485 int overlap = 0, i;
486
487 for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
488 if (radix_tree_tag_get(&dma_active_cacheline, cln, i))
489 overlap |= 1 << i;
490 return overlap;
491}
492
493static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
494{
495 int i;
496
497 if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0)
498 return overlap;
499
500 for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
501 if (overlap & 1 << i)
502 radix_tree_tag_set(&dma_active_cacheline, cln, i);
503 else
504 radix_tree_tag_clear(&dma_active_cacheline, cln, i);
505
506 return overlap;
507}
508
509static void active_cacheline_inc_overlap(phys_addr_t cln)
510{
511 int overlap = active_cacheline_read_overlap(cln);
512
513 overlap = active_cacheline_set_overlap(cln, ++overlap);
514
515 /* If we overflowed the overlap counter then we're potentially
516 * leaking dma-mappings. Otherwise, if maps and unmaps are
517 * balanced then this overflow may cause false negatives in
518 * debug_dma_assert_idle() as the cacheline may be marked idle
519 * prematurely.
520 */
521 WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
522 "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n",
523 ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
524}
525
526static int active_cacheline_dec_overlap(phys_addr_t cln)
527{
528 int overlap = active_cacheline_read_overlap(cln);
529
530 return active_cacheline_set_overlap(cln, --overlap);
531}
532
533static int active_cacheline_insert(struct dma_debug_entry *entry)
534{
535 phys_addr_t cln = to_cacheline_number(entry);
536 unsigned long flags;
537 int rc;
538
539 /* If the device is not writing memory then we don't have any
540 * concerns about the cpu consuming stale data. This mitigates
541 * legitimate usages of overlapping mappings.
542 */
543 if (entry->direction == DMA_TO_DEVICE)
544 return 0;
545
546 spin_lock_irqsave(&radix_lock, flags);
547 rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
548 if (rc == -EEXIST)
549 active_cacheline_inc_overlap(cln);
550 spin_unlock_irqrestore(&radix_lock, flags);
551
552 return rc;
553}
554
555static void active_cacheline_remove(struct dma_debug_entry *entry)
556{
557 phys_addr_t cln = to_cacheline_number(entry);
558 unsigned long flags;
559
560 /* ...mirror the insert case */
561 if (entry->direction == DMA_TO_DEVICE)
562 return;
563
564 spin_lock_irqsave(&radix_lock, flags);
565 /* since we are counting overlaps the final put of the
566 * cacheline will occur when the overlap count is 0.
567 * active_cacheline_dec_overlap() returns -1 in that case
568 */
569 if (active_cacheline_dec_overlap(cln) < 0)
570 radix_tree_delete(&dma_active_cacheline, cln);
571 spin_unlock_irqrestore(&radix_lock, flags);
572}
573
574/**
575 * debug_dma_assert_idle() - assert that a page is not undergoing dma
576 * @page: page to lookup in the dma_active_cacheline tree
577 *
578 * Place a call to this routine in cases where the cpu touching the page
579 * before the dma completes (page is dma_unmapped) will lead to data
580 * corruption.
581 */
582void debug_dma_assert_idle(struct page *page)
583{
584 static struct dma_debug_entry *ents[CACHELINES_PER_PAGE];
585 struct dma_debug_entry *entry = NULL;
586 void **results = (void **) &ents;
587 unsigned int nents, i;
588 unsigned long flags;
589 phys_addr_t cln;
590
591 if (dma_debug_disabled())
592 return;
593
594 if (!page)
595 return;
596
597 cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT;
598 spin_lock_irqsave(&radix_lock, flags);
599 nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln,
600 CACHELINES_PER_PAGE);
601 for (i = 0; i < nents; i++) {
602 phys_addr_t ent_cln = to_cacheline_number(ents[i]);
603
604 if (ent_cln == cln) {
605 entry = ents[i];
606 break;
607 } else if (ent_cln >= cln + CACHELINES_PER_PAGE)
608 break;
609 }
610 spin_unlock_irqrestore(&radix_lock, flags);
611
612 if (!entry)
613 return;
614
615 cln = to_cacheline_number(entry);
616 err_printk(entry->dev, entry,
617 "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n",
618 &cln);
619}
620
621/*
622 * Wrapper function for adding an entry to the hash.
623 * This function takes care of locking itself.
624 */
625static void add_dma_entry(struct dma_debug_entry *entry)
626{
627 struct hash_bucket *bucket;
628 unsigned long flags;
629 int rc;
630
631 bucket = get_hash_bucket(entry, &flags);
632 hash_bucket_add(bucket, entry);
633 put_hash_bucket(bucket, &flags);
634
635 rc = active_cacheline_insert(entry);
636 if (rc == -ENOMEM) {
637 pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n");
638 global_disable = true;
639 }
640
641 /* TODO: report -EEXIST errors here as overlapping mappings are
642 * not supported by the DMA API
643 */
644}
645
646static struct dma_debug_entry *__dma_entry_alloc(void)
647{
648 struct dma_debug_entry *entry;
649
650 entry = list_entry(free_entries.next, struct dma_debug_entry, list);
651 list_del(&entry->list);
652 memset(entry, 0, sizeof(*entry));
653
654 num_free_entries -= 1;
655 if (num_free_entries < min_free_entries)
656 min_free_entries = num_free_entries;
657
658 return entry;
659}
660
661/* struct dma_entry allocator
662 *
663 * The next two functions implement the allocator for
664 * struct dma_debug_entries.
665 */
666static struct dma_debug_entry *dma_entry_alloc(void)
667{
668 struct dma_debug_entry *entry;
669 unsigned long flags;
670
671 spin_lock_irqsave(&free_entries_lock, flags);
672
673 if (list_empty(&free_entries)) {
674 global_disable = true;
675 spin_unlock_irqrestore(&free_entries_lock, flags);
676 pr_err("DMA-API: debugging out of memory - disabling\n");
677 return NULL;
678 }
679
680 entry = __dma_entry_alloc();
681
682 spin_unlock_irqrestore(&free_entries_lock, flags);
683
684#ifdef CONFIG_STACKTRACE
685 entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
686 entry->stacktrace.entries = entry->st_entries;
687 entry->stacktrace.skip = 2;
688 save_stack_trace(&entry->stacktrace);
689#endif
690
691 return entry;
692}
693
694static void dma_entry_free(struct dma_debug_entry *entry)
695{
696 unsigned long flags;
697
698 active_cacheline_remove(entry);
699
700 /*
701 * add to beginning of the list - this way the entries are
702 * more likely cache hot when they are reallocated.
703 */
704 spin_lock_irqsave(&free_entries_lock, flags);
705 list_add(&entry->list, &free_entries);
706 num_free_entries += 1;
707 spin_unlock_irqrestore(&free_entries_lock, flags);
708}
709
710int dma_debug_resize_entries(u32 num_entries)
711{
712 int i, delta, ret = 0;
713 unsigned long flags;
714 struct dma_debug_entry *entry;
715 LIST_HEAD(tmp);
716
717 spin_lock_irqsave(&free_entries_lock, flags);
718
719 if (nr_total_entries < num_entries) {
720 delta = num_entries - nr_total_entries;
721
722 spin_unlock_irqrestore(&free_entries_lock, flags);
723
724 for (i = 0; i < delta; i++) {
725 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
726 if (!entry)
727 break;
728
729 list_add_tail(&entry->list, &tmp);
730 }
731
732 spin_lock_irqsave(&free_entries_lock, flags);
733
734 list_splice(&tmp, &free_entries);
735 nr_total_entries += i;
736 num_free_entries += i;
737 } else {
738 delta = nr_total_entries - num_entries;
739
740 for (i = 0; i < delta && !list_empty(&free_entries); i++) {
741 entry = __dma_entry_alloc();
742 kfree(entry);
743 }
744
745 nr_total_entries -= i;
746 }
747
748 if (nr_total_entries != num_entries)
749 ret = 1;
750
751 spin_unlock_irqrestore(&free_entries_lock, flags);
752
753 return ret;
754}
755
756/*
757 * DMA-API debugging init code
758 *
759 * The init code does two things:
760 * 1. Initialize core data structures
761 * 2. Preallocate a given number of dma_debug_entry structs
762 */
763
764static int prealloc_memory(u32 num_entries)
765{
766 struct dma_debug_entry *entry, *next_entry;
767 int i;
768
769 for (i = 0; i < num_entries; ++i) {
770 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
771 if (!entry)
772 goto out_err;
773
774 list_add_tail(&entry->list, &free_entries);
775 }
776
777 num_free_entries = num_entries;
778 min_free_entries = num_entries;
779
780 pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
781
782 return 0;
783
784out_err:
785
786 list_for_each_entry_safe(entry, next_entry, &free_entries, list) {
787 list_del(&entry->list);
788 kfree(entry);
789 }
790
791 return -ENOMEM;
792}
793
794static ssize_t filter_read(struct file *file, char __user *user_buf,
795 size_t count, loff_t *ppos)
796{
797 char buf[NAME_MAX_LEN + 1];
798 unsigned long flags;
799 int len;
800
801 if (!current_driver_name[0])
802 return 0;
803
804 /*
805 * We can't copy to userspace directly because current_driver_name can
806 * only be read under the driver_name_lock with irqs disabled. So
807 * create a temporary copy first.
808 */
809 read_lock_irqsave(&driver_name_lock, flags);
810 len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
811 read_unlock_irqrestore(&driver_name_lock, flags);
812
813 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
814}
815
816static ssize_t filter_write(struct file *file, const char __user *userbuf,
817 size_t count, loff_t *ppos)
818{
819 char buf[NAME_MAX_LEN];
820 unsigned long flags;
821 size_t len;
822 int i;
823
824 /*
825 * We can't copy from userspace directly. Access to
826 * current_driver_name is protected with a write_lock with irqs
827 * disabled. Since copy_from_user can fault and may sleep we
828 * need to copy to temporary buffer first
829 */
830 len = min(count, (size_t)(NAME_MAX_LEN - 1));
831 if (copy_from_user(buf, userbuf, len))
832 return -EFAULT;
833
834 buf[len] = 0;
835
836 write_lock_irqsave(&driver_name_lock, flags);
837
838 /*
839 * Now handle the string we got from userspace very carefully.
840 * The rules are:
841 * - only use the first token we got
842 * - token delimiter is everything looking like a space
843 * character (' ', '\n', '\t' ...)
844 *
845 */
846 if (!isalnum(buf[0])) {
847 /*
848 * If the first character userspace gave us is not
849 * alphanumerical then assume the filter should be
850 * switched off.
851 */
852 if (current_driver_name[0])
853 pr_info("DMA-API: switching off dma-debug driver filter\n");
854 current_driver_name[0] = 0;
855 current_driver = NULL;
856 goto out_unlock;
857 }
858
859 /*
860 * Now parse out the first token and use it as the name for the
861 * driver to filter for.
862 */
863 for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
864 current_driver_name[i] = buf[i];
865 if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
866 break;
867 }
868 current_driver_name[i] = 0;
869 current_driver = NULL;
870
871 pr_info("DMA-API: enable driver filter for driver [%s]\n",
872 current_driver_name);
873
874out_unlock:
875 write_unlock_irqrestore(&driver_name_lock, flags);
876
877 return count;
878}
879
880static const struct file_operations filter_fops = {
881 .read = filter_read,
882 .write = filter_write,
883 .llseek = default_llseek,
884};
885
886static int dma_debug_fs_init(void)
887{
888 dma_debug_dent = debugfs_create_dir("dma-api", NULL);
889 if (!dma_debug_dent) {
890 pr_err("DMA-API: can not create debugfs directory\n");
891 return -ENOMEM;
892 }
893
894 global_disable_dent = debugfs_create_bool("disabled", 0444,
895 dma_debug_dent,
896 &global_disable);
897 if (!global_disable_dent)
898 goto out_err;
899
900 error_count_dent = debugfs_create_u32("error_count", 0444,
901 dma_debug_dent, &error_count);
902 if (!error_count_dent)
903 goto out_err;
904
905 show_all_errors_dent = debugfs_create_u32("all_errors", 0644,
906 dma_debug_dent,
907 &show_all_errors);
908 if (!show_all_errors_dent)
909 goto out_err;
910
911 show_num_errors_dent = debugfs_create_u32("num_errors", 0644,
912 dma_debug_dent,
913 &show_num_errors);
914 if (!show_num_errors_dent)
915 goto out_err;
916
917 num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444,
918 dma_debug_dent,
919 &num_free_entries);
920 if (!num_free_entries_dent)
921 goto out_err;
922
923 min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444,
924 dma_debug_dent,
925 &min_free_entries);
926 if (!min_free_entries_dent)
927 goto out_err;
928
929 filter_dent = debugfs_create_file("driver_filter", 0644,
930 dma_debug_dent, NULL, &filter_fops);
931 if (!filter_dent)
932 goto out_err;
933
934 return 0;
935
936out_err:
937 debugfs_remove_recursive(dma_debug_dent);
938
939 return -ENOMEM;
940}
941
942static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
943{
944 struct dma_debug_entry *entry;
945 unsigned long flags;
946 int count = 0, i;
947
948 for (i = 0; i < HASH_SIZE; ++i) {
949 spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
950 list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
951 if (entry->dev == dev) {
952 count += 1;
953 *out_entry = entry;
954 }
955 }
956 spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags);
957 }
958
959 return count;
960}
961
962static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
963{
964 struct device *dev = data;
965 struct dma_debug_entry *uninitialized_var(entry);
966 int count;
967
968 if (dma_debug_disabled())
969 return 0;
970
971 switch (action) {
972 case BUS_NOTIFY_UNBOUND_DRIVER:
973 count = device_dma_allocations(dev, &entry);
974 if (count == 0)
975 break;
976 err_printk(dev, entry, "DMA-API: device driver has pending "
977 "DMA allocations while released from device "
978 "[count=%d]\n"
979 "One of leaked entries details: "
980 "[device address=0x%016llx] [size=%llu bytes] "
981 "[mapped with %s] [mapped as %s]\n",
982 count, entry->dev_addr, entry->size,
983 dir2name[entry->direction], type2name[entry->type]);
984 break;
985 default:
986 break;
987 }
988
989 return 0;
990}
991
992void dma_debug_add_bus(struct bus_type *bus)
993{
994 struct notifier_block *nb;
995
996 if (dma_debug_disabled())
997 return;
998
999 nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
1000 if (nb == NULL) {
1001 pr_err("dma_debug_add_bus: out of memory\n");
1002 return;
1003 }
1004
1005 nb->notifier_call = dma_debug_device_change;
1006
1007 bus_register_notifier(bus, nb);
1008}
1009
1010static int dma_debug_init(void)
1011{
1012 int i;
1013
1014 /* Do not use dma_debug_initialized here, since we really want to be
1015 * called to set dma_debug_initialized
1016 */
1017 if (global_disable)
1018 return 0;
1019
1020 for (i = 0; i < HASH_SIZE; ++i) {
1021 INIT_LIST_HEAD(&dma_entry_hash[i].list);
1022 spin_lock_init(&dma_entry_hash[i].lock);
1023 }
1024
1025 if (dma_debug_fs_init() != 0) {
1026 pr_err("DMA-API: error creating debugfs entries - disabling\n");
1027 global_disable = true;
1028
1029 return 0;
1030 }
1031
1032 if (prealloc_memory(nr_prealloc_entries) != 0) {
1033 pr_err("DMA-API: debugging out of memory error - disabled\n");
1034 global_disable = true;
1035
1036 return 0;
1037 }
1038
1039 nr_total_entries = num_free_entries;
1040
1041 dma_debug_initialized = true;
1042
1043 pr_info("DMA-API: debugging enabled by kernel config\n");
1044 return 0;
1045}
1046core_initcall(dma_debug_init);
1047
1048static __init int dma_debug_cmdline(char *str)
1049{
1050 if (!str)
1051 return -EINVAL;
1052
1053 if (strncmp(str, "off", 3) == 0) {
1054 pr_info("DMA-API: debugging disabled on kernel command line\n");
1055 global_disable = true;
1056 }
1057
1058 return 0;
1059}
1060
1061static __init int dma_debug_entries_cmdline(char *str)
1062{
1063 if (!str)
1064 return -EINVAL;
1065 if (!get_option(&str, &nr_prealloc_entries))
1066 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
1067 return 0;
1068}
1069
1070__setup("dma_debug=", dma_debug_cmdline);
1071__setup("dma_debug_entries=", dma_debug_entries_cmdline);
1072
1073static void check_unmap(struct dma_debug_entry *ref)
1074{
1075 struct dma_debug_entry *entry;
1076 struct hash_bucket *bucket;
1077 unsigned long flags;
1078
1079 bucket = get_hash_bucket(ref, &flags);
1080 entry = bucket_find_exact(bucket, ref);
1081
1082 if (!entry) {
1083 /* must drop lock before calling dma_mapping_error */
1084 put_hash_bucket(bucket, &flags);
1085
1086 if (dma_mapping_error(ref->dev, ref->dev_addr)) {
1087 err_printk(ref->dev, NULL,
1088 "DMA-API: device driver tries to free an "
1089 "invalid DMA memory address\n");
1090 } else {
1091 err_printk(ref->dev, NULL,
1092 "DMA-API: device driver tries to free DMA "
1093 "memory it has not allocated [device "
1094 "address=0x%016llx] [size=%llu bytes]\n",
1095 ref->dev_addr, ref->size);
1096 }
1097 return;
1098 }
1099
1100 if (ref->size != entry->size) {
1101 err_printk(ref->dev, entry, "DMA-API: device driver frees "
1102 "DMA memory with different size "
1103 "[device address=0x%016llx] [map size=%llu bytes] "
1104 "[unmap size=%llu bytes]\n",
1105 ref->dev_addr, entry->size, ref->size);
1106 }
1107
1108 if (ref->type != entry->type) {
1109 err_printk(ref->dev, entry, "DMA-API: device driver frees "
1110 "DMA memory with wrong function "
1111 "[device address=0x%016llx] [size=%llu bytes] "
1112 "[mapped as %s] [unmapped as %s]\n",
1113 ref->dev_addr, ref->size,
1114 type2name[entry->type], type2name[ref->type]);
1115 } else if ((entry->type == dma_debug_coherent) &&
1116 (phys_addr(ref) != phys_addr(entry))) {
1117 err_printk(ref->dev, entry, "DMA-API: device driver frees "
1118 "DMA memory with different CPU address "
1119 "[device address=0x%016llx] [size=%llu bytes] "
1120 "[cpu alloc address=0x%016llx] "
1121 "[cpu free address=0x%016llx]",
1122 ref->dev_addr, ref->size,
1123 phys_addr(entry),
1124 phys_addr(ref));
1125 }
1126
1127 if (ref->sg_call_ents && ref->type == dma_debug_sg &&
1128 ref->sg_call_ents != entry->sg_call_ents) {
1129 err_printk(ref->dev, entry, "DMA-API: device driver frees "
1130 "DMA sg list with different entry count "
1131 "[map count=%d] [unmap count=%d]\n",
1132 entry->sg_call_ents, ref->sg_call_ents);
1133 }
1134
1135 /*
1136 * This may be no bug in reality - but most implementations of the
1137 * DMA API don't handle this properly, so check for it here
1138 */
1139 if (ref->direction != entry->direction) {
1140 err_printk(ref->dev, entry, "DMA-API: device driver frees "
1141 "DMA memory with different direction "
1142 "[device address=0x%016llx] [size=%llu bytes] "
1143 "[mapped with %s] [unmapped with %s]\n",
1144 ref->dev_addr, ref->size,
1145 dir2name[entry->direction],
1146 dir2name[ref->direction]);
1147 }
1148
1149 /*
1150 * Drivers should use dma_mapping_error() to check the returned
1151 * addresses of dma_map_single() and dma_map_page().
1152 * If not, print this warning message. See Documentation/DMA-API.txt.
1153 */
1154 if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
1155 err_printk(ref->dev, entry,
1156 "DMA-API: device driver failed to check map error"
1157 "[device address=0x%016llx] [size=%llu bytes] "
1158 "[mapped as %s]",
1159 ref->dev_addr, ref->size,
1160 type2name[entry->type]);
1161 }
1162
1163 hash_bucket_del(entry);
1164 dma_entry_free(entry);
1165
1166 put_hash_bucket(bucket, &flags);
1167}
1168
1169static void check_for_stack(struct device *dev,
1170 struct page *page, size_t offset)
1171{
1172 void *addr;
1173 struct vm_struct *stack_vm_area = task_stack_vm_area(current);
1174
1175 if (!stack_vm_area) {
1176 /* Stack is direct-mapped. */
1177 if (PageHighMem(page))
1178 return;
1179 addr = page_address(page) + offset;
1180 if (object_is_on_stack(addr))
1181 err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
1182 } else {
1183 /* Stack is vmalloced. */
1184 int i;
1185
1186 for (i = 0; i < stack_vm_area->nr_pages; i++) {
1187 if (page != stack_vm_area->pages[i])
1188 continue;
1189
1190 addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
1191 err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
1192 break;
1193 }
1194 }
1195}
1196
1197static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
1198{
1199 unsigned long a1 = (unsigned long)addr;
1200 unsigned long b1 = a1 + len;
1201 unsigned long a2 = (unsigned long)start;
1202 unsigned long b2 = (unsigned long)end;
1203
1204 return !(b1 <= a2 || a1 >= b2);
1205}
1206
1207static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
1208{
1209 if (overlap(addr, len, _stext, _etext) ||
1210 overlap(addr, len, __start_rodata, __end_rodata))
1211 err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
1212}
1213
1214static void check_sync(struct device *dev,
1215 struct dma_debug_entry *ref,
1216 bool to_cpu)
1217{
1218 struct dma_debug_entry *entry;
1219 struct hash_bucket *bucket;
1220 unsigned long flags;
1221
1222 bucket = get_hash_bucket(ref, &flags);
1223
1224 entry = bucket_find_contain(&bucket, ref, &flags);
1225
1226 if (!entry) {
1227 err_printk(dev, NULL, "DMA-API: device driver tries "
1228 "to sync DMA memory it has not allocated "
1229 "[device address=0x%016llx] [size=%llu bytes]\n",
1230 (unsigned long long)ref->dev_addr, ref->size);
1231 goto out;
1232 }
1233
1234 if (ref->size > entry->size) {
1235 err_printk(dev, entry, "DMA-API: device driver syncs"
1236 " DMA memory outside allocated range "
1237 "[device address=0x%016llx] "
1238 "[allocation size=%llu bytes] "
1239 "[sync offset+size=%llu]\n",
1240 entry->dev_addr, entry->size,
1241 ref->size);
1242 }
1243
1244 if (entry->direction == DMA_BIDIRECTIONAL)
1245 goto out;
1246
1247 if (ref->direction != entry->direction) {
1248 err_printk(dev, entry, "DMA-API: device driver syncs "
1249 "DMA memory with different direction "
1250 "[device address=0x%016llx] [size=%llu bytes] "
1251 "[mapped with %s] [synced with %s]\n",
1252 (unsigned long long)ref->dev_addr, entry->size,
1253 dir2name[entry->direction],
1254 dir2name[ref->direction]);
1255 }
1256
1257 if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
1258 !(ref->direction == DMA_TO_DEVICE))
1259 err_printk(dev, entry, "DMA-API: device driver syncs "
1260 "device read-only DMA memory for cpu "
1261 "[device address=0x%016llx] [size=%llu bytes] "
1262 "[mapped with %s] [synced with %s]\n",
1263 (unsigned long long)ref->dev_addr, entry->size,
1264 dir2name[entry->direction],
1265 dir2name[ref->direction]);
1266
1267 if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) &&
1268 !(ref->direction == DMA_FROM_DEVICE))
1269 err_printk(dev, entry, "DMA-API: device driver syncs "
1270 "device write-only DMA memory to device "
1271 "[device address=0x%016llx] [size=%llu bytes] "
1272 "[mapped with %s] [synced with %s]\n",
1273 (unsigned long long)ref->dev_addr, entry->size,
1274 dir2name[entry->direction],
1275 dir2name[ref->direction]);
1276
1277 if (ref->sg_call_ents && ref->type == dma_debug_sg &&
1278 ref->sg_call_ents != entry->sg_call_ents) {
1279 err_printk(ref->dev, entry, "DMA-API: device driver syncs "
1280 "DMA sg list with different entry count "
1281 "[map count=%d] [sync count=%d]\n",
1282 entry->sg_call_ents, ref->sg_call_ents);
1283 }
1284
1285out:
1286 put_hash_bucket(bucket, &flags);
1287}
1288
1289static void check_sg_segment(struct device *dev, struct scatterlist *sg)
1290{
1291#ifdef CONFIG_DMA_API_DEBUG_SG
1292 unsigned int max_seg = dma_get_max_seg_size(dev);
1293 u64 start, end, boundary = dma_get_seg_boundary(dev);
1294
1295 /*
1296 * Either the driver forgot to set dma_parms appropriately, or
1297 * whoever generated the list forgot to check them.
1298 */
1299 if (sg->length > max_seg)
1300 err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n",
1301 sg->length, max_seg);
1302 /*
1303 * In some cases this could potentially be the DMA API
1304 * implementation's fault, but it would usually imply that
1305 * the scatterlist was built inappropriately to begin with.
1306 */
1307 start = sg_dma_address(sg);
1308 end = start + sg_dma_len(sg) - 1;
1309 if ((start ^ end) & ~boundary)
1310 err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n",
1311 start, end, boundary);
1312#endif
1313}
1314
1315void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
1316 size_t size, int direction, dma_addr_t dma_addr,
1317 bool map_single)
1318{
1319 struct dma_debug_entry *entry;
1320
1321 if (unlikely(dma_debug_disabled()))
1322 return;
1323
1324 if (dma_mapping_error(dev, dma_addr))
1325 return;
1326
1327 entry = dma_entry_alloc();
1328 if (!entry)
1329 return;
1330
1331 entry->dev = dev;
1332 entry->type = dma_debug_page;
1333 entry->pfn = page_to_pfn(page);
1334 entry->offset = offset,
1335 entry->dev_addr = dma_addr;
1336 entry->size = size;
1337 entry->direction = direction;
1338 entry->map_err_type = MAP_ERR_NOT_CHECKED;
1339
1340 if (map_single)
1341 entry->type = dma_debug_single;
1342
1343 check_for_stack(dev, page, offset);
1344
1345 if (!PageHighMem(page)) {
1346 void *addr = page_address(page) + offset;
1347
1348 check_for_illegal_area(dev, addr, size);
1349 }
1350
1351 add_dma_entry(entry);
1352}
1353EXPORT_SYMBOL(debug_dma_map_page);
1354
1355void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
1356{
1357 struct dma_debug_entry ref;
1358 struct dma_debug_entry *entry;
1359 struct hash_bucket *bucket;
1360 unsigned long flags;
1361
1362 if (unlikely(dma_debug_disabled()))
1363 return;
1364
1365 ref.dev = dev;
1366 ref.dev_addr = dma_addr;
1367 bucket = get_hash_bucket(&ref, &flags);
1368
1369 list_for_each_entry(entry, &bucket->list, list) {
1370 if (!exact_match(&ref, entry))
1371 continue;
1372
1373 /*
1374 * The same physical address can be mapped multiple
1375 * times. Without a hardware IOMMU this results in the
1376 * same device addresses being put into the dma-debug
1377 * hash multiple times too. This can result in false
1378 * positives being reported. Therefore we implement a
1379 * best-fit algorithm here which updates the first entry
1380 * from the hash which fits the reference value and is
1381 * not currently listed as being checked.
1382 */
1383 if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
1384 entry->map_err_type = MAP_ERR_CHECKED;
1385 break;
1386 }
1387 }
1388
1389 put_hash_bucket(bucket, &flags);
1390}
1391EXPORT_SYMBOL(debug_dma_mapping_error);
1392
1393void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
1394 size_t size, int direction, bool map_single)
1395{
1396 struct dma_debug_entry ref = {
1397 .type = dma_debug_page,
1398 .dev = dev,
1399 .dev_addr = addr,
1400 .size = size,
1401 .direction = direction,
1402 };
1403
1404 if (unlikely(dma_debug_disabled()))
1405 return;
1406
1407 if (map_single)
1408 ref.type = dma_debug_single;
1409
1410 check_unmap(&ref);
1411}
1412EXPORT_SYMBOL(debug_dma_unmap_page);
1413
1414void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
1415 int nents, int mapped_ents, int direction)
1416{
1417 struct dma_debug_entry *entry;
1418 struct scatterlist *s;
1419 int i;
1420
1421 if (unlikely(dma_debug_disabled()))
1422 return;
1423
1424 for_each_sg(sg, s, mapped_ents, i) {
1425 entry = dma_entry_alloc();
1426 if (!entry)
1427 return;
1428
1429 entry->type = dma_debug_sg;
1430 entry->dev = dev;
1431 entry->pfn = page_to_pfn(sg_page(s));
1432 entry->offset = s->offset,
1433 entry->size = sg_dma_len(s);
1434 entry->dev_addr = sg_dma_address(s);
1435 entry->direction = direction;
1436 entry->sg_call_ents = nents;
1437 entry->sg_mapped_ents = mapped_ents;
1438
1439 check_for_stack(dev, sg_page(s), s->offset);
1440
1441 if (!PageHighMem(sg_page(s))) {
1442 check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
1443 }
1444
1445 check_sg_segment(dev, s);
1446
1447 add_dma_entry(entry);
1448 }
1449}
1450EXPORT_SYMBOL(debug_dma_map_sg);
1451
1452static int get_nr_mapped_entries(struct device *dev,
1453 struct dma_debug_entry *ref)
1454{
1455 struct dma_debug_entry *entry;
1456 struct hash_bucket *bucket;
1457 unsigned long flags;
1458 int mapped_ents;
1459
1460 bucket = get_hash_bucket(ref, &flags);
1461 entry = bucket_find_exact(bucket, ref);
1462 mapped_ents = 0;
1463
1464 if (entry)
1465 mapped_ents = entry->sg_mapped_ents;
1466 put_hash_bucket(bucket, &flags);
1467
1468 return mapped_ents;
1469}
1470
1471void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
1472 int nelems, int dir)
1473{
1474 struct scatterlist *s;
1475 int mapped_ents = 0, i;
1476
1477 if (unlikely(dma_debug_disabled()))
1478 return;
1479
1480 for_each_sg(sglist, s, nelems, i) {
1481
1482 struct dma_debug_entry ref = {
1483 .type = dma_debug_sg,
1484 .dev = dev,
1485 .pfn = page_to_pfn(sg_page(s)),
1486 .offset = s->offset,
1487 .dev_addr = sg_dma_address(s),
1488 .size = sg_dma_len(s),
1489 .direction = dir,
1490 .sg_call_ents = nelems,
1491 };
1492
1493 if (mapped_ents && i >= mapped_ents)
1494 break;
1495
1496 if (!i)
1497 mapped_ents = get_nr_mapped_entries(dev, &ref);
1498
1499 check_unmap(&ref);
1500 }
1501}
1502EXPORT_SYMBOL(debug_dma_unmap_sg);
1503
1504void debug_dma_alloc_coherent(struct device *dev, size_t size,
1505 dma_addr_t dma_addr, void *virt)
1506{
1507 struct dma_debug_entry *entry;
1508
1509 if (unlikely(dma_debug_disabled()))
1510 return;
1511
1512 if (unlikely(virt == NULL))
1513 return;
1514
1515 /* handle vmalloc and linear addresses */
1516 if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
1517 return;
1518
1519 entry = dma_entry_alloc();
1520 if (!entry)
1521 return;
1522
1523 entry->type = dma_debug_coherent;
1524 entry->dev = dev;
1525 entry->offset = offset_in_page(virt);
1526 entry->size = size;
1527 entry->dev_addr = dma_addr;
1528 entry->direction = DMA_BIDIRECTIONAL;
1529
1530 if (is_vmalloc_addr(virt))
1531 entry->pfn = vmalloc_to_pfn(virt);
1532 else
1533 entry->pfn = page_to_pfn(virt_to_page(virt));
1534
1535 add_dma_entry(entry);
1536}
1537EXPORT_SYMBOL(debug_dma_alloc_coherent);
1538
1539void debug_dma_free_coherent(struct device *dev, size_t size,
1540 void *virt, dma_addr_t addr)
1541{
1542 struct dma_debug_entry ref = {
1543 .type = dma_debug_coherent,
1544 .dev = dev,
1545 .offset = offset_in_page(virt),
1546 .dev_addr = addr,
1547 .size = size,
1548 .direction = DMA_BIDIRECTIONAL,
1549 };
1550
1551 /* handle vmalloc and linear addresses */
1552 if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
1553 return;
1554
1555 if (is_vmalloc_addr(virt))
1556 ref.pfn = vmalloc_to_pfn(virt);
1557 else
1558 ref.pfn = page_to_pfn(virt_to_page(virt));
1559
1560 if (unlikely(dma_debug_disabled()))
1561 return;
1562
1563 check_unmap(&ref);
1564}
1565EXPORT_SYMBOL(debug_dma_free_coherent);
1566
1567void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
1568 int direction, dma_addr_t dma_addr)
1569{
1570 struct dma_debug_entry *entry;
1571
1572 if (unlikely(dma_debug_disabled()))
1573 return;
1574
1575 entry = dma_entry_alloc();
1576 if (!entry)
1577 return;
1578
1579 entry->type = dma_debug_resource;
1580 entry->dev = dev;
1581 entry->pfn = PHYS_PFN(addr);
1582 entry->offset = offset_in_page(addr);
1583 entry->size = size;
1584 entry->dev_addr = dma_addr;
1585 entry->direction = direction;
1586 entry->map_err_type = MAP_ERR_NOT_CHECKED;
1587
1588 add_dma_entry(entry);
1589}
1590EXPORT_SYMBOL(debug_dma_map_resource);
1591
1592void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
1593 size_t size, int direction)
1594{
1595 struct dma_debug_entry ref = {
1596 .type = dma_debug_resource,
1597 .dev = dev,
1598 .dev_addr = dma_addr,
1599 .size = size,
1600 .direction = direction,
1601 };
1602
1603 if (unlikely(dma_debug_disabled()))
1604 return;
1605
1606 check_unmap(&ref);
1607}
1608EXPORT_SYMBOL(debug_dma_unmap_resource);
1609
1610void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
1611 size_t size, int direction)
1612{
1613 struct dma_debug_entry ref;
1614
1615 if (unlikely(dma_debug_disabled()))
1616 return;
1617
1618 ref.type = dma_debug_single;
1619 ref.dev = dev;
1620 ref.dev_addr = dma_handle;
1621 ref.size = size;
1622 ref.direction = direction;
1623 ref.sg_call_ents = 0;
1624
1625 check_sync(dev, &ref, true);
1626}
1627EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
1628
1629void debug_dma_sync_single_for_device(struct device *dev,
1630 dma_addr_t dma_handle, size_t size,
1631 int direction)
1632{
1633 struct dma_debug_entry ref;
1634
1635 if (unlikely(dma_debug_disabled()))
1636 return;
1637
1638 ref.type = dma_debug_single;
1639 ref.dev = dev;
1640 ref.dev_addr = dma_handle;
1641 ref.size = size;
1642 ref.direction = direction;
1643 ref.sg_call_ents = 0;
1644
1645 check_sync(dev, &ref, false);
1646}
1647EXPORT_SYMBOL(debug_dma_sync_single_for_device);
1648
1649void debug_dma_sync_single_range_for_cpu(struct device *dev,
1650 dma_addr_t dma_handle,
1651 unsigned long offset, size_t size,
1652 int direction)
1653{
1654 struct dma_debug_entry ref;
1655
1656 if (unlikely(dma_debug_disabled()))
1657 return;
1658
1659 ref.type = dma_debug_single;
1660 ref.dev = dev;
1661 ref.dev_addr = dma_handle;
1662 ref.size = offset + size;
1663 ref.direction = direction;
1664 ref.sg_call_ents = 0;
1665
1666 check_sync(dev, &ref, true);
1667}
1668EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu);
1669
1670void debug_dma_sync_single_range_for_device(struct device *dev,
1671 dma_addr_t dma_handle,
1672 unsigned long offset,
1673 size_t size, int direction)
1674{
1675 struct dma_debug_entry ref;
1676
1677 if (unlikely(dma_debug_disabled()))
1678 return;
1679
1680 ref.type = dma_debug_single;
1681 ref.dev = dev;
1682 ref.dev_addr = dma_handle;
1683 ref.size = offset + size;
1684 ref.direction = direction;
1685 ref.sg_call_ents = 0;
1686
1687 check_sync(dev, &ref, false);
1688}
1689EXPORT_SYMBOL(debug_dma_sync_single_range_for_device);
1690
1691void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
1692 int nelems, int direction)
1693{
1694 struct scatterlist *s;
1695 int mapped_ents = 0, i;
1696
1697 if (unlikely(dma_debug_disabled()))
1698 return;
1699
1700 for_each_sg(sg, s, nelems, i) {
1701
1702 struct dma_debug_entry ref = {
1703 .type = dma_debug_sg,
1704 .dev = dev,
1705 .pfn = page_to_pfn(sg_page(s)),
1706 .offset = s->offset,
1707 .dev_addr = sg_dma_address(s),
1708 .size = sg_dma_len(s),
1709 .direction = direction,
1710 .sg_call_ents = nelems,
1711 };
1712
1713 if (!i)
1714 mapped_ents = get_nr_mapped_entries(dev, &ref);
1715
1716 if (i >= mapped_ents)
1717 break;
1718
1719 check_sync(dev, &ref, true);
1720 }
1721}
1722EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
1723
1724void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
1725 int nelems, int direction)
1726{
1727 struct scatterlist *s;
1728 int mapped_ents = 0, i;
1729
1730 if (unlikely(dma_debug_disabled()))
1731 return;
1732
1733 for_each_sg(sg, s, nelems, i) {
1734
1735 struct dma_debug_entry ref = {
1736 .type = dma_debug_sg,
1737 .dev = dev,
1738 .pfn = page_to_pfn(sg_page(s)),
1739 .offset = s->offset,
1740 .dev_addr = sg_dma_address(s),
1741 .size = sg_dma_len(s),
1742 .direction = direction,
1743 .sg_call_ents = nelems,
1744 };
1745 if (!i)
1746 mapped_ents = get_nr_mapped_entries(dev, &ref);
1747
1748 if (i >= mapped_ents)
1749 break;
1750
1751 check_sync(dev, &ref, false);
1752 }
1753}
1754EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
1755
1756static int __init dma_debug_driver_setup(char *str)
1757{
1758 int i;
1759
1760 for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
1761 current_driver_name[i] = *str;
1762 if (*str == 0)
1763 break;
1764 }
1765
1766 if (current_driver_name[0])
1767 pr_info("DMA-API: enable driver filter for driver [%s]\n",
1768 current_driver_name);
1769
1770
1771 return 1;
1772}
1773__setup("dma_debug_driver=", dma_debug_driver_setup);
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
deleted file mode 100644
index 8be8106270c2..000000000000
--- a/lib/dma-direct.c
+++ /dev/null
@@ -1,204 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * DMA operations that map physical memory directly without using an IOMMU or
4 * flushing caches.
5 */
6#include <linux/export.h>
7#include <linux/mm.h>
8#include <linux/dma-direct.h>
9#include <linux/scatterlist.h>
10#include <linux/dma-contiguous.h>
11#include <linux/pfn.h>
12#include <linux/set_memory.h>
13
14#define DIRECT_MAPPING_ERROR 0
15
16/*
17 * Most architectures use ZONE_DMA for the first 16 Megabytes, but
18 * some use it for entirely different regions:
19 */
20#ifndef ARCH_ZONE_DMA_BITS
21#define ARCH_ZONE_DMA_BITS 24
22#endif
23
24/*
25 * For AMD SEV all DMA must be to unencrypted addresses.
26 */
27static inline bool force_dma_unencrypted(void)
28{
29 return sev_active();
30}
31
32static bool
33check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
34 const char *caller)
35{
36 if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
37 if (!dev->dma_mask) {
38 dev_err(dev,
39 "%s: call on device without dma_mask\n",
40 caller);
41 return false;
42 }
43
44 if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
45 dev_err(dev,
46 "%s: overflow %pad+%zu of device mask %llx\n",
47 caller, &dma_addr, size, *dev->dma_mask);
48 }
49 return false;
50 }
51 return true;
52}
53
54static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
55{
56 dma_addr_t addr = force_dma_unencrypted() ?
57 __phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
58 return addr + size - 1 <= dev->coherent_dma_mask;
59}
60
61void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
62 gfp_t gfp, unsigned long attrs)
63{
64 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
65 int page_order = get_order(size);
66 struct page *page = NULL;
67 void *ret;
68
69 /* we always manually zero the memory once we are done: */
70 gfp &= ~__GFP_ZERO;
71
72 /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
73 if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
74 gfp |= GFP_DMA;
75 if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
76 gfp |= GFP_DMA32;
77
78again:
79 /* CMA can be used only in the context which permits sleeping */
80 if (gfpflags_allow_blocking(gfp)) {
81 page = dma_alloc_from_contiguous(dev, count, page_order, gfp);
82 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
83 dma_release_from_contiguous(dev, page, count);
84 page = NULL;
85 }
86 }
87 if (!page)
88 page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
89
90 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
91 __free_pages(page, page_order);
92 page = NULL;
93
94 if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
95 dev->coherent_dma_mask < DMA_BIT_MASK(64) &&
96 !(gfp & (GFP_DMA32 | GFP_DMA))) {
97 gfp |= GFP_DMA32;
98 goto again;
99 }
100
101 if (IS_ENABLED(CONFIG_ZONE_DMA) &&
102 dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
103 !(gfp & GFP_DMA)) {
104 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
105 goto again;
106 }
107 }
108
109 if (!page)
110 return NULL;
111 ret = page_address(page);
112 if (force_dma_unencrypted()) {
113 set_memory_decrypted((unsigned long)ret, 1 << page_order);
114 *dma_handle = __phys_to_dma(dev, page_to_phys(page));
115 } else {
116 *dma_handle = phys_to_dma(dev, page_to_phys(page));
117 }
118 memset(ret, 0, size);
119 return ret;
120}
121
122/*
123 * NOTE: this function must never look at the dma_addr argument, because we want
124 * to be able to use it as a helper for iommu implementations as well.
125 */
126void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
127 dma_addr_t dma_addr, unsigned long attrs)
128{
129 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
130 unsigned int page_order = get_order(size);
131
132 if (force_dma_unencrypted())
133 set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
134 if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
135 free_pages((unsigned long)cpu_addr, page_order);
136}
137
138dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
139 unsigned long offset, size_t size, enum dma_data_direction dir,
140 unsigned long attrs)
141{
142 dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
143
144 if (!check_addr(dev, dma_addr, size, __func__))
145 return DIRECT_MAPPING_ERROR;
146 return dma_addr;
147}
148
149int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
150 enum dma_data_direction dir, unsigned long attrs)
151{
152 int i;
153 struct scatterlist *sg;
154
155 for_each_sg(sgl, sg, nents, i) {
156 BUG_ON(!sg_page(sg));
157
158 sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg));
159 if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__))
160 return 0;
161 sg_dma_len(sg) = sg->length;
162 }
163
164 return nents;
165}
166
167int dma_direct_supported(struct device *dev, u64 mask)
168{
169#ifdef CONFIG_ZONE_DMA
170 if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
171 return 0;
172#else
173 /*
174 * Because 32-bit DMA masks are so common we expect every architecture
175 * to be able to satisfy them - either by not supporting more physical
176 * memory, or by providing a ZONE_DMA32. If neither is the case, the
177 * architecture needs to use an IOMMU instead of the direct mapping.
178 */
179 if (mask < DMA_BIT_MASK(32))
180 return 0;
181#endif
182 /*
183 * Various PCI/PCIe bridges have broken support for > 32bit DMA even
184 * if the device itself might support it.
185 */
186 if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32))
187 return 0;
188 return 1;
189}
190
191int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
192{
193 return dma_addr == DIRECT_MAPPING_ERROR;
194}
195
196const struct dma_map_ops dma_direct_ops = {
197 .alloc = dma_direct_alloc,
198 .free = dma_direct_free,
199 .map_page = dma_direct_map_page,
200 .map_sg = dma_direct_map_sg,
201 .dma_supported = dma_direct_supported,
202 .mapping_error = dma_direct_mapping_error,
203};
204EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c
deleted file mode 100644
index 79e9a757387f..000000000000
--- a/lib/dma-noncoherent.c
+++ /dev/null
@@ -1,102 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2018 Christoph Hellwig.
4 *
5 * DMA operations that map physical memory directly without providing cache
6 * coherence.
7 */
8#include <linux/export.h>
9#include <linux/mm.h>
10#include <linux/dma-direct.h>
11#include <linux/dma-noncoherent.h>
12#include <linux/scatterlist.h>
13
14static void dma_noncoherent_sync_single_for_device(struct device *dev,
15 dma_addr_t addr, size_t size, enum dma_data_direction dir)
16{
17 arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir);
18}
19
20static void dma_noncoherent_sync_sg_for_device(struct device *dev,
21 struct scatterlist *sgl, int nents, enum dma_data_direction dir)
22{
23 struct scatterlist *sg;
24 int i;
25
26 for_each_sg(sgl, sg, nents, i)
27 arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
28}
29
30static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page,
31 unsigned long offset, size_t size, enum dma_data_direction dir,
32 unsigned long attrs)
33{
34 dma_addr_t addr;
35
36 addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
37 if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
38 arch_sync_dma_for_device(dev, page_to_phys(page) + offset,
39 size, dir);
40 return addr;
41}
42
43static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl,
44 int nents, enum dma_data_direction dir, unsigned long attrs)
45{
46 nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs);
47 if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
48 dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir);
49 return nents;
50}
51
52#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
53static void dma_noncoherent_sync_single_for_cpu(struct device *dev,
54 dma_addr_t addr, size_t size, enum dma_data_direction dir)
55{
56 arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
57}
58
59static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
60 struct scatterlist *sgl, int nents, enum dma_data_direction dir)
61{
62 struct scatterlist *sg;
63 int i;
64
65 for_each_sg(sgl, sg, nents, i)
66 arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
67}
68
69static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr,
70 size_t size, enum dma_data_direction dir, unsigned long attrs)
71{
72 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
73 dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir);
74}
75
76static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl,
77 int nents, enum dma_data_direction dir, unsigned long attrs)
78{
79 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
80 dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir);
81}
82#endif
83
84const struct dma_map_ops dma_noncoherent_ops = {
85 .alloc = arch_dma_alloc,
86 .free = arch_dma_free,
87 .mmap = arch_dma_mmap,
88 .sync_single_for_device = dma_noncoherent_sync_single_for_device,
89 .sync_sg_for_device = dma_noncoherent_sync_sg_for_device,
90 .map_page = dma_noncoherent_map_page,
91 .map_sg = dma_noncoherent_map_sg,
92#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
93 .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu,
94 .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu,
95 .unmap_page = dma_noncoherent_unmap_page,
96 .unmap_sg = dma_noncoherent_unmap_sg,
97#endif
98 .dma_supported = dma_direct_supported,
99 .mapping_error = dma_direct_mapping_error,
100 .cache_sync = arch_dma_cache_sync,
101};
102EXPORT_SYMBOL(dma_noncoherent_ops);
diff --git a/lib/dma-virt.c b/lib/dma-virt.c
deleted file mode 100644
index 8e61a02ef9ca..000000000000
--- a/lib/dma-virt.c
+++ /dev/null
@@ -1,61 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * lib/dma-virt.c
4 *
5 * DMA operations that map to virtual addresses without flushing memory.
6 */
7#include <linux/export.h>
8#include <linux/mm.h>
9#include <linux/dma-mapping.h>
10#include <linux/scatterlist.h>
11
12static void *dma_virt_alloc(struct device *dev, size_t size,
13 dma_addr_t *dma_handle, gfp_t gfp,
14 unsigned long attrs)
15{
16 void *ret;
17
18 ret = (void *)__get_free_pages(gfp, get_order(size));
19 if (ret)
20 *dma_handle = (uintptr_t)ret;
21 return ret;
22}
23
24static void dma_virt_free(struct device *dev, size_t size,
25 void *cpu_addr, dma_addr_t dma_addr,
26 unsigned long attrs)
27{
28 free_pages((unsigned long)cpu_addr, get_order(size));
29}
30
31static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page,
32 unsigned long offset, size_t size,
33 enum dma_data_direction dir,
34 unsigned long attrs)
35{
36 return (uintptr_t)(page_address(page) + offset);
37}
38
39static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl,
40 int nents, enum dma_data_direction dir,
41 unsigned long attrs)
42{
43 int i;
44 struct scatterlist *sg;
45
46 for_each_sg(sgl, sg, nents, i) {
47 BUG_ON(!sg_page(sg));
48 sg_dma_address(sg) = (uintptr_t)sg_virt(sg);
49 sg_dma_len(sg) = sg->length;
50 }
51
52 return nents;
53}
54
55const struct dma_map_ops dma_virt_ops = {
56 .alloc = dma_virt_alloc,
57 .free = dma_virt_free,
58 .map_page = dma_virt_map_page,
59 .map_sg = dma_virt_map_sg,
60};
61EXPORT_SYMBOL(dma_virt_ops);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 7e43cd54c84c..8be175df3075 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -596,15 +596,70 @@ static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
596 return ret; 596 return ret;
597} 597}
598 598
599static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
600 struct iov_iter *i)
601{
602 struct pipe_inode_info *pipe = i->pipe;
603 size_t n, off, xfer = 0;
604 int idx;
605
606 if (!sanity(i))
607 return 0;
608
609 bytes = n = push_pipe(i, bytes, &idx, &off);
610 if (unlikely(!n))
611 return 0;
612 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
613 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
614 unsigned long rem;
615
616 rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
617 chunk);
618 i->idx = idx;
619 i->iov_offset = off + chunk - rem;
620 xfer += chunk - rem;
621 if (rem)
622 break;
623 n -= chunk;
624 addr += chunk;
625 }
626 i->count -= xfer;
627 return xfer;
628}
629
630/**
631 * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
632 * @addr: source kernel address
633 * @bytes: total transfer length
634 * @iter: destination iterator
635 *
636 * The pmem driver arranges for filesystem-dax to use this facility via
637 * dax_copy_to_iter() for protecting read/write to persistent memory.
638 * Unless / until an architecture can guarantee identical performance
639 * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
640 * performance regression to switch more users to the mcsafe version.
641 *
642 * Otherwise, the main differences between this and typical _copy_to_iter().
643 *
644 * * Typical tail/residue handling after a fault retries the copy
645 * byte-by-byte until the fault happens again. Re-triggering machine
646 * checks is potentially fatal so the implementation uses source
647 * alignment and poison alignment assumptions to avoid re-triggering
648 * hardware exceptions.
649 *
650 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
651 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return
652 * a short copy.
653 *
654 * See MCSAFE_TEST for self-test.
655 */
599size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) 656size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
600{ 657{
601 const char *from = addr; 658 const char *from = addr;
602 unsigned long rem, curr_addr, s_addr = (unsigned long) addr; 659 unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
603 660
604 if (unlikely(i->type & ITER_PIPE)) { 661 if (unlikely(i->type & ITER_PIPE))
605 WARN_ON(1); 662 return copy_pipe_to_iter_mcsafe(addr, bytes, i);
606 return 0;
607 }
608 if (iter_is_iovec(i)) 663 if (iter_is_iovec(i))
609 might_fault(); 664 might_fault();
610 iterate_and_advance(i, bytes, v, 665 iterate_and_advance(i, bytes, v,
@@ -701,6 +756,20 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
701EXPORT_SYMBOL(_copy_from_iter_nocache); 756EXPORT_SYMBOL(_copy_from_iter_nocache);
702 757
703#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 758#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
759/**
760 * _copy_from_iter_flushcache - write destination through cpu cache
761 * @addr: destination kernel address
762 * @bytes: total transfer length
763 * @iter: source iterator
764 *
765 * The pmem driver arranges for filesystem-dax to use this facility via
766 * dax_copy_from_iter() for ensuring that writes to persistent memory
767 * are flushed through the CPU cache. It is differentiated from
768 * _copy_from_iter_nocache() in that guarantees all data is flushed for
769 * all iterator types. The _copy_from_iter_nocache() only attempts to
770 * bypass the cache for the ITER_IOVEC case, and on some archs may use
771 * instructions that strand dirty-data in the cache.
772 */
704size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 773size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
705{ 774{
706 char *to = addr; 775 char *to = addr;
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 9bbd9c5d375a..beb14839b41a 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -141,7 +141,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
141 spin_lock_irqsave(&tags->lock, flags); 141 spin_lock_irqsave(&tags->lock, flags);
142 142
143 /* Fastpath */ 143 /* Fastpath */
144 if (likely(tags->nr_free >= 0)) { 144 if (likely(tags->nr_free)) {
145 tag = tags->freelist[--tags->nr_free]; 145 tag = tags->freelist[--tags->nr_free];
146 spin_unlock_irqrestore(&tags->lock, flags); 146 spin_unlock_irqrestore(&tags->lock, flags);
147 return tag; 147 return tag;
diff --git a/lib/refcount.c b/lib/refcount.c
index 0eb48353abe3..d3b81cefce91 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -350,3 +350,31 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
350} 350}
351EXPORT_SYMBOL(refcount_dec_and_lock); 351EXPORT_SYMBOL(refcount_dec_and_lock);
352 352
353/**
354 * refcount_dec_and_lock_irqsave - return holding spinlock with disabled
355 * interrupts if able to decrement refcount to 0
356 * @r: the refcount
357 * @lock: the spinlock to be locked
358 * @flags: saved IRQ-flags if the is acquired
359 *
360 * Same as refcount_dec_and_lock() above except that the spinlock is acquired
361 * with disabled interupts.
362 *
363 * Return: true and hold spinlock if able to decrement refcount to 0, false
364 * otherwise
365 */
366bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock,
367 unsigned long *flags)
368{
369 if (refcount_dec_not_one(r))
370 return false;
371
372 spin_lock_irqsave(lock, *flags);
373 if (!refcount_dec_and_test(r)) {
374 spin_unlock_irqrestore(lock, *flags);
375 return false;
376 }
377
378 return true;
379}
380EXPORT_SYMBOL(refcount_dec_and_lock_irqsave);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 9427b5766134..e5c8586cf717 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -774,7 +774,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
774 skip++; 774 skip++;
775 if (list == iter->list) { 775 if (list == iter->list) {
776 iter->p = p; 776 iter->p = p;
777 skip = skip; 777 iter->skip = skip;
778 goto found; 778 goto found;
779 } 779 }
780 } 780 }
@@ -964,8 +964,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
964 964
965static size_t rounded_hashtable_size(const struct rhashtable_params *params) 965static size_t rounded_hashtable_size(const struct rhashtable_params *params)
966{ 966{
967 return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), 967 size_t retsize;
968 (unsigned long)params->min_size); 968
969 if (params->nelem_hint)
970 retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
971 (unsigned long)params->min_size);
972 else
973 retsize = max(HASH_DEFAULT_SIZE,
974 (unsigned long)params->min_size);
975
976 return retsize;
969} 977}
970 978
971static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) 979static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
@@ -1022,8 +1030,6 @@ int rhashtable_init(struct rhashtable *ht,
1022 struct bucket_table *tbl; 1030 struct bucket_table *tbl;
1023 size_t size; 1031 size_t size;
1024 1032
1025 size = HASH_DEFAULT_SIZE;
1026
1027 if ((!params->key_len && !params->obj_hashfn) || 1033 if ((!params->key_len && !params->obj_hashfn) ||
1028 (params->obj_hashfn && !params->obj_cmpfn)) 1034 (params->obj_hashfn && !params->obj_cmpfn))
1029 return -EINVAL; 1035 return -EINVAL;
@@ -1050,8 +1056,7 @@ int rhashtable_init(struct rhashtable *ht,
1050 1056
1051 ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); 1057 ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1052 1058
1053 if (params->nelem_hint) 1059 size = rounded_hashtable_size(&ht->p);
1054 size = rounded_hashtable_size(&ht->p);
1055 1060
1056 if (params->locks_mul) 1061 if (params->locks_mul)
1057 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); 1062 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
@@ -1143,13 +1148,14 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
1143 void (*free_fn)(void *ptr, void *arg), 1148 void (*free_fn)(void *ptr, void *arg),
1144 void *arg) 1149 void *arg)
1145{ 1150{
1146 struct bucket_table *tbl; 1151 struct bucket_table *tbl, *next_tbl;
1147 unsigned int i; 1152 unsigned int i;
1148 1153
1149 cancel_work_sync(&ht->run_work); 1154 cancel_work_sync(&ht->run_work);
1150 1155
1151 mutex_lock(&ht->mutex); 1156 mutex_lock(&ht->mutex);
1152 tbl = rht_dereference(ht->tbl, ht); 1157 tbl = rht_dereference(ht->tbl, ht);
1158restart:
1153 if (free_fn) { 1159 if (free_fn) {
1154 for (i = 0; i < tbl->size; i++) { 1160 for (i = 0; i < tbl->size; i++) {
1155 struct rhash_head *pos, *next; 1161 struct rhash_head *pos, *next;
@@ -1166,7 +1172,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
1166 } 1172 }
1167 } 1173 }
1168 1174
1175 next_tbl = rht_dereference(tbl->future_tbl, ht);
1169 bucket_table_free(tbl); 1176 bucket_table_free(tbl);
1177 if (next_tbl) {
1178 tbl = next_tbl;
1179 goto restart;
1180 }
1170 mutex_unlock(&ht->mutex); 1181 mutex_unlock(&ht->mutex);
1171} 1182}
1172EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); 1183EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 1642fd507a96..7c6096a71704 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -24,9 +24,6 @@
24 **/ 24 **/
25struct scatterlist *sg_next(struct scatterlist *sg) 25struct scatterlist *sg_next(struct scatterlist *sg)
26{ 26{
27#ifdef CONFIG_DEBUG_SG
28 BUG_ON(sg->sg_magic != SG_MAGIC);
29#endif
30 if (sg_is_last(sg)) 27 if (sg_is_last(sg))
31 return NULL; 28 return NULL;
32 29
@@ -111,10 +108,7 @@ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
111 for_each_sg(sgl, sg, nents, i) 108 for_each_sg(sgl, sg, nents, i)
112 ret = sg; 109 ret = sg;
113 110
114#ifdef CONFIG_DEBUG_SG
115 BUG_ON(sgl[0].sg_magic != SG_MAGIC);
116 BUG_ON(!sg_is_last(ret)); 111 BUG_ON(!sg_is_last(ret));
117#endif
118 return ret; 112 return ret;
119} 113}
120EXPORT_SYMBOL(sg_last); 114EXPORT_SYMBOL(sg_last);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
deleted file mode 100644
index 04b68d9dffac..000000000000
--- a/lib/swiotlb.c
+++ /dev/null
@@ -1,1087 +0,0 @@
1/*
2 * Dynamic DMA mapping support.
3 *
4 * This implementation is a fallback for platforms that do not support
5 * I/O TLBs (aka DMA address translation hardware).
6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
8 * Copyright (C) 2000, 2003 Hewlett-Packard Co
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 *
11 * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
12 * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
13 * unnecessary i-cache flushing.
14 * 04/07/.. ak Better overflow handling. Assorted fixes.
15 * 05/09/10 linville Add support for syncing ranges, support syncing for
16 * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
17 * 08/12/11 beckyb Add highmem support
18 */
19
20#include <linux/cache.h>
21#include <linux/dma-direct.h>
22#include <linux/mm.h>
23#include <linux/export.h>
24#include <linux/spinlock.h>
25#include <linux/string.h>
26#include <linux/swiotlb.h>
27#include <linux/pfn.h>
28#include <linux/types.h>
29#include <linux/ctype.h>
30#include <linux/highmem.h>
31#include <linux/gfp.h>
32#include <linux/scatterlist.h>
33#include <linux/mem_encrypt.h>
34#include <linux/set_memory.h>
35
36#include <asm/io.h>
37#include <asm/dma.h>
38
39#include <linux/init.h>
40#include <linux/bootmem.h>
41#include <linux/iommu-helper.h>
42
43#define CREATE_TRACE_POINTS
44#include <trace/events/swiotlb.h>
45
46#define OFFSET(val,align) ((unsigned long) \
47 ( (val) & ( (align) - 1)))
48
49#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
50
51/*
52 * Minimum IO TLB size to bother booting with. Systems with mainly
53 * 64bit capable cards will only lightly use the swiotlb. If we can't
54 * allocate a contiguous 1MB, we're probably in trouble anyway.
55 */
56#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
57
58enum swiotlb_force swiotlb_force;
59
60/*
61 * Used to do a quick range check in swiotlb_tbl_unmap_single and
62 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
63 * API.
64 */
65static phys_addr_t io_tlb_start, io_tlb_end;
66
67/*
68 * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
69 * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
70 */
71static unsigned long io_tlb_nslabs;
72
73/*
74 * When the IOMMU overflows we return a fallback buffer. This sets the size.
75 */
76static unsigned long io_tlb_overflow = 32*1024;
77
78static phys_addr_t io_tlb_overflow_buffer;
79
80/*
81 * This is a free list describing the number of free entries available from
82 * each index
83 */
84static unsigned int *io_tlb_list;
85static unsigned int io_tlb_index;
86
87/*
88 * Max segment that we can provide which (if pages are contingous) will
89 * not be bounced (unless SWIOTLB_FORCE is set).
90 */
91unsigned int max_segment;
92
93/*
94 * We need to save away the original address corresponding to a mapped entry
95 * for the sync operations.
96 */
97#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
98static phys_addr_t *io_tlb_orig_addr;
99
100/*
101 * Protect the above data structures in the map and unmap calls
102 */
103static DEFINE_SPINLOCK(io_tlb_lock);
104
105static int late_alloc;
106
107static int __init
108setup_io_tlb_npages(char *str)
109{
110 if (isdigit(*str)) {
111 io_tlb_nslabs = simple_strtoul(str, &str, 0);
112 /* avoid tail segment of size < IO_TLB_SEGSIZE */
113 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
114 }
115 if (*str == ',')
116 ++str;
117 if (!strcmp(str, "force")) {
118 swiotlb_force = SWIOTLB_FORCE;
119 } else if (!strcmp(str, "noforce")) {
120 swiotlb_force = SWIOTLB_NO_FORCE;
121 io_tlb_nslabs = 1;
122 }
123
124 return 0;
125}
126early_param("swiotlb", setup_io_tlb_npages);
127/* make io_tlb_overflow tunable too? */
128
129unsigned long swiotlb_nr_tbl(void)
130{
131 return io_tlb_nslabs;
132}
133EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
134
135unsigned int swiotlb_max_segment(void)
136{
137 return max_segment;
138}
139EXPORT_SYMBOL_GPL(swiotlb_max_segment);
140
141void swiotlb_set_max_segment(unsigned int val)
142{
143 if (swiotlb_force == SWIOTLB_FORCE)
144 max_segment = 1;
145 else
146 max_segment = rounddown(val, PAGE_SIZE);
147}
148
149/* default to 64MB */
150#define IO_TLB_DEFAULT_SIZE (64UL<<20)
151unsigned long swiotlb_size_or_default(void)
152{
153 unsigned long size;
154
155 size = io_tlb_nslabs << IO_TLB_SHIFT;
156
157 return size ? size : (IO_TLB_DEFAULT_SIZE);
158}
159
160static bool no_iotlb_memory;
161
162void swiotlb_print_info(void)
163{
164 unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
165 unsigned char *vstart, *vend;
166
167 if (no_iotlb_memory) {
168 pr_warn("software IO TLB: No low mem\n");
169 return;
170 }
171
172 vstart = phys_to_virt(io_tlb_start);
173 vend = phys_to_virt(io_tlb_end);
174
175 printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
176 (unsigned long long)io_tlb_start,
177 (unsigned long long)io_tlb_end,
178 bytes >> 20, vstart, vend - 1);
179}
180
181/*
182 * Early SWIOTLB allocation may be too early to allow an architecture to
183 * perform the desired operations. This function allows the architecture to
184 * call SWIOTLB when the operations are possible. It needs to be called
185 * before the SWIOTLB memory is used.
186 */
187void __init swiotlb_update_mem_attributes(void)
188{
189 void *vaddr;
190 unsigned long bytes;
191
192 if (no_iotlb_memory || late_alloc)
193 return;
194
195 vaddr = phys_to_virt(io_tlb_start);
196 bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
197 set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
198 memset(vaddr, 0, bytes);
199
200 vaddr = phys_to_virt(io_tlb_overflow_buffer);
201 bytes = PAGE_ALIGN(io_tlb_overflow);
202 set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
203 memset(vaddr, 0, bytes);
204}
205
206int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
207{
208 void *v_overflow_buffer;
209 unsigned long i, bytes;
210
211 bytes = nslabs << IO_TLB_SHIFT;
212
213 io_tlb_nslabs = nslabs;
214 io_tlb_start = __pa(tlb);
215 io_tlb_end = io_tlb_start + bytes;
216
217 /*
218 * Get the overflow emergency buffer
219 */
220 v_overflow_buffer = memblock_virt_alloc_low_nopanic(
221 PAGE_ALIGN(io_tlb_overflow),
222 PAGE_SIZE);
223 if (!v_overflow_buffer)
224 return -ENOMEM;
225
226 io_tlb_overflow_buffer = __pa(v_overflow_buffer);
227
228 /*
229 * Allocate and initialize the free list array. This array is used
230 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
231 * between io_tlb_start and io_tlb_end.
232 */
233 io_tlb_list = memblock_virt_alloc(
234 PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
235 PAGE_SIZE);
236 io_tlb_orig_addr = memblock_virt_alloc(
237 PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
238 PAGE_SIZE);
239 for (i = 0; i < io_tlb_nslabs; i++) {
240 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
241 io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
242 }
243 io_tlb_index = 0;
244
245 if (verbose)
246 swiotlb_print_info();
247
248 swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
249 return 0;
250}
251
252/*
253 * Statically reserve bounce buffer space and initialize bounce buffer data
254 * structures for the software IO TLB used to implement the DMA API.
255 */
256void __init
257swiotlb_init(int verbose)
258{
259 size_t default_size = IO_TLB_DEFAULT_SIZE;
260 unsigned char *vstart;
261 unsigned long bytes;
262
263 if (!io_tlb_nslabs) {
264 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
265 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
266 }
267
268 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
269
270 /* Get IO TLB memory from the low pages */
271 vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
272 if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
273 return;
274
275 if (io_tlb_start)
276 memblock_free_early(io_tlb_start,
277 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
278 pr_warn("Cannot allocate SWIOTLB buffer");
279 no_iotlb_memory = true;
280}
281
282/*
283 * Systems with larger DMA zones (those that don't support ISA) can
284 * initialize the swiotlb later using the slab allocator if needed.
285 * This should be just like above, but with some error catching.
286 */
287int
288swiotlb_late_init_with_default_size(size_t default_size)
289{
290 unsigned long bytes, req_nslabs = io_tlb_nslabs;
291 unsigned char *vstart = NULL;
292 unsigned int order;
293 int rc = 0;
294
295 if (!io_tlb_nslabs) {
296 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
297 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
298 }
299
300 /*
301 * Get IO TLB memory from the low pages
302 */
303 order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
304 io_tlb_nslabs = SLABS_PER_PAGE << order;
305 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
306
307 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
308 vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
309 order);
310 if (vstart)
311 break;
312 order--;
313 }
314
315 if (!vstart) {
316 io_tlb_nslabs = req_nslabs;
317 return -ENOMEM;
318 }
319 if (order != get_order(bytes)) {
320 printk(KERN_WARNING "Warning: only able to allocate %ld MB "
321 "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
322 io_tlb_nslabs = SLABS_PER_PAGE << order;
323 }
324 rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
325 if (rc)
326 free_pages((unsigned long)vstart, order);
327
328 return rc;
329}
330
331int
332swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
333{
334 unsigned long i, bytes;
335 unsigned char *v_overflow_buffer;
336
337 bytes = nslabs << IO_TLB_SHIFT;
338
339 io_tlb_nslabs = nslabs;
340 io_tlb_start = virt_to_phys(tlb);
341 io_tlb_end = io_tlb_start + bytes;
342
343 set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
344 memset(tlb, 0, bytes);
345
346 /*
347 * Get the overflow emergency buffer
348 */
349 v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
350 get_order(io_tlb_overflow));
351 if (!v_overflow_buffer)
352 goto cleanup2;
353
354 set_memory_decrypted((unsigned long)v_overflow_buffer,
355 io_tlb_overflow >> PAGE_SHIFT);
356 memset(v_overflow_buffer, 0, io_tlb_overflow);
357 io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
358
359 /*
360 * Allocate and initialize the free list array. This array is used
361 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
362 * between io_tlb_start and io_tlb_end.
363 */
364 io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
365 get_order(io_tlb_nslabs * sizeof(int)));
366 if (!io_tlb_list)
367 goto cleanup3;
368
369 io_tlb_orig_addr = (phys_addr_t *)
370 __get_free_pages(GFP_KERNEL,
371 get_order(io_tlb_nslabs *
372 sizeof(phys_addr_t)));
373 if (!io_tlb_orig_addr)
374 goto cleanup4;
375
376 for (i = 0; i < io_tlb_nslabs; i++) {
377 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
378 io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
379 }
380 io_tlb_index = 0;
381
382 swiotlb_print_info();
383
384 late_alloc = 1;
385
386 swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
387
388 return 0;
389
390cleanup4:
391 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
392 sizeof(int)));
393 io_tlb_list = NULL;
394cleanup3:
395 free_pages((unsigned long)v_overflow_buffer,
396 get_order(io_tlb_overflow));
397 io_tlb_overflow_buffer = 0;
398cleanup2:
399 io_tlb_end = 0;
400 io_tlb_start = 0;
401 io_tlb_nslabs = 0;
402 max_segment = 0;
403 return -ENOMEM;
404}
405
406void __init swiotlb_exit(void)
407{
408 if (!io_tlb_orig_addr)
409 return;
410
411 if (late_alloc) {
412 free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
413 get_order(io_tlb_overflow));
414 free_pages((unsigned long)io_tlb_orig_addr,
415 get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
416 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
417 sizeof(int)));
418 free_pages((unsigned long)phys_to_virt(io_tlb_start),
419 get_order(io_tlb_nslabs << IO_TLB_SHIFT));
420 } else {
421 memblock_free_late(io_tlb_overflow_buffer,
422 PAGE_ALIGN(io_tlb_overflow));
423 memblock_free_late(__pa(io_tlb_orig_addr),
424 PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
425 memblock_free_late(__pa(io_tlb_list),
426 PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
427 memblock_free_late(io_tlb_start,
428 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
429 }
430 io_tlb_nslabs = 0;
431 max_segment = 0;
432}
433
434int is_swiotlb_buffer(phys_addr_t paddr)
435{
436 return paddr >= io_tlb_start && paddr < io_tlb_end;
437}
438
439/*
440 * Bounce: copy the swiotlb buffer back to the original dma location
441 */
442static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
443 size_t size, enum dma_data_direction dir)
444{
445 unsigned long pfn = PFN_DOWN(orig_addr);
446 unsigned char *vaddr = phys_to_virt(tlb_addr);
447
448 if (PageHighMem(pfn_to_page(pfn))) {
449 /* The buffer does not have a mapping. Map it in and copy */
450 unsigned int offset = orig_addr & ~PAGE_MASK;
451 char *buffer;
452 unsigned int sz = 0;
453 unsigned long flags;
454
455 while (size) {
456 sz = min_t(size_t, PAGE_SIZE - offset, size);
457
458 local_irq_save(flags);
459 buffer = kmap_atomic(pfn_to_page(pfn));
460 if (dir == DMA_TO_DEVICE)
461 memcpy(vaddr, buffer + offset, sz);
462 else
463 memcpy(buffer + offset, vaddr, sz);
464 kunmap_atomic(buffer);
465 local_irq_restore(flags);
466
467 size -= sz;
468 pfn++;
469 vaddr += sz;
470 offset = 0;
471 }
472 } else if (dir == DMA_TO_DEVICE) {
473 memcpy(vaddr, phys_to_virt(orig_addr), size);
474 } else {
475 memcpy(phys_to_virt(orig_addr), vaddr, size);
476 }
477}
478
479phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
480 dma_addr_t tbl_dma_addr,
481 phys_addr_t orig_addr, size_t size,
482 enum dma_data_direction dir,
483 unsigned long attrs)
484{
485 unsigned long flags;
486 phys_addr_t tlb_addr;
487 unsigned int nslots, stride, index, wrap;
488 int i;
489 unsigned long mask;
490 unsigned long offset_slots;
491 unsigned long max_slots;
492
493 if (no_iotlb_memory)
494 panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
495
496 if (mem_encrypt_active())
497 pr_warn_once("%s is active and system is using DMA bounce buffers\n",
498 sme_active() ? "SME" : "SEV");
499
500 mask = dma_get_seg_boundary(hwdev);
501
502 tbl_dma_addr &= mask;
503
504 offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
505
506 /*
507 * Carefully handle integer overflow which can occur when mask == ~0UL.
508 */
509 max_slots = mask + 1
510 ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
511 : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
512
513 /*
514 * For mappings greater than or equal to a page, we limit the stride
515 * (and hence alignment) to a page size.
516 */
517 nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
518 if (size >= PAGE_SIZE)
519 stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
520 else
521 stride = 1;
522
523 BUG_ON(!nslots);
524
525 /*
526 * Find suitable number of IO TLB entries size that will fit this
527 * request and allocate a buffer from that IO TLB pool.
528 */
529 spin_lock_irqsave(&io_tlb_lock, flags);
530 index = ALIGN(io_tlb_index, stride);
531 if (index >= io_tlb_nslabs)
532 index = 0;
533 wrap = index;
534
535 do {
536 while (iommu_is_span_boundary(index, nslots, offset_slots,
537 max_slots)) {
538 index += stride;
539 if (index >= io_tlb_nslabs)
540 index = 0;
541 if (index == wrap)
542 goto not_found;
543 }
544
545 /*
546 * If we find a slot that indicates we have 'nslots' number of
547 * contiguous buffers, we allocate the buffers from that slot
548 * and mark the entries as '0' indicating unavailable.
549 */
550 if (io_tlb_list[index] >= nslots) {
551 int count = 0;
552
553 for (i = index; i < (int) (index + nslots); i++)
554 io_tlb_list[i] = 0;
555 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
556 io_tlb_list[i] = ++count;
557 tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
558
559 /*
560 * Update the indices to avoid searching in the next
561 * round.
562 */
563 io_tlb_index = ((index + nslots) < io_tlb_nslabs
564 ? (index + nslots) : 0);
565
566 goto found;
567 }
568 index += stride;
569 if (index >= io_tlb_nslabs)
570 index = 0;
571 } while (index != wrap);
572
573not_found:
574 spin_unlock_irqrestore(&io_tlb_lock, flags);
575 if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
576 dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
577 return SWIOTLB_MAP_ERROR;
578found:
579 spin_unlock_irqrestore(&io_tlb_lock, flags);
580
581 /*
582 * Save away the mapping from the original address to the DMA address.
583 * This is needed when we sync the memory. Then we sync the buffer if
584 * needed.
585 */
586 for (i = 0; i < nslots; i++)
587 io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
588 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
589 (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
590 swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
591
592 return tlb_addr;
593}
594
595/*
596 * Allocates bounce buffer and returns its physical address.
597 */
598static phys_addr_t
599map_single(struct device *hwdev, phys_addr_t phys, size_t size,
600 enum dma_data_direction dir, unsigned long attrs)
601{
602 dma_addr_t start_dma_addr;
603
604 if (swiotlb_force == SWIOTLB_NO_FORCE) {
605 dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
606 &phys);
607 return SWIOTLB_MAP_ERROR;
608 }
609
610 start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
611 return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
612 dir, attrs);
613}
614
615/*
616 * tlb_addr is the physical address of the bounce buffer to unmap.
617 */
618void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
619 size_t size, enum dma_data_direction dir,
620 unsigned long attrs)
621{
622 unsigned long flags;
623 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
624 int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
625 phys_addr_t orig_addr = io_tlb_orig_addr[index];
626
627 /*
628 * First, sync the memory before unmapping the entry
629 */
630 if (orig_addr != INVALID_PHYS_ADDR &&
631 !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
632 ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
633 swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
634
635 /*
636 * Return the buffer to the free list by setting the corresponding
637 * entries to indicate the number of contiguous entries available.
638 * While returning the entries to the free list, we merge the entries
639 * with slots below and above the pool being returned.
640 */
641 spin_lock_irqsave(&io_tlb_lock, flags);
642 {
643 count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
644 io_tlb_list[index + nslots] : 0);
645 /*
646 * Step 1: return the slots to the free list, merging the
647 * slots with superceeding slots
648 */
649 for (i = index + nslots - 1; i >= index; i--) {
650 io_tlb_list[i] = ++count;
651 io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
652 }
653 /*
654 * Step 2: merge the returned slots with the preceding slots,
655 * if available (non zero)
656 */
657 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
658 io_tlb_list[i] = ++count;
659 }
660 spin_unlock_irqrestore(&io_tlb_lock, flags);
661}
662
663void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
664 size_t size, enum dma_data_direction dir,
665 enum dma_sync_target target)
666{
667 int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
668 phys_addr_t orig_addr = io_tlb_orig_addr[index];
669
670 if (orig_addr == INVALID_PHYS_ADDR)
671 return;
672 orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
673
674 switch (target) {
675 case SYNC_FOR_CPU:
676 if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
677 swiotlb_bounce(orig_addr, tlb_addr,
678 size, DMA_FROM_DEVICE);
679 else
680 BUG_ON(dir != DMA_TO_DEVICE);
681 break;
682 case SYNC_FOR_DEVICE:
683 if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
684 swiotlb_bounce(orig_addr, tlb_addr,
685 size, DMA_TO_DEVICE);
686 else
687 BUG_ON(dir != DMA_FROM_DEVICE);
688 break;
689 default:
690 BUG();
691 }
692}
693
694static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
695 size_t size)
696{
697 u64 mask = DMA_BIT_MASK(32);
698
699 if (dev && dev->coherent_dma_mask)
700 mask = dev->coherent_dma_mask;
701 return addr + size - 1 <= mask;
702}
703
704static void *
705swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
706 unsigned long attrs)
707{
708 phys_addr_t phys_addr;
709
710 if (swiotlb_force == SWIOTLB_NO_FORCE)
711 goto out_warn;
712
713 phys_addr = swiotlb_tbl_map_single(dev,
714 __phys_to_dma(dev, io_tlb_start),
715 0, size, DMA_FROM_DEVICE, attrs);
716 if (phys_addr == SWIOTLB_MAP_ERROR)
717 goto out_warn;
718
719 *dma_handle = __phys_to_dma(dev, phys_addr);
720 if (!dma_coherent_ok(dev, *dma_handle, size))
721 goto out_unmap;
722
723 memset(phys_to_virt(phys_addr), 0, size);
724 return phys_to_virt(phys_addr);
725
726out_unmap:
727 dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
728 (unsigned long long)dev->coherent_dma_mask,
729 (unsigned long long)*dma_handle);
730
731 /*
732 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
733 * DMA_ATTR_SKIP_CPU_SYNC is optional.
734 */
735 swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
736 DMA_ATTR_SKIP_CPU_SYNC);
737out_warn:
738 if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
739 dev_warn(dev,
740 "swiotlb: coherent allocation failed, size=%zu\n",
741 size);
742 dump_stack();
743 }
744 return NULL;
745}
746
747static bool swiotlb_free_buffer(struct device *dev, size_t size,
748 dma_addr_t dma_addr)
749{
750 phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
751
752 WARN_ON_ONCE(irqs_disabled());
753
754 if (!is_swiotlb_buffer(phys_addr))
755 return false;
756
757 /*
758 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
759 * DMA_ATTR_SKIP_CPU_SYNC is optional.
760 */
761 swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
762 DMA_ATTR_SKIP_CPU_SYNC);
763 return true;
764}
765
766static void
767swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
768 int do_panic)
769{
770 if (swiotlb_force == SWIOTLB_NO_FORCE)
771 return;
772
773 /*
774 * Ran out of IOMMU space for this operation. This is very bad.
775 * Unfortunately the drivers cannot handle this operation properly.
776 * unless they check for dma_mapping_error (most don't)
777 * When the mapping is small enough return a static buffer to limit
778 * the damage, or panic when the transfer is too big.
779 */
780 dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
781 size);
782
783 if (size <= io_tlb_overflow || !do_panic)
784 return;
785
786 if (dir == DMA_BIDIRECTIONAL)
787 panic("DMA: Random memory could be DMA accessed\n");
788 if (dir == DMA_FROM_DEVICE)
789 panic("DMA: Random memory could be DMA written\n");
790 if (dir == DMA_TO_DEVICE)
791 panic("DMA: Random memory could be DMA read\n");
792}
793
794/*
795 * Map a single buffer of the indicated size for DMA in streaming mode. The
796 * physical address to use is returned.
797 *
798 * Once the device is given the dma address, the device owns this memory until
799 * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
800 */
801dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
802 unsigned long offset, size_t size,
803 enum dma_data_direction dir,
804 unsigned long attrs)
805{
806 phys_addr_t map, phys = page_to_phys(page) + offset;
807 dma_addr_t dev_addr = phys_to_dma(dev, phys);
808
809 BUG_ON(dir == DMA_NONE);
810 /*
811 * If the address happens to be in the device's DMA window,
812 * we can safely return the device addr and not worry about bounce
813 * buffering it.
814 */
815 if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
816 return dev_addr;
817
818 trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
819
820 /* Oh well, have to allocate and map a bounce buffer. */
821 map = map_single(dev, phys, size, dir, attrs);
822 if (map == SWIOTLB_MAP_ERROR) {
823 swiotlb_full(dev, size, dir, 1);
824 return __phys_to_dma(dev, io_tlb_overflow_buffer);
825 }
826
827 dev_addr = __phys_to_dma(dev, map);
828
829 /* Ensure that the address returned is DMA'ble */
830 if (dma_capable(dev, dev_addr, size))
831 return dev_addr;
832
833 attrs |= DMA_ATTR_SKIP_CPU_SYNC;
834 swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
835
836 return __phys_to_dma(dev, io_tlb_overflow_buffer);
837}
838
839/*
840 * Unmap a single streaming mode DMA translation. The dma_addr and size must
841 * match what was provided for in a previous swiotlb_map_page call. All
842 * other usages are undefined.
843 *
844 * After this call, reads by the cpu to the buffer are guaranteed to see
845 * whatever the device wrote there.
846 */
847static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
848 size_t size, enum dma_data_direction dir,
849 unsigned long attrs)
850{
851 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
852
853 BUG_ON(dir == DMA_NONE);
854
855 if (is_swiotlb_buffer(paddr)) {
856 swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
857 return;
858 }
859
860 if (dir != DMA_FROM_DEVICE)
861 return;
862
863 /*
864 * phys_to_virt doesn't work with hihgmem page but we could
865 * call dma_mark_clean() with hihgmem page here. However, we
866 * are fine since dma_mark_clean() is null on POWERPC. We can
867 * make dma_mark_clean() take a physical address if necessary.
868 */
869 dma_mark_clean(phys_to_virt(paddr), size);
870}
871
872void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
873 size_t size, enum dma_data_direction dir,
874 unsigned long attrs)
875{
876 unmap_single(hwdev, dev_addr, size, dir, attrs);
877}
878
879/*
880 * Make physical memory consistent for a single streaming mode DMA translation
881 * after a transfer.
882 *
883 * If you perform a swiotlb_map_page() but wish to interrogate the buffer
884 * using the cpu, yet do not wish to teardown the dma mapping, you must
885 * call this function before doing so. At the next point you give the dma
886 * address back to the card, you must first perform a
887 * swiotlb_dma_sync_for_device, and then the device again owns the buffer
888 */
889static void
890swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
891 size_t size, enum dma_data_direction dir,
892 enum dma_sync_target target)
893{
894 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
895
896 BUG_ON(dir == DMA_NONE);
897
898 if (is_swiotlb_buffer(paddr)) {
899 swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
900 return;
901 }
902
903 if (dir != DMA_FROM_DEVICE)
904 return;
905
906 dma_mark_clean(phys_to_virt(paddr), size);
907}
908
909void
910swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
911 size_t size, enum dma_data_direction dir)
912{
913 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
914}
915
916void
917swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
918 size_t size, enum dma_data_direction dir)
919{
920 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
921}
922
923/*
924 * Map a set of buffers described by scatterlist in streaming mode for DMA.
925 * This is the scatter-gather version of the above swiotlb_map_page
926 * interface. Here the scatter gather list elements are each tagged with the
927 * appropriate dma address and length. They are obtained via
928 * sg_dma_{address,length}(SG).
929 *
930 * NOTE: An implementation may be able to use a smaller number of
931 * DMA address/length pairs than there are SG table elements.
932 * (for example via virtual mapping capabilities)
933 * The routine returns the number of addr/length pairs actually
934 * used, at most nents.
935 *
936 * Device ownership issues as mentioned above for swiotlb_map_page are the
937 * same here.
938 */
939int
940swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
941 enum dma_data_direction dir, unsigned long attrs)
942{
943 struct scatterlist *sg;
944 int i;
945
946 BUG_ON(dir == DMA_NONE);
947
948 for_each_sg(sgl, sg, nelems, i) {
949 phys_addr_t paddr = sg_phys(sg);
950 dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
951
952 if (swiotlb_force == SWIOTLB_FORCE ||
953 !dma_capable(hwdev, dev_addr, sg->length)) {
954 phys_addr_t map = map_single(hwdev, sg_phys(sg),
955 sg->length, dir, attrs);
956 if (map == SWIOTLB_MAP_ERROR) {
957 /* Don't panic here, we expect map_sg users
958 to do proper error handling. */
959 swiotlb_full(hwdev, sg->length, dir, 0);
960 attrs |= DMA_ATTR_SKIP_CPU_SYNC;
961 swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
962 attrs);
963 sg_dma_len(sgl) = 0;
964 return 0;
965 }
966 sg->dma_address = __phys_to_dma(hwdev, map);
967 } else
968 sg->dma_address = dev_addr;
969 sg_dma_len(sg) = sg->length;
970 }
971 return nelems;
972}
973
974/*
975 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
976 * concerning calls here are the same as for swiotlb_unmap_page() above.
977 */
978void
979swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
980 int nelems, enum dma_data_direction dir,
981 unsigned long attrs)
982{
983 struct scatterlist *sg;
984 int i;
985
986 BUG_ON(dir == DMA_NONE);
987
988 for_each_sg(sgl, sg, nelems, i)
989 unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
990 attrs);
991}
992
993/*
994 * Make physical memory consistent for a set of streaming mode DMA translations
995 * after a transfer.
996 *
997 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
998 * and usage.
999 */
1000static void
1001swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
1002 int nelems, enum dma_data_direction dir,
1003 enum dma_sync_target target)
1004{
1005 struct scatterlist *sg;
1006 int i;
1007
1008 for_each_sg(sgl, sg, nelems, i)
1009 swiotlb_sync_single(hwdev, sg->dma_address,
1010 sg_dma_len(sg), dir, target);
1011}
1012
1013void
1014swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
1015 int nelems, enum dma_data_direction dir)
1016{
1017 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
1018}
1019
1020void
1021swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
1022 int nelems, enum dma_data_direction dir)
1023{
1024 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
1025}
1026
1027int
1028swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
1029{
1030 return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
1031}
1032
1033/*
1034 * Return whether the given device DMA address mask can be supported
1035 * properly. For example, if your device can only drive the low 24-bits
1036 * during bus mastering, then you would pass 0x00ffffff as the mask to
1037 * this function.
1038 */
1039int
1040swiotlb_dma_supported(struct device *hwdev, u64 mask)
1041{
1042 return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
1043}
1044
1045void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
1046 gfp_t gfp, unsigned long attrs)
1047{
1048 void *vaddr;
1049
1050 /* temporary workaround: */
1051 if (gfp & __GFP_NOWARN)
1052 attrs |= DMA_ATTR_NO_WARN;
1053
1054 /*
1055 * Don't print a warning when the first allocation attempt fails.
1056 * swiotlb_alloc_coherent() will print a warning when the DMA memory
1057 * allocation ultimately failed.
1058 */
1059 gfp |= __GFP_NOWARN;
1060
1061 vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
1062 if (!vaddr)
1063 vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
1064 return vaddr;
1065}
1066
1067void swiotlb_free(struct device *dev, size_t size, void *vaddr,
1068 dma_addr_t dma_addr, unsigned long attrs)
1069{
1070 if (!swiotlb_free_buffer(dev, size, dma_addr))
1071 dma_direct_free(dev, size, vaddr, dma_addr, attrs);
1072}
1073
1074const struct dma_map_ops swiotlb_dma_ops = {
1075 .mapping_error = swiotlb_dma_mapping_error,
1076 .alloc = swiotlb_alloc,
1077 .free = swiotlb_free,
1078 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
1079 .sync_single_for_device = swiotlb_sync_single_for_device,
1080 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
1081 .sync_sg_for_device = swiotlb_sync_sg_for_device,
1082 .map_sg = swiotlb_map_sg_attrs,
1083 .unmap_sg = swiotlb_unmap_sg_attrs,
1084 .map_page = swiotlb_map_page,
1085 .unmap_page = swiotlb_unmap_page,
1086 .dma_supported = dma_direct_supported,
1087};
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 60aedc879361..08d3d59dca17 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5282,21 +5282,31 @@ static struct bpf_test tests[] = {
5282 { /* Mainly checking JIT here. */ 5282 { /* Mainly checking JIT here. */
5283 "BPF_MAXINSNS: Ctx heavy transformations", 5283 "BPF_MAXINSNS: Ctx heavy transformations",
5284 { }, 5284 { },
5285#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
5286 CLASSIC | FLAG_EXPECTED_FAIL,
5287#else
5285 CLASSIC, 5288 CLASSIC,
5289#endif
5286 { }, 5290 { },
5287 { 5291 {
5288 { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, 5292 { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) },
5289 { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } 5293 { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }
5290 }, 5294 },
5291 .fill_helper = bpf_fill_maxinsns6, 5295 .fill_helper = bpf_fill_maxinsns6,
5296 .expected_errcode = -ENOTSUPP,
5292 }, 5297 },
5293 { /* Mainly checking JIT here. */ 5298 { /* Mainly checking JIT here. */
5294 "BPF_MAXINSNS: Call heavy transformations", 5299 "BPF_MAXINSNS: Call heavy transformations",
5295 { }, 5300 { },
5301#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
5302 CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
5303#else
5296 CLASSIC | FLAG_NO_DATA, 5304 CLASSIC | FLAG_NO_DATA,
5305#endif
5297 { }, 5306 { },
5298 { { 1, 0 }, { 10, 0 } }, 5307 { { 1, 0 }, { 10, 0 } },
5299 .fill_helper = bpf_fill_maxinsns7, 5308 .fill_helper = bpf_fill_maxinsns7,
5309 .expected_errcode = -ENOTSUPP,
5300 }, 5310 },
5301 { /* Mainly checking JIT here. */ 5311 { /* Mainly checking JIT here. */
5302 "BPF_MAXINSNS: Jump heavy test", 5312 "BPF_MAXINSNS: Jump heavy test",
@@ -5347,18 +5357,28 @@ static struct bpf_test tests[] = {
5347 { 5357 {
5348 "BPF_MAXINSNS: exec all MSH", 5358 "BPF_MAXINSNS: exec all MSH",
5349 { }, 5359 { },
5360#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
5361 CLASSIC | FLAG_EXPECTED_FAIL,
5362#else
5350 CLASSIC, 5363 CLASSIC,
5364#endif
5351 { 0xfa, 0xfb, 0xfc, 0xfd, }, 5365 { 0xfa, 0xfb, 0xfc, 0xfd, },
5352 { { 4, 0xababab83 } }, 5366 { { 4, 0xababab83 } },
5353 .fill_helper = bpf_fill_maxinsns13, 5367 .fill_helper = bpf_fill_maxinsns13,
5368 .expected_errcode = -ENOTSUPP,
5354 }, 5369 },
5355 { 5370 {
5356 "BPF_MAXINSNS: ld_abs+get_processor_id", 5371 "BPF_MAXINSNS: ld_abs+get_processor_id",
5357 { }, 5372 { },
5373#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
5374 CLASSIC | FLAG_EXPECTED_FAIL,
5375#else
5358 CLASSIC, 5376 CLASSIC,
5377#endif
5359 { }, 5378 { },
5360 { { 1, 0xbee } }, 5379 { { 1, 0xbee } },
5361 .fill_helper = bpf_fill_ld_abs_get_processor_id, 5380 .fill_helper = bpf_fill_ld_abs_get_processor_id,
5381 .expected_errcode = -ENOTSUPP,
5362 }, 5382 },
5363 /* 5383 /*
5364 * LD_IND / LD_ABS on fragmented SKBs 5384 * LD_IND / LD_ABS on fragmented SKBs
diff --git a/lib/test_printf.c b/lib/test_printf.c
index b2aa8f514844..cea592f402ed 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -260,13 +260,6 @@ plain(void)
260{ 260{
261 int err; 261 int err;
262 262
263 /*
264 * Make sure crng is ready. Otherwise we get "(ptrval)" instead
265 * of a hashed address when printing '%p' in plain_hash() and
266 * plain_format().
267 */
268 wait_for_random_bytes();
269
270 err = plain_hash(); 263 err = plain_hash();
271 if (err) { 264 if (err) {
272 pr_warn("plain 'p' does not appear to be hashed\n"); 265 pr_warn("plain 'p' does not appear to be hashed\n");