aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/crypto/ccp
diff options
context:
space:
mode:
authorTom Lendacky <thomas.lendacky@amd.com>2013-11-12 12:46:16 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2013-12-05 08:28:37 -0500
commit63b945091a070d8d4275dc0f7699ba22cd5f9435 (patch)
tree720bd381770f1519531262f3659eccdf3c79e9bd /drivers/crypto/ccp
parent8ec25c51291681bd68bdc290b35f2e61fa601c21 (diff)
crypto: ccp - CCP device driver and interface support
These routines provide the device driver support for the AMD Cryptographic Coprocessor (CCP). Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'drivers/crypto/ccp')
-rw-r--r--drivers/crypto/ccp/ccp-dev.c582
-rw-r--r--drivers/crypto/ccp/ccp-dev.h272
-rw-r--r--drivers/crypto/ccp/ccp-ops.c2020
-rw-r--r--drivers/crypto/ccp/ccp-pci.c360
4 files changed, 3234 insertions, 0 deletions
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
new file mode 100644
index 000000000000..de59df970176
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -0,0 +1,582 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) driver
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/kthread.h>
16#include <linux/sched.h>
17#include <linux/interrupt.h>
18#include <linux/spinlock.h>
19#include <linux/mutex.h>
20#include <linux/delay.h>
21#include <linux/hw_random.h>
22#include <linux/cpu.h>
23#include <asm/cpu_device_id.h>
24#include <linux/ccp.h>
25
26#include "ccp-dev.h"
27
28MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
29MODULE_LICENSE("GPL");
30MODULE_VERSION("1.0.0");
31MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver");
32
33
34static struct ccp_device *ccp_dev;
35static inline struct ccp_device *ccp_get_device(void)
36{
37 return ccp_dev;
38}
39
40static inline void ccp_add_device(struct ccp_device *ccp)
41{
42 ccp_dev = ccp;
43}
44
45static inline void ccp_del_device(struct ccp_device *ccp)
46{
47 ccp_dev = NULL;
48}
49
50/**
51 * ccp_enqueue_cmd - queue an operation for processing by the CCP
52 *
53 * @cmd: ccp_cmd struct to be processed
54 *
55 * Queue a cmd to be processed by the CCP. If queueing the cmd
56 * would exceed the defined length of the cmd queue the cmd will
57 * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will
58 * result in a return code of -EBUSY.
59 *
60 * The callback routine specified in the ccp_cmd struct will be
61 * called to notify the caller of completion (if the cmd was not
62 * backlogged) or advancement out of the backlog. If the cmd has
63 * advanced out of the backlog the "err" value of the callback
64 * will be -EINPROGRESS. Any other "err" value during callback is
65 * the result of the operation.
66 *
67 * The cmd has been successfully queued if:
68 * the return code is -EINPROGRESS or
69 * the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set
70 */
71int ccp_enqueue_cmd(struct ccp_cmd *cmd)
72{
73 struct ccp_device *ccp = ccp_get_device();
74 unsigned long flags;
75 unsigned int i;
76 int ret;
77
78 if (!ccp)
79 return -ENODEV;
80
81 /* Caller must supply a callback routine */
82 if (!cmd->callback)
83 return -EINVAL;
84
85 cmd->ccp = ccp;
86
87 spin_lock_irqsave(&ccp->cmd_lock, flags);
88
89 i = ccp->cmd_q_count;
90
91 if (ccp->cmd_count >= MAX_CMD_QLEN) {
92 ret = -EBUSY;
93 if (cmd->flags & CCP_CMD_MAY_BACKLOG)
94 list_add_tail(&cmd->entry, &ccp->backlog);
95 } else {
96 ret = -EINPROGRESS;
97 ccp->cmd_count++;
98 list_add_tail(&cmd->entry, &ccp->cmd);
99
100 /* Find an idle queue */
101 if (!ccp->suspending) {
102 for (i = 0; i < ccp->cmd_q_count; i++) {
103 if (ccp->cmd_q[i].active)
104 continue;
105
106 break;
107 }
108 }
109 }
110
111 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
112
113 /* If we found an idle queue, wake it up */
114 if (i < ccp->cmd_q_count)
115 wake_up_process(ccp->cmd_q[i].kthread);
116
117 return ret;
118}
119EXPORT_SYMBOL_GPL(ccp_enqueue_cmd);
120
121static void ccp_do_cmd_backlog(struct work_struct *work)
122{
123 struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work);
124 struct ccp_device *ccp = cmd->ccp;
125 unsigned long flags;
126 unsigned int i;
127
128 cmd->callback(cmd->data, -EINPROGRESS);
129
130 spin_lock_irqsave(&ccp->cmd_lock, flags);
131
132 ccp->cmd_count++;
133 list_add_tail(&cmd->entry, &ccp->cmd);
134
135 /* Find an idle queue */
136 for (i = 0; i < ccp->cmd_q_count; i++) {
137 if (ccp->cmd_q[i].active)
138 continue;
139
140 break;
141 }
142
143 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
144
145 /* If we found an idle queue, wake it up */
146 if (i < ccp->cmd_q_count)
147 wake_up_process(ccp->cmd_q[i].kthread);
148}
149
150static struct ccp_cmd *ccp_dequeue_cmd(struct ccp_cmd_queue *cmd_q)
151{
152 struct ccp_device *ccp = cmd_q->ccp;
153 struct ccp_cmd *cmd = NULL;
154 struct ccp_cmd *backlog = NULL;
155 unsigned long flags;
156
157 spin_lock_irqsave(&ccp->cmd_lock, flags);
158
159 cmd_q->active = 0;
160
161 if (ccp->suspending) {
162 cmd_q->suspended = 1;
163
164 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
165 wake_up_interruptible(&ccp->suspend_queue);
166
167 return NULL;
168 }
169
170 if (ccp->cmd_count) {
171 cmd_q->active = 1;
172
173 cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
174 list_del(&cmd->entry);
175
176 ccp->cmd_count--;
177 }
178
179 if (!list_empty(&ccp->backlog)) {
180 backlog = list_first_entry(&ccp->backlog, struct ccp_cmd,
181 entry);
182 list_del(&backlog->entry);
183 }
184
185 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
186
187 if (backlog) {
188 INIT_WORK(&backlog->work, ccp_do_cmd_backlog);
189 schedule_work(&backlog->work);
190 }
191
192 return cmd;
193}
194
195static void ccp_do_cmd_complete(struct work_struct *work)
196{
197 struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work);
198
199 cmd->callback(cmd->data, cmd->ret);
200}
201
202static int ccp_cmd_queue_thread(void *data)
203{
204 struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data;
205 struct ccp_cmd *cmd;
206
207 set_current_state(TASK_INTERRUPTIBLE);
208 while (!kthread_should_stop()) {
209 schedule();
210
211 set_current_state(TASK_INTERRUPTIBLE);
212
213 cmd = ccp_dequeue_cmd(cmd_q);
214 if (!cmd)
215 continue;
216
217 __set_current_state(TASK_RUNNING);
218
219 /* Execute the command */
220 cmd->ret = ccp_run_cmd(cmd_q, cmd);
221
222 /* Schedule the completion callback */
223 INIT_WORK(&cmd->work, ccp_do_cmd_complete);
224 schedule_work(&cmd->work);
225 }
226
227 __set_current_state(TASK_RUNNING);
228
229 return 0;
230}
231
232static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
233{
234 struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
235 u32 trng_value;
236 int len = min_t(int, sizeof(trng_value), max);
237
238 /*
239 * Locking is provided by the caller so we can update device
240 * hwrng-related fields safely
241 */
242 trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
243 if (!trng_value) {
244 /* Zero is returned if not data is available or if a
245 * bad-entropy error is present. Assume an error if
246 * we exceed TRNG_RETRIES reads of zero.
247 */
248 if (ccp->hwrng_retries++ > TRNG_RETRIES)
249 return -EIO;
250
251 return 0;
252 }
253
254 /* Reset the counter and save the rng value */
255 ccp->hwrng_retries = 0;
256 memcpy(data, &trng_value, len);
257
258 return len;
259}
260
261/**
262 * ccp_alloc_struct - allocate and initialize the ccp_device struct
263 *
264 * @dev: device struct of the CCP
265 */
266struct ccp_device *ccp_alloc_struct(struct device *dev)
267{
268 struct ccp_device *ccp;
269
270 ccp = kzalloc(sizeof(*ccp), GFP_KERNEL);
271 if (ccp == NULL) {
272 dev_err(dev, "unable to allocate device struct\n");
273 return NULL;
274 }
275 ccp->dev = dev;
276
277 INIT_LIST_HEAD(&ccp->cmd);
278 INIT_LIST_HEAD(&ccp->backlog);
279
280 spin_lock_init(&ccp->cmd_lock);
281 mutex_init(&ccp->req_mutex);
282 mutex_init(&ccp->ksb_mutex);
283 ccp->ksb_count = KSB_COUNT;
284 ccp->ksb_start = 0;
285
286 return ccp;
287}
288
289/**
290 * ccp_init - initialize the CCP device
291 *
292 * @ccp: ccp_device struct
293 */
294int ccp_init(struct ccp_device *ccp)
295{
296 struct device *dev = ccp->dev;
297 struct ccp_cmd_queue *cmd_q;
298 struct dma_pool *dma_pool;
299 char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
300 unsigned int qmr, qim, i;
301 int ret;
302
303 /* Find available queues */
304 qim = 0;
305 qmr = ioread32(ccp->io_regs + Q_MASK_REG);
306 for (i = 0; i < MAX_HW_QUEUES; i++) {
307 if (!(qmr & (1 << i)))
308 continue;
309
310 /* Allocate a dma pool for this queue */
311 snprintf(dma_pool_name, sizeof(dma_pool_name), "ccp_q%d", i);
312 dma_pool = dma_pool_create(dma_pool_name, dev,
313 CCP_DMAPOOL_MAX_SIZE,
314 CCP_DMAPOOL_ALIGN, 0);
315 if (!dma_pool) {
316 dev_err(dev, "unable to allocate dma pool\n");
317 ret = -ENOMEM;
318 goto e_pool;
319 }
320
321 cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
322 ccp->cmd_q_count++;
323
324 cmd_q->ccp = ccp;
325 cmd_q->id = i;
326 cmd_q->dma_pool = dma_pool;
327
328 /* Reserve 2 KSB regions for the queue */
329 cmd_q->ksb_key = KSB_START + ccp->ksb_start++;
330 cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++;
331 ccp->ksb_count -= 2;
332
333 /* Preset some register values and masks that are queue
334 * number dependent
335 */
336 cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE +
337 (CMD_Q_STATUS_INCR * i);
338 cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE +
339 (CMD_Q_STATUS_INCR * i);
340 cmd_q->int_ok = 1 << (i * 2);
341 cmd_q->int_err = 1 << ((i * 2) + 1);
342
343 cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
344
345 init_waitqueue_head(&cmd_q->int_queue);
346
347 /* Build queue interrupt mask (two interrupts per queue) */
348 qim |= cmd_q->int_ok | cmd_q->int_err;
349
350 dev_dbg(dev, "queue #%u available\n", i);
351 }
352 if (ccp->cmd_q_count == 0) {
353 dev_notice(dev, "no command queues available\n");
354 ret = -EIO;
355 goto e_pool;
356 }
357 dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
358
359 /* Disable and clear interrupts until ready */
360 iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
361 for (i = 0; i < ccp->cmd_q_count; i++) {
362 cmd_q = &ccp->cmd_q[i];
363
364 ioread32(cmd_q->reg_int_status);
365 ioread32(cmd_q->reg_status);
366 }
367 iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
368
369 /* Request an irq */
370 ret = ccp->get_irq(ccp);
371 if (ret) {
372 dev_err(dev, "unable to allocate an IRQ\n");
373 goto e_pool;
374 }
375
376 /* Initialize the queues used to wait for KSB space and suspend */
377 init_waitqueue_head(&ccp->ksb_queue);
378 init_waitqueue_head(&ccp->suspend_queue);
379
380 /* Create a kthread for each queue */
381 for (i = 0; i < ccp->cmd_q_count; i++) {
382 struct task_struct *kthread;
383
384 cmd_q = &ccp->cmd_q[i];
385
386 kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
387 "ccp-q%u", cmd_q->id);
388 if (IS_ERR(kthread)) {
389 dev_err(dev, "error creating queue thread (%ld)\n",
390 PTR_ERR(kthread));
391 ret = PTR_ERR(kthread);
392 goto e_kthread;
393 }
394
395 cmd_q->kthread = kthread;
396 wake_up_process(kthread);
397 }
398
399 /* Register the RNG */
400 ccp->hwrng.name = "ccp-rng";
401 ccp->hwrng.read = ccp_trng_read;
402 ret = hwrng_register(&ccp->hwrng);
403 if (ret) {
404 dev_err(dev, "error registering hwrng (%d)\n", ret);
405 goto e_kthread;
406 }
407
408 /* Make the device struct available before enabling interrupts */
409 ccp_add_device(ccp);
410
411 /* Enable interrupts */
412 iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
413
414 return 0;
415
416e_kthread:
417 for (i = 0; i < ccp->cmd_q_count; i++)
418 if (ccp->cmd_q[i].kthread)
419 kthread_stop(ccp->cmd_q[i].kthread);
420
421 ccp->free_irq(ccp);
422
423e_pool:
424 for (i = 0; i < ccp->cmd_q_count; i++)
425 dma_pool_destroy(ccp->cmd_q[i].dma_pool);
426
427 return ret;
428}
429
430/**
431 * ccp_destroy - tear down the CCP device
432 *
433 * @ccp: ccp_device struct
434 */
435void ccp_destroy(struct ccp_device *ccp)
436{
437 struct ccp_cmd_queue *cmd_q;
438 struct ccp_cmd *cmd;
439 unsigned int qim, i;
440
441 /* Remove general access to the device struct */
442 ccp_del_device(ccp);
443
444 /* Unregister the RNG */
445 hwrng_unregister(&ccp->hwrng);
446
447 /* Stop the queue kthreads */
448 for (i = 0; i < ccp->cmd_q_count; i++)
449 if (ccp->cmd_q[i].kthread)
450 kthread_stop(ccp->cmd_q[i].kthread);
451
452 /* Build queue interrupt mask (two interrupt masks per queue) */
453 qim = 0;
454 for (i = 0; i < ccp->cmd_q_count; i++) {
455 cmd_q = &ccp->cmd_q[i];
456 qim |= cmd_q->int_ok | cmd_q->int_err;
457 }
458
459 /* Disable and clear interrupts */
460 iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
461 for (i = 0; i < ccp->cmd_q_count; i++) {
462 cmd_q = &ccp->cmd_q[i];
463
464 ioread32(cmd_q->reg_int_status);
465 ioread32(cmd_q->reg_status);
466 }
467 iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
468
469 ccp->free_irq(ccp);
470
471 for (i = 0; i < ccp->cmd_q_count; i++)
472 dma_pool_destroy(ccp->cmd_q[i].dma_pool);
473
474 /* Flush the cmd and backlog queue */
475 while (!list_empty(&ccp->cmd)) {
476 /* Invoke the callback directly with an error code */
477 cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
478 list_del(&cmd->entry);
479 cmd->callback(cmd->data, -ENODEV);
480 }
481 while (!list_empty(&ccp->backlog)) {
482 /* Invoke the callback directly with an error code */
483 cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
484 list_del(&cmd->entry);
485 cmd->callback(cmd->data, -ENODEV);
486 }
487}
488
489/**
490 * ccp_irq_handler - handle interrupts generated by the CCP device
491 *
492 * @irq: the irq associated with the interrupt
493 * @data: the data value supplied when the irq was created
494 */
495irqreturn_t ccp_irq_handler(int irq, void *data)
496{
497 struct device *dev = data;
498 struct ccp_device *ccp = dev_get_drvdata(dev);
499 struct ccp_cmd_queue *cmd_q;
500 u32 q_int, status;
501 unsigned int i;
502
503 status = ioread32(ccp->io_regs + IRQ_STATUS_REG);
504
505 for (i = 0; i < ccp->cmd_q_count; i++) {
506 cmd_q = &ccp->cmd_q[i];
507
508 q_int = status & (cmd_q->int_ok | cmd_q->int_err);
509 if (q_int) {
510 cmd_q->int_status = status;
511 cmd_q->q_status = ioread32(cmd_q->reg_status);
512 cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
513
514 /* On error, only save the first error value */
515 if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error)
516 cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
517
518 cmd_q->int_rcvd = 1;
519
520 /* Acknowledge the interrupt and wake the kthread */
521 iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG);
522 wake_up_interruptible(&cmd_q->int_queue);
523 }
524 }
525
526 return IRQ_HANDLED;
527}
528
529#ifdef CONFIG_PM
530bool ccp_queues_suspended(struct ccp_device *ccp)
531{
532 unsigned int suspended = 0;
533 unsigned long flags;
534 unsigned int i;
535
536 spin_lock_irqsave(&ccp->cmd_lock, flags);
537
538 for (i = 0; i < ccp->cmd_q_count; i++)
539 if (ccp->cmd_q[i].suspended)
540 suspended++;
541
542 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
543
544 return ccp->cmd_q_count == suspended;
545}
546#endif
547
548static const struct x86_cpu_id ccp_support[] = {
549 { X86_VENDOR_AMD, 22, },
550};
551
552static int __init ccp_mod_init(void)
553{
554 struct cpuinfo_x86 *cpuinfo = &boot_cpu_data;
555
556 if (!x86_match_cpu(ccp_support))
557 return -ENODEV;
558
559 switch (cpuinfo->x86) {
560 case 22:
561 if ((cpuinfo->x86_model < 48) || (cpuinfo->x86_model > 63))
562 return -ENODEV;
563 return ccp_pci_init();
564 break;
565 };
566
567 return -ENODEV;
568}
569
570static void __exit ccp_mod_exit(void)
571{
572 struct cpuinfo_x86 *cpuinfo = &boot_cpu_data;
573
574 switch (cpuinfo->x86) {
575 case 22:
576 ccp_pci_exit();
577 break;
578 };
579}
580
581module_init(ccp_mod_init);
582module_exit(ccp_mod_exit);
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
new file mode 100644
index 000000000000..7ec536e702ec
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -0,0 +1,272 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) driver
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#ifndef __CCP_DEV_H__
14#define __CCP_DEV_H__
15
16#include <linux/device.h>
17#include <linux/pci.h>
18#include <linux/spinlock.h>
19#include <linux/mutex.h>
20#include <linux/list.h>
21#include <linux/wait.h>
22#include <linux/dmapool.h>
23#include <linux/hw_random.h>
24
25
26#define IO_OFFSET 0x20000
27
28#define MAX_DMAPOOL_NAME_LEN 32
29
30#define MAX_HW_QUEUES 5
31#define MAX_CMD_QLEN 100
32
33#define TRNG_RETRIES 10
34
35
36/****** Register Mappings ******/
37#define Q_MASK_REG 0x000
38#define TRNG_OUT_REG 0x00c
39#define IRQ_MASK_REG 0x040
40#define IRQ_STATUS_REG 0x200
41
42#define DEL_CMD_Q_JOB 0x124
43#define DEL_Q_ACTIVE 0x00000200
44#define DEL_Q_ID_SHIFT 6
45
46#define CMD_REQ0 0x180
47#define CMD_REQ_INCR 0x04
48
49#define CMD_Q_STATUS_BASE 0x210
50#define CMD_Q_INT_STATUS_BASE 0x214
51#define CMD_Q_STATUS_INCR 0x20
52
53#define CMD_Q_CACHE 0x228
54#define CMD_Q_CACHE_INC 0x20
55
56#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f);
57#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f);
58
59/****** REQ0 Related Values ******/
60#define REQ0_WAIT_FOR_WRITE 0x00000004
61#define REQ0_INT_ON_COMPLETE 0x00000002
62#define REQ0_STOP_ON_COMPLETE 0x00000001
63
64#define REQ0_CMD_Q_SHIFT 9
65#define REQ0_JOBID_SHIFT 3
66
67/****** REQ1 Related Values ******/
68#define REQ1_PROTECT_SHIFT 27
69#define REQ1_ENGINE_SHIFT 23
70#define REQ1_KEY_KSB_SHIFT 2
71
72#define REQ1_EOM 0x00000002
73#define REQ1_INIT 0x00000001
74
75/* AES Related Values */
76#define REQ1_AES_TYPE_SHIFT 21
77#define REQ1_AES_MODE_SHIFT 18
78#define REQ1_AES_ACTION_SHIFT 17
79#define REQ1_AES_CFB_SIZE_SHIFT 10
80
81/* XTS-AES Related Values */
82#define REQ1_XTS_AES_SIZE_SHIFT 10
83
84/* SHA Related Values */
85#define REQ1_SHA_TYPE_SHIFT 21
86
87/* RSA Related Values */
88#define REQ1_RSA_MOD_SIZE_SHIFT 10
89
90/* Pass-Through Related Values */
91#define REQ1_PT_BW_SHIFT 12
92#define REQ1_PT_BS_SHIFT 10
93
94/* ECC Related Values */
95#define REQ1_ECC_AFFINE_CONVERT 0x00200000
96#define REQ1_ECC_FUNCTION_SHIFT 18
97
98/****** REQ4 Related Values ******/
99#define REQ4_KSB_SHIFT 18
100#define REQ4_MEMTYPE_SHIFT 16
101
102/****** REQ6 Related Values ******/
103#define REQ6_MEMTYPE_SHIFT 16
104
105
106/****** Key Storage Block ******/
107#define KSB_START 77
108#define KSB_END 127
109#define KSB_COUNT (KSB_END - KSB_START + 1)
110#define CCP_KSB_BITS 256
111#define CCP_KSB_BYTES 32
112
113#define CCP_JOBID_MASK 0x0000003f
114
115#define CCP_DMAPOOL_MAX_SIZE 64
116#define CCP_DMAPOOL_ALIGN (1 << 5)
117
118#define CCP_REVERSE_BUF_SIZE 64
119
120#define CCP_AES_KEY_KSB_COUNT 1
121#define CCP_AES_CTX_KSB_COUNT 1
122
123#define CCP_XTS_AES_KEY_KSB_COUNT 1
124#define CCP_XTS_AES_CTX_KSB_COUNT 1
125
126#define CCP_SHA_KSB_COUNT 1
127
128#define CCP_RSA_MAX_WIDTH 4096
129
130#define CCP_PASSTHRU_BLOCKSIZE 256
131#define CCP_PASSTHRU_MASKSIZE 32
132#define CCP_PASSTHRU_KSB_COUNT 1
133
134#define CCP_ECC_MODULUS_BYTES 48 /* 384-bits */
135#define CCP_ECC_MAX_OPERANDS 6
136#define CCP_ECC_MAX_OUTPUTS 3
137#define CCP_ECC_SRC_BUF_SIZE 448
138#define CCP_ECC_DST_BUF_SIZE 192
139#define CCP_ECC_OPERAND_SIZE 64
140#define CCP_ECC_OUTPUT_SIZE 64
141#define CCP_ECC_RESULT_OFFSET 60
142#define CCP_ECC_RESULT_SUCCESS 0x0001
143
144
145struct ccp_device;
146struct ccp_cmd;
147
148struct ccp_cmd_queue {
149 struct ccp_device *ccp;
150
151 /* Queue identifier */
152 u32 id;
153
154 /* Queue dma pool */
155 struct dma_pool *dma_pool;
156
157 /* Queue reserved KSB regions */
158 u32 ksb_key;
159 u32 ksb_ctx;
160
161 /* Queue processing thread */
162 struct task_struct *kthread;
163 unsigned int active;
164 unsigned int suspended;
165
166 /* Number of free command slots available */
167 unsigned int free_slots;
168
169 /* Interrupt masks */
170 u32 int_ok;
171 u32 int_err;
172
173 /* Register addresses for queue */
174 void __iomem *reg_status;
175 void __iomem *reg_int_status;
176
177 /* Status values from job */
178 u32 int_status;
179 u32 q_status;
180 u32 q_int_status;
181 u32 cmd_error;
182
183 /* Interrupt wait queue */
184 wait_queue_head_t int_queue;
185 unsigned int int_rcvd;
186} ____cacheline_aligned;
187
188struct ccp_device {
189 struct device *dev;
190
191 /*
192 * Bus specific device information
193 */
194 void *dev_specific;
195 int (*get_irq)(struct ccp_device *ccp);
196 void (*free_irq)(struct ccp_device *ccp);
197
198 /*
199 * I/O area used for device communication. The register mapping
200 * starts at an offset into the mapped bar.
201 * The CMD_REQx registers and the Delete_Cmd_Queue_Job register
202 * need to be protected while a command queue thread is accessing
203 * them.
204 */
205 struct mutex req_mutex ____cacheline_aligned;
206 void __iomem *io_map;
207 void __iomem *io_regs;
208
209 /*
210 * Master lists that all cmds are queued on. Because there can be
211 * more than one CCP command queue that can process a cmd a separate
212 * backlog list is neeeded so that the backlog completion call
213 * completes before the cmd is available for execution.
214 */
215 spinlock_t cmd_lock ____cacheline_aligned;
216 unsigned int cmd_count;
217 struct list_head cmd;
218 struct list_head backlog;
219
220 /*
221 * The command queues. These represent the queues available on the
222 * CCP that are available for processing cmds
223 */
224 struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES];
225 unsigned int cmd_q_count;
226
227 /*
228 * Support for the CCP True RNG
229 */
230 struct hwrng hwrng;
231 unsigned int hwrng_retries;
232
233 /*
234 * A counter used to generate job-ids for cmds submitted to the CCP
235 */
236 atomic_t current_id ____cacheline_aligned;
237
238 /*
239 * The CCP uses key storage blocks (KSB) to maintain context for certain
240 * operations. To prevent multiple cmds from using the same KSB range
241 * a command queue reserves a KSB range for the duration of the cmd.
242 * Each queue, will however, reserve 2 KSB blocks for operations that
243 * only require single KSB entries (eg. AES context/iv and key) in order
244 * to avoid allocation contention. This will reserve at most 10 KSB
245 * entries, leaving 40 KSB entries available for dynamic allocation.
246 */
247 struct mutex ksb_mutex ____cacheline_aligned;
248 DECLARE_BITMAP(ksb, KSB_COUNT);
249 wait_queue_head_t ksb_queue;
250 unsigned int ksb_avail;
251 unsigned int ksb_count;
252 u32 ksb_start;
253
254 /* Suspend support */
255 unsigned int suspending;
256 wait_queue_head_t suspend_queue;
257};
258
259
260int ccp_pci_init(void);
261void ccp_pci_exit(void);
262
263struct ccp_device *ccp_alloc_struct(struct device *dev);
264int ccp_init(struct ccp_device *ccp);
265void ccp_destroy(struct ccp_device *ccp);
266bool ccp_queues_suspended(struct ccp_device *ccp);
267
268irqreturn_t ccp_irq_handler(int irq, void *data);
269
270int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd);
271
272#endif
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
new file mode 100644
index 000000000000..4be091037549
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -0,0 +1,2020 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) driver
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/kthread.h>
18#include <linux/sched.h>
19#include <linux/interrupt.h>
20#include <linux/spinlock.h>
21#include <linux/mutex.h>
22#include <linux/delay.h>
23#include <linux/ccp.h>
24#include <linux/scatterlist.h>
25#include <crypto/scatterwalk.h>
26
27#include "ccp-dev.h"
28
29
30enum ccp_memtype {
31 CCP_MEMTYPE_SYSTEM = 0,
32 CCP_MEMTYPE_KSB,
33 CCP_MEMTYPE_LOCAL,
34 CCP_MEMTYPE__LAST,
35};
36
37struct ccp_dma_info {
38 dma_addr_t address;
39 unsigned int offset;
40 unsigned int length;
41 enum dma_data_direction dir;
42};
43
44struct ccp_dm_workarea {
45 struct device *dev;
46 struct dma_pool *dma_pool;
47 unsigned int length;
48
49 u8 *address;
50 struct ccp_dma_info dma;
51};
52
53struct ccp_sg_workarea {
54 struct scatterlist *sg;
55 unsigned int nents;
56 unsigned int length;
57
58 struct scatterlist *dma_sg;
59 struct device *dma_dev;
60 unsigned int dma_count;
61 enum dma_data_direction dma_dir;
62
63 u32 sg_used;
64
65 u32 bytes_left;
66};
67
68struct ccp_data {
69 struct ccp_sg_workarea sg_wa;
70 struct ccp_dm_workarea dm_wa;
71};
72
73struct ccp_mem {
74 enum ccp_memtype type;
75 union {
76 struct ccp_dma_info dma;
77 u32 ksb;
78 } u;
79};
80
81struct ccp_aes_op {
82 enum ccp_aes_type type;
83 enum ccp_aes_mode mode;
84 enum ccp_aes_action action;
85};
86
87struct ccp_xts_aes_op {
88 enum ccp_aes_action action;
89 enum ccp_xts_aes_unit_size unit_size;
90};
91
92struct ccp_sha_op {
93 enum ccp_sha_type type;
94 u64 msg_bits;
95};
96
97struct ccp_rsa_op {
98 u32 mod_size;
99 u32 input_len;
100};
101
102struct ccp_passthru_op {
103 enum ccp_passthru_bitwise bit_mod;
104 enum ccp_passthru_byteswap byte_swap;
105};
106
107struct ccp_ecc_op {
108 enum ccp_ecc_function function;
109};
110
111struct ccp_op {
112 struct ccp_cmd_queue *cmd_q;
113
114 u32 jobid;
115 u32 ioc;
116 u32 soc;
117 u32 ksb_key;
118 u32 ksb_ctx;
119 u32 init;
120 u32 eom;
121
122 struct ccp_mem src;
123 struct ccp_mem dst;
124
125 union {
126 struct ccp_aes_op aes;
127 struct ccp_xts_aes_op xts;
128 struct ccp_sha_op sha;
129 struct ccp_rsa_op rsa;
130 struct ccp_passthru_op passthru;
131 struct ccp_ecc_op ecc;
132 } u;
133};
134
135/* The CCP cannot perform zero-length sha operations so the caller
136 * is required to buffer data for the final operation. However, a
137 * sha operation for a message with a total length of zero is valid
138 * so known values are required to supply the result.
139 */
140static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
141 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
142 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
143 0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
144 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
145};
146
147static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
148 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
149 0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
150 0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
151 0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
152};
153
154static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
155 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
156 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
157 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
158 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
159};
160
161static u32 ccp_addr_lo(struct ccp_dma_info *info)
162{
163 return lower_32_bits(info->address + info->offset);
164}
165
166static u32 ccp_addr_hi(struct ccp_dma_info *info)
167{
168 return upper_32_bits(info->address + info->offset) & 0x0000ffff;
169}
170
171static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
172{
173 struct ccp_cmd_queue *cmd_q = op->cmd_q;
174 struct ccp_device *ccp = cmd_q->ccp;
175 void __iomem *cr_addr;
176 u32 cr0, cmd;
177 unsigned int i;
178 int ret = 0;
179
180 /* We could read a status register to see how many free slots
181 * are actually available, but reading that register resets it
182 * and you could lose some error information.
183 */
184 cmd_q->free_slots--;
185
186 cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
187 | (op->jobid << REQ0_JOBID_SHIFT)
188 | REQ0_WAIT_FOR_WRITE;
189
190 if (op->soc)
191 cr0 |= REQ0_STOP_ON_COMPLETE
192 | REQ0_INT_ON_COMPLETE;
193
194 if (op->ioc || !cmd_q->free_slots)
195 cr0 |= REQ0_INT_ON_COMPLETE;
196
197 /* Start at CMD_REQ1 */
198 cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
199
200 mutex_lock(&ccp->req_mutex);
201
202 /* Write CMD_REQ1 through CMD_REQx first */
203 for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
204 iowrite32(*(cr + i), cr_addr);
205
206 /* Tell the CCP to start */
207 wmb();
208 iowrite32(cr0, ccp->io_regs + CMD_REQ0);
209
210 mutex_unlock(&ccp->req_mutex);
211
212 if (cr0 & REQ0_INT_ON_COMPLETE) {
213 /* Wait for the job to complete */
214 ret = wait_event_interruptible(cmd_q->int_queue,
215 cmd_q->int_rcvd);
216 if (ret || cmd_q->cmd_error) {
217 /* On error delete all related jobs from the queue */
218 cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
219 | op->jobid;
220
221 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
222
223 if (!ret)
224 ret = -EIO;
225 } else if (op->soc) {
226 /* Delete just head job from the queue on SoC */
227 cmd = DEL_Q_ACTIVE
228 | (cmd_q->id << DEL_Q_ID_SHIFT)
229 | op->jobid;
230
231 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
232 }
233
234 cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
235
236 cmd_q->int_rcvd = 0;
237 }
238
239 return ret;
240}
241
242static int ccp_perform_aes(struct ccp_op *op)
243{
244 u32 cr[6];
245
246 /* Fill out the register contents for REQ1 through REQ6 */
247 cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
248 | (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
249 | (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
250 | (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
251 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
252 cr[1] = op->src.u.dma.length - 1;
253 cr[2] = ccp_addr_lo(&op->src.u.dma);
254 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
255 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
256 | ccp_addr_hi(&op->src.u.dma);
257 cr[4] = ccp_addr_lo(&op->dst.u.dma);
258 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
259 | ccp_addr_hi(&op->dst.u.dma);
260
261 if (op->u.aes.mode == CCP_AES_MODE_CFB)
262 cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
263
264 if (op->eom)
265 cr[0] |= REQ1_EOM;
266
267 if (op->init)
268 cr[0] |= REQ1_INIT;
269
270 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
271}
272
273static int ccp_perform_xts_aes(struct ccp_op *op)
274{
275 u32 cr[6];
276
277 /* Fill out the register contents for REQ1 through REQ6 */
278 cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
279 | (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
280 | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
281 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
282 cr[1] = op->src.u.dma.length - 1;
283 cr[2] = ccp_addr_lo(&op->src.u.dma);
284 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
285 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
286 | ccp_addr_hi(&op->src.u.dma);
287 cr[4] = ccp_addr_lo(&op->dst.u.dma);
288 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
289 | ccp_addr_hi(&op->dst.u.dma);
290
291 if (op->eom)
292 cr[0] |= REQ1_EOM;
293
294 if (op->init)
295 cr[0] |= REQ1_INIT;
296
297 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
298}
299
300static int ccp_perform_sha(struct ccp_op *op)
301{
302 u32 cr[6];
303
304 /* Fill out the register contents for REQ1 through REQ6 */
305 cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
306 | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
307 | REQ1_INIT;
308 cr[1] = op->src.u.dma.length - 1;
309 cr[2] = ccp_addr_lo(&op->src.u.dma);
310 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
311 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
312 | ccp_addr_hi(&op->src.u.dma);
313
314 if (op->eom) {
315 cr[0] |= REQ1_EOM;
316 cr[4] = lower_32_bits(op->u.sha.msg_bits);
317 cr[5] = upper_32_bits(op->u.sha.msg_bits);
318 } else {
319 cr[4] = 0;
320 cr[5] = 0;
321 }
322
323 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
324}
325
326static int ccp_perform_rsa(struct ccp_op *op)
327{
328 u32 cr[6];
329
330 /* Fill out the register contents for REQ1 through REQ6 */
331 cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
332 | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
333 | (op->ksb_key << REQ1_KEY_KSB_SHIFT)
334 | REQ1_EOM;
335 cr[1] = op->u.rsa.input_len - 1;
336 cr[2] = ccp_addr_lo(&op->src.u.dma);
337 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
338 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
339 | ccp_addr_hi(&op->src.u.dma);
340 cr[4] = ccp_addr_lo(&op->dst.u.dma);
341 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
342 | ccp_addr_hi(&op->dst.u.dma);
343
344 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
345}
346
347static int ccp_perform_passthru(struct ccp_op *op)
348{
349 u32 cr[6];
350
351 /* Fill out the register contents for REQ1 through REQ6 */
352 cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
353 | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
354 | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
355
356 if (op->src.type == CCP_MEMTYPE_SYSTEM)
357 cr[1] = op->src.u.dma.length - 1;
358 else
359 cr[1] = op->dst.u.dma.length - 1;
360
361 if (op->src.type == CCP_MEMTYPE_SYSTEM) {
362 cr[2] = ccp_addr_lo(&op->src.u.dma);
363 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
364 | ccp_addr_hi(&op->src.u.dma);
365
366 if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
367 cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
368 } else {
369 cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
370 cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
371 }
372
373 if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
374 cr[4] = ccp_addr_lo(&op->dst.u.dma);
375 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
376 | ccp_addr_hi(&op->dst.u.dma);
377 } else {
378 cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
379 cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
380 }
381
382 if (op->eom)
383 cr[0] |= REQ1_EOM;
384
385 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
386}
387
388static int ccp_perform_ecc(struct ccp_op *op)
389{
390 u32 cr[6];
391
392 /* Fill out the register contents for REQ1 through REQ6 */
393 cr[0] = REQ1_ECC_AFFINE_CONVERT
394 | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
395 | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
396 | REQ1_EOM;
397 cr[1] = op->src.u.dma.length - 1;
398 cr[2] = ccp_addr_lo(&op->src.u.dma);
399 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
400 | ccp_addr_hi(&op->src.u.dma);
401 cr[4] = ccp_addr_lo(&op->dst.u.dma);
402 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
403 | ccp_addr_hi(&op->dst.u.dma);
404
405 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
406}
407
408static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
409{
410 int start;
411
412 for (;;) {
413 mutex_lock(&ccp->ksb_mutex);
414
415 start = (u32)bitmap_find_next_zero_area(ccp->ksb,
416 ccp->ksb_count,
417 ccp->ksb_start,
418 count, 0);
419 if (start <= ccp->ksb_count) {
420 bitmap_set(ccp->ksb, start, count);
421
422 mutex_unlock(&ccp->ksb_mutex);
423 break;
424 }
425
426 ccp->ksb_avail = 0;
427
428 mutex_unlock(&ccp->ksb_mutex);
429
430 /* Wait for KSB entries to become available */
431 if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
432 return 0;
433 }
434
435 return KSB_START + start;
436}
437
438static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
439 unsigned int count)
440{
441 if (!start)
442 return;
443
444 mutex_lock(&ccp->ksb_mutex);
445
446 bitmap_clear(ccp->ksb, start - KSB_START, count);
447
448 ccp->ksb_avail = 1;
449
450 mutex_unlock(&ccp->ksb_mutex);
451
452 wake_up_interruptible_all(&ccp->ksb_queue);
453}
454
455static u32 ccp_gen_jobid(struct ccp_device *ccp)
456{
457 return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
458}
459
460static void ccp_sg_free(struct ccp_sg_workarea *wa)
461{
462 if (wa->dma_count)
463 dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
464
465 wa->dma_count = 0;
466}
467
468static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
469 struct scatterlist *sg, unsigned int len,
470 enum dma_data_direction dma_dir)
471{
472 memset(wa, 0, sizeof(*wa));
473
474 wa->sg = sg;
475 if (!sg)
476 return 0;
477
478 wa->nents = sg_nents(sg);
479 wa->length = sg->length;
480 wa->bytes_left = len;
481 wa->sg_used = 0;
482
483 if (len == 0)
484 return 0;
485
486 if (dma_dir == DMA_NONE)
487 return 0;
488
489 wa->dma_sg = sg;
490 wa->dma_dev = dev;
491 wa->dma_dir = dma_dir;
492 wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
493 if (!wa->dma_count)
494 return -ENOMEM;
495
496
497 return 0;
498}
499
500static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
501{
502 unsigned int nbytes = min(len, wa->bytes_left);
503
504 if (!wa->sg)
505 return;
506
507 wa->sg_used += nbytes;
508 wa->bytes_left -= nbytes;
509 if (wa->sg_used == wa->sg->length) {
510 wa->sg = sg_next(wa->sg);
511 wa->sg_used = 0;
512 }
513}
514
515static void ccp_dm_free(struct ccp_dm_workarea *wa)
516{
517 if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
518 if (wa->address)
519 dma_pool_free(wa->dma_pool, wa->address,
520 wa->dma.address);
521 } else {
522 if (wa->dma.address)
523 dma_unmap_single(wa->dev, wa->dma.address, wa->length,
524 wa->dma.dir);
525 kfree(wa->address);
526 }
527
528 wa->address = NULL;
529 wa->dma.address = 0;
530}
531
532static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
533 struct ccp_cmd_queue *cmd_q,
534 unsigned int len,
535 enum dma_data_direction dir)
536{
537 memset(wa, 0, sizeof(*wa));
538
539 if (!len)
540 return 0;
541
542 wa->dev = cmd_q->ccp->dev;
543 wa->length = len;
544
545 if (len <= CCP_DMAPOOL_MAX_SIZE) {
546 wa->dma_pool = cmd_q->dma_pool;
547
548 wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
549 &wa->dma.address);
550 if (!wa->address)
551 return -ENOMEM;
552
553 wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
554
555 memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
556 } else {
557 wa->address = kzalloc(len, GFP_KERNEL);
558 if (!wa->address)
559 return -ENOMEM;
560
561 wa->dma.address = dma_map_single(wa->dev, wa->address, len,
562 dir);
563 if (!wa->dma.address)
564 return -ENOMEM;
565
566 wa->dma.length = len;
567 }
568 wa->dma.dir = dir;
569
570 return 0;
571}
572
573static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
574 struct scatterlist *sg, unsigned int sg_offset,
575 unsigned int len)
576{
577 WARN_ON(!wa->address);
578
579 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
580 0);
581}
582
583static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
584 struct scatterlist *sg, unsigned int sg_offset,
585 unsigned int len)
586{
587 WARN_ON(!wa->address);
588
589 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
590 1);
591}
592
593static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
594 struct scatterlist *sg,
595 unsigned int len, unsigned int se_len,
596 bool sign_extend)
597{
598 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
599 u8 buffer[CCP_REVERSE_BUF_SIZE];
600
601 BUG_ON(se_len > sizeof(buffer));
602
603 sg_offset = len;
604 dm_offset = 0;
605 nbytes = len;
606 while (nbytes) {
607 ksb_len = min_t(unsigned int, nbytes, se_len);
608 sg_offset -= ksb_len;
609
610 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
611 for (i = 0; i < ksb_len; i++)
612 wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
613
614 dm_offset += ksb_len;
615 nbytes -= ksb_len;
616
617 if ((ksb_len != se_len) && sign_extend) {
618 /* Must sign-extend to nearest sign-extend length */
619 if (wa->address[dm_offset - 1] & 0x80)
620 memset(wa->address + dm_offset, 0xff,
621 se_len - ksb_len);
622 }
623 }
624}
625
626static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
627 struct scatterlist *sg,
628 unsigned int len)
629{
630 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
631 u8 buffer[CCP_REVERSE_BUF_SIZE];
632
633 sg_offset = 0;
634 dm_offset = len;
635 nbytes = len;
636 while (nbytes) {
637 ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
638 dm_offset -= ksb_len;
639
640 for (i = 0; i < ksb_len; i++)
641 buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
642 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
643
644 sg_offset += ksb_len;
645 nbytes -= ksb_len;
646 }
647}
648
649static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
650{
651 ccp_dm_free(&data->dm_wa);
652 ccp_sg_free(&data->sg_wa);
653}
654
655static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
656 struct scatterlist *sg, unsigned int sg_len,
657 unsigned int dm_len,
658 enum dma_data_direction dir)
659{
660 int ret;
661
662 memset(data, 0, sizeof(*data));
663
664 ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
665 dir);
666 if (ret)
667 goto e_err;
668
669 ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
670 if (ret)
671 goto e_err;
672
673 return 0;
674
675e_err:
676 ccp_free_data(data, cmd_q);
677
678 return ret;
679}
680
681static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
682{
683 struct ccp_sg_workarea *sg_wa = &data->sg_wa;
684 struct ccp_dm_workarea *dm_wa = &data->dm_wa;
685 unsigned int buf_count, nbytes;
686
687 /* Clear the buffer if setting it */
688 if (!from)
689 memset(dm_wa->address, 0, dm_wa->length);
690
691 if (!sg_wa->sg)
692 return 0;
693
694 /* Perform the copy operation */
695 nbytes = min(sg_wa->bytes_left, dm_wa->length);
696 scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
697 nbytes, from);
698
699 /* Update the structures and generate the count */
700 buf_count = 0;
701 while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
702 nbytes = min3(sg_wa->sg->length - sg_wa->sg_used,
703 dm_wa->length - buf_count,
704 sg_wa->bytes_left);
705
706 buf_count += nbytes;
707 ccp_update_sg_workarea(sg_wa, nbytes);
708 }
709
710 return buf_count;
711}
712
713static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
714{
715 return ccp_queue_buf(data, 0);
716}
717
718static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
719{
720 return ccp_queue_buf(data, 1);
721}
722
723static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
724 struct ccp_op *op, unsigned int block_size,
725 bool blocksize_op)
726{
727 unsigned int sg_src_len, sg_dst_len, op_len;
728
729 /* The CCP can only DMA from/to one address each per operation. This
730 * requires that we find the smallest DMA area between the source
731 * and destination.
732 */
733 sg_src_len = min(sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used,
734 src->sg_wa.bytes_left);
735
736 if (dst) {
737 sg_dst_len = min(sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used,
738 src->sg_wa.bytes_left);
739 op_len = min(sg_src_len, sg_dst_len);
740 } else
741 op_len = sg_src_len;
742
743 /* The data operation length will be at least block_size in length
744 * or the smaller of available sg room remaining for the source or
745 * the destination
746 */
747 op_len = max(op_len, block_size);
748
749 /* Unless we have to buffer data, there's no reason to wait */
750 op->soc = 0;
751
752 if (sg_src_len < block_size) {
753 /* Not enough data in the sg element, so it
754 * needs to be buffered into a blocksize chunk
755 */
756 int cp_len = ccp_fill_queue_buf(src);
757
758 op->soc = 1;
759 op->src.u.dma.address = src->dm_wa.dma.address;
760 op->src.u.dma.offset = 0;
761 op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
762 } else {
763 /* Enough data in the sg element, but we need to
764 * adjust for any previously copied data
765 */
766 op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
767 op->src.u.dma.offset = src->sg_wa.sg_used;
768 op->src.u.dma.length = op_len & ~(block_size - 1);
769
770 ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
771 }
772
773 if (dst) {
774 if (sg_dst_len < block_size) {
775 /* Not enough room in the sg element or we're on the
776 * last piece of data (when using padding), so the
777 * output needs to be buffered into a blocksize chunk
778 */
779 op->soc = 1;
780 op->dst.u.dma.address = dst->dm_wa.dma.address;
781 op->dst.u.dma.offset = 0;
782 op->dst.u.dma.length = op->src.u.dma.length;
783 } else {
784 /* Enough room in the sg element, but we need to
785 * adjust for any previously used area
786 */
787 op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
788 op->dst.u.dma.offset = dst->sg_wa.sg_used;
789 op->dst.u.dma.length = op->src.u.dma.length;
790 }
791 }
792}
793
794static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
795 struct ccp_op *op)
796{
797 op->init = 0;
798
799 if (dst) {
800 if (op->dst.u.dma.address == dst->dm_wa.dma.address)
801 ccp_empty_queue_buf(dst);
802 else
803 ccp_update_sg_workarea(&dst->sg_wa,
804 op->dst.u.dma.length);
805 }
806}
807
808static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
809 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
810 u32 byte_swap, bool from)
811{
812 struct ccp_op op;
813
814 memset(&op, 0, sizeof(op));
815
816 op.cmd_q = cmd_q;
817 op.jobid = jobid;
818 op.eom = 1;
819
820 if (from) {
821 op.soc = 1;
822 op.src.type = CCP_MEMTYPE_KSB;
823 op.src.u.ksb = ksb;
824 op.dst.type = CCP_MEMTYPE_SYSTEM;
825 op.dst.u.dma.address = wa->dma.address;
826 op.dst.u.dma.length = wa->length;
827 } else {
828 op.src.type = CCP_MEMTYPE_SYSTEM;
829 op.src.u.dma.address = wa->dma.address;
830 op.src.u.dma.length = wa->length;
831 op.dst.type = CCP_MEMTYPE_KSB;
832 op.dst.u.ksb = ksb;
833 }
834
835 op.u.passthru.byte_swap = byte_swap;
836
837 return ccp_perform_passthru(&op);
838}
839
840static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
841 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
842 u32 byte_swap)
843{
844 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
845}
846
847static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
848 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
849 u32 byte_swap)
850{
851 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
852}
853
854static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
855 struct ccp_cmd *cmd)
856{
857 struct ccp_aes_engine *aes = &cmd->u.aes;
858 struct ccp_dm_workarea key, ctx;
859 struct ccp_data src;
860 struct ccp_op op;
861 unsigned int dm_offset;
862 int ret;
863
864 if (!((aes->key_len == AES_KEYSIZE_128) ||
865 (aes->key_len == AES_KEYSIZE_192) ||
866 (aes->key_len == AES_KEYSIZE_256)))
867 return -EINVAL;
868
869 if (aes->src_len & (AES_BLOCK_SIZE - 1))
870 return -EINVAL;
871
872 if (aes->iv_len != AES_BLOCK_SIZE)
873 return -EINVAL;
874
875 if (!aes->key || !aes->iv || !aes->src)
876 return -EINVAL;
877
878 if (aes->cmac_final) {
879 if (aes->cmac_key_len != AES_BLOCK_SIZE)
880 return -EINVAL;
881
882 if (!aes->cmac_key)
883 return -EINVAL;
884 }
885
886 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
887 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
888
889 ret = -EIO;
890 memset(&op, 0, sizeof(op));
891 op.cmd_q = cmd_q;
892 op.jobid = ccp_gen_jobid(cmd_q->ccp);
893 op.ksb_key = cmd_q->ksb_key;
894 op.ksb_ctx = cmd_q->ksb_ctx;
895 op.init = 1;
896 op.u.aes.type = aes->type;
897 op.u.aes.mode = aes->mode;
898 op.u.aes.action = aes->action;
899
900 /* All supported key sizes fit in a single (32-byte) KSB entry
901 * and must be in little endian format. Use the 256-bit byte
902 * swap passthru option to convert from big endian to little
903 * endian.
904 */
905 ret = ccp_init_dm_workarea(&key, cmd_q,
906 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
907 DMA_TO_DEVICE);
908 if (ret)
909 return ret;
910
911 dm_offset = CCP_KSB_BYTES - aes->key_len;
912 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
913 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
914 CCP_PASSTHRU_BYTESWAP_256BIT);
915 if (ret) {
916 cmd->engine_error = cmd_q->cmd_error;
917 goto e_key;
918 }
919
920 /* The AES context fits in a single (32-byte) KSB entry and
921 * must be in little endian format. Use the 256-bit byte swap
922 * passthru option to convert from big endian to little endian.
923 */
924 ret = ccp_init_dm_workarea(&ctx, cmd_q,
925 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
926 DMA_BIDIRECTIONAL);
927 if (ret)
928 goto e_key;
929
930 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
931 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
932 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
933 CCP_PASSTHRU_BYTESWAP_256BIT);
934 if (ret) {
935 cmd->engine_error = cmd_q->cmd_error;
936 goto e_ctx;
937 }
938
939 /* Send data to the CCP AES engine */
940 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
941 AES_BLOCK_SIZE, DMA_TO_DEVICE);
942 if (ret)
943 goto e_ctx;
944
945 while (src.sg_wa.bytes_left) {
946 ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
947 if (aes->cmac_final && !src.sg_wa.bytes_left) {
948 op.eom = 1;
949
950 /* Push the K1/K2 key to the CCP now */
951 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
952 op.ksb_ctx,
953 CCP_PASSTHRU_BYTESWAP_256BIT);
954 if (ret) {
955 cmd->engine_error = cmd_q->cmd_error;
956 goto e_src;
957 }
958
959 ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
960 aes->cmac_key_len);
961 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
962 CCP_PASSTHRU_BYTESWAP_256BIT);
963 if (ret) {
964 cmd->engine_error = cmd_q->cmd_error;
965 goto e_src;
966 }
967 }
968
969 ret = ccp_perform_aes(&op);
970 if (ret) {
971 cmd->engine_error = cmd_q->cmd_error;
972 goto e_src;
973 }
974
975 ccp_process_data(&src, NULL, &op);
976 }
977
978 /* Retrieve the AES context - convert from LE to BE using
979 * 32-byte (256-bit) byteswapping
980 */
981 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
982 CCP_PASSTHRU_BYTESWAP_256BIT);
983 if (ret) {
984 cmd->engine_error = cmd_q->cmd_error;
985 goto e_src;
986 }
987
988 /* ...but we only need AES_BLOCK_SIZE bytes */
989 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
990 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
991
992e_src:
993 ccp_free_data(&src, cmd_q);
994
995e_ctx:
996 ccp_dm_free(&ctx);
997
998e_key:
999 ccp_dm_free(&key);
1000
1001 return ret;
1002}
1003
1004static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1005{
1006 struct ccp_aes_engine *aes = &cmd->u.aes;
1007 struct ccp_dm_workarea key, ctx;
1008 struct ccp_data src, dst;
1009 struct ccp_op op;
1010 unsigned int dm_offset;
1011 bool in_place = false;
1012 int ret;
1013
1014 if (aes->mode == CCP_AES_MODE_CMAC)
1015 return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1016
1017 if (!((aes->key_len == AES_KEYSIZE_128) ||
1018 (aes->key_len == AES_KEYSIZE_192) ||
1019 (aes->key_len == AES_KEYSIZE_256)))
1020 return -EINVAL;
1021
1022 if (((aes->mode == CCP_AES_MODE_ECB) ||
1023 (aes->mode == CCP_AES_MODE_CBC) ||
1024 (aes->mode == CCP_AES_MODE_CFB)) &&
1025 (aes->src_len & (AES_BLOCK_SIZE - 1)))
1026 return -EINVAL;
1027
1028 if (!aes->key || !aes->src || !aes->dst)
1029 return -EINVAL;
1030
1031 if (aes->mode != CCP_AES_MODE_ECB) {
1032 if (aes->iv_len != AES_BLOCK_SIZE)
1033 return -EINVAL;
1034
1035 if (!aes->iv)
1036 return -EINVAL;
1037 }
1038
1039 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1040 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1041
1042 ret = -EIO;
1043 memset(&op, 0, sizeof(op));
1044 op.cmd_q = cmd_q;
1045 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1046 op.ksb_key = cmd_q->ksb_key;
1047 op.ksb_ctx = cmd_q->ksb_ctx;
1048 op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1049 op.u.aes.type = aes->type;
1050 op.u.aes.mode = aes->mode;
1051 op.u.aes.action = aes->action;
1052
1053 /* All supported key sizes fit in a single (32-byte) KSB entry
1054 * and must be in little endian format. Use the 256-bit byte
1055 * swap passthru option to convert from big endian to little
1056 * endian.
1057 */
1058 ret = ccp_init_dm_workarea(&key, cmd_q,
1059 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1060 DMA_TO_DEVICE);
1061 if (ret)
1062 return ret;
1063
1064 dm_offset = CCP_KSB_BYTES - aes->key_len;
1065 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1066 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1067 CCP_PASSTHRU_BYTESWAP_256BIT);
1068 if (ret) {
1069 cmd->engine_error = cmd_q->cmd_error;
1070 goto e_key;
1071 }
1072
1073 /* The AES context fits in a single (32-byte) KSB entry and
1074 * must be in little endian format. Use the 256-bit byte swap
1075 * passthru option to convert from big endian to little endian.
1076 */
1077 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1078 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1079 DMA_BIDIRECTIONAL);
1080 if (ret)
1081 goto e_key;
1082
1083 if (aes->mode != CCP_AES_MODE_ECB) {
1084 /* Load the AES context - conver to LE */
1085 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1086 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1087 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1088 CCP_PASSTHRU_BYTESWAP_256BIT);
1089 if (ret) {
1090 cmd->engine_error = cmd_q->cmd_error;
1091 goto e_ctx;
1092 }
1093 }
1094
1095 /* Prepare the input and output data workareas. For in-place
1096 * operations we need to set the dma direction to BIDIRECTIONAL
1097 * and copy the src workarea to the dst workarea.
1098 */
1099 if (sg_virt(aes->src) == sg_virt(aes->dst))
1100 in_place = true;
1101
1102 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1103 AES_BLOCK_SIZE,
1104 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1105 if (ret)
1106 goto e_ctx;
1107
1108 if (in_place)
1109 dst = src;
1110 else {
1111 ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1112 AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1113 if (ret)
1114 goto e_src;
1115 }
1116
1117 /* Send data to the CCP AES engine */
1118 while (src.sg_wa.bytes_left) {
1119 ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1120 if (!src.sg_wa.bytes_left) {
1121 op.eom = 1;
1122
1123 /* Since we don't retrieve the AES context in ECB
1124 * mode we have to wait for the operation to complete
1125 * on the last piece of data
1126 */
1127 if (aes->mode == CCP_AES_MODE_ECB)
1128 op.soc = 1;
1129 }
1130
1131 ret = ccp_perform_aes(&op);
1132 if (ret) {
1133 cmd->engine_error = cmd_q->cmd_error;
1134 goto e_dst;
1135 }
1136
1137 ccp_process_data(&src, &dst, &op);
1138 }
1139
1140 if (aes->mode != CCP_AES_MODE_ECB) {
1141 /* Retrieve the AES context - convert from LE to BE using
1142 * 32-byte (256-bit) byteswapping
1143 */
1144 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1145 CCP_PASSTHRU_BYTESWAP_256BIT);
1146 if (ret) {
1147 cmd->engine_error = cmd_q->cmd_error;
1148 goto e_dst;
1149 }
1150
1151 /* ...but we only need AES_BLOCK_SIZE bytes */
1152 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1153 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1154 }
1155
1156e_dst:
1157 if (!in_place)
1158 ccp_free_data(&dst, cmd_q);
1159
1160e_src:
1161 ccp_free_data(&src, cmd_q);
1162
1163e_ctx:
1164 ccp_dm_free(&ctx);
1165
1166e_key:
1167 ccp_dm_free(&key);
1168
1169 return ret;
1170}
1171
1172static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1173 struct ccp_cmd *cmd)
1174{
1175 struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1176 struct ccp_dm_workarea key, ctx;
1177 struct ccp_data src, dst;
1178 struct ccp_op op;
1179 unsigned int unit_size, dm_offset;
1180 bool in_place = false;
1181 int ret;
1182
1183 switch (xts->unit_size) {
1184 case CCP_XTS_AES_UNIT_SIZE_16:
1185 unit_size = 16;
1186 break;
1187 case CCP_XTS_AES_UNIT_SIZE_512:
1188 unit_size = 512;
1189 break;
1190 case CCP_XTS_AES_UNIT_SIZE_1024:
1191 unit_size = 1024;
1192 break;
1193 case CCP_XTS_AES_UNIT_SIZE_2048:
1194 unit_size = 2048;
1195 break;
1196 case CCP_XTS_AES_UNIT_SIZE_4096:
1197 unit_size = 4096;
1198 break;
1199
1200 default:
1201 return -EINVAL;
1202 }
1203
1204 if (xts->key_len != AES_KEYSIZE_128)
1205 return -EINVAL;
1206
1207 if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1208 return -EINVAL;
1209
1210 if (xts->iv_len != AES_BLOCK_SIZE)
1211 return -EINVAL;
1212
1213 if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1214 return -EINVAL;
1215
1216 BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1217 BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1218
1219 ret = -EIO;
1220 memset(&op, 0, sizeof(op));
1221 op.cmd_q = cmd_q;
1222 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1223 op.ksb_key = cmd_q->ksb_key;
1224 op.ksb_ctx = cmd_q->ksb_ctx;
1225 op.init = 1;
1226 op.u.xts.action = xts->action;
1227 op.u.xts.unit_size = xts->unit_size;
1228
1229 /* All supported key sizes fit in a single (32-byte) KSB entry
1230 * and must be in little endian format. Use the 256-bit byte
1231 * swap passthru option to convert from big endian to little
1232 * endian.
1233 */
1234 ret = ccp_init_dm_workarea(&key, cmd_q,
1235 CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1236 DMA_TO_DEVICE);
1237 if (ret)
1238 return ret;
1239
1240 dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1241 ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1242 ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1243 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1244 CCP_PASSTHRU_BYTESWAP_256BIT);
1245 if (ret) {
1246 cmd->engine_error = cmd_q->cmd_error;
1247 goto e_key;
1248 }
1249
1250 /* The AES context fits in a single (32-byte) KSB entry and
1251 * for XTS is already in little endian format so no byte swapping
1252 * is needed.
1253 */
1254 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1255 CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1256 DMA_BIDIRECTIONAL);
1257 if (ret)
1258 goto e_key;
1259
1260 ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1261 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1262 CCP_PASSTHRU_BYTESWAP_NOOP);
1263 if (ret) {
1264 cmd->engine_error = cmd_q->cmd_error;
1265 goto e_ctx;
1266 }
1267
1268 /* Prepare the input and output data workareas. For in-place
1269 * operations we need to set the dma direction to BIDIRECTIONAL
1270 * and copy the src workarea to the dst workarea.
1271 */
1272 if (sg_virt(xts->src) == sg_virt(xts->dst))
1273 in_place = true;
1274
1275 ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1276 unit_size,
1277 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1278 if (ret)
1279 goto e_ctx;
1280
1281 if (in_place)
1282 dst = src;
1283 else {
1284 ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1285 unit_size, DMA_FROM_DEVICE);
1286 if (ret)
1287 goto e_src;
1288 }
1289
1290 /* Send data to the CCP AES engine */
1291 while (src.sg_wa.bytes_left) {
1292 ccp_prepare_data(&src, &dst, &op, unit_size, true);
1293 if (!src.sg_wa.bytes_left)
1294 op.eom = 1;
1295
1296 ret = ccp_perform_xts_aes(&op);
1297 if (ret) {
1298 cmd->engine_error = cmd_q->cmd_error;
1299 goto e_dst;
1300 }
1301
1302 ccp_process_data(&src, &dst, &op);
1303 }
1304
1305 /* Retrieve the AES context - convert from LE to BE using
1306 * 32-byte (256-bit) byteswapping
1307 */
1308 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1309 CCP_PASSTHRU_BYTESWAP_256BIT);
1310 if (ret) {
1311 cmd->engine_error = cmd_q->cmd_error;
1312 goto e_dst;
1313 }
1314
1315 /* ...but we only need AES_BLOCK_SIZE bytes */
1316 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1317 ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1318
1319e_dst:
1320 if (!in_place)
1321 ccp_free_data(&dst, cmd_q);
1322
1323e_src:
1324 ccp_free_data(&src, cmd_q);
1325
1326e_ctx:
1327 ccp_dm_free(&ctx);
1328
1329e_key:
1330 ccp_dm_free(&key);
1331
1332 return ret;
1333}
1334
1335static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1336{
1337 struct ccp_sha_engine *sha = &cmd->u.sha;
1338 struct ccp_dm_workarea ctx;
1339 struct ccp_data src;
1340 struct ccp_op op;
1341 int ret;
1342
1343 if (sha->ctx_len != CCP_SHA_CTXSIZE)
1344 return -EINVAL;
1345
1346 if (!sha->ctx)
1347 return -EINVAL;
1348
1349 if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1350 return -EINVAL;
1351
1352 if (!sha->src_len) {
1353 const u8 *sha_zero;
1354
1355 /* Not final, just return */
1356 if (!sha->final)
1357 return 0;
1358
1359 /* CCP can't do a zero length sha operation so the caller
1360 * must buffer the data.
1361 */
1362 if (sha->msg_bits)
1363 return -EINVAL;
1364
1365 /* A sha operation for a message with a total length of zero,
1366 * return known result.
1367 */
1368 switch (sha->type) {
1369 case CCP_SHA_TYPE_1:
1370 sha_zero = ccp_sha1_zero;
1371 break;
1372 case CCP_SHA_TYPE_224:
1373 sha_zero = ccp_sha224_zero;
1374 break;
1375 case CCP_SHA_TYPE_256:
1376 sha_zero = ccp_sha256_zero;
1377 break;
1378 default:
1379 return -EINVAL;
1380 }
1381
1382 scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1383 sha->ctx_len, 1);
1384
1385 return 0;
1386 }
1387
1388 if (!sha->src)
1389 return -EINVAL;
1390
1391 BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1392
1393 memset(&op, 0, sizeof(op));
1394 op.cmd_q = cmd_q;
1395 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1396 op.ksb_ctx = cmd_q->ksb_ctx;
1397 op.u.sha.type = sha->type;
1398 op.u.sha.msg_bits = sha->msg_bits;
1399
1400 /* The SHA context fits in a single (32-byte) KSB entry and
1401 * must be in little endian format. Use the 256-bit byte swap
1402 * passthru option to convert from big endian to little endian.
1403 */
1404 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1405 CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1406 DMA_BIDIRECTIONAL);
1407 if (ret)
1408 return ret;
1409
1410 ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1411 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1412 CCP_PASSTHRU_BYTESWAP_256BIT);
1413 if (ret) {
1414 cmd->engine_error = cmd_q->cmd_error;
1415 goto e_ctx;
1416 }
1417
1418 /* Send data to the CCP SHA engine */
1419 ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1420 CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1421 if (ret)
1422 goto e_ctx;
1423
1424 while (src.sg_wa.bytes_left) {
1425 ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1426 if (sha->final && !src.sg_wa.bytes_left)
1427 op.eom = 1;
1428
1429 ret = ccp_perform_sha(&op);
1430 if (ret) {
1431 cmd->engine_error = cmd_q->cmd_error;
1432 goto e_data;
1433 }
1434
1435 ccp_process_data(&src, NULL, &op);
1436 }
1437
1438 /* Retrieve the SHA context - convert from LE to BE using
1439 * 32-byte (256-bit) byteswapping to BE
1440 */
1441 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1442 CCP_PASSTHRU_BYTESWAP_256BIT);
1443 if (ret) {
1444 cmd->engine_error = cmd_q->cmd_error;
1445 goto e_data;
1446 }
1447
1448 ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1449
1450e_data:
1451 ccp_free_data(&src, cmd_q);
1452
1453e_ctx:
1454 ccp_dm_free(&ctx);
1455
1456 return ret;
1457}
1458
1459static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1460{
1461 struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1462 struct ccp_dm_workarea exp, src;
1463 struct ccp_data dst;
1464 struct ccp_op op;
1465 unsigned int ksb_count, i_len, o_len;
1466 int ret;
1467
1468 if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1469 return -EINVAL;
1470
1471 if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1472 return -EINVAL;
1473
1474 /* The RSA modulus must precede the message being acted upon, so
1475 * it must be copied to a DMA area where the message and the
1476 * modulus can be concatenated. Therefore the input buffer
1477 * length required is twice the output buffer length (which
1478 * must be a multiple of 256-bits).
1479 */
1480 o_len = ((rsa->key_size + 255) / 256) * 32;
1481 i_len = o_len * 2;
1482
1483 ksb_count = o_len / CCP_KSB_BYTES;
1484
1485 memset(&op, 0, sizeof(op));
1486 op.cmd_q = cmd_q;
1487 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1488 op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1489 if (!op.ksb_key)
1490 return -EIO;
1491
1492 /* The RSA exponent may span multiple (32-byte) KSB entries and must
1493 * be in little endian format. Reverse copy each 32-byte chunk
1494 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1495 * and each byte within that chunk and do not perform any byte swap
1496 * operations on the passthru operation.
1497 */
1498 ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1499 if (ret)
1500 goto e_ksb;
1501
1502 ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1503 true);
1504 ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1505 CCP_PASSTHRU_BYTESWAP_NOOP);
1506 if (ret) {
1507 cmd->engine_error = cmd_q->cmd_error;
1508 goto e_exp;
1509 }
1510
1511 /* Concatenate the modulus and the message. Both the modulus and
1512 * the operands must be in little endian format. Since the input
1513 * is in big endian format it must be converted.
1514 */
1515 ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1516 if (ret)
1517 goto e_exp;
1518
1519 ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1520 true);
1521 src.address += o_len; /* Adjust the address for the copy operation */
1522 ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1523 true);
1524 src.address -= o_len; /* Reset the address to original value */
1525
1526 /* Prepare the output area for the operation */
1527 ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1528 o_len, DMA_FROM_DEVICE);
1529 if (ret)
1530 goto e_src;
1531
1532 op.soc = 1;
1533 op.src.u.dma.address = src.dma.address;
1534 op.src.u.dma.offset = 0;
1535 op.src.u.dma.length = i_len;
1536 op.dst.u.dma.address = dst.dm_wa.dma.address;
1537 op.dst.u.dma.offset = 0;
1538 op.dst.u.dma.length = o_len;
1539
1540 op.u.rsa.mod_size = rsa->key_size;
1541 op.u.rsa.input_len = i_len;
1542
1543 ret = ccp_perform_rsa(&op);
1544 if (ret) {
1545 cmd->engine_error = cmd_q->cmd_error;
1546 goto e_dst;
1547 }
1548
1549 ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1550
1551e_dst:
1552 ccp_free_data(&dst, cmd_q);
1553
1554e_src:
1555 ccp_dm_free(&src);
1556
1557e_exp:
1558 ccp_dm_free(&exp);
1559
1560e_ksb:
1561 ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1562
1563 return ret;
1564}
1565
1566static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1567 struct ccp_cmd *cmd)
1568{
1569 struct ccp_passthru_engine *pt = &cmd->u.passthru;
1570 struct ccp_dm_workarea mask;
1571 struct ccp_data src, dst;
1572 struct ccp_op op;
1573 bool in_place = false;
1574 unsigned int i;
1575 int ret;
1576
1577 if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1578 return -EINVAL;
1579
1580 if (!pt->src || !pt->dst)
1581 return -EINVAL;
1582
1583 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1584 if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1585 return -EINVAL;
1586 if (!pt->mask)
1587 return -EINVAL;
1588 }
1589
1590 BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1591
1592 memset(&op, 0, sizeof(op));
1593 op.cmd_q = cmd_q;
1594 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1595
1596 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1597 /* Load the mask */
1598 op.ksb_key = cmd_q->ksb_key;
1599
1600 ret = ccp_init_dm_workarea(&mask, cmd_q,
1601 CCP_PASSTHRU_KSB_COUNT *
1602 CCP_KSB_BYTES,
1603 DMA_TO_DEVICE);
1604 if (ret)
1605 return ret;
1606
1607 ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1608 ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1609 CCP_PASSTHRU_BYTESWAP_NOOP);
1610 if (ret) {
1611 cmd->engine_error = cmd_q->cmd_error;
1612 goto e_mask;
1613 }
1614 }
1615
1616 /* Prepare the input and output data workareas. For in-place
1617 * operations we need to set the dma direction to BIDIRECTIONAL
1618 * and copy the src workarea to the dst workarea.
1619 */
1620 if (sg_virt(pt->src) == sg_virt(pt->dst))
1621 in_place = true;
1622
1623 ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1624 CCP_PASSTHRU_MASKSIZE,
1625 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1626 if (ret)
1627 goto e_mask;
1628
1629 if (in_place)
1630 dst = src;
1631 else {
1632 ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1633 CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1634 if (ret)
1635 goto e_src;
1636 }
1637
1638 /* Send data to the CCP Passthru engine
1639 * Because the CCP engine works on a single source and destination
1640 * dma address at a time, each entry in the source scatterlist
1641 * (after the dma_map_sg call) must be less than or equal to the
1642 * (remaining) length in the destination scatterlist entry and the
1643 * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1644 */
1645 dst.sg_wa.sg_used = 0;
1646 for (i = 1; i <= src.sg_wa.dma_count; i++) {
1647 if (!dst.sg_wa.sg ||
1648 (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1649 ret = -EINVAL;
1650 goto e_dst;
1651 }
1652
1653 if (i == src.sg_wa.dma_count) {
1654 op.eom = 1;
1655 op.soc = 1;
1656 }
1657
1658 op.src.type = CCP_MEMTYPE_SYSTEM;
1659 op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1660 op.src.u.dma.offset = 0;
1661 op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1662
1663 op.dst.type = CCP_MEMTYPE_SYSTEM;
1664 op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1665 op.src.u.dma.offset = dst.sg_wa.sg_used;
1666 op.src.u.dma.length = op.src.u.dma.length;
1667
1668 ret = ccp_perform_passthru(&op);
1669 if (ret) {
1670 cmd->engine_error = cmd_q->cmd_error;
1671 goto e_dst;
1672 }
1673
1674 dst.sg_wa.sg_used += src.sg_wa.sg->length;
1675 if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1676 dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1677 dst.sg_wa.sg_used = 0;
1678 }
1679 src.sg_wa.sg = sg_next(src.sg_wa.sg);
1680 }
1681
1682e_dst:
1683 if (!in_place)
1684 ccp_free_data(&dst, cmd_q);
1685
1686e_src:
1687 ccp_free_data(&src, cmd_q);
1688
1689e_mask:
1690 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1691 ccp_dm_free(&mask);
1692
1693 return ret;
1694}
1695
1696static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1697{
1698 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1699 struct ccp_dm_workarea src, dst;
1700 struct ccp_op op;
1701 int ret;
1702 u8 *save;
1703
1704 if (!ecc->u.mm.operand_1 ||
1705 (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1706 return -EINVAL;
1707
1708 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1709 if (!ecc->u.mm.operand_2 ||
1710 (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1711 return -EINVAL;
1712
1713 if (!ecc->u.mm.result ||
1714 (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1715 return -EINVAL;
1716
1717 memset(&op, 0, sizeof(op));
1718 op.cmd_q = cmd_q;
1719 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1720
1721 /* Concatenate the modulus and the operands. Both the modulus and
1722 * the operands must be in little endian format. Since the input
1723 * is in big endian format it must be converted and placed in a
1724 * fixed length buffer.
1725 */
1726 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1727 DMA_TO_DEVICE);
1728 if (ret)
1729 return ret;
1730
1731 /* Save the workarea address since it is updated in order to perform
1732 * the concatenation
1733 */
1734 save = src.address;
1735
1736 /* Copy the ECC modulus */
1737 ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1738 CCP_ECC_OPERAND_SIZE, true);
1739 src.address += CCP_ECC_OPERAND_SIZE;
1740
1741 /* Copy the first operand */
1742 ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1743 ecc->u.mm.operand_1_len,
1744 CCP_ECC_OPERAND_SIZE, true);
1745 src.address += CCP_ECC_OPERAND_SIZE;
1746
1747 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1748 /* Copy the second operand */
1749 ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1750 ecc->u.mm.operand_2_len,
1751 CCP_ECC_OPERAND_SIZE, true);
1752 src.address += CCP_ECC_OPERAND_SIZE;
1753 }
1754
1755 /* Restore the workarea address */
1756 src.address = save;
1757
1758 /* Prepare the output area for the operation */
1759 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1760 DMA_FROM_DEVICE);
1761 if (ret)
1762 goto e_src;
1763
1764 op.soc = 1;
1765 op.src.u.dma.address = src.dma.address;
1766 op.src.u.dma.offset = 0;
1767 op.src.u.dma.length = src.length;
1768 op.dst.u.dma.address = dst.dma.address;
1769 op.dst.u.dma.offset = 0;
1770 op.dst.u.dma.length = dst.length;
1771
1772 op.u.ecc.function = cmd->u.ecc.function;
1773
1774 ret = ccp_perform_ecc(&op);
1775 if (ret) {
1776 cmd->engine_error = cmd_q->cmd_error;
1777 goto e_dst;
1778 }
1779
1780 ecc->ecc_result = le16_to_cpup(
1781 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1782 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1783 ret = -EIO;
1784 goto e_dst;
1785 }
1786
1787 /* Save the ECC result */
1788 ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1789
1790e_dst:
1791 ccp_dm_free(&dst);
1792
1793e_src:
1794 ccp_dm_free(&src);
1795
1796 return ret;
1797}
1798
1799static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1800{
1801 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1802 struct ccp_dm_workarea src, dst;
1803 struct ccp_op op;
1804 int ret;
1805 u8 *save;
1806
1807 if (!ecc->u.pm.point_1.x ||
1808 (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1809 !ecc->u.pm.point_1.y ||
1810 (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1811 return -EINVAL;
1812
1813 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1814 if (!ecc->u.pm.point_2.x ||
1815 (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1816 !ecc->u.pm.point_2.y ||
1817 (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1818 return -EINVAL;
1819 } else {
1820 if (!ecc->u.pm.domain_a ||
1821 (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1822 return -EINVAL;
1823
1824 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1825 if (!ecc->u.pm.scalar ||
1826 (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1827 return -EINVAL;
1828 }
1829
1830 if (!ecc->u.pm.result.x ||
1831 (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1832 !ecc->u.pm.result.y ||
1833 (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1834 return -EINVAL;
1835
1836 memset(&op, 0, sizeof(op));
1837 op.cmd_q = cmd_q;
1838 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1839
1840 /* Concatenate the modulus and the operands. Both the modulus and
1841 * the operands must be in little endian format. Since the input
1842 * is in big endian format it must be converted and placed in a
1843 * fixed length buffer.
1844 */
1845 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1846 DMA_TO_DEVICE);
1847 if (ret)
1848 return ret;
1849
1850 /* Save the workarea address since it is updated in order to perform
1851 * the concatenation
1852 */
1853 save = src.address;
1854
1855 /* Copy the ECC modulus */
1856 ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1857 CCP_ECC_OPERAND_SIZE, true);
1858 src.address += CCP_ECC_OPERAND_SIZE;
1859
1860 /* Copy the first point X and Y coordinate */
1861 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1862 ecc->u.pm.point_1.x_len,
1863 CCP_ECC_OPERAND_SIZE, true);
1864 src.address += CCP_ECC_OPERAND_SIZE;
1865 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1866 ecc->u.pm.point_1.y_len,
1867 CCP_ECC_OPERAND_SIZE, true);
1868 src.address += CCP_ECC_OPERAND_SIZE;
1869
1870 /* Set the first point Z coordianate to 1 */
1871 *(src.address) = 0x01;
1872 src.address += CCP_ECC_OPERAND_SIZE;
1873
1874 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1875 /* Copy the second point X and Y coordinate */
1876 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1877 ecc->u.pm.point_2.x_len,
1878 CCP_ECC_OPERAND_SIZE, true);
1879 src.address += CCP_ECC_OPERAND_SIZE;
1880 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1881 ecc->u.pm.point_2.y_len,
1882 CCP_ECC_OPERAND_SIZE, true);
1883 src.address += CCP_ECC_OPERAND_SIZE;
1884
1885 /* Set the second point Z coordianate to 1 */
1886 *(src.address) = 0x01;
1887 src.address += CCP_ECC_OPERAND_SIZE;
1888 } else {
1889 /* Copy the Domain "a" parameter */
1890 ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1891 ecc->u.pm.domain_a_len,
1892 CCP_ECC_OPERAND_SIZE, true);
1893 src.address += CCP_ECC_OPERAND_SIZE;
1894
1895 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
1896 /* Copy the scalar value */
1897 ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
1898 ecc->u.pm.scalar_len,
1899 CCP_ECC_OPERAND_SIZE, true);
1900 src.address += CCP_ECC_OPERAND_SIZE;
1901 }
1902 }
1903
1904 /* Restore the workarea address */
1905 src.address = save;
1906
1907 /* Prepare the output area for the operation */
1908 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1909 DMA_FROM_DEVICE);
1910 if (ret)
1911 goto e_src;
1912
1913 op.soc = 1;
1914 op.src.u.dma.address = src.dma.address;
1915 op.src.u.dma.offset = 0;
1916 op.src.u.dma.length = src.length;
1917 op.dst.u.dma.address = dst.dma.address;
1918 op.dst.u.dma.offset = 0;
1919 op.dst.u.dma.length = dst.length;
1920
1921 op.u.ecc.function = cmd->u.ecc.function;
1922
1923 ret = ccp_perform_ecc(&op);
1924 if (ret) {
1925 cmd->engine_error = cmd_q->cmd_error;
1926 goto e_dst;
1927 }
1928
1929 ecc->ecc_result = le16_to_cpup(
1930 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1931 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1932 ret = -EIO;
1933 goto e_dst;
1934 }
1935
1936 /* Save the workarea address since it is updated as we walk through
1937 * to copy the point math result
1938 */
1939 save = dst.address;
1940
1941 /* Save the ECC result X and Y coordinates */
1942 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
1943 CCP_ECC_MODULUS_BYTES);
1944 dst.address += CCP_ECC_OUTPUT_SIZE;
1945 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
1946 CCP_ECC_MODULUS_BYTES);
1947 dst.address += CCP_ECC_OUTPUT_SIZE;
1948
1949 /* Restore the workarea address */
1950 dst.address = save;
1951
1952e_dst:
1953 ccp_dm_free(&dst);
1954
1955e_src:
1956 ccp_dm_free(&src);
1957
1958 return ret;
1959}
1960
1961static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1962{
1963 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1964
1965 ecc->ecc_result = 0;
1966
1967 if (!ecc->mod ||
1968 (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
1969 return -EINVAL;
1970
1971 switch (ecc->function) {
1972 case CCP_ECC_FUNCTION_MMUL_384BIT:
1973 case CCP_ECC_FUNCTION_MADD_384BIT:
1974 case CCP_ECC_FUNCTION_MINV_384BIT:
1975 return ccp_run_ecc_mm_cmd(cmd_q, cmd);
1976
1977 case CCP_ECC_FUNCTION_PADD_384BIT:
1978 case CCP_ECC_FUNCTION_PMUL_384BIT:
1979 case CCP_ECC_FUNCTION_PDBL_384BIT:
1980 return ccp_run_ecc_pm_cmd(cmd_q, cmd);
1981
1982 default:
1983 return -EINVAL;
1984 }
1985}
1986
1987int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1988{
1989 int ret;
1990
1991 cmd->engine_error = 0;
1992 cmd_q->cmd_error = 0;
1993 cmd_q->int_rcvd = 0;
1994 cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
1995
1996 switch (cmd->engine) {
1997 case CCP_ENGINE_AES:
1998 ret = ccp_run_aes_cmd(cmd_q, cmd);
1999 break;
2000 case CCP_ENGINE_XTS_AES_128:
2001 ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2002 break;
2003 case CCP_ENGINE_SHA:
2004 ret = ccp_run_sha_cmd(cmd_q, cmd);
2005 break;
2006 case CCP_ENGINE_RSA:
2007 ret = ccp_run_rsa_cmd(cmd_q, cmd);
2008 break;
2009 case CCP_ENGINE_PASSTHRU:
2010 ret = ccp_run_passthru_cmd(cmd_q, cmd);
2011 break;
2012 case CCP_ENGINE_ECC:
2013 ret = ccp_run_ecc_cmd(cmd_q, cmd);
2014 break;
2015 default:
2016 ret = -EINVAL;
2017 }
2018
2019 return ret;
2020}
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c
new file mode 100644
index 000000000000..1fbeaf1856a8
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -0,0 +1,360 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) driver
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/kthread.h>
18#include <linux/sched.h>
19#include <linux/interrupt.h>
20#include <linux/spinlock.h>
21#include <linux/delay.h>
22#include <linux/ccp.h>
23
24#include "ccp-dev.h"
25
26#define IO_BAR 2
27#define MSIX_VECTORS 2
28
29struct ccp_msix {
30 u32 vector;
31 char name[16];
32};
33
34struct ccp_pci {
35 int msix_count;
36 struct ccp_msix msix[MSIX_VECTORS];
37};
38
39static int ccp_get_msix_irqs(struct ccp_device *ccp)
40{
41 struct ccp_pci *ccp_pci = ccp->dev_specific;
42 struct device *dev = ccp->dev;
43 struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
44 struct msix_entry msix_entry[MSIX_VECTORS];
45 unsigned int name_len = sizeof(ccp_pci->msix[0].name) - 1;
46 int v, ret;
47
48 for (v = 0; v < ARRAY_SIZE(msix_entry); v++)
49 msix_entry[v].entry = v;
50
51 while ((ret = pci_enable_msix(pdev, msix_entry, v)) > 0)
52 v = ret;
53 if (ret)
54 return ret;
55
56 ccp_pci->msix_count = v;
57 for (v = 0; v < ccp_pci->msix_count; v++) {
58 /* Set the interrupt names and request the irqs */
59 snprintf(ccp_pci->msix[v].name, name_len, "ccp-%u", v);
60 ccp_pci->msix[v].vector = msix_entry[v].vector;
61 ret = request_irq(ccp_pci->msix[v].vector, ccp_irq_handler,
62 0, ccp_pci->msix[v].name, dev);
63 if (ret) {
64 dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n",
65 ret);
66 goto e_irq;
67 }
68 }
69
70 return 0;
71
72e_irq:
73 while (v--)
74 free_irq(ccp_pci->msix[v].vector, dev);
75
76 pci_disable_msix(pdev);
77
78 ccp_pci->msix_count = 0;
79
80 return ret;
81}
82
83static int ccp_get_msi_irq(struct ccp_device *ccp)
84{
85 struct device *dev = ccp->dev;
86 struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
87 int ret;
88
89 ret = pci_enable_msi(pdev);
90 if (ret)
91 return ret;
92
93 ret = request_irq(pdev->irq, ccp_irq_handler, 0, "ccp", dev);
94 if (ret) {
95 dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret);
96 goto e_msi;
97 }
98
99 return 0;
100
101e_msi:
102 pci_disable_msi(pdev);
103
104 return ret;
105}
106
107static int ccp_get_irqs(struct ccp_device *ccp)
108{
109 struct device *dev = ccp->dev;
110 int ret;
111
112 ret = ccp_get_msix_irqs(ccp);
113 if (!ret)
114 return 0;
115
116 /* Couldn't get MSI-X vectors, try MSI */
117 dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
118 ret = ccp_get_msi_irq(ccp);
119 if (!ret)
120 return 0;
121
122 /* Couldn't get MSI interrupt */
123 dev_notice(dev, "could not enable MSI (%d)\n", ret);
124
125 return ret;
126}
127
128static void ccp_free_irqs(struct ccp_device *ccp)
129{
130 struct ccp_pci *ccp_pci = ccp->dev_specific;
131 struct device *dev = ccp->dev;
132 struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
133
134 if (ccp_pci->msix_count) {
135 while (ccp_pci->msix_count--)
136 free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
137 dev);
138 pci_disable_msix(pdev);
139 } else {
140 free_irq(pdev->irq, dev);
141 pci_disable_msi(pdev);
142 }
143}
144
145static int ccp_find_mmio_area(struct ccp_device *ccp)
146{
147 struct device *dev = ccp->dev;
148 struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
149 resource_size_t io_len;
150 unsigned long io_flags;
151 int bar;
152
153 io_flags = pci_resource_flags(pdev, IO_BAR);
154 io_len = pci_resource_len(pdev, IO_BAR);
155 if ((io_flags & IORESOURCE_MEM) && (io_len >= (IO_OFFSET + 0x800)))
156 return IO_BAR;
157
158 for (bar = 0; bar < PCI_STD_RESOURCE_END; bar++) {
159 io_flags = pci_resource_flags(pdev, bar);
160 io_len = pci_resource_len(pdev, bar);
161 if ((io_flags & IORESOURCE_MEM) &&
162 (io_len >= (IO_OFFSET + 0x800)))
163 return bar;
164 }
165
166 return -EIO;
167}
168
169static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
170{
171 struct ccp_device *ccp;
172 struct ccp_pci *ccp_pci;
173 struct device *dev = &pdev->dev;
174 unsigned int bar;
175 int ret;
176
177 ret = -ENOMEM;
178 ccp = ccp_alloc_struct(dev);
179 if (!ccp)
180 goto e_err;
181
182 ccp_pci = kzalloc(sizeof(*ccp_pci), GFP_KERNEL);
183 if (!ccp_pci) {
184 ret = -ENOMEM;
185 goto e_free1;
186 }
187 ccp->dev_specific = ccp_pci;
188 ccp->get_irq = ccp_get_irqs;
189 ccp->free_irq = ccp_free_irqs;
190
191 ret = pci_request_regions(pdev, "ccp");
192 if (ret) {
193 dev_err(dev, "pci_request_regions failed (%d)\n", ret);
194 goto e_free2;
195 }
196
197 ret = pci_enable_device(pdev);
198 if (ret) {
199 dev_err(dev, "pci_enable_device failed (%d)\n", ret);
200 goto e_regions;
201 }
202
203 pci_set_master(pdev);
204
205 ret = ccp_find_mmio_area(ccp);
206 if (ret < 0)
207 goto e_device;
208 bar = ret;
209
210 ret = -EIO;
211 ccp->io_map = pci_iomap(pdev, bar, 0);
212 if (ccp->io_map == NULL) {
213 dev_err(dev, "pci_iomap failed\n");
214 goto e_device;
215 }
216 ccp->io_regs = ccp->io_map + IO_OFFSET;
217
218 ret = dma_set_mask(dev, DMA_BIT_MASK(48));
219 if (ret == 0) {
220 ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(48));
221 if (ret) {
222 dev_err(dev,
223 "pci_set_consistent_dma_mask failed (%d)\n",
224 ret);
225 goto e_bar0;
226 }
227 } else {
228 ret = dma_set_mask(dev, DMA_BIT_MASK(32));
229 if (ret) {
230 dev_err(dev, "pci_set_dma_mask failed (%d)\n", ret);
231 goto e_bar0;
232 }
233 }
234
235 dev_set_drvdata(dev, ccp);
236
237 ret = ccp_init(ccp);
238 if (ret)
239 goto e_bar0;
240
241 dev_notice(dev, "enabled\n");
242
243 return 0;
244
245e_bar0:
246 pci_iounmap(pdev, ccp->io_map);
247
248e_device:
249 pci_disable_device(pdev);
250 dev_set_drvdata(dev, NULL);
251
252e_regions:
253 pci_release_regions(pdev);
254
255e_free2:
256 kfree(ccp_pci);
257
258e_free1:
259 kfree(ccp);
260
261e_err:
262 dev_notice(dev, "initialization failed\n");
263 return ret;
264}
265
266static void ccp_pci_remove(struct pci_dev *pdev)
267{
268 struct device *dev = &pdev->dev;
269 struct ccp_device *ccp = dev_get_drvdata(dev);
270
271 ccp_destroy(ccp);
272
273 pci_iounmap(pdev, ccp->io_map);
274
275 pci_disable_device(pdev);
276 dev_set_drvdata(dev, NULL);
277
278 pci_release_regions(pdev);
279
280 kfree(ccp);
281
282 dev_notice(dev, "disabled\n");
283}
284
285#ifdef CONFIG_PM
286static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
287{
288 struct device *dev = &pdev->dev;
289 struct ccp_device *ccp = dev_get_drvdata(dev);
290 unsigned long flags;
291 unsigned int i;
292
293 spin_lock_irqsave(&ccp->cmd_lock, flags);
294
295 ccp->suspending = 1;
296
297 /* Wake all the queue kthreads to prepare for suspend */
298 for (i = 0; i < ccp->cmd_q_count; i++)
299 wake_up_process(ccp->cmd_q[i].kthread);
300
301 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
302
303 /* Wait for all queue kthreads to say they're done */
304 while (!ccp_queues_suspended(ccp))
305 wait_event_interruptible(ccp->suspend_queue,
306 ccp_queues_suspended(ccp));
307
308 return 0;
309}
310
311static int ccp_pci_resume(struct pci_dev *pdev)
312{
313 struct device *dev = &pdev->dev;
314 struct ccp_device *ccp = dev_get_drvdata(dev);
315 unsigned long flags;
316 unsigned int i;
317
318 spin_lock_irqsave(&ccp->cmd_lock, flags);
319
320 ccp->suspending = 0;
321
322 /* Wake up all the kthreads */
323 for (i = 0; i < ccp->cmd_q_count; i++) {
324 ccp->cmd_q[i].suspended = 0;
325 wake_up_process(ccp->cmd_q[i].kthread);
326 }
327
328 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
329
330 return 0;
331}
332#endif
333
334static DEFINE_PCI_DEVICE_TABLE(ccp_pci_table) = {
335 { PCI_VDEVICE(AMD, 0x1537), },
336 /* Last entry must be zero */
337 { 0, }
338};
339MODULE_DEVICE_TABLE(pci, ccp_pci_table);
340
341static struct pci_driver ccp_pci_driver = {
342 .name = "AMD Cryptographic Coprocessor",
343 .id_table = ccp_pci_table,
344 .probe = ccp_pci_probe,
345 .remove = ccp_pci_remove,
346#ifdef CONFIG_PM
347 .suspend = ccp_pci_suspend,
348 .resume = ccp_pci_resume,
349#endif
350};
351
352int ccp_pci_init(void)
353{
354 return pci_register_driver(&ccp_pci_driver);
355}
356
357void ccp_pci_exit(void)
358{
359 pci_unregister_driver(&ccp_pci_driver);
360}