aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/crypto
diff options
context:
space:
mode:
authorDan Streetman <ddstreet@ieee.org>2015-05-07 13:49:20 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2015-05-11 03:06:47 -0400
commitb8e04187c90107c58d1ccbeb68a0ba4c5bfd4167 (patch)
tree4e774d0ef34dad2626c01f0c417cffaedf5b8dc7 /drivers/crypto
parent99182a42b7ef3d5e4180992ce01befd9e87526d2 (diff)
crypto: nx - simplify pSeries nx842 driver
Simplify the pSeries NX-842 driver: do not expect incoming buffers to be exactly page-sized; do not break up input buffers to compress smaller blocks; do not use any internal headers in the compressed data blocks; remove the software decompression implementation; implement the pSeries nx842_constraints. This changes the pSeries NX-842 driver to perform constraints-based compression so that it only needs to compress one entire input block at a time. This removes the need for it to split input data blocks into multiple compressed data sections in the output buffer, and removes the need for any extra header info in the compressed data; all that is moved (in a later patch) into the main crypto 842 driver. Additionally, the 842 software decompression implementation is no longer needed here, as the crypto 842 driver will use the generic software 842 decompression function as a fallback if any hardware 842 driver fails. Signed-off-by: Dan Streetman <ddstreet@ieee.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'drivers/crypto')
-rw-r--r--drivers/crypto/nx/nx-842-pseries.c779
1 files changed, 153 insertions, 626 deletions
diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
index 6db99924652c..85837e96e9a3 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -21,7 +21,6 @@
21 * Seth Jennings <sjenning@linux.vnet.ibm.com> 21 * Seth Jennings <sjenning@linux.vnet.ibm.com>
22 */ 22 */
23 23
24#include <asm/page.h>
25#include <asm/vio.h> 24#include <asm/vio.h>
26 25
27#include "nx-842.h" 26#include "nx-842.h"
@@ -32,11 +31,6 @@ MODULE_LICENSE("GPL");
32MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>"); 31MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>");
33MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); 32MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
34 33
35#define SHIFT_4K 12
36#define SHIFT_64K 16
37#define SIZE_4K (1UL << SHIFT_4K)
38#define SIZE_64K (1UL << SHIFT_64K)
39
40/* IO buffer must be 128 byte aligned */ 34/* IO buffer must be 128 byte aligned */
41#define IO_BUFFER_ALIGN 128 35#define IO_BUFFER_ALIGN 128
42 36
@@ -47,18 +41,52 @@ static struct nx842_constraints nx842_pseries_constraints = {
47 .maximum = PAGE_SIZE, /* dynamic, max_sync_size */ 41 .maximum = PAGE_SIZE, /* dynamic, max_sync_size */
48}; 42};
49 43
50struct nx842_header { 44static int check_constraints(unsigned long buf, unsigned int *len, bool in)
51 int blocks_nr; /* number of compressed blocks */
52 int offset; /* offset of the first block (from beginning of header) */
53 int sizes[0]; /* size of compressed blocks */
54};
55
56static inline int nx842_header_size(const struct nx842_header *hdr)
57{ 45{
58 return sizeof(struct nx842_header) + 46 if (!IS_ALIGNED(buf, nx842_pseries_constraints.alignment)) {
59 hdr->blocks_nr * sizeof(hdr->sizes[0]); 47 pr_debug("%s buffer 0x%lx not aligned to 0x%x\n",
48 in ? "input" : "output", buf,
49 nx842_pseries_constraints.alignment);
50 return -EINVAL;
51 }
52 if (*len % nx842_pseries_constraints.multiple) {
53 pr_debug("%s buffer len 0x%x not multiple of 0x%x\n",
54 in ? "input" : "output", *len,
55 nx842_pseries_constraints.multiple);
56 if (in)
57 return -EINVAL;
58 *len = round_down(*len, nx842_pseries_constraints.multiple);
59 }
60 if (*len < nx842_pseries_constraints.minimum) {
61 pr_debug("%s buffer len 0x%x under minimum 0x%x\n",
62 in ? "input" : "output", *len,
63 nx842_pseries_constraints.minimum);
64 return -EINVAL;
65 }
66 if (*len > nx842_pseries_constraints.maximum) {
67 pr_debug("%s buffer len 0x%x over maximum 0x%x\n",
68 in ? "input" : "output", *len,
69 nx842_pseries_constraints.maximum);
70 if (in)
71 return -EINVAL;
72 *len = nx842_pseries_constraints.maximum;
73 }
74 return 0;
60} 75}
61 76
77/* I assume we need to align the CSB? */
78#define WORKMEM_ALIGN (256)
79
80struct nx842_workmem {
81 /* scatterlist */
82 char slin[4096];
83 char slout[4096];
84 /* coprocessor status/parameter block */
85 struct nx_csbcpb csbcpb;
86
87 char padding[WORKMEM_ALIGN];
88} __aligned(WORKMEM_ALIGN);
89
62/* Macros for fields within nx_csbcpb */ 90/* Macros for fields within nx_csbcpb */
63/* Check the valid bit within the csbcpb valid field */ 91/* Check the valid bit within the csbcpb valid field */
64#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7)) 92#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
@@ -72,8 +100,7 @@ static inline int nx842_header_size(const struct nx842_header *hdr)
72#define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5)) 100#define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5))
73 101
74/* The NX unit accepts data only on 4K page boundaries */ 102/* The NX unit accepts data only on 4K page boundaries */
75#define NX842_HW_PAGE_SHIFT SHIFT_4K 103#define NX842_HW_PAGE_SIZE (4096)
76#define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
77#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1)) 104#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1))
78 105
79enum nx842_status { 106enum nx842_status {
@@ -194,41 +221,6 @@ static int nx842_build_scatterlist(unsigned long buf, int len,
194 return 0; 221 return 0;
195} 222}
196 223
197/*
198 * Working memory for software decompression
199 */
200struct sw842_fifo {
201 union {
202 char f8[256][8];
203 char f4[512][4];
204 };
205 char f2[256][2];
206 unsigned char f84_full;
207 unsigned char f2_full;
208 unsigned char f8_count;
209 unsigned char f2_count;
210 unsigned int f4_count;
211};
212
213/*
214 * Working memory for crypto API
215 */
216struct nx842_workmem {
217 char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */
218 union {
219 /* hardware working memory */
220 struct {
221 /* scatterlist */
222 char slin[SIZE_4K];
223 char slout[SIZE_4K];
224 /* coprocessor status/parameter block */
225 struct nx_csbcpb csbcpb;
226 };
227 /* software working memory */
228 struct sw842_fifo swfifo; /* software decompression fifo */
229 };
230};
231
232static int nx842_validate_result(struct device *dev, 224static int nx842_validate_result(struct device *dev,
233 struct cop_status_block *csb) 225 struct cop_status_block *csb)
234{ 226{
@@ -291,8 +283,8 @@ static int nx842_validate_result(struct device *dev,
291 * compressed data. If there is an error then @outlen will be 0 and an 283 * compressed data. If there is an error then @outlen will be 0 and an
292 * error will be specified by the return code from this function. 284 * error will be specified by the return code from this function.
293 * 285 *
294 * @in: Pointer to input buffer, must be page aligned 286 * @in: Pointer to input buffer
295 * @inlen: Length of input buffer, must be PAGE_SIZE 287 * @inlen: Length of input buffer
296 * @out: Pointer to output buffer 288 * @out: Pointer to output buffer
297 * @outlen: Length of output buffer 289 * @outlen: Length of output buffer
298 * @wrkmem: ptr to buffer for working memory, size determined by 290 * @wrkmem: ptr to buffer for working memory, size determined by
@@ -302,7 +294,6 @@ static int nx842_validate_result(struct device *dev,
302 * 0 Success, output of length @outlen stored in the buffer at @out 294 * 0 Success, output of length @outlen stored in the buffer at @out
303 * -ENOMEM Unable to allocate internal buffers 295 * -ENOMEM Unable to allocate internal buffers
304 * -ENOSPC Output buffer is to small 296 * -ENOSPC Output buffer is to small
305 * -EMSGSIZE XXX Difficult to describe this limitation
306 * -EIO Internal error 297 * -EIO Internal error
307 * -ENODEV Hardware unavailable 298 * -ENODEV Hardware unavailable
308 */ 299 */
@@ -310,29 +301,26 @@ static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen,
310 unsigned char *out, unsigned int *outlen, 301 unsigned char *out, unsigned int *outlen,
311 void *wmem) 302 void *wmem)
312{ 303{
313 struct nx842_header *hdr;
314 struct nx842_devdata *local_devdata; 304 struct nx842_devdata *local_devdata;
315 struct device *dev = NULL; 305 struct device *dev = NULL;
316 struct nx842_workmem *workmem; 306 struct nx842_workmem *workmem;
317 struct nx842_scatterlist slin, slout; 307 struct nx842_scatterlist slin, slout;
318 struct nx_csbcpb *csbcpb; 308 struct nx_csbcpb *csbcpb;
319 int ret = 0, max_sync_size, i, bytesleft, size, hdrsize; 309 int ret = 0, max_sync_size;
320 unsigned long inbuf, outbuf, padding; 310 unsigned long inbuf, outbuf;
321 struct vio_pfo_op op = { 311 struct vio_pfo_op op = {
322 .done = NULL, 312 .done = NULL,
323 .handle = 0, 313 .handle = 0,
324 .timeout = 0, 314 .timeout = 0,
325 }; 315 };
326 unsigned long start_time = get_tb(); 316 unsigned long start = get_tb();
327 317
328 /*
329 * Make sure input buffer is 64k page aligned. This is assumed since
330 * this driver is designed for page compression only (for now). This
331 * is very nice since we can now use direct DDE(s) for the input and
332 * the alignment is guaranteed.
333 */
334 inbuf = (unsigned long)in; 318 inbuf = (unsigned long)in;
335 if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE) 319 if (check_constraints(inbuf, &inlen, true))
320 return -EINVAL;
321
322 outbuf = (unsigned long)out;
323 if (check_constraints(outbuf, outlen, false))
336 return -EINVAL; 324 return -EINVAL;
337 325
338 rcu_read_lock(); 326 rcu_read_lock();
@@ -344,16 +332,8 @@ static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen,
344 max_sync_size = local_devdata->max_sync_size; 332 max_sync_size = local_devdata->max_sync_size;
345 dev = local_devdata->dev; 333 dev = local_devdata->dev;
346 334
347 /* Create the header */
348 hdr = (struct nx842_header *)out;
349 hdr->blocks_nr = PAGE_SIZE / max_sync_size;
350 hdrsize = nx842_header_size(hdr);
351 outbuf = (unsigned long)out + hdrsize;
352 bytesleft = *outlen - hdrsize;
353
354 /* Init scatterlist */ 335 /* Init scatterlist */
355 workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, 336 workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN);
356 NX842_HW_PAGE_SIZE);
357 slin.entries = (struct nx842_slentry *)workmem->slin; 337 slin.entries = (struct nx842_slentry *)workmem->slin;
358 slout.entries = (struct nx842_slentry *)workmem->slout; 338 slout.entries = (struct nx842_slentry *)workmem->slout;
359 339
@@ -364,105 +344,48 @@ static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen,
364 op.csbcpb = nx842_get_pa(csbcpb); 344 op.csbcpb = nx842_get_pa(csbcpb);
365 op.out = nx842_get_pa(slout.entries); 345 op.out = nx842_get_pa(slout.entries);
366 346
367 for (i = 0; i < hdr->blocks_nr; i++) { 347 if ((inbuf & NX842_HW_PAGE_MASK) ==
368 /* 348 ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) {
369 * Aligning the output blocks to 128 bytes does waste space, 349 /* Create direct DDE */
370 * but it prevents the need for bounce buffers and memory 350 op.in = nx842_get_pa((void *)inbuf);
371 * copies. It also simplifies the code a lot. In the worst 351 op.inlen = inlen;
372 * case (64k page, 4k max_sync_size), you lose up to 352 } else {
373 * (128*16)/64k = ~3% the compression factor. For 64k 353 /* Create indirect DDE (scatterlist) */
374 * max_sync_size, the loss would be at most 128/64k = ~0.2%. 354 nx842_build_scatterlist(inbuf, inlen, &slin);
375 */ 355 op.in = nx842_get_pa(slin.entries);
376 padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf; 356 op.inlen = -nx842_get_scatterlist_size(&slin);
377 outbuf += padding; 357 }
378 bytesleft -= padding;
379 if (i == 0)
380 /* save offset into first block in header */
381 hdr->offset = padding + hdrsize;
382
383 if (bytesleft <= 0) {
384 ret = -ENOSPC;
385 goto unlock;
386 }
387
388 /*
389 * NOTE: If the default max_sync_size is changed from 4k
390 * to 64k, remove the "likely" case below, since a
391 * scatterlist will always be needed.
392 */
393 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
394 /* Create direct DDE */
395 op.in = nx842_get_pa((void *)inbuf);
396 op.inlen = max_sync_size;
397
398 } else {
399 /* Create indirect DDE (scatterlist) */
400 nx842_build_scatterlist(inbuf, max_sync_size, &slin);
401 op.in = nx842_get_pa(slin.entries);
402 op.inlen = -nx842_get_scatterlist_size(&slin);
403 }
404 358
405 /* 359 if ((outbuf & NX842_HW_PAGE_MASK) ==
406 * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect 360 ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) {
407 * DDE is required for the outbuf. 361 /* Create direct DDE */
408 * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must 362 op.out = nx842_get_pa((void *)outbuf);
409 * also be page aligned (1 in 128/4k=32 chance) in order 363 op.outlen = *outlen;
410 * to use a direct DDE. 364 } else {
411 * This is unlikely, just use an indirect DDE always. 365 /* Create indirect DDE (scatterlist) */
412 */ 366 nx842_build_scatterlist(outbuf, *outlen, &slout);
413 nx842_build_scatterlist(outbuf, 367 op.out = nx842_get_pa(slout.entries);
414 min(bytesleft, max_sync_size), &slout);
415 /* op.out set before loop */
416 op.outlen = -nx842_get_scatterlist_size(&slout); 368 op.outlen = -nx842_get_scatterlist_size(&slout);
369 }
417 370
418 /* Send request to pHyp */ 371 /* Send request to pHyp */
419 ret = vio_h_cop_sync(local_devdata->vdev, &op); 372 ret = vio_h_cop_sync(local_devdata->vdev, &op);
420
421 /* Check for pHyp error */
422 if (ret) {
423 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
424 __func__, ret, op.hcall_err);
425 ret = -EIO;
426 goto unlock;
427 }
428
429 /* Check for hardware error */
430 ret = nx842_validate_result(dev, &csbcpb->csb);
431 if (ret && ret != -ENOSPC)
432 goto unlock;
433
434 /* Handle incompressible data */
435 if (unlikely(ret == -ENOSPC)) {
436 if (bytesleft < max_sync_size) {
437 /*
438 * Not enough space left in the output buffer
439 * to store uncompressed block
440 */
441 goto unlock;
442 } else {
443 /* Store incompressible block */
444 memcpy((void *)outbuf, (void *)inbuf,
445 max_sync_size);
446 hdr->sizes[i] = -max_sync_size;
447 outbuf += max_sync_size;
448 bytesleft -= max_sync_size;
449 /* Reset ret, incompressible data handled */
450 ret = 0;
451 }
452 } else {
453 /* Normal case, compression was successful */
454 size = csbcpb->csb.processed_byte_count;
455 dev_dbg(dev, "%s: processed_bytes=%d\n",
456 __func__, size);
457 hdr->sizes[i] = size;
458 outbuf += size;
459 bytesleft -= size;
460 }
461 373
462 inbuf += max_sync_size; 374 /* Check for pHyp error */
375 if (ret) {
376 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
377 __func__, ret, op.hcall_err);
378 ret = -EIO;
379 goto unlock;
463 } 380 }
464 381
465 *outlen = (unsigned int)(outbuf - (unsigned long)out); 382 /* Check for hardware error */
383 ret = nx842_validate_result(dev, &csbcpb->csb);
384 if (ret)
385 goto unlock;
386
387 *outlen = csbcpb->csb.processed_byte_count;
388 dev_dbg(dev, "%s: processed_bytes=%d\n", __func__, *outlen);
466 389
467unlock: 390unlock:
468 if (ret) 391 if (ret)
@@ -470,15 +393,12 @@ unlock:
470 else { 393 else {
471 nx842_inc_comp_complete(local_devdata); 394 nx842_inc_comp_complete(local_devdata);
472 ibm_nx842_incr_hist(local_devdata->counters->comp_times, 395 ibm_nx842_incr_hist(local_devdata->counters->comp_times,
473 (get_tb() - start_time) / tb_ticks_per_usec); 396 (get_tb() - start) / tb_ticks_per_usec);
474 } 397 }
475 rcu_read_unlock(); 398 rcu_read_unlock();
476 return ret; 399 return ret;
477} 400}
478 401
479static int sw842_decompress(const unsigned char *, int, unsigned char *, int *,
480 const void *);
481
482/** 402/**
483 * nx842_pseries_decompress - Decompress data using the 842 algorithm 403 * nx842_pseries_decompress - Decompress data using the 842 algorithm
484 * 404 *
@@ -490,11 +410,10 @@ static int sw842_decompress(const unsigned char *, int, unsigned char *, int *,
490 * If there is an error then @outlen will be 0 and an error will be 410 * If there is an error then @outlen will be 0 and an error will be
491 * specified by the return code from this function. 411 * specified by the return code from this function.
492 * 412 *
493 * @in: Pointer to input buffer, will use bounce buffer if not 128 byte 413 * @in: Pointer to input buffer
494 * aligned
495 * @inlen: Length of input buffer 414 * @inlen: Length of input buffer
496 * @out: Pointer to output buffer, must be page aligned 415 * @out: Pointer to output buffer
497 * @outlen: Length of output buffer, must be PAGE_SIZE 416 * @outlen: Length of output buffer
498 * @wrkmem: ptr to buffer for working memory, size determined by 417 * @wrkmem: ptr to buffer for working memory, size determined by
499 * NX842_MEM_COMPRESS 418 * NX842_MEM_COMPRESS
500 * 419 *
@@ -510,43 +429,39 @@ static int nx842_pseries_decompress(const unsigned char *in, unsigned int inlen,
510 unsigned char *out, unsigned int *outlen, 429 unsigned char *out, unsigned int *outlen,
511 void *wmem) 430 void *wmem)
512{ 431{
513 struct nx842_header *hdr;
514 struct nx842_devdata *local_devdata; 432 struct nx842_devdata *local_devdata;
515 struct device *dev = NULL; 433 struct device *dev = NULL;
516 struct nx842_workmem *workmem; 434 struct nx842_workmem *workmem;
517 struct nx842_scatterlist slin, slout; 435 struct nx842_scatterlist slin, slout;
518 struct nx_csbcpb *csbcpb; 436 struct nx_csbcpb *csbcpb;
519 int ret = 0, i, size, max_sync_size; 437 int ret = 0, max_sync_size;
520 unsigned long inbuf, outbuf; 438 unsigned long inbuf, outbuf;
521 struct vio_pfo_op op = { 439 struct vio_pfo_op op = {
522 .done = NULL, 440 .done = NULL,
523 .handle = 0, 441 .handle = 0,
524 .timeout = 0, 442 .timeout = 0,
525 }; 443 };
526 unsigned long start_time = get_tb(); 444 unsigned long start = get_tb();
527 445
528 /* Ensure page alignment and size */ 446 /* Ensure page alignment and size */
447 inbuf = (unsigned long)in;
448 if (check_constraints(inbuf, &inlen, true))
449 return -EINVAL;
450
529 outbuf = (unsigned long)out; 451 outbuf = (unsigned long)out;
530 if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE) 452 if (check_constraints(outbuf, outlen, false))
531 return -EINVAL; 453 return -EINVAL;
532 454
533 rcu_read_lock(); 455 rcu_read_lock();
534 local_devdata = rcu_dereference(devdata); 456 local_devdata = rcu_dereference(devdata);
535 if (local_devdata) 457 if (!local_devdata || !local_devdata->dev) {
536 dev = local_devdata->dev; 458 rcu_read_unlock();
537 459 return -ENODEV;
538 /* Get header */
539 hdr = (struct nx842_header *)in;
540
541 workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
542 NX842_HW_PAGE_SIZE);
543
544 inbuf = (unsigned long)in + hdr->offset;
545 if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) {
546 /* Copy block(s) into bounce buffer for alignment */
547 memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset);
548 inbuf = (unsigned long)workmem->bounce;
549 } 460 }
461 max_sync_size = local_devdata->max_sync_size;
462 dev = local_devdata->dev;
463
464 workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN);
550 465
551 /* Init scatterlist */ 466 /* Init scatterlist */
552 slin.entries = (struct nx842_slentry *)workmem->slin; 467 slin.entries = (struct nx842_slentry *)workmem->slin;
@@ -558,119 +473,55 @@ static int nx842_pseries_decompress(const unsigned char *in, unsigned int inlen,
558 memset(csbcpb, 0, sizeof(*csbcpb)); 473 memset(csbcpb, 0, sizeof(*csbcpb));
559 op.csbcpb = nx842_get_pa(csbcpb); 474 op.csbcpb = nx842_get_pa(csbcpb);
560 475
561 /* 476 if ((inbuf & NX842_HW_PAGE_MASK) ==
562 * max_sync_size may have changed since compression, 477 ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) {
563 * so we can't read it from the device info. We need 478 /* Create direct DDE */
564 * to derive it from hdr->blocks_nr. 479 op.in = nx842_get_pa((void *)inbuf);
565 */ 480 op.inlen = inlen;
566 max_sync_size = PAGE_SIZE / hdr->blocks_nr; 481 } else {
567 482 /* Create indirect DDE (scatterlist) */
568 for (i = 0; i < hdr->blocks_nr; i++) { 483 nx842_build_scatterlist(inbuf, inlen, &slin);
569 /* Skip padding */ 484 op.in = nx842_get_pa(slin.entries);
570 inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN); 485 op.inlen = -nx842_get_scatterlist_size(&slin);
571 486 }
572 if (hdr->sizes[i] < 0) {
573 /* Negative sizes indicate uncompressed data blocks */
574 size = abs(hdr->sizes[i]);
575 memcpy((void *)outbuf, (void *)inbuf, size);
576 outbuf += size;
577 inbuf += size;
578 continue;
579 }
580
581 if (!dev)
582 goto sw;
583
584 /*
585 * The better the compression, the more likely the "likely"
586 * case becomes.
587 */
588 if (likely((inbuf & NX842_HW_PAGE_MASK) ==
589 ((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) {
590 /* Create direct DDE */
591 op.in = nx842_get_pa((void *)inbuf);
592 op.inlen = hdr->sizes[i];
593 } else {
594 /* Create indirect DDE (scatterlist) */
595 nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin);
596 op.in = nx842_get_pa(slin.entries);
597 op.inlen = -nx842_get_scatterlist_size(&slin);
598 }
599
600 /*
601 * NOTE: If the default max_sync_size is changed from 4k
602 * to 64k, remove the "likely" case below, since a
603 * scatterlist will always be needed.
604 */
605 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
606 /* Create direct DDE */
607 op.out = nx842_get_pa((void *)outbuf);
608 op.outlen = max_sync_size;
609 } else {
610 /* Create indirect DDE (scatterlist) */
611 nx842_build_scatterlist(outbuf, max_sync_size, &slout);
612 op.out = nx842_get_pa(slout.entries);
613 op.outlen = -nx842_get_scatterlist_size(&slout);
614 }
615
616 /* Send request to pHyp */
617 ret = vio_h_cop_sync(local_devdata->vdev, &op);
618
619 /* Check for pHyp error */
620 if (ret) {
621 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
622 __func__, ret, op.hcall_err);
623 dev = NULL;
624 goto sw;
625 }
626 487
627 /* Check for hardware error */ 488 if ((outbuf & NX842_HW_PAGE_MASK) ==
628 ret = nx842_validate_result(dev, &csbcpb->csb); 489 ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) {
629 if (ret) { 490 /* Create direct DDE */
630 dev = NULL; 491 op.out = nx842_get_pa((void *)outbuf);
631 goto sw; 492 op.outlen = *outlen;
632 } 493 } else {
494 /* Create indirect DDE (scatterlist) */
495 nx842_build_scatterlist(outbuf, *outlen, &slout);
496 op.out = nx842_get_pa(slout.entries);
497 op.outlen = -nx842_get_scatterlist_size(&slout);
498 }
633 499
634 /* HW decompression success */ 500 /* Send request to pHyp */
635 inbuf += hdr->sizes[i]; 501 ret = vio_h_cop_sync(local_devdata->vdev, &op);
636 outbuf += csbcpb->csb.processed_byte_count;
637 continue;
638
639sw:
640 /* software decompression */
641 size = max_sync_size;
642 ret = sw842_decompress(
643 (unsigned char *)inbuf, hdr->sizes[i],
644 (unsigned char *)outbuf, &size, wmem);
645 if (ret)
646 pr_debug("%s: sw842_decompress failed with %d\n",
647 __func__, ret);
648
649 if (ret) {
650 if (ret != -ENOSPC && ret != -EINVAL &&
651 ret != -EMSGSIZE)
652 ret = -EIO;
653 goto unlock;
654 }
655 502
656 /* SW decompression success */ 503 /* Check for pHyp error */
657 inbuf += hdr->sizes[i]; 504 if (ret) {
658 outbuf += size; 505 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
506 __func__, ret, op.hcall_err);
507 goto unlock;
659 } 508 }
660 509
661 *outlen = (unsigned int)(outbuf - (unsigned long)out); 510 /* Check for hardware error */
511 ret = nx842_validate_result(dev, &csbcpb->csb);
512 if (ret)
513 goto unlock;
514
515 *outlen = csbcpb->csb.processed_byte_count;
662 516
663unlock: 517unlock:
664 if (ret) 518 if (ret)
665 /* decompress fail */ 519 /* decompress fail */
666 nx842_inc_decomp_failed(local_devdata); 520 nx842_inc_decomp_failed(local_devdata);
667 else { 521 else {
668 if (!dev)
669 /* software decompress */
670 nx842_inc_swdecomp(local_devdata);
671 nx842_inc_decomp_complete(local_devdata); 522 nx842_inc_decomp_complete(local_devdata);
672 ibm_nx842_incr_hist(local_devdata->counters->decomp_times, 523 ibm_nx842_incr_hist(local_devdata->counters->decomp_times,
673 (get_tb() - start_time) / tb_ticks_per_usec); 524 (get_tb() - start) / tb_ticks_per_usec);
674 } 525 }
675 526
676 rcu_read_unlock(); 527 rcu_read_unlock();
@@ -829,9 +680,9 @@ static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata,
829 maxsynccop->decomp_data_limit); 680 maxsynccop->decomp_data_limit);
830 681
831 devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size, 682 devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size,
832 SIZE_64K); 683 65536);
833 684
834 if (devdata->max_sync_size < SIZE_4K) { 685 if (devdata->max_sync_size < 4096) {
835 dev_err(devdata->dev, "%s: hardware max data size (%u) is " 686 dev_err(devdata->dev, "%s: hardware max data size (%u) is "
836 "less than the driver minimum, unable to use " 687 "less than the driver minimum, unable to use "
837 "the hardware device\n", 688 "the hardware device\n",
@@ -1220,17 +1071,17 @@ static int __exit nx842_remove(struct vio_dev *viodev)
1220 return 0; 1071 return 0;
1221} 1072}
1222 1073
1223static struct vio_device_id nx842_driver_ids[] = { 1074static struct vio_device_id nx842_vio_driver_ids[] = {
1224 {NX842_PSERIES_COMPAT_NAME "-v1", NX842_PSERIES_COMPAT_NAME}, 1075 {NX842_PSERIES_COMPAT_NAME "-v1", NX842_PSERIES_COMPAT_NAME},
1225 {"", ""}, 1076 {"", ""},
1226}; 1077};
1227 1078
1228static struct vio_driver nx842_driver = { 1079static struct vio_driver nx842_vio_driver = {
1229 .name = MODULE_NAME, 1080 .name = MODULE_NAME,
1230 .probe = nx842_probe, 1081 .probe = nx842_probe,
1231 .remove = __exit_p(nx842_remove), 1082 .remove = __exit_p(nx842_remove),
1232 .get_desired_dma = nx842_get_desired_dma, 1083 .get_desired_dma = nx842_get_desired_dma,
1233 .id_table = nx842_driver_ids, 1084 .id_table = nx842_vio_driver_ids,
1234}; 1085};
1235 1086
1236static int __init nx842_init(void) 1087static int __init nx842_init(void)
@@ -1249,7 +1100,7 @@ static int __init nx842_init(void)
1249 new_devdata->status = UNAVAILABLE; 1100 new_devdata->status = UNAVAILABLE;
1250 RCU_INIT_POINTER(devdata, new_devdata); 1101 RCU_INIT_POINTER(devdata, new_devdata);
1251 1102
1252 return vio_register_driver(&nx842_driver); 1103 return vio_register_driver(&nx842_vio_driver);
1253} 1104}
1254 1105
1255module_init(nx842_init); 1106module_init(nx842_init);
@@ -1266,336 +1117,12 @@ static void __exit nx842_exit(void)
1266 RCU_INIT_POINTER(devdata, NULL); 1117 RCU_INIT_POINTER(devdata, NULL);
1267 spin_unlock_irqrestore(&devdata_mutex, flags); 1118 spin_unlock_irqrestore(&devdata_mutex, flags);
1268 synchronize_rcu(); 1119 synchronize_rcu();
1269 if (old_devdata) 1120 if (old_devdata && old_devdata->dev)
1270 dev_set_drvdata(old_devdata->dev, NULL); 1121 dev_set_drvdata(old_devdata->dev, NULL);
1271 kfree(old_devdata); 1122 kfree(old_devdata);
1272 nx842_unregister_driver(&nx842_pseries_driver); 1123 nx842_unregister_driver(&nx842_pseries_driver);
1273 vio_unregister_driver(&nx842_driver); 1124 vio_unregister_driver(&nx842_vio_driver);
1274} 1125}
1275 1126
1276module_exit(nx842_exit); 1127module_exit(nx842_exit);
1277 1128
1278/*********************************
1279 * 842 software decompressor
1280*********************************/
1281typedef int (*sw842_template_op)(const char **, int *, unsigned char **,
1282 struct sw842_fifo *);
1283
1284static int sw842_data8(const char **, int *, unsigned char **,
1285 struct sw842_fifo *);
1286static int sw842_data4(const char **, int *, unsigned char **,
1287 struct sw842_fifo *);
1288static int sw842_data2(const char **, int *, unsigned char **,
1289 struct sw842_fifo *);
1290static int sw842_ptr8(const char **, int *, unsigned char **,
1291 struct sw842_fifo *);
1292static int sw842_ptr4(const char **, int *, unsigned char **,
1293 struct sw842_fifo *);
1294static int sw842_ptr2(const char **, int *, unsigned char **,
1295 struct sw842_fifo *);
1296
1297/* special templates */
1298#define SW842_TMPL_REPEAT 0x1B
1299#define SW842_TMPL_ZEROS 0x1C
1300#define SW842_TMPL_EOF 0x1E
1301
1302static sw842_template_op sw842_tmpl_ops[26][4] = {
1303 { sw842_data8, NULL}, /* 0 (00000) */
1304 { sw842_data4, sw842_data2, sw842_ptr2, NULL},
1305 { sw842_data4, sw842_ptr2, sw842_data2, NULL},
1306 { sw842_data4, sw842_ptr2, sw842_ptr2, NULL},
1307 { sw842_data4, sw842_ptr4, NULL},
1308 { sw842_data2, sw842_ptr2, sw842_data4, NULL},
1309 { sw842_data2, sw842_ptr2, sw842_data2, sw842_ptr2},
1310 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_data2},
1311 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_ptr2,},
1312 { sw842_data2, sw842_ptr2, sw842_ptr4, NULL},
1313 { sw842_ptr2, sw842_data2, sw842_data4, NULL}, /* 10 (01010) */
1314 { sw842_ptr2, sw842_data4, sw842_ptr2, NULL},
1315 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_data2},
1316 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_ptr2},
1317 { sw842_ptr2, sw842_data2, sw842_ptr4, NULL},
1318 { sw842_ptr2, sw842_ptr2, sw842_data4, NULL},
1319 { sw842_ptr2, sw842_ptr2, sw842_data2, sw842_ptr2},
1320 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_data2},
1321 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_ptr2},
1322 { sw842_ptr2, sw842_ptr2, sw842_ptr4, NULL},
1323 { sw842_ptr4, sw842_data4, NULL}, /* 20 (10100) */
1324 { sw842_ptr4, sw842_data2, sw842_ptr2, NULL},
1325 { sw842_ptr4, sw842_ptr2, sw842_data2, NULL},
1326 { sw842_ptr4, sw842_ptr2, sw842_ptr2, NULL},
1327 { sw842_ptr4, sw842_ptr4, NULL},
1328 { sw842_ptr8, NULL}
1329};
1330
1331/* Software decompress helpers */
1332
1333static uint8_t sw842_get_byte(const char *buf, int bit)
1334{
1335 uint8_t tmpl;
1336 uint16_t tmp;
1337 tmp = htons(*(uint16_t *)(buf));
1338 tmp = (uint16_t)(tmp << bit);
1339 tmp = ntohs(tmp);
1340 memcpy(&tmpl, &tmp, 1);
1341 return tmpl;
1342}
1343
1344static uint8_t sw842_get_template(const char **buf, int *bit)
1345{
1346 uint8_t byte;
1347 byte = sw842_get_byte(*buf, *bit);
1348 byte = byte >> 3;
1349 byte &= 0x1F;
1350 *buf += (*bit + 5) / 8;
1351 *bit = (*bit + 5) % 8;
1352 return byte;
1353}
1354
1355/* repeat_count happens to be 5-bit too (like the template) */
1356static uint8_t sw842_get_repeat_count(const char **buf, int *bit)
1357{
1358 uint8_t byte;
1359 byte = sw842_get_byte(*buf, *bit);
1360 byte = byte >> 2;
1361 byte &= 0x3F;
1362 *buf += (*bit + 6) / 8;
1363 *bit = (*bit + 6) % 8;
1364 return byte;
1365}
1366
1367static uint8_t sw842_get_ptr2(const char **buf, int *bit)
1368{
1369 uint8_t ptr;
1370 ptr = sw842_get_byte(*buf, *bit);
1371 (*buf)++;
1372 return ptr;
1373}
1374
1375static uint16_t sw842_get_ptr4(const char **buf, int *bit,
1376 struct sw842_fifo *fifo)
1377{
1378 uint16_t ptr;
1379 ptr = htons(*(uint16_t *)(*buf));
1380 ptr = (uint16_t)(ptr << *bit);
1381 ptr = ptr >> 7;
1382 ptr &= 0x01FF;
1383 *buf += (*bit + 9) / 8;
1384 *bit = (*bit + 9) % 8;
1385 return ptr;
1386}
1387
1388static uint8_t sw842_get_ptr8(const char **buf, int *bit,
1389 struct sw842_fifo *fifo)
1390{
1391 return sw842_get_ptr2(buf, bit);
1392}
1393
1394/* Software decompress template ops */
1395
1396static int sw842_data8(const char **inbuf, int *inbit,
1397 unsigned char **outbuf, struct sw842_fifo *fifo)
1398{
1399 int ret;
1400
1401 ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1402 if (ret)
1403 return ret;
1404 ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1405 return ret;
1406}
1407
1408static int sw842_data4(const char **inbuf, int *inbit,
1409 unsigned char **outbuf, struct sw842_fifo *fifo)
1410{
1411 int ret;
1412
1413 ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1414 if (ret)
1415 return ret;
1416 ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1417 return ret;
1418}
1419
1420static int sw842_data2(const char **inbuf, int *inbit,
1421 unsigned char **outbuf, struct sw842_fifo *fifo)
1422{
1423 **outbuf = sw842_get_byte(*inbuf, *inbit);
1424 (*inbuf)++;
1425 (*outbuf)++;
1426 **outbuf = sw842_get_byte(*inbuf, *inbit);
1427 (*inbuf)++;
1428 (*outbuf)++;
1429 return 0;
1430}
1431
1432static int sw842_ptr8(const char **inbuf, int *inbit,
1433 unsigned char **outbuf, struct sw842_fifo *fifo)
1434{
1435 uint8_t ptr;
1436 ptr = sw842_get_ptr8(inbuf, inbit, fifo);
1437 if (!fifo->f84_full && (ptr >= fifo->f8_count))
1438 return 1;
1439 memcpy(*outbuf, fifo->f8[ptr], 8);
1440 *outbuf += 8;
1441 return 0;
1442}
1443
1444static int sw842_ptr4(const char **inbuf, int *inbit,
1445 unsigned char **outbuf, struct sw842_fifo *fifo)
1446{
1447 uint16_t ptr;
1448 ptr = sw842_get_ptr4(inbuf, inbit, fifo);
1449 if (!fifo->f84_full && (ptr >= fifo->f4_count))
1450 return 1;
1451 memcpy(*outbuf, fifo->f4[ptr], 4);
1452 *outbuf += 4;
1453 return 0;
1454}
1455
1456static int sw842_ptr2(const char **inbuf, int *inbit,
1457 unsigned char **outbuf, struct sw842_fifo *fifo)
1458{
1459 uint8_t ptr;
1460 ptr = sw842_get_ptr2(inbuf, inbit);
1461 if (!fifo->f2_full && (ptr >= fifo->f2_count))
1462 return 1;
1463 memcpy(*outbuf, fifo->f2[ptr], 2);
1464 *outbuf += 2;
1465 return 0;
1466}
1467
1468static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo)
1469{
1470 unsigned char initial_f2count = fifo->f2_count;
1471
1472 memcpy(fifo->f8[fifo->f8_count], buf, 8);
1473 fifo->f4_count += 2;
1474 fifo->f8_count += 1;
1475
1476 if (!fifo->f84_full && fifo->f4_count >= 512) {
1477 fifo->f84_full = 1;
1478 fifo->f4_count /= 512;
1479 }
1480
1481 memcpy(fifo->f2[fifo->f2_count++], buf, 2);
1482 memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2);
1483 memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2);
1484 memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2);
1485 if (fifo->f2_count < initial_f2count)
1486 fifo->f2_full = 1;
1487}
1488
1489static int sw842_decompress(const unsigned char *src, int srclen,
1490 unsigned char *dst, int *destlen,
1491 const void *wrkmem)
1492{
1493 uint8_t tmpl;
1494 const char *inbuf;
1495 int inbit = 0;
1496 unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf;
1497 const char *inbuf_end;
1498 sw842_template_op op;
1499 int opindex;
1500 int i, repeat_count;
1501 struct sw842_fifo *fifo;
1502 int ret = 0;
1503
1504 fifo = &((struct nx842_workmem *)(wrkmem))->swfifo;
1505 memset(fifo, 0, sizeof(*fifo));
1506
1507 origbuf = NULL;
1508 inbuf = src;
1509 inbuf_end = src + srclen;
1510 outbuf = dst;
1511 outbuf_end = dst + *destlen;
1512
1513 while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) {
1514 if (inbuf >= inbuf_end) {
1515 ret = -EINVAL;
1516 goto out;
1517 }
1518
1519 opindex = 0;
1520 prevbuf = origbuf;
1521 origbuf = outbuf;
1522 switch (tmpl) {
1523 case SW842_TMPL_REPEAT:
1524 if (prevbuf == NULL) {
1525 ret = -EINVAL;
1526 goto out;
1527 }
1528
1529 repeat_count = sw842_get_repeat_count(&inbuf,
1530 &inbit) + 1;
1531
1532 /* Did the repeat count advance past the end of input */
1533 if (inbuf > inbuf_end) {
1534 ret = -EINVAL;
1535 goto out;
1536 }
1537
1538 for (i = 0; i < repeat_count; i++) {
1539 /* Would this overflow the output buffer */
1540 if ((outbuf + 8) > outbuf_end) {
1541 ret = -ENOSPC;
1542 goto out;
1543 }
1544
1545 memcpy(outbuf, prevbuf, 8);
1546 sw842_copy_to_fifo(outbuf, fifo);
1547 outbuf += 8;
1548 }
1549 break;
1550
1551 case SW842_TMPL_ZEROS:
1552 /* Would this overflow the output buffer */
1553 if ((outbuf + 8) > outbuf_end) {
1554 ret = -ENOSPC;
1555 goto out;
1556 }
1557
1558 memset(outbuf, 0, 8);
1559 sw842_copy_to_fifo(outbuf, fifo);
1560 outbuf += 8;
1561 break;
1562
1563 default:
1564 if (tmpl > 25) {
1565 ret = -EINVAL;
1566 goto out;
1567 }
1568
1569 /* Does this go past the end of the input buffer */
1570 if ((inbuf + 2) > inbuf_end) {
1571 ret = -EINVAL;
1572 goto out;
1573 }
1574
1575 /* Would this overflow the output buffer */
1576 if ((outbuf + 8) > outbuf_end) {
1577 ret = -ENOSPC;
1578 goto out;
1579 }
1580
1581 while (opindex < 4 &&
1582 (op = sw842_tmpl_ops[tmpl][opindex++])
1583 != NULL) {
1584 ret = (*op)(&inbuf, &inbit, &outbuf, fifo);
1585 if (ret) {
1586 ret = -EINVAL;
1587 goto out;
1588 }
1589 sw842_copy_to_fifo(origbuf, fifo);
1590 }
1591 }
1592 }
1593
1594out:
1595 if (!ret)
1596 *destlen = (unsigned int)(outbuf - dst);
1597 else
1598 *destlen = 0;
1599
1600 return ret;
1601}