aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/crypto
diff options
context:
space:
mode:
authorSeth Jennings <sjenning@linux.vnet.ibm.com>2012-07-19 10:42:40 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2012-08-01 05:47:56 -0400
commit0e16aafb12046e12effbdaab179fbe1a38427ba9 (patch)
tree743ce60ba95bf9a5ee3cff7f664dd99a7498473e /drivers/crypto
parentda29aa8f2ab178903a1ac23ce19442f92be4f09c (diff)
powerpc/crypto: add 842 hardware compression driver
This patch adds the driver for interacting with the 842 compression accelerator on IBM Power7+ systems. The device is a child of the Platform Facilities Option (PFO) and shows up as a child of the IBM VIO bus. The compression/decompression API takes the same arguments as existing compression methods like lzo and deflate. The 842 hardware operates on 4K hardware pages and the driver breaks up input on 4K boundaries to submit it to the hardware accelerator. Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com> Signed-off-by: Seth Jennings <sjenning@linux.vnet.ibm.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'drivers/crypto')
-rw-r--r--drivers/crypto/nx/Kconfig9
-rw-r--r--drivers/crypto/nx/Makefile3
-rw-r--r--drivers/crypto/nx/nx-842.c1615
3 files changed, 1627 insertions, 0 deletions
diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig
index dedde535024e..f82616621ae1 100644
--- a/drivers/crypto/nx/Kconfig
+++ b/drivers/crypto/nx/Kconfig
@@ -15,3 +15,12 @@ config CRYPTO_DEV_NX_ENCRYPT
15 Support for Power7+ in-Nest encryption acceleration. This 15 Support for Power7+ in-Nest encryption acceleration. This
16 module supports acceleration for AES and SHA2 algorithms. If you 16 module supports acceleration for AES and SHA2 algorithms. If you
17 choose 'M' here, this module will be called nx_crypto. 17 choose 'M' here, this module will be called nx_crypto.
18
19config CRYPTO_DEV_NX_COMPRESS
20 tristate "Compression acceleration support"
21 depends on PPC64 && IBMVIO
22 default y
23 help
24 Support for Power7+ in-Nest compression acceleration. This
25 module supports acceleration for AES and SHA2 algorithms. If you
26 choose 'M' here, this module will be called nx_compress.
diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile
index 7f110e460da3..bb770ea45ce9 100644
--- a/drivers/crypto/nx/Makefile
+++ b/drivers/crypto/nx/Makefile
@@ -9,3 +9,6 @@ nx-crypto-objs := nx.o \
9 nx-aes-xcbc.o \ 9 nx-aes-xcbc.o \
10 nx-sha256.o \ 10 nx-sha256.o \
11 nx-sha512.o 11 nx-sha512.o
12
13obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o
14nx-compress-objs := nx-842.o
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
new file mode 100644
index 000000000000..9da0fb2d3f64
--- /dev/null
+++ b/drivers/crypto/nx/nx-842.c
@@ -0,0 +1,1615 @@
1/*
2 * Driver for IBM Power 842 compression accelerator
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 *
18 * Copyright (C) IBM Corporation, 2012
19 *
20 * Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
21 * Seth Jennings <sjenning@linux.vnet.ibm.com>
22 */
23
24#include <linux/module.h>
25#include <asm/vio.h>
26#include <asm/pSeries_reconfig.h>
27#include <linux/slab.h>
28#include <asm/abs_addr.h>
29#include <linux/nx842.h>
30#include <linux/kernel.h>
31
32#include "nx_csbcpb.h" /* struct nx_csbcpb */
33
34#define MODULE_NAME "nx-compress"
35MODULE_LICENSE("GPL");
36MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>");
37MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
38
39#define SHIFT_4K 12
40#define SHIFT_64K 16
41#define SIZE_4K (1UL << SHIFT_4K)
42#define SIZE_64K (1UL << SHIFT_64K)
43
44/* IO buffer must be 128 byte aligned */
45#define IO_BUFFER_ALIGN 128
46
47struct nx842_header {
48 int blocks_nr; /* number of compressed blocks */
49 int offset; /* offset of the first block (from beginning of header) */
50 int sizes[0]; /* size of compressed blocks */
51};
52
53static inline int nx842_header_size(const struct nx842_header *hdr)
54{
55 return sizeof(struct nx842_header) +
56 hdr->blocks_nr * sizeof(hdr->sizes[0]);
57}
58
59/* Macros for fields within nx_csbcpb */
60/* Check the valid bit within the csbcpb valid field */
61#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
62
63/* CE macros operate on the completion_extension field bits in the csbcpb.
64 * CE0 0=full completion, 1=partial completion
65 * CE1 0=CE0 indicates completion, 1=termination (output may be modified)
66 * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */
67#define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7))
68#define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6))
69#define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5))
70
71/* The NX unit accepts data only on 4K page boundaries */
72#define NX842_HW_PAGE_SHIFT SHIFT_4K
73#define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
74#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1))
75
76enum nx842_status {
77 UNAVAILABLE,
78 AVAILABLE
79};
80
81struct ibm_nx842_counters {
82 atomic64_t comp_complete;
83 atomic64_t comp_failed;
84 atomic64_t decomp_complete;
85 atomic64_t decomp_failed;
86 atomic64_t swdecomp;
87 atomic64_t comp_times[32];
88 atomic64_t decomp_times[32];
89};
90
91static struct nx842_devdata {
92 struct vio_dev *vdev;
93 struct device *dev;
94 struct ibm_nx842_counters *counters;
95 unsigned int max_sg_len;
96 unsigned int max_sync_size;
97 unsigned int max_sync_sg;
98 enum nx842_status status;
99} __rcu *devdata;
100static DEFINE_SPINLOCK(devdata_mutex);
101
102#define NX842_COUNTER_INC(_x) \
103static inline void nx842_inc_##_x( \
104 const struct nx842_devdata *dev) { \
105 if (dev) \
106 atomic64_inc(&dev->counters->_x); \
107}
108NX842_COUNTER_INC(comp_complete);
109NX842_COUNTER_INC(comp_failed);
110NX842_COUNTER_INC(decomp_complete);
111NX842_COUNTER_INC(decomp_failed);
112NX842_COUNTER_INC(swdecomp);
113
114#define NX842_HIST_SLOTS 16
115
116static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time)
117{
118 int bucket = fls(time);
119
120 if (bucket)
121 bucket = min((NX842_HIST_SLOTS - 1), bucket - 1);
122
123 atomic64_inc(&times[bucket]);
124}
125
126/* NX unit operation flags */
127#define NX842_OP_COMPRESS 0x0
128#define NX842_OP_CRC 0x1
129#define NX842_OP_DECOMPRESS 0x2
130#define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC)
131#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC)
132#define NX842_OP_ASYNC (1<<23)
133#define NX842_OP_NOTIFY (1<<22)
134#define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8)
135
136static unsigned long nx842_get_desired_dma(struct vio_dev *viodev)
137{
138 /* No use of DMA mappings within the driver. */
139 return 0;
140}
141
142struct nx842_slentry {
143 unsigned long ptr; /* Absolute address (use virt_to_abs()) */
144 unsigned long len;
145};
146
147/* pHyp scatterlist entry */
148struct nx842_scatterlist {
149 int entry_nr; /* number of slentries */
150 struct nx842_slentry *entries; /* ptr to array of slentries */
151};
152
153/* Does not include sizeof(entry_nr) in the size */
154static inline unsigned long nx842_get_scatterlist_size(
155 struct nx842_scatterlist *sl)
156{
157 return sl->entry_nr * sizeof(struct nx842_slentry);
158}
159
160static int nx842_build_scatterlist(unsigned long buf, int len,
161 struct nx842_scatterlist *sl)
162{
163 unsigned long nextpage;
164 struct nx842_slentry *entry;
165
166 sl->entry_nr = 0;
167
168 entry = sl->entries;
169 while (len) {
170 entry->ptr = virt_to_abs(buf);
171 nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE);
172 if (nextpage < buf + len) {
173 /* we aren't at the end yet */
174 if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE))
175 /* we are in the middle (or beginning) */
176 entry->len = NX842_HW_PAGE_SIZE;
177 else
178 /* we are at the beginning */
179 entry->len = nextpage - buf;
180 } else {
181 /* at the end */
182 entry->len = len;
183 }
184
185 len -= entry->len;
186 buf += entry->len;
187 sl->entry_nr++;
188 entry++;
189 }
190
191 return 0;
192}
193
194/*
195 * Working memory for software decompression
196 */
197struct sw842_fifo {
198 union {
199 char f8[256][8];
200 char f4[512][4];
201 };
202 char f2[256][2];
203 unsigned char f84_full;
204 unsigned char f2_full;
205 unsigned char f8_count;
206 unsigned char f2_count;
207 unsigned int f4_count;
208};
209
210/*
211 * Working memory for crypto API
212 */
213struct nx842_workmem {
214 char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */
215 union {
216 /* hardware working memory */
217 struct {
218 /* scatterlist */
219 char slin[SIZE_4K];
220 char slout[SIZE_4K];
221 /* coprocessor status/parameter block */
222 struct nx_csbcpb csbcpb;
223 };
224 /* software working memory */
225 struct sw842_fifo swfifo; /* software decompression fifo */
226 };
227};
228
229int nx842_get_workmem_size(void)
230{
231 return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE;
232}
233EXPORT_SYMBOL_GPL(nx842_get_workmem_size);
234
235int nx842_get_workmem_size_aligned(void)
236{
237 return sizeof(struct nx842_workmem);
238}
239EXPORT_SYMBOL_GPL(nx842_get_workmem_size_aligned);
240
241static int nx842_validate_result(struct device *dev,
242 struct cop_status_block *csb)
243{
244 /* The csb must be valid after returning from vio_h_cop_sync */
245 if (!NX842_CSBCBP_VALID_CHK(csb->valid)) {
246 dev_err(dev, "%s: cspcbp not valid upon completion.\n",
247 __func__);
248 dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n",
249 csb->valid,
250 csb->crb_seq_number,
251 csb->completion_code,
252 csb->completion_extension);
253 dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n",
254 csb->processed_byte_count,
255 (unsigned long)csb->address);
256 return -EIO;
257 }
258
259 /* Check return values from the hardware in the CSB */
260 switch (csb->completion_code) {
261 case 0: /* Completed without error */
262 break;
263 case 64: /* Target bytes > Source bytes during compression */
264 case 13: /* Output buffer too small */
265 dev_dbg(dev, "%s: Compression output larger than input\n",
266 __func__);
267 return -ENOSPC;
268 case 66: /* Input data contains an illegal template field */
269 case 67: /* Template indicates data past the end of the input stream */
270 dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n",
271 __func__, csb->completion_code);
272 return -EINVAL;
273 default:
274 dev_dbg(dev, "%s: Unspecified error (code:%d)\n",
275 __func__, csb->completion_code);
276 return -EIO;
277 }
278
279 /* Hardware sanity check */
280 if (!NX842_CSBCPB_CE2(csb->completion_extension)) {
281 dev_err(dev, "%s: No error returned by hardware, but "
282 "data returned is unusable, contact support.\n"
283 "(Additional info: csbcbp->processed bytes "
284 "does not specify processed bytes for the "
285 "target buffer.)\n", __func__);
286 return -EIO;
287 }
288
289 return 0;
290}
291
292/**
293 * nx842_compress - Compress data using the 842 algorithm
294 *
295 * Compression provide by the NX842 coprocessor on IBM Power systems.
296 * The input buffer is compressed and the result is stored in the
297 * provided output buffer.
298 *
299 * Upon return from this function @outlen contains the length of the
300 * compressed data. If there is an error then @outlen will be 0 and an
301 * error will be specified by the return code from this function.
302 *
303 * @in: Pointer to input buffer, must be page aligned
304 * @inlen: Length of input buffer, must be PAGE_SIZE
305 * @out: Pointer to output buffer
306 * @outlen: Length of output buffer
307 * @wrkmem: ptr to buffer for working memory, size determined by
308 * nx842_get_workmem_size()
309 *
310 * Returns:
311 * 0 Success, output of length @outlen stored in the buffer at @out
312 * -ENOMEM Unable to allocate internal buffers
313 * -ENOSPC Output buffer is to small
314 * -EMSGSIZE XXX Difficult to describe this limitation
315 * -EIO Internal error
316 * -ENODEV Hardware unavailable
317 */
318int nx842_compress(const unsigned char *in, unsigned int inlen,
319 unsigned char *out, unsigned int *outlen, void *wmem)
320{
321 struct nx842_header *hdr;
322 struct nx842_devdata *local_devdata;
323 struct device *dev = NULL;
324 struct nx842_workmem *workmem;
325 struct nx842_scatterlist slin, slout;
326 struct nx_csbcpb *csbcpb;
327 int ret = 0, max_sync_size, i, bytesleft, size, hdrsize;
328 unsigned long inbuf, outbuf, padding;
329 struct vio_pfo_op op = {
330 .done = NULL,
331 .handle = 0,
332 .timeout = 0,
333 };
334 unsigned long start_time = get_tb();
335
336 /*
337 * Make sure input buffer is 64k page aligned. This is assumed since
338 * this driver is designed for page compression only (for now). This
339 * is very nice since we can now use direct DDE(s) for the input and
340 * the alignment is guaranteed.
341 */
342 inbuf = (unsigned long)in;
343 if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE)
344 return -EINVAL;
345
346 rcu_read_lock();
347 local_devdata = rcu_dereference(devdata);
348 if (!local_devdata || !local_devdata->dev) {
349 rcu_read_unlock();
350 return -ENODEV;
351 }
352 max_sync_size = local_devdata->max_sync_size;
353 dev = local_devdata->dev;
354
355 /* Create the header */
356 hdr = (struct nx842_header *)out;
357 hdr->blocks_nr = PAGE_SIZE / max_sync_size;
358 hdrsize = nx842_header_size(hdr);
359 outbuf = (unsigned long)out + hdrsize;
360 bytesleft = *outlen - hdrsize;
361
362 /* Init scatterlist */
363 workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
364 NX842_HW_PAGE_SIZE);
365 slin.entries = (struct nx842_slentry *)workmem->slin;
366 slout.entries = (struct nx842_slentry *)workmem->slout;
367
368 /* Init operation */
369 op.flags = NX842_OP_COMPRESS;
370 csbcpb = &workmem->csbcpb;
371 memset(csbcpb, 0, sizeof(*csbcpb));
372 op.csbcpb = virt_to_abs(csbcpb);
373 op.out = virt_to_abs(slout.entries);
374
375 for (i = 0; i < hdr->blocks_nr; i++) {
376 /*
377 * Aligning the output blocks to 128 bytes does waste space,
378 * but it prevents the need for bounce buffers and memory
379 * copies. It also simplifies the code a lot. In the worst
380 * case (64k page, 4k max_sync_size), you lose up to
381 * (128*16)/64k = ~3% the compression factor. For 64k
382 * max_sync_size, the loss would be at most 128/64k = ~0.2%.
383 */
384 padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf;
385 outbuf += padding;
386 bytesleft -= padding;
387 if (i == 0)
388 /* save offset into first block in header */
389 hdr->offset = padding + hdrsize;
390
391 if (bytesleft <= 0) {
392 ret = -ENOSPC;
393 goto unlock;
394 }
395
396 /*
397 * NOTE: If the default max_sync_size is changed from 4k
398 * to 64k, remove the "likely" case below, since a
399 * scatterlist will always be needed.
400 */
401 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
402 /* Create direct DDE */
403 op.in = virt_to_abs(inbuf);
404 op.inlen = max_sync_size;
405
406 } else {
407 /* Create indirect DDE (scatterlist) */
408 nx842_build_scatterlist(inbuf, max_sync_size, &slin);
409 op.in = virt_to_abs(slin.entries);
410 op.inlen = -nx842_get_scatterlist_size(&slin);
411 }
412
413 /*
414 * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect
415 * DDE is required for the outbuf.
416 * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must
417 * also be page aligned (1 in 128/4k=32 chance) in order
418 * to use a direct DDE.
419 * This is unlikely, just use an indirect DDE always.
420 */
421 nx842_build_scatterlist(outbuf,
422 min(bytesleft, max_sync_size), &slout);
423 /* op.out set before loop */
424 op.outlen = -nx842_get_scatterlist_size(&slout);
425
426 /* Send request to pHyp */
427 ret = vio_h_cop_sync(local_devdata->vdev, &op);
428
429 /* Check for pHyp error */
430 if (ret) {
431 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
432 __func__, ret, op.hcall_err);
433 ret = -EIO;
434 goto unlock;
435 }
436
437 /* Check for hardware error */
438 ret = nx842_validate_result(dev, &csbcpb->csb);
439 if (ret && ret != -ENOSPC)
440 goto unlock;
441
442 /* Handle incompressible data */
443 if (unlikely(ret == -ENOSPC)) {
444 if (bytesleft < max_sync_size) {
445 /*
446 * Not enough space left in the output buffer
447 * to store uncompressed block
448 */
449 goto unlock;
450 } else {
451 /* Store incompressible block */
452 memcpy((void *)outbuf, (void *)inbuf,
453 max_sync_size);
454 hdr->sizes[i] = -max_sync_size;
455 outbuf += max_sync_size;
456 bytesleft -= max_sync_size;
457 /* Reset ret, incompressible data handled */
458 ret = 0;
459 }
460 } else {
461 /* Normal case, compression was successful */
462 size = csbcpb->csb.processed_byte_count;
463 dev_dbg(dev, "%s: processed_bytes=%d\n",
464 __func__, size);
465 hdr->sizes[i] = size;
466 outbuf += size;
467 bytesleft -= size;
468 }
469
470 inbuf += max_sync_size;
471 }
472
473 *outlen = (unsigned int)(outbuf - (unsigned long)out);
474
475unlock:
476 if (ret)
477 nx842_inc_comp_failed(local_devdata);
478 else {
479 nx842_inc_comp_complete(local_devdata);
480 ibm_nx842_incr_hist(local_devdata->counters->comp_times,
481 (get_tb() - start_time) / tb_ticks_per_usec);
482 }
483 rcu_read_unlock();
484 return ret;
485}
486EXPORT_SYMBOL_GPL(nx842_compress);
487
488static int sw842_decompress(const unsigned char *, int, unsigned char *, int *,
489 const void *);
490
491/**
492 * nx842_decompress - Decompress data using the 842 algorithm
493 *
494 * Decompression provide by the NX842 coprocessor on IBM Power systems.
495 * The input buffer is decompressed and the result is stored in the
496 * provided output buffer. The size allocated to the output buffer is
497 * provided by the caller of this function in @outlen. Upon return from
498 * this function @outlen contains the length of the decompressed data.
499 * If there is an error then @outlen will be 0 and an error will be
500 * specified by the return code from this function.
501 *
502 * @in: Pointer to input buffer, will use bounce buffer if not 128 byte
503 * aligned
504 * @inlen: Length of input buffer
505 * @out: Pointer to output buffer, must be page aligned
506 * @outlen: Length of output buffer, must be PAGE_SIZE
507 * @wrkmem: ptr to buffer for working memory, size determined by
508 * nx842_get_workmem_size()
509 *
510 * Returns:
511 * 0 Success, output of length @outlen stored in the buffer at @out
512 * -ENODEV Hardware decompression device is unavailable
513 * -ENOMEM Unable to allocate internal buffers
514 * -ENOSPC Output buffer is to small
515 * -EINVAL Bad input data encountered when attempting decompress
516 * -EIO Internal error
517 */
518int nx842_decompress(const unsigned char *in, unsigned int inlen,
519 unsigned char *out, unsigned int *outlen, void *wmem)
520{
521 struct nx842_header *hdr;
522 struct nx842_devdata *local_devdata;
523 struct device *dev = NULL;
524 struct nx842_workmem *workmem;
525 struct nx842_scatterlist slin, slout;
526 struct nx_csbcpb *csbcpb;
527 int ret = 0, i, size, max_sync_size;
528 unsigned long inbuf, outbuf;
529 struct vio_pfo_op op = {
530 .done = NULL,
531 .handle = 0,
532 .timeout = 0,
533 };
534 unsigned long start_time = get_tb();
535
536 /* Ensure page alignment and size */
537 outbuf = (unsigned long)out;
538 if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE)
539 return -EINVAL;
540
541 rcu_read_lock();
542 local_devdata = rcu_dereference(devdata);
543 if (local_devdata)
544 dev = local_devdata->dev;
545
546 /* Get header */
547 hdr = (struct nx842_header *)in;
548
549 workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
550 NX842_HW_PAGE_SIZE);
551
552 inbuf = (unsigned long)in + hdr->offset;
553 if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) {
554 /* Copy block(s) into bounce buffer for alignment */
555 memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset);
556 inbuf = (unsigned long)workmem->bounce;
557 }
558
559 /* Init scatterlist */
560 slin.entries = (struct nx842_slentry *)workmem->slin;
561 slout.entries = (struct nx842_slentry *)workmem->slout;
562
563 /* Init operation */
564 op.flags = NX842_OP_DECOMPRESS;
565 csbcpb = &workmem->csbcpb;
566 memset(csbcpb, 0, sizeof(*csbcpb));
567 op.csbcpb = virt_to_abs(csbcpb);
568
569 /*
570 * max_sync_size may have changed since compression,
571 * so we can't read it from the device info. We need
572 * to derive it from hdr->blocks_nr.
573 */
574 max_sync_size = PAGE_SIZE / hdr->blocks_nr;
575
576 for (i = 0; i < hdr->blocks_nr; i++) {
577 /* Skip padding */
578 inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN);
579
580 if (hdr->sizes[i] < 0) {
581 /* Negative sizes indicate uncompressed data blocks */
582 size = abs(hdr->sizes[i]);
583 memcpy((void *)outbuf, (void *)inbuf, size);
584 outbuf += size;
585 inbuf += size;
586 continue;
587 }
588
589 if (!dev)
590 goto sw;
591
592 /*
593 * The better the compression, the more likely the "likely"
594 * case becomes.
595 */
596 if (likely((inbuf & NX842_HW_PAGE_MASK) ==
597 ((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) {
598 /* Create direct DDE */
599 op.in = virt_to_abs(inbuf);
600 op.inlen = hdr->sizes[i];
601 } else {
602 /* Create indirect DDE (scatterlist) */
603 nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin);
604 op.in = virt_to_abs(slin.entries);
605 op.inlen = -nx842_get_scatterlist_size(&slin);
606 }
607
608 /*
609 * NOTE: If the default max_sync_size is changed from 4k
610 * to 64k, remove the "likely" case below, since a
611 * scatterlist will always be needed.
612 */
613 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
614 /* Create direct DDE */
615 op.out = virt_to_abs(outbuf);
616 op.outlen = max_sync_size;
617 } else {
618 /* Create indirect DDE (scatterlist) */
619 nx842_build_scatterlist(outbuf, max_sync_size, &slout);
620 op.out = virt_to_abs(slout.entries);
621 op.outlen = -nx842_get_scatterlist_size(&slout);
622 }
623
624 /* Send request to pHyp */
625 ret = vio_h_cop_sync(local_devdata->vdev, &op);
626
627 /* Check for pHyp error */
628 if (ret) {
629 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
630 __func__, ret, op.hcall_err);
631 dev = NULL;
632 goto sw;
633 }
634
635 /* Check for hardware error */
636 ret = nx842_validate_result(dev, &csbcpb->csb);
637 if (ret) {
638 dev = NULL;
639 goto sw;
640 }
641
642 /* HW decompression success */
643 inbuf += hdr->sizes[i];
644 outbuf += csbcpb->csb.processed_byte_count;
645 continue;
646
647sw:
648 /* software decompression */
649 size = max_sync_size;
650 ret = sw842_decompress(
651 (unsigned char *)inbuf, hdr->sizes[i],
652 (unsigned char *)outbuf, &size, wmem);
653 if (ret)
654 pr_debug("%s: sw842_decompress failed with %d\n",
655 __func__, ret);
656
657 if (ret) {
658 if (ret != -ENOSPC && ret != -EINVAL &&
659 ret != -EMSGSIZE)
660 ret = -EIO;
661 goto unlock;
662 }
663
664 /* SW decompression success */
665 inbuf += hdr->sizes[i];
666 outbuf += size;
667 }
668
669 *outlen = (unsigned int)(outbuf - (unsigned long)out);
670
671unlock:
672 if (ret)
673 /* decompress fail */
674 nx842_inc_decomp_failed(local_devdata);
675 else {
676 if (!dev)
677 /* software decompress */
678 nx842_inc_swdecomp(local_devdata);
679 nx842_inc_decomp_complete(local_devdata);
680 ibm_nx842_incr_hist(local_devdata->counters->decomp_times,
681 (get_tb() - start_time) / tb_ticks_per_usec);
682 }
683
684 rcu_read_unlock();
685 return ret;
686}
687EXPORT_SYMBOL_GPL(nx842_decompress);
688
689/**
690 * nx842_OF_set_defaults -- Set default (disabled) values for devdata
691 *
692 * @devdata - struct nx842_devdata to update
693 *
694 * Returns:
695 * 0 on success
696 * -ENOENT if @devdata ptr is NULL
697 */
698static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
699{
700 if (devdata) {
701 devdata->max_sync_size = 0;
702 devdata->max_sync_sg = 0;
703 devdata->max_sg_len = 0;
704 devdata->status = UNAVAILABLE;
705 return 0;
706 } else
707 return -ENOENT;
708}
709
710/**
711 * nx842_OF_upd_status -- Update the device info from OF status prop
712 *
713 * The status property indicates if the accelerator is enabled. If the
714 * device is in the OF tree it indicates that the hardware is present.
715 * The status field indicates if the device is enabled when the status
716 * is 'okay'. Otherwise the device driver will be disabled.
717 *
718 * @devdata - struct nx842_devdata to update
719 * @prop - struct property point containing the maxsyncop for the update
720 *
721 * Returns:
722 * 0 - Device is available
723 * -EINVAL - Device is not available
724 */
725static int nx842_OF_upd_status(struct nx842_devdata *devdata,
726 struct property *prop) {
727 int ret = 0;
728 const char *status = (const char *)prop->value;
729
730 if (!strncmp(status, "okay", (size_t)prop->length)) {
731 devdata->status = AVAILABLE;
732 } else {
733 dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n",
734 __func__, status);
735 devdata->status = UNAVAILABLE;
736 }
737
738 return ret;
739}
740
741/**
742 * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop
743 *
744 * Definition of the 'ibm,max-sg-len' OF property:
745 * This field indicates the maximum byte length of a scatter list
746 * for the platform facility. It is a single cell encoded as with encode-int.
747 *
748 * Example:
749 * # od -x ibm,max-sg-len
750 * 0000000 0000 0ff0
751 *
752 * In this example, the maximum byte length of a scatter list is
753 * 0x0ff0 (4,080).
754 *
755 * @devdata - struct nx842_devdata to update
756 * @prop - struct property point containing the maxsyncop for the update
757 *
758 * Returns:
759 * 0 on success
760 * -EINVAL on failure
761 */
762static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata,
763 struct property *prop) {
764 int ret = 0;
765 const int *maxsglen = prop->value;
766
767 if (prop->length != sizeof(*maxsglen)) {
768 dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__);
769 dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__,
770 prop->length, sizeof(*maxsglen));
771 ret = -EINVAL;
772 } else {
773 devdata->max_sg_len = (unsigned int)min(*maxsglen,
774 (int)NX842_HW_PAGE_SIZE);
775 }
776
777 return ret;
778}
779
780/**
781 * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop
782 *
783 * Definition of the 'ibm,max-sync-cop' OF property:
784 * Two series of cells. The first series of cells represents the maximums
785 * that can be synchronously compressed. The second series of cells
786 * represents the maximums that can be synchronously decompressed.
787 * 1. The first cell in each series contains the count of the number of
788 * data length, scatter list elements pairs that follow – each being
789 * of the form
790 * a. One cell data byte length
791 * b. One cell total number of scatter list elements
792 *
793 * Example:
794 * # od -x ibm,max-sync-cop
795 * 0000000 0000 0001 0000 1000 0000 01fe 0000 0001
796 * 0000020 0000 1000 0000 01fe
797 *
798 * In this example, compression supports 0x1000 (4,096) data byte length
799 * and 0x1fe (510) total scatter list elements. Decompression supports
800 * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list
801 * elements.
802 *
803 * @devdata - struct nx842_devdata to update
804 * @prop - struct property point containing the maxsyncop for the update
805 *
806 * Returns:
807 * 0 on success
808 * -EINVAL on failure
809 */
810static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata,
811 struct property *prop) {
812 int ret = 0;
813 const struct maxsynccop_t {
814 int comp_elements;
815 int comp_data_limit;
816 int comp_sg_limit;
817 int decomp_elements;
818 int decomp_data_limit;
819 int decomp_sg_limit;
820 } *maxsynccop;
821
822 if (prop->length != sizeof(*maxsynccop)) {
823 dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__);
824 dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length,
825 sizeof(*maxsynccop));
826 ret = -EINVAL;
827 goto out;
828 }
829
830 maxsynccop = (const struct maxsynccop_t *)prop->value;
831
832 /* Use one limit rather than separate limits for compression and
833 * decompression. Set a maximum for this so as not to exceed the
834 * size that the header can support and round the value down to
835 * the hardware page size (4K) */
836 devdata->max_sync_size =
837 (unsigned int)min(maxsynccop->comp_data_limit,
838 maxsynccop->decomp_data_limit);
839
840 devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size,
841 SIZE_64K);
842
843 if (devdata->max_sync_size < SIZE_4K) {
844 dev_err(devdata->dev, "%s: hardware max data size (%u) is "
845 "less than the driver minimum, unable to use "
846 "the hardware device\n",
847 __func__, devdata->max_sync_size);
848 ret = -EINVAL;
849 goto out;
850 }
851
852 devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit,
853 maxsynccop->decomp_sg_limit);
854 if (devdata->max_sync_sg < 1) {
855 dev_err(devdata->dev, "%s: hardware max sg size (%u) is "
856 "less than the driver minimum, unable to use "
857 "the hardware device\n",
858 __func__, devdata->max_sync_sg);
859 ret = -EINVAL;
860 goto out;
861 }
862
863out:
864 return ret;
865}
866
867/**
868 *
869 * nx842_OF_upd -- Handle OF properties updates for the device.
870 *
871 * Set all properties from the OF tree. Optionally, a new property
872 * can be provided by the @new_prop pointer to overwrite an existing value.
873 * The device will remain disabled until all values are valid, this function
874 * will return an error for updates unless all values are valid.
875 *
876 * @new_prop: If not NULL, this property is being updated. If NULL, update
877 * all properties from the current values in the OF tree.
878 *
879 * Returns:
880 * 0 - Success
881 * -ENOMEM - Could not allocate memory for new devdata structure
882 * -EINVAL - property value not found, new_prop is not a recognized
883 * property for the device or property value is not valid.
884 * -ENODEV - Device is not available
885 */
886static int nx842_OF_upd(struct property *new_prop)
887{
888 struct nx842_devdata *old_devdata = NULL;
889 struct nx842_devdata *new_devdata = NULL;
890 struct device_node *of_node = NULL;
891 struct property *status = NULL;
892 struct property *maxsglen = NULL;
893 struct property *maxsyncop = NULL;
894 int ret = 0;
895 unsigned long flags;
896
897 spin_lock_irqsave(&devdata_mutex, flags);
898 old_devdata = rcu_dereference_check(devdata,
899 lockdep_is_held(&devdata_mutex));
900 if (old_devdata)
901 of_node = old_devdata->dev->of_node;
902
903 if (!old_devdata || !of_node) {
904 pr_err("%s: device is not available\n", __func__);
905 spin_unlock_irqrestore(&devdata_mutex, flags);
906 return -ENODEV;
907 }
908
909 new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
910 if (!new_devdata) {
911 dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__);
912 ret = -ENOMEM;
913 goto error_out;
914 }
915
916 memcpy(new_devdata, old_devdata, sizeof(*old_devdata));
917 new_devdata->counters = old_devdata->counters;
918
919 /* Set ptrs for existing properties */
920 status = of_find_property(of_node, "status", NULL);
921 maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL);
922 maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL);
923 if (!status || !maxsglen || !maxsyncop) {
924 dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__);
925 ret = -EINVAL;
926 goto error_out;
927 }
928
929 /* Set ptr to new property if provided */
930 if (new_prop) {
931 /* Single property */
932 if (!strncmp(new_prop->name, "status", new_prop->length)) {
933 status = new_prop;
934
935 } else if (!strncmp(new_prop->name, "ibm,max-sg-len",
936 new_prop->length)) {
937 maxsglen = new_prop;
938
939 } else if (!strncmp(new_prop->name, "ibm,max-sync-cop",
940 new_prop->length)) {
941 maxsyncop = new_prop;
942
943 } else {
944 /*
945 * Skip the update, the property being updated
946 * has no impact.
947 */
948 goto out;
949 }
950 }
951
952 /* Perform property updates */
953 ret = nx842_OF_upd_status(new_devdata, status);
954 if (ret)
955 goto error_out;
956
957 ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen);
958 if (ret)
959 goto error_out;
960
961 ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop);
962 if (ret)
963 goto error_out;
964
965out:
966 dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n",
967 __func__, new_devdata->max_sync_size,
968 old_devdata->max_sync_size);
969 dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n",
970 __func__, new_devdata->max_sync_sg,
971 old_devdata->max_sync_sg);
972 dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n",
973 __func__, new_devdata->max_sg_len,
974 old_devdata->max_sg_len);
975
976 rcu_assign_pointer(devdata, new_devdata);
977 spin_unlock_irqrestore(&devdata_mutex, flags);
978 synchronize_rcu();
979 dev_set_drvdata(new_devdata->dev, new_devdata);
980 kfree(old_devdata);
981 return 0;
982
983error_out:
984 if (new_devdata) {
985 dev_info(old_devdata->dev, "%s: device disabled\n", __func__);
986 nx842_OF_set_defaults(new_devdata);
987 rcu_assign_pointer(devdata, new_devdata);
988 spin_unlock_irqrestore(&devdata_mutex, flags);
989 synchronize_rcu();
990 dev_set_drvdata(new_devdata->dev, new_devdata);
991 kfree(old_devdata);
992 } else {
993 dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__);
994 spin_unlock_irqrestore(&devdata_mutex, flags);
995 }
996
997 if (!ret)
998 ret = -EINVAL;
999 return ret;
1000}
1001
1002/**
1003 * nx842_OF_notifier - Process updates to OF properties for the device
1004 *
1005 * @np: notifier block
1006 * @action: notifier action
1007 * @update: struct pSeries_reconfig_prop_update pointer if action is
1008 * PSERIES_UPDATE_PROPERTY
1009 *
1010 * Returns:
1011 * NOTIFY_OK on success
1012 * NOTIFY_BAD encoded with error number on failure, use
1013 * notifier_to_errno() to decode this value
1014 */
1015static int nx842_OF_notifier(struct notifier_block *np,
1016 unsigned long action,
1017 void *update)
1018{
1019 struct pSeries_reconfig_prop_update *upd;
1020 struct nx842_devdata *local_devdata;
1021 struct device_node *node = NULL;
1022
1023 upd = (struct pSeries_reconfig_prop_update *)update;
1024
1025 rcu_read_lock();
1026 local_devdata = rcu_dereference(devdata);
1027 if (local_devdata)
1028 node = local_devdata->dev->of_node;
1029
1030 if (local_devdata &&
1031 action == PSERIES_UPDATE_PROPERTY &&
1032 !strcmp(upd->node->name, node->name)) {
1033 rcu_read_unlock();
1034 nx842_OF_upd(upd->property);
1035 } else
1036 rcu_read_unlock();
1037
1038 return NOTIFY_OK;
1039}
1040
1041static struct notifier_block nx842_of_nb = {
1042 .notifier_call = nx842_OF_notifier,
1043};
1044
1045#define nx842_counter_read(_name) \
1046static ssize_t nx842_##_name##_show(struct device *dev, \
1047 struct device_attribute *attr, \
1048 char *buf) { \
1049 struct nx842_devdata *local_devdata; \
1050 int p = 0; \
1051 rcu_read_lock(); \
1052 local_devdata = rcu_dereference(devdata); \
1053 if (local_devdata) \
1054 p = snprintf(buf, PAGE_SIZE, "%ld\n", \
1055 atomic64_read(&local_devdata->counters->_name)); \
1056 rcu_read_unlock(); \
1057 return p; \
1058}
1059
1060#define NX842DEV_COUNTER_ATTR_RO(_name) \
1061 nx842_counter_read(_name); \
1062 static struct device_attribute dev_attr_##_name = __ATTR(_name, \
1063 0444, \
1064 nx842_##_name##_show,\
1065 NULL);
1066
1067NX842DEV_COUNTER_ATTR_RO(comp_complete);
1068NX842DEV_COUNTER_ATTR_RO(comp_failed);
1069NX842DEV_COUNTER_ATTR_RO(decomp_complete);
1070NX842DEV_COUNTER_ATTR_RO(decomp_failed);
1071NX842DEV_COUNTER_ATTR_RO(swdecomp);
1072
1073static ssize_t nx842_timehist_show(struct device *,
1074 struct device_attribute *, char *);
1075
1076static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444,
1077 nx842_timehist_show, NULL);
1078static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times,
1079 0444, nx842_timehist_show, NULL);
1080
1081static ssize_t nx842_timehist_show(struct device *dev,
1082 struct device_attribute *attr, char *buf) {
1083 char *p = buf;
1084 struct nx842_devdata *local_devdata;
1085 atomic64_t *times;
1086 int bytes_remain = PAGE_SIZE;
1087 int bytes;
1088 int i;
1089
1090 rcu_read_lock();
1091 local_devdata = rcu_dereference(devdata);
1092 if (!local_devdata) {
1093 rcu_read_unlock();
1094 return 0;
1095 }
1096
1097 if (attr == &dev_attr_comp_times)
1098 times = local_devdata->counters->comp_times;
1099 else if (attr == &dev_attr_decomp_times)
1100 times = local_devdata->counters->decomp_times;
1101 else {
1102 rcu_read_unlock();
1103 return 0;
1104 }
1105
1106 for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
1107 bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n",
1108 i ? (2<<(i-1)) : 0, (2<<i)-1,
1109 atomic64_read(&times[i]));
1110 bytes_remain -= bytes;
1111 p += bytes;
1112 }
1113 /* The last bucket holds everything over
1114 * 2<<(NX842_HIST_SLOTS - 2) us */
1115 bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n",
1116 2<<(NX842_HIST_SLOTS - 2),
1117 atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
1118 p += bytes;
1119
1120 rcu_read_unlock();
1121 return p - buf;
1122}
1123
1124static struct attribute *nx842_sysfs_entries[] = {
1125 &dev_attr_comp_complete.attr,
1126 &dev_attr_comp_failed.attr,
1127 &dev_attr_decomp_complete.attr,
1128 &dev_attr_decomp_failed.attr,
1129 &dev_attr_swdecomp.attr,
1130 &dev_attr_comp_times.attr,
1131 &dev_attr_decomp_times.attr,
1132 NULL,
1133};
1134
1135static struct attribute_group nx842_attribute_group = {
1136 .name = NULL, /* put in device directory */
1137 .attrs = nx842_sysfs_entries,
1138};
1139
1140static int __init nx842_probe(struct vio_dev *viodev,
1141 const struct vio_device_id *id)
1142{
1143 struct nx842_devdata *old_devdata, *new_devdata = NULL;
1144 unsigned long flags;
1145 int ret = 0;
1146
1147 spin_lock_irqsave(&devdata_mutex, flags);
1148 old_devdata = rcu_dereference_check(devdata,
1149 lockdep_is_held(&devdata_mutex));
1150
1151 if (old_devdata && old_devdata->vdev != NULL) {
1152 dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__);
1153 ret = -1;
1154 goto error_unlock;
1155 }
1156
1157 dev_set_drvdata(&viodev->dev, NULL);
1158
1159 new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
1160 if (!new_devdata) {
1161 dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__);
1162 ret = -ENOMEM;
1163 goto error_unlock;
1164 }
1165
1166 new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
1167 GFP_NOFS);
1168 if (!new_devdata->counters) {
1169 dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__);
1170 ret = -ENOMEM;
1171 goto error_unlock;
1172 }
1173
1174 new_devdata->vdev = viodev;
1175 new_devdata->dev = &viodev->dev;
1176 nx842_OF_set_defaults(new_devdata);
1177
1178 rcu_assign_pointer(devdata, new_devdata);
1179 spin_unlock_irqrestore(&devdata_mutex, flags);
1180 synchronize_rcu();
1181 kfree(old_devdata);
1182
1183 pSeries_reconfig_notifier_register(&nx842_of_nb);
1184
1185 ret = nx842_OF_upd(NULL);
1186 if (ret && ret != -ENODEV) {
1187 dev_err(&viodev->dev, "could not parse device tree. %d\n", ret);
1188 ret = -1;
1189 goto error;
1190 }
1191
1192 rcu_read_lock();
1193 if (dev_set_drvdata(&viodev->dev, rcu_dereference(devdata))) {
1194 rcu_read_unlock();
1195 dev_err(&viodev->dev, "failed to set driver data for device\n");
1196 ret = -1;
1197 goto error;
1198 }
1199 rcu_read_unlock();
1200
1201 if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) {
1202 dev_err(&viodev->dev, "could not create sysfs device attributes\n");
1203 ret = -1;
1204 goto error;
1205 }
1206
1207 return 0;
1208
1209error_unlock:
1210 spin_unlock_irqrestore(&devdata_mutex, flags);
1211 if (new_devdata)
1212 kfree(new_devdata->counters);
1213 kfree(new_devdata);
1214error:
1215 return ret;
1216}
1217
1218static int __exit nx842_remove(struct vio_dev *viodev)
1219{
1220 struct nx842_devdata *old_devdata;
1221 unsigned long flags;
1222
1223 pr_info("Removing IBM Power 842 compression device\n");
1224 sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group);
1225
1226 spin_lock_irqsave(&devdata_mutex, flags);
1227 old_devdata = rcu_dereference_check(devdata,
1228 lockdep_is_held(&devdata_mutex));
1229 pSeries_reconfig_notifier_unregister(&nx842_of_nb);
1230 rcu_assign_pointer(devdata, NULL);
1231 spin_unlock_irqrestore(&devdata_mutex, flags);
1232 synchronize_rcu();
1233 dev_set_drvdata(&viodev->dev, NULL);
1234 if (old_devdata)
1235 kfree(old_devdata->counters);
1236 kfree(old_devdata);
1237 return 0;
1238}
1239
1240static struct vio_device_id nx842_driver_ids[] = {
1241 {"ibm,compression-v1", "ibm,compression"},
1242 {"", ""},
1243};
1244
1245static struct vio_driver nx842_driver = {
1246 .name = MODULE_NAME,
1247 .probe = nx842_probe,
1248 .remove = nx842_remove,
1249 .get_desired_dma = nx842_get_desired_dma,
1250 .id_table = nx842_driver_ids,
1251};
1252
1253static int __init nx842_init(void)
1254{
1255 struct nx842_devdata *new_devdata;
1256 pr_info("Registering IBM Power 842 compression driver\n");
1257
1258 RCU_INIT_POINTER(devdata, NULL);
1259 new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL);
1260 if (!new_devdata) {
1261 pr_err("Could not allocate memory for device data\n");
1262 return -ENOMEM;
1263 }
1264 new_devdata->status = UNAVAILABLE;
1265 RCU_INIT_POINTER(devdata, new_devdata);
1266
1267 return vio_register_driver(&nx842_driver);
1268}
1269
1270module_init(nx842_init);
1271
1272static void __exit nx842_exit(void)
1273{
1274 struct nx842_devdata *old_devdata;
1275 unsigned long flags;
1276
1277 pr_info("Exiting IBM Power 842 compression driver\n");
1278 spin_lock_irqsave(&devdata_mutex, flags);
1279 old_devdata = rcu_dereference_check(devdata,
1280 lockdep_is_held(&devdata_mutex));
1281 rcu_assign_pointer(devdata, NULL);
1282 spin_unlock_irqrestore(&devdata_mutex, flags);
1283 synchronize_rcu();
1284 if (old_devdata)
1285 dev_set_drvdata(old_devdata->dev, NULL);
1286 kfree(old_devdata);
1287 vio_unregister_driver(&nx842_driver);
1288}
1289
1290module_exit(nx842_exit);
1291
1292/*********************************
1293 * 842 software decompressor
1294*********************************/
1295typedef int (*sw842_template_op)(const char **, int *, unsigned char **,
1296 struct sw842_fifo *);
1297
1298static int sw842_data8(const char **, int *, unsigned char **,
1299 struct sw842_fifo *);
1300static int sw842_data4(const char **, int *, unsigned char **,
1301 struct sw842_fifo *);
1302static int sw842_data2(const char **, int *, unsigned char **,
1303 struct sw842_fifo *);
1304static int sw842_ptr8(const char **, int *, unsigned char **,
1305 struct sw842_fifo *);
1306static int sw842_ptr4(const char **, int *, unsigned char **,
1307 struct sw842_fifo *);
1308static int sw842_ptr2(const char **, int *, unsigned char **,
1309 struct sw842_fifo *);
1310
1311/* special templates */
1312#define SW842_TMPL_REPEAT 0x1B
1313#define SW842_TMPL_ZEROS 0x1C
1314#define SW842_TMPL_EOF 0x1E
1315
1316static sw842_template_op sw842_tmpl_ops[26][4] = {
1317 { sw842_data8, NULL}, /* 0 (00000) */
1318 { sw842_data4, sw842_data2, sw842_ptr2, NULL},
1319 { sw842_data4, sw842_ptr2, sw842_data2, NULL},
1320 { sw842_data4, sw842_ptr2, sw842_ptr2, NULL},
1321 { sw842_data4, sw842_ptr4, NULL},
1322 { sw842_data2, sw842_ptr2, sw842_data4, NULL},
1323 { sw842_data2, sw842_ptr2, sw842_data2, sw842_ptr2},
1324 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_data2},
1325 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_ptr2,},
1326 { sw842_data2, sw842_ptr2, sw842_ptr4, NULL},
1327 { sw842_ptr2, sw842_data2, sw842_data4, NULL}, /* 10 (01010) */
1328 { sw842_ptr2, sw842_data4, sw842_ptr2, NULL},
1329 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_data2},
1330 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_ptr2},
1331 { sw842_ptr2, sw842_data2, sw842_ptr4, NULL},
1332 { sw842_ptr2, sw842_ptr2, sw842_data4, NULL},
1333 { sw842_ptr2, sw842_ptr2, sw842_data2, sw842_ptr2},
1334 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_data2},
1335 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_ptr2},
1336 { sw842_ptr2, sw842_ptr2, sw842_ptr4, NULL},
1337 { sw842_ptr4, sw842_data4, NULL}, /* 20 (10100) */
1338 { sw842_ptr4, sw842_data2, sw842_ptr2, NULL},
1339 { sw842_ptr4, sw842_ptr2, sw842_data2, NULL},
1340 { sw842_ptr4, sw842_ptr2, sw842_ptr2, NULL},
1341 { sw842_ptr4, sw842_ptr4, NULL},
1342 { sw842_ptr8, NULL}
1343};
1344
1345/* Software decompress helpers */
1346
1347static uint8_t sw842_get_byte(const char *buf, int bit)
1348{
1349 uint8_t tmpl;
1350 uint16_t tmp;
1351 tmp = htons(*(uint16_t *)(buf));
1352 tmp = (uint16_t)(tmp << bit);
1353 tmp = ntohs(tmp);
1354 memcpy(&tmpl, &tmp, 1);
1355 return tmpl;
1356}
1357
1358static uint8_t sw842_get_template(const char **buf, int *bit)
1359{
1360 uint8_t byte;
1361 byte = sw842_get_byte(*buf, *bit);
1362 byte = byte >> 3;
1363 byte &= 0x1F;
1364 *buf += (*bit + 5) / 8;
1365 *bit = (*bit + 5) % 8;
1366 return byte;
1367}
1368
1369/* repeat_count happens to be 5-bit too (like the template) */
1370static uint8_t sw842_get_repeat_count(const char **buf, int *bit)
1371{
1372 uint8_t byte;
1373 byte = sw842_get_byte(*buf, *bit);
1374 byte = byte >> 2;
1375 byte &= 0x3F;
1376 *buf += (*bit + 6) / 8;
1377 *bit = (*bit + 6) % 8;
1378 return byte;
1379}
1380
1381static uint8_t sw842_get_ptr2(const char **buf, int *bit)
1382{
1383 uint8_t ptr;
1384 ptr = sw842_get_byte(*buf, *bit);
1385 (*buf)++;
1386 return ptr;
1387}
1388
1389static uint16_t sw842_get_ptr4(const char **buf, int *bit,
1390 struct sw842_fifo *fifo)
1391{
1392 uint16_t ptr;
1393 ptr = htons(*(uint16_t *)(*buf));
1394 ptr = (uint16_t)(ptr << *bit);
1395 ptr = ptr >> 7;
1396 ptr &= 0x01FF;
1397 *buf += (*bit + 9) / 8;
1398 *bit = (*bit + 9) % 8;
1399 return ptr;
1400}
1401
1402static uint8_t sw842_get_ptr8(const char **buf, int *bit,
1403 struct sw842_fifo *fifo)
1404{
1405 return sw842_get_ptr2(buf, bit);
1406}
1407
1408/* Software decompress template ops */
1409
1410static int sw842_data8(const char **inbuf, int *inbit,
1411 unsigned char **outbuf, struct sw842_fifo *fifo)
1412{
1413 int ret;
1414
1415 ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1416 if (ret)
1417 return ret;
1418 ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1419 return ret;
1420}
1421
1422static int sw842_data4(const char **inbuf, int *inbit,
1423 unsigned char **outbuf, struct sw842_fifo *fifo)
1424{
1425 int ret;
1426
1427 ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1428 if (ret)
1429 return ret;
1430 ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1431 return ret;
1432}
1433
1434static int sw842_data2(const char **inbuf, int *inbit,
1435 unsigned char **outbuf, struct sw842_fifo *fifo)
1436{
1437 **outbuf = sw842_get_byte(*inbuf, *inbit);
1438 (*inbuf)++;
1439 (*outbuf)++;
1440 **outbuf = sw842_get_byte(*inbuf, *inbit);
1441 (*inbuf)++;
1442 (*outbuf)++;
1443 return 0;
1444}
1445
1446static int sw842_ptr8(const char **inbuf, int *inbit,
1447 unsigned char **outbuf, struct sw842_fifo *fifo)
1448{
1449 uint8_t ptr;
1450 ptr = sw842_get_ptr8(inbuf, inbit, fifo);
1451 if (!fifo->f84_full && (ptr >= fifo->f8_count))
1452 return 1;
1453 memcpy(*outbuf, fifo->f8[ptr], 8);
1454 *outbuf += 8;
1455 return 0;
1456}
1457
1458static int sw842_ptr4(const char **inbuf, int *inbit,
1459 unsigned char **outbuf, struct sw842_fifo *fifo)
1460{
1461 uint16_t ptr;
1462 ptr = sw842_get_ptr4(inbuf, inbit, fifo);
1463 if (!fifo->f84_full && (ptr >= fifo->f4_count))
1464 return 1;
1465 memcpy(*outbuf, fifo->f4[ptr], 4);
1466 *outbuf += 4;
1467 return 0;
1468}
1469
1470static int sw842_ptr2(const char **inbuf, int *inbit,
1471 unsigned char **outbuf, struct sw842_fifo *fifo)
1472{
1473 uint8_t ptr;
1474 ptr = sw842_get_ptr2(inbuf, inbit);
1475 if (!fifo->f2_full && (ptr >= fifo->f2_count))
1476 return 1;
1477 memcpy(*outbuf, fifo->f2[ptr], 2);
1478 *outbuf += 2;
1479 return 0;
1480}
1481
1482static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo)
1483{
1484 unsigned char initial_f2count = fifo->f2_count;
1485
1486 memcpy(fifo->f8[fifo->f8_count], buf, 8);
1487 fifo->f4_count += 2;
1488 fifo->f8_count += 1;
1489
1490 if (!fifo->f84_full && fifo->f4_count >= 512) {
1491 fifo->f84_full = 1;
1492 fifo->f4_count /= 512;
1493 }
1494
1495 memcpy(fifo->f2[fifo->f2_count++], buf, 2);
1496 memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2);
1497 memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2);
1498 memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2);
1499 if (fifo->f2_count < initial_f2count)
1500 fifo->f2_full = 1;
1501}
1502
1503static int sw842_decompress(const unsigned char *src, int srclen,
1504 unsigned char *dst, int *destlen,
1505 const void *wrkmem)
1506{
1507 uint8_t tmpl;
1508 const char *inbuf;
1509 int inbit = 0;
1510 unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf;
1511 const char *inbuf_end;
1512 sw842_template_op op;
1513 int opindex;
1514 int i, repeat_count;
1515 struct sw842_fifo *fifo;
1516 int ret = 0;
1517
1518 fifo = &((struct nx842_workmem *)(wrkmem))->swfifo;
1519 memset(fifo, 0, sizeof(*fifo));
1520
1521 origbuf = NULL;
1522 inbuf = src;
1523 inbuf_end = src + srclen;
1524 outbuf = dst;
1525 outbuf_end = dst + *destlen;
1526
1527 while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) {
1528 if (inbuf >= inbuf_end) {
1529 ret = -EINVAL;
1530 goto out;
1531 }
1532
1533 opindex = 0;
1534 prevbuf = origbuf;
1535 origbuf = outbuf;
1536 switch (tmpl) {
1537 case SW842_TMPL_REPEAT:
1538 if (prevbuf == NULL) {
1539 ret = -EINVAL;
1540 goto out;
1541 }
1542
1543 repeat_count = sw842_get_repeat_count(&inbuf,
1544 &inbit) + 1;
1545
1546 /* Did the repeat count advance past the end of input */
1547 if (inbuf > inbuf_end) {
1548 ret = -EINVAL;
1549 goto out;
1550 }
1551
1552 for (i = 0; i < repeat_count; i++) {
1553 /* Would this overflow the output buffer */
1554 if ((outbuf + 8) > outbuf_end) {
1555 ret = -ENOSPC;
1556 goto out;
1557 }
1558
1559 memcpy(outbuf, prevbuf, 8);
1560 sw842_copy_to_fifo(outbuf, fifo);
1561 outbuf += 8;
1562 }
1563 break;
1564
1565 case SW842_TMPL_ZEROS:
1566 /* Would this overflow the output buffer */
1567 if ((outbuf + 8) > outbuf_end) {
1568 ret = -ENOSPC;
1569 goto out;
1570 }
1571
1572 memset(outbuf, 0, 8);
1573 sw842_copy_to_fifo(outbuf, fifo);
1574 outbuf += 8;
1575 break;
1576
1577 default:
1578 if (tmpl > 25) {
1579 ret = -EINVAL;
1580 goto out;
1581 }
1582
1583 /* Does this go past the end of the input buffer */
1584 if ((inbuf + 2) > inbuf_end) {
1585 ret = -EINVAL;
1586 goto out;
1587 }
1588
1589 /* Would this overflow the output buffer */
1590 if ((outbuf + 8) > outbuf_end) {
1591 ret = -ENOSPC;
1592 goto out;
1593 }
1594
1595 while (opindex < 4 &&
1596 (op = sw842_tmpl_ops[tmpl][opindex++])
1597 != NULL) {
1598 ret = (*op)(&inbuf, &inbit, &outbuf, fifo);
1599 if (ret) {
1600 ret = -EINVAL;
1601 goto out;
1602 }
1603 sw842_copy_to_fifo(origbuf, fifo);
1604 }
1605 }
1606 }
1607
1608out:
1609 if (!ret)
1610 *destlen = (unsigned int)(outbuf - dst);
1611 else
1612 *destlen = 0;
1613
1614 return ret;
1615}