summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c249
1 files changed, 245 insertions, 4 deletions
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 2b2f3c3335ce..ec7d10da94f0 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -28,6 +28,11 @@ static inline bool is_simm5(unsigned int value)
28 return value + 0x10 < 0x20; 28 return value + 0x10 < 0x20;
29} 29}
30 30
31static inline bool is_sethi(unsigned int value)
32{
33 return (value & ~0x3fffff) == 0;
34}
35
31static void bpf_flush_icache(void *start_, void *end_) 36static void bpf_flush_icache(void *start_, void *end_)
32{ 37{
33 /* Cheetah's I-cache is fully coherent. */ 38 /* Cheetah's I-cache is fully coherent. */
@@ -367,16 +372,252 @@ static void emit_loadimm_sext(s32 K, unsigned int dest, struct jit_ctx *ctx)
367 } 372 }
368} 373}
369 374
375static void analyze_64bit_constant(u32 high_bits, u32 low_bits,
376 int *hbsp, int *lbsp, int *abbasp)
377{
378 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
379 int i;
380
381 lowest_bit_set = highest_bit_set = -1;
382 i = 0;
383 do {
384 if ((lowest_bit_set == -1) && ((low_bits >> i) & 1))
385 lowest_bit_set = i;
386 if ((highest_bit_set == -1) && ((high_bits >> (32 - i - 1)) & 1))
387 highest_bit_set = (64 - i - 1);
388 } while (++i < 32 && (highest_bit_set == -1 ||
389 lowest_bit_set == -1));
390 if (i == 32) {
391 i = 0;
392 do {
393 if (lowest_bit_set == -1 && ((high_bits >> i) & 1))
394 lowest_bit_set = i + 32;
395 if (highest_bit_set == -1 &&
396 ((low_bits >> (32 - i - 1)) & 1))
397 highest_bit_set = 32 - i - 1;
398 } while (++i < 32 && (highest_bit_set == -1 ||
399 lowest_bit_set == -1));
400 }
401
402 all_bits_between_are_set = 1;
403 for (i = lowest_bit_set; i <= highest_bit_set; i++) {
404 if (i < 32) {
405 if ((low_bits & (1 << i)) != 0)
406 continue;
407 } else {
408 if ((high_bits & (1 << (i - 32))) != 0)
409 continue;
410 }
411 all_bits_between_are_set = 0;
412 break;
413 }
414 *hbsp = highest_bit_set;
415 *lbsp = lowest_bit_set;
416 *abbasp = all_bits_between_are_set;
417}
418
419static unsigned long create_simple_focus_bits(unsigned long high_bits,
420 unsigned long low_bits,
421 int lowest_bit_set, int shift)
422{
423 long hi, lo;
424
425 if (lowest_bit_set < 32) {
426 lo = (low_bits >> lowest_bit_set) << shift;
427 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
428 } else {
429 lo = 0;
430 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
431 }
432 return hi | lo;
433}
434
435static bool const64_is_2insns(unsigned long high_bits,
436 unsigned long low_bits)
437{
438 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
439
440 if (high_bits == 0 || high_bits == 0xffffffff)
441 return true;
442
443 analyze_64bit_constant(high_bits, low_bits,
444 &highest_bit_set, &lowest_bit_set,
445 &all_bits_between_are_set);
446
447 if ((highest_bit_set == 63 || lowest_bit_set == 0) &&
448 all_bits_between_are_set != 0)
449 return true;
450
451 if (highest_bit_set - lowest_bit_set < 21)
452 return true;
453
454 return false;
455}
456
457static void sparc_emit_set_const64_quick2(unsigned long high_bits,
458 unsigned long low_imm,
459 unsigned int dest,
460 int shift_count, struct jit_ctx *ctx)
461{
462 emit_loadimm32(high_bits, dest, ctx);
463
464 /* Now shift it up into place. */
465 emit_alu_K(SLLX, dest, shift_count, ctx);
466
467 /* If there is a low immediate part piece, finish up by
468 * putting that in as well.
469 */
470 if (low_imm != 0)
471 emit(OR | IMMED | RS1(dest) | S13(low_imm) | RD(dest), ctx);
472}
473
370static void emit_loadimm64(u64 K, unsigned int dest, struct jit_ctx *ctx) 474static void emit_loadimm64(u64 K, unsigned int dest, struct jit_ctx *ctx)
371{ 475{
476 int all_bits_between_are_set, lowest_bit_set, highest_bit_set;
372 unsigned int tmp = bpf2sparc[TMP_REG_1]; 477 unsigned int tmp = bpf2sparc[TMP_REG_1];
373 u32 high_part = (K >> 32); 478 u32 low_bits = (K & 0xffffffff);
374 u32 low_part = (K & 0xffffffff); 479 u32 high_bits = (K >> 32);
480
481 /* These two tests also take care of all of the one
482 * instruction cases.
483 */
484 if (high_bits == 0xffffffff && (low_bits & 0x80000000))
485 return emit_loadimm_sext(K, dest, ctx);
486 if (high_bits == 0x00000000)
487 return emit_loadimm32(K, dest, ctx);
488
489 analyze_64bit_constant(high_bits, low_bits, &highest_bit_set,
490 &lowest_bit_set, &all_bits_between_are_set);
491
492 /* 1) mov -1, %reg
493 * sllx %reg, shift, %reg
494 * 2) mov -1, %reg
495 * srlx %reg, shift, %reg
496 * 3) mov some_small_const, %reg
497 * sllx %reg, shift, %reg
498 */
499 if (((highest_bit_set == 63 || lowest_bit_set == 0) &&
500 all_bits_between_are_set != 0) ||
501 ((highest_bit_set - lowest_bit_set) < 12)) {
502 int shift = lowest_bit_set;
503 long the_const = -1;
504
505 if ((highest_bit_set != 63 && lowest_bit_set != 0) ||
506 all_bits_between_are_set == 0) {
507 the_const =
508 create_simple_focus_bits(high_bits, low_bits,
509 lowest_bit_set, 0);
510 } else if (lowest_bit_set == 0)
511 shift = -(63 - highest_bit_set);
512
513 emit(OR | IMMED | RS1(G0) | S13(the_const) | RD(dest), ctx);
514 if (shift > 0)
515 emit_alu_K(SLLX, dest, shift, ctx);
516 else if (shift < 0)
517 emit_alu_K(SRLX, dest, -shift, ctx);
518
519 return;
520 }
521
522 /* Now a range of 22 or less bits set somewhere.
523 * 1) sethi %hi(focus_bits), %reg
524 * sllx %reg, shift, %reg
525 * 2) sethi %hi(focus_bits), %reg
526 * srlx %reg, shift, %reg
527 */
528 if ((highest_bit_set - lowest_bit_set) < 21) {
529 unsigned long focus_bits =
530 create_simple_focus_bits(high_bits, low_bits,
531 lowest_bit_set, 10);
532
533 emit(SETHI(focus_bits, dest), ctx);
534
535 /* If lowest_bit_set == 10 then a sethi alone could
536 * have done it.
537 */
538 if (lowest_bit_set < 10)
539 emit_alu_K(SRLX, dest, 10 - lowest_bit_set, ctx);
540 else if (lowest_bit_set > 10)
541 emit_alu_K(SLLX, dest, lowest_bit_set - 10, ctx);
542 return;
543 }
544
545 /* Ok, now 3 instruction sequences. */
546 if (low_bits == 0) {
547 emit_loadimm32(high_bits, dest, ctx);
548 emit_alu_K(SLLX, dest, 32, ctx);
549 return;
550 }
551
552 /* We may be able to do something quick
553 * when the constant is negated, so try that.
554 */
555 if (const64_is_2insns((~high_bits) & 0xffffffff,
556 (~low_bits) & 0xfffffc00)) {
557 /* NOTE: The trailing bits get XOR'd so we need the
558 * non-negated bits, not the negated ones.
559 */
560 unsigned long trailing_bits = low_bits & 0x3ff;
561
562 if ((((~high_bits) & 0xffffffff) == 0 &&
563 ((~low_bits) & 0x80000000) == 0) ||
564 (((~high_bits) & 0xffffffff) == 0xffffffff &&
565 ((~low_bits) & 0x80000000) != 0)) {
566 unsigned long fast_int = (~low_bits & 0xffffffff);
567
568 if ((is_sethi(fast_int) &&
569 (~high_bits & 0xffffffff) == 0)) {
570 emit(SETHI(fast_int, dest), ctx);
571 } else if (is_simm13(fast_int)) {
572 emit(OR | IMMED | RS1(G0) | S13(fast_int) | RD(dest), ctx);
573 } else {
574 emit_loadimm64(fast_int, dest, ctx);
575 }
576 } else {
577 u64 n = ((~low_bits) & 0xfffffc00) |
578 (((unsigned long)((~high_bits) & 0xffffffff))<<32);
579 emit_loadimm64(n, dest, ctx);
580 }
581
582 low_bits = -0x400 | trailing_bits;
583
584 emit(XOR | IMMED | RS1(dest) | S13(low_bits) | RD(dest), ctx);
585 return;
586 }
587
588 /* 1) sethi %hi(xxx), %reg
589 * or %reg, %lo(xxx), %reg
590 * sllx %reg, yyy, %reg
591 */
592 if ((highest_bit_set - lowest_bit_set) < 32) {
593 unsigned long focus_bits =
594 create_simple_focus_bits(high_bits, low_bits,
595 lowest_bit_set, 0);
596
597 /* So what we know is that the set bits straddle the
598 * middle of the 64-bit word.
599 */
600 sparc_emit_set_const64_quick2(focus_bits, 0, dest,
601 lowest_bit_set, ctx);
602 return;
603 }
604
605 /* 1) sethi %hi(high_bits), %reg
606 * or %reg, %lo(high_bits), %reg
607 * sllx %reg, 32, %reg
608 * or %reg, low_bits, %reg
609 */
610 if (is_simm13(low_bits) && ((int)low_bits > 0)) {
611 sparc_emit_set_const64_quick2(high_bits, low_bits,
612 dest, 32, ctx);
613 return;
614 }
375 615
616 /* Oh well, we tried... Do a full 64-bit decomposition. */
376 ctx->tmp_1_used = true; 617 ctx->tmp_1_used = true;
377 618
378 emit_set_const(high_part, tmp, ctx); 619 emit_loadimm32(high_bits, tmp, ctx);
379 emit_set_const(low_part, dest, ctx); 620 emit_loadimm32(low_bits, dest, ctx);
380 emit_alu_K(SLLX, tmp, 32, ctx); 621 emit_alu_K(SLLX, tmp, 32, ctx);
381 emit(OR | RS1(dest) | RS2(tmp) | RD(dest), ctx); 622 emit(OR | RS1(dest) | RS2(tmp) | RD(dest), ctx);
382} 623}