diff options
author | Tvrtko Ursulin <tvrtko.ursulin@intel.com> | 2017-02-16 07:23:23 -0500 |
---|---|---|
committer | Tvrtko Ursulin <tvrtko.ursulin@intel.com> | 2017-02-17 06:39:59 -0500 |
commit | 133b4bd74d89220c612d4adfabb2f41f6f432184 (patch) | |
tree | a86fe934340d4ba7056e7747c25bceacee83ce4d /drivers/gpu/drm/i915/intel_ringbuffer.c | |
parent | 8ee7c6e23bb1b3c37ef27e81395db056bd7eac53 (diff) |
drm/i915: Move common workaround code to intel_engine_cs
It is used by all submission backends.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 550 |
1 files changed, 0 insertions, 550 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8c17db72489f..629fe6584e61 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
@@ -644,41 +644,6 @@ static void reset_ring_common(struct intel_engine_cs *engine, | |||
644 | } | 644 | } |
645 | } | 645 | } |
646 | 646 | ||
647 | int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) | ||
648 | { | ||
649 | struct i915_workarounds *w = &req->i915->workarounds; | ||
650 | u32 *cs; | ||
651 | int ret, i; | ||
652 | |||
653 | if (w->count == 0) | ||
654 | return 0; | ||
655 | |||
656 | ret = req->engine->emit_flush(req, EMIT_BARRIER); | ||
657 | if (ret) | ||
658 | return ret; | ||
659 | |||
660 | cs = intel_ring_begin(req, (w->count * 2 + 2)); | ||
661 | if (IS_ERR(cs)) | ||
662 | return PTR_ERR(cs); | ||
663 | |||
664 | *cs++ = MI_LOAD_REGISTER_IMM(w->count); | ||
665 | for (i = 0; i < w->count; i++) { | ||
666 | *cs++ = i915_mmio_reg_offset(w->reg[i].addr); | ||
667 | *cs++ = w->reg[i].value; | ||
668 | } | ||
669 | *cs++ = MI_NOOP; | ||
670 | |||
671 | intel_ring_advance(req, cs); | ||
672 | |||
673 | ret = req->engine->emit_flush(req, EMIT_BARRIER); | ||
674 | if (ret) | ||
675 | return ret; | ||
676 | |||
677 | DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); | ||
678 | |||
679 | return 0; | ||
680 | } | ||
681 | |||
682 | static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) | 647 | static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) |
683 | { | 648 | { |
684 | int ret; | 649 | int ret; |
@@ -694,521 +659,6 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) | |||
694 | return 0; | 659 | return 0; |
695 | } | 660 | } |
696 | 661 | ||
697 | static int wa_add(struct drm_i915_private *dev_priv, | ||
698 | i915_reg_t addr, | ||
699 | const u32 mask, const u32 val) | ||
700 | { | ||
701 | const u32 idx = dev_priv->workarounds.count; | ||
702 | |||
703 | if (WARN_ON(idx >= I915_MAX_WA_REGS)) | ||
704 | return -ENOSPC; | ||
705 | |||
706 | dev_priv->workarounds.reg[idx].addr = addr; | ||
707 | dev_priv->workarounds.reg[idx].value = val; | ||
708 | dev_priv->workarounds.reg[idx].mask = mask; | ||
709 | |||
710 | dev_priv->workarounds.count++; | ||
711 | |||
712 | return 0; | ||
713 | } | ||
714 | |||
715 | #define WA_REG(addr, mask, val) do { \ | ||
716 | const int r = wa_add(dev_priv, (addr), (mask), (val)); \ | ||
717 | if (r) \ | ||
718 | return r; \ | ||
719 | } while (0) | ||
720 | |||
721 | #define WA_SET_BIT_MASKED(addr, mask) \ | ||
722 | WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) | ||
723 | |||
724 | #define WA_CLR_BIT_MASKED(addr, mask) \ | ||
725 | WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) | ||
726 | |||
727 | #define WA_SET_FIELD_MASKED(addr, mask, value) \ | ||
728 | WA_REG(addr, mask, _MASKED_FIELD(mask, value)) | ||
729 | |||
730 | #define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) | ||
731 | #define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) | ||
732 | |||
733 | #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) | ||
734 | |||
735 | static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, | ||
736 | i915_reg_t reg) | ||
737 | { | ||
738 | struct drm_i915_private *dev_priv = engine->i915; | ||
739 | struct i915_workarounds *wa = &dev_priv->workarounds; | ||
740 | const uint32_t index = wa->hw_whitelist_count[engine->id]; | ||
741 | |||
742 | if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) | ||
743 | return -EINVAL; | ||
744 | |||
745 | WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), | ||
746 | i915_mmio_reg_offset(reg)); | ||
747 | wa->hw_whitelist_count[engine->id]++; | ||
748 | |||
749 | return 0; | ||
750 | } | ||
751 | |||
752 | static int gen8_init_workarounds(struct intel_engine_cs *engine) | ||
753 | { | ||
754 | struct drm_i915_private *dev_priv = engine->i915; | ||
755 | |||
756 | WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); | ||
757 | |||
758 | /* WaDisableAsyncFlipPerfMode:bdw,chv */ | ||
759 | WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); | ||
760 | |||
761 | /* WaDisablePartialInstShootdown:bdw,chv */ | ||
762 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, | ||
763 | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); | ||
764 | |||
765 | /* Use Force Non-Coherent whenever executing a 3D context. This is a | ||
766 | * workaround for for a possible hang in the unlikely event a TLB | ||
767 | * invalidation occurs during a PSD flush. | ||
768 | */ | ||
769 | /* WaForceEnableNonCoherent:bdw,chv */ | ||
770 | /* WaHdcDisableFetchWhenMasked:bdw,chv */ | ||
771 | WA_SET_BIT_MASKED(HDC_CHICKEN0, | ||
772 | HDC_DONOT_FETCH_MEM_WHEN_MASKED | | ||
773 | HDC_FORCE_NON_COHERENT); | ||
774 | |||
775 | /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: | ||
776 | * "The Hierarchical Z RAW Stall Optimization allows non-overlapping | ||
777 | * polygons in the same 8x4 pixel/sample area to be processed without | ||
778 | * stalling waiting for the earlier ones to write to Hierarchical Z | ||
779 | * buffer." | ||
780 | * | ||
781 | * This optimization is off by default for BDW and CHV; turn it on. | ||
782 | */ | ||
783 | WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); | ||
784 | |||
785 | /* Wa4x4STCOptimizationDisable:bdw,chv */ | ||
786 | WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); | ||
787 | |||
788 | /* | ||
789 | * BSpec recommends 8x4 when MSAA is used, | ||
790 | * however in practice 16x4 seems fastest. | ||
791 | * | ||
792 | * Note that PS/WM thread counts depend on the WIZ hashing | ||
793 | * disable bit, which we don't touch here, but it's good | ||
794 | * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). | ||
795 | */ | ||
796 | WA_SET_FIELD_MASKED(GEN7_GT_MODE, | ||
797 | GEN6_WIZ_HASHING_MASK, | ||
798 | GEN6_WIZ_HASHING_16x4); | ||
799 | |||
800 | return 0; | ||
801 | } | ||
802 | |||
803 | static int bdw_init_workarounds(struct intel_engine_cs *engine) | ||
804 | { | ||
805 | struct drm_i915_private *dev_priv = engine->i915; | ||
806 | int ret; | ||
807 | |||
808 | ret = gen8_init_workarounds(engine); | ||
809 | if (ret) | ||
810 | return ret; | ||
811 | |||
812 | /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ | ||
813 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); | ||
814 | |||
815 | /* WaDisableDopClockGating:bdw | ||
816 | * | ||
817 | * Also see the related UCGTCL1 write in broadwell_init_clock_gating() | ||
818 | * to disable EUTC clock gating. | ||
819 | */ | ||
820 | WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, | ||
821 | DOP_CLOCK_GATING_DISABLE); | ||
822 | |||
823 | WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, | ||
824 | GEN8_SAMPLER_POWER_BYPASS_DIS); | ||
825 | |||
826 | WA_SET_BIT_MASKED(HDC_CHICKEN0, | ||
827 | /* WaForceContextSaveRestoreNonCoherent:bdw */ | ||
828 | HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | | ||
829 | /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ | ||
830 | (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); | ||
831 | |||
832 | return 0; | ||
833 | } | ||
834 | |||
835 | static int chv_init_workarounds(struct intel_engine_cs *engine) | ||
836 | { | ||
837 | struct drm_i915_private *dev_priv = engine->i915; | ||
838 | int ret; | ||
839 | |||
840 | ret = gen8_init_workarounds(engine); | ||
841 | if (ret) | ||
842 | return ret; | ||
843 | |||
844 | /* WaDisableThreadStallDopClockGating:chv */ | ||
845 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); | ||
846 | |||
847 | /* Improve HiZ throughput on CHV. */ | ||
848 | WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); | ||
849 | |||
850 | return 0; | ||
851 | } | ||
852 | |||
853 | static int gen9_init_workarounds(struct intel_engine_cs *engine) | ||
854 | { | ||
855 | struct drm_i915_private *dev_priv = engine->i915; | ||
856 | int ret; | ||
857 | |||
858 | /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ | ||
859 | I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); | ||
860 | |||
861 | /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ | ||
862 | I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | | ||
863 | GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); | ||
864 | |||
865 | /* WaDisableKillLogic:bxt,skl,kbl */ | ||
866 | I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | | ||
867 | ECOCHK_DIS_TLB); | ||
868 | |||
869 | /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ | ||
870 | /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ | ||
871 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, | ||
872 | FLOW_CONTROL_ENABLE | | ||
873 | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); | ||
874 | |||
875 | /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ | ||
876 | WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, | ||
877 | GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); | ||
878 | |||
879 | /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ | ||
880 | if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) | ||
881 | WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, | ||
882 | GEN9_DG_MIRROR_FIX_ENABLE); | ||
883 | |||
884 | /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ | ||
885 | if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { | ||
886 | WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, | ||
887 | GEN9_RHWO_OPTIMIZATION_DISABLE); | ||
888 | /* | ||
889 | * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set | ||
890 | * but we do that in per ctx batchbuffer as there is an issue | ||
891 | * with this register not getting restored on ctx restore | ||
892 | */ | ||
893 | } | ||
894 | |||
895 | /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ | ||
896 | WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, | ||
897 | GEN9_ENABLE_GPGPU_PREEMPTION); | ||
898 | |||
899 | /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ | ||
900 | /* WaDisablePartialResolveInVc:skl,bxt,kbl */ | ||
901 | WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | | ||
902 | GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); | ||
903 | |||
904 | /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ | ||
905 | WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, | ||
906 | GEN9_CCS_TLB_PREFETCH_ENABLE); | ||
907 | |||
908 | /* WaDisableMaskBasedCammingInRCC:bxt */ | ||
909 | if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) | ||
910 | WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, | ||
911 | PIXEL_MASK_CAMMING_DISABLE); | ||
912 | |||
913 | /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ | ||
914 | WA_SET_BIT_MASKED(HDC_CHICKEN0, | ||
915 | HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | | ||
916 | HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); | ||
917 | |||
918 | /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are | ||
919 | * both tied to WaForceContextSaveRestoreNonCoherent | ||
920 | * in some hsds for skl. We keep the tie for all gen9. The | ||
921 | * documentation is a bit hazy and so we want to get common behaviour, | ||
922 | * even though there is no clear evidence we would need both on kbl/bxt. | ||
923 | * This area has been source of system hangs so we play it safe | ||
924 | * and mimic the skl regardless of what bspec says. | ||
925 | * | ||
926 | * Use Force Non-Coherent whenever executing a 3D context. This | ||
927 | * is a workaround for a possible hang in the unlikely event | ||
928 | * a TLB invalidation occurs during a PSD flush. | ||
929 | */ | ||
930 | |||
931 | /* WaForceEnableNonCoherent:skl,bxt,kbl */ | ||
932 | WA_SET_BIT_MASKED(HDC_CHICKEN0, | ||
933 | HDC_FORCE_NON_COHERENT); | ||
934 | |||
935 | /* WaDisableHDCInvalidation:skl,bxt,kbl */ | ||
936 | I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | | ||
937 | BDW_DISABLE_HDC_INVALIDATION); | ||
938 | |||
939 | /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ | ||
940 | if (IS_SKYLAKE(dev_priv) || | ||
941 | IS_KABYLAKE(dev_priv) || | ||
942 | IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) | ||
943 | WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, | ||
944 | GEN8_SAMPLER_POWER_BYPASS_DIS); | ||
945 | |||
946 | /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ | ||
947 | WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); | ||
948 | |||
949 | /* WaOCLCoherentLineFlush:skl,bxt,kbl */ | ||
950 | I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | | ||
951 | GEN8_LQSC_FLUSH_COHERENT_LINES)); | ||
952 | |||
953 | /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ | ||
954 | ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); | ||
955 | if (ret) | ||
956 | return ret; | ||
957 | |||
958 | /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ | ||
959 | ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); | ||
960 | if (ret) | ||
961 | return ret; | ||
962 | |||
963 | /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ | ||
964 | ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); | ||
965 | if (ret) | ||
966 | return ret; | ||
967 | |||
968 | return 0; | ||
969 | } | ||
970 | |||
971 | static int skl_tune_iz_hashing(struct intel_engine_cs *engine) | ||
972 | { | ||
973 | struct drm_i915_private *dev_priv = engine->i915; | ||
974 | u8 vals[3] = { 0, 0, 0 }; | ||
975 | unsigned int i; | ||
976 | |||
977 | for (i = 0; i < 3; i++) { | ||
978 | u8 ss; | ||
979 | |||
980 | /* | ||
981 | * Only consider slices where one, and only one, subslice has 7 | ||
982 | * EUs | ||
983 | */ | ||
984 | if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) | ||
985 | continue; | ||
986 | |||
987 | /* | ||
988 | * subslice_7eu[i] != 0 (because of the check above) and | ||
989 | * ss_max == 4 (maximum number of subslices possible per slice) | ||
990 | * | ||
991 | * -> 0 <= ss <= 3; | ||
992 | */ | ||
993 | ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; | ||
994 | vals[i] = 3 - ss; | ||
995 | } | ||
996 | |||
997 | if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) | ||
998 | return 0; | ||
999 | |||
1000 | /* Tune IZ hashing. See intel_device_info_runtime_init() */ | ||
1001 | WA_SET_FIELD_MASKED(GEN7_GT_MODE, | ||
1002 | GEN9_IZ_HASHING_MASK(2) | | ||
1003 | GEN9_IZ_HASHING_MASK(1) | | ||
1004 | GEN9_IZ_HASHING_MASK(0), | ||
1005 | GEN9_IZ_HASHING(2, vals[2]) | | ||
1006 | GEN9_IZ_HASHING(1, vals[1]) | | ||
1007 | GEN9_IZ_HASHING(0, vals[0])); | ||
1008 | |||
1009 | return 0; | ||
1010 | } | ||
1011 | |||
1012 | static int skl_init_workarounds(struct intel_engine_cs *engine) | ||
1013 | { | ||
1014 | struct drm_i915_private *dev_priv = engine->i915; | ||
1015 | int ret; | ||
1016 | |||
1017 | ret = gen9_init_workarounds(engine); | ||
1018 | if (ret) | ||
1019 | return ret; | ||
1020 | |||
1021 | /* | ||
1022 | * Actual WA is to disable percontext preemption granularity control | ||
1023 | * until D0 which is the default case so this is equivalent to | ||
1024 | * !WaDisablePerCtxtPreemptionGranularityControl:skl | ||
1025 | */ | ||
1026 | I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, | ||
1027 | _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); | ||
1028 | |||
1029 | /* WaEnableGapsTsvCreditFix:skl */ | ||
1030 | I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | | ||
1031 | GEN9_GAPS_TSV_CREDIT_DISABLE)); | ||
1032 | |||
1033 | /* WaDisableGafsUnitClkGating:skl */ | ||
1034 | WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); | ||
1035 | |||
1036 | /* WaInPlaceDecompressionHang:skl */ | ||
1037 | if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) | ||
1038 | WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, | ||
1039 | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); | ||
1040 | |||
1041 | /* WaDisableLSQCROPERFforOCL:skl */ | ||
1042 | ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); | ||
1043 | if (ret) | ||
1044 | return ret; | ||
1045 | |||
1046 | return skl_tune_iz_hashing(engine); | ||
1047 | } | ||
1048 | |||
1049 | static int bxt_init_workarounds(struct intel_engine_cs *engine) | ||
1050 | { | ||
1051 | struct drm_i915_private *dev_priv = engine->i915; | ||
1052 | int ret; | ||
1053 | |||
1054 | ret = gen9_init_workarounds(engine); | ||
1055 | if (ret) | ||
1056 | return ret; | ||
1057 | |||
1058 | /* WaStoreMultiplePTEenable:bxt */ | ||
1059 | /* This is a requirement according to Hardware specification */ | ||
1060 | if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) | ||
1061 | I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); | ||
1062 | |||
1063 | /* WaSetClckGatingDisableMedia:bxt */ | ||
1064 | if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { | ||
1065 | I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & | ||
1066 | ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); | ||
1067 | } | ||
1068 | |||
1069 | /* WaDisableThreadStallDopClockGating:bxt */ | ||
1070 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, | ||
1071 | STALL_DOP_GATING_DISABLE); | ||
1072 | |||
1073 | /* WaDisablePooledEuLoadBalancingFix:bxt */ | ||
1074 | if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { | ||
1075 | WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, | ||
1076 | GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); | ||
1077 | } | ||
1078 | |||
1079 | /* WaDisableSbeCacheDispatchPortSharing:bxt */ | ||
1080 | if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { | ||
1081 | WA_SET_BIT_MASKED( | ||
1082 | GEN7_HALF_SLICE_CHICKEN1, | ||
1083 | GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); | ||
1084 | } | ||
1085 | |||
1086 | /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ | ||
1087 | /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ | ||
1088 | /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ | ||
1089 | /* WaDisableLSQCROPERFforOCL:bxt */ | ||
1090 | if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { | ||
1091 | ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); | ||
1092 | if (ret) | ||
1093 | return ret; | ||
1094 | |||
1095 | ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); | ||
1096 | if (ret) | ||
1097 | return ret; | ||
1098 | } | ||
1099 | |||
1100 | /* WaProgramL3SqcReg1DefaultForPerf:bxt */ | ||
1101 | if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) | ||
1102 | I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | | ||
1103 | L3_HIGH_PRIO_CREDITS(2)); | ||
1104 | |||
1105 | /* WaToEnableHwFixForPushConstHWBug:bxt */ | ||
1106 | if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) | ||
1107 | WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, | ||
1108 | GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); | ||
1109 | |||
1110 | /* WaInPlaceDecompressionHang:bxt */ | ||
1111 | if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) | ||
1112 | WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, | ||
1113 | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); | ||
1114 | |||
1115 | return 0; | ||
1116 | } | ||
1117 | |||
1118 | static int kbl_init_workarounds(struct intel_engine_cs *engine) | ||
1119 | { | ||
1120 | struct drm_i915_private *dev_priv = engine->i915; | ||
1121 | int ret; | ||
1122 | |||
1123 | ret = gen9_init_workarounds(engine); | ||
1124 | if (ret) | ||
1125 | return ret; | ||
1126 | |||
1127 | /* WaEnableGapsTsvCreditFix:kbl */ | ||
1128 | I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | | ||
1129 | GEN9_GAPS_TSV_CREDIT_DISABLE)); | ||
1130 | |||
1131 | /* WaDisableDynamicCreditSharing:kbl */ | ||
1132 | if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) | ||
1133 | WA_SET_BIT(GAMT_CHKN_BIT_REG, | ||
1134 | GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); | ||
1135 | |||
1136 | /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ | ||
1137 | if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) | ||
1138 | WA_SET_BIT_MASKED(HDC_CHICKEN0, | ||
1139 | HDC_FENCE_DEST_SLM_DISABLE); | ||
1140 | |||
1141 | /* WaToEnableHwFixForPushConstHWBug:kbl */ | ||
1142 | if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) | ||
1143 | WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, | ||
1144 | GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); | ||
1145 | |||
1146 | /* WaDisableGafsUnitClkGating:kbl */ | ||
1147 | WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); | ||
1148 | |||
1149 | /* WaDisableSbeCacheDispatchPortSharing:kbl */ | ||
1150 | WA_SET_BIT_MASKED( | ||
1151 | GEN7_HALF_SLICE_CHICKEN1, | ||
1152 | GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); | ||
1153 | |||
1154 | /* WaInPlaceDecompressionHang:kbl */ | ||
1155 | WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, | ||
1156 | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); | ||
1157 | |||
1158 | /* WaDisableLSQCROPERFforOCL:kbl */ | ||
1159 | ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); | ||
1160 | if (ret) | ||
1161 | return ret; | ||
1162 | |||
1163 | return 0; | ||
1164 | } | ||
1165 | |||
1166 | static int glk_init_workarounds(struct intel_engine_cs *engine) | ||
1167 | { | ||
1168 | struct drm_i915_private *dev_priv = engine->i915; | ||
1169 | int ret; | ||
1170 | |||
1171 | ret = gen9_init_workarounds(engine); | ||
1172 | if (ret) | ||
1173 | return ret; | ||
1174 | |||
1175 | /* WaToEnableHwFixForPushConstHWBug:glk */ | ||
1176 | WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, | ||
1177 | GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); | ||
1178 | |||
1179 | return 0; | ||
1180 | } | ||
1181 | |||
1182 | int init_workarounds_ring(struct intel_engine_cs *engine) | ||
1183 | { | ||
1184 | struct drm_i915_private *dev_priv = engine->i915; | ||
1185 | |||
1186 | WARN_ON(engine->id != RCS); | ||
1187 | |||
1188 | dev_priv->workarounds.count = 0; | ||
1189 | dev_priv->workarounds.hw_whitelist_count[RCS] = 0; | ||
1190 | |||
1191 | if (IS_BROADWELL(dev_priv)) | ||
1192 | return bdw_init_workarounds(engine); | ||
1193 | |||
1194 | if (IS_CHERRYVIEW(dev_priv)) | ||
1195 | return chv_init_workarounds(engine); | ||
1196 | |||
1197 | if (IS_SKYLAKE(dev_priv)) | ||
1198 | return skl_init_workarounds(engine); | ||
1199 | |||
1200 | if (IS_BROXTON(dev_priv)) | ||
1201 | return bxt_init_workarounds(engine); | ||
1202 | |||
1203 | if (IS_KABYLAKE(dev_priv)) | ||
1204 | return kbl_init_workarounds(engine); | ||
1205 | |||
1206 | if (IS_GEMINILAKE(dev_priv)) | ||
1207 | return glk_init_workarounds(engine); | ||
1208 | |||
1209 | return 0; | ||
1210 | } | ||
1211 | |||
1212 | static int init_render_ring(struct intel_engine_cs *engine) | 662 | static int init_render_ring(struct intel_engine_cs *engine) |
1213 | { | 663 | { |
1214 | struct drm_i915_private *dev_priv = engine->i915; | 664 | struct drm_i915_private *dev_priv = engine->i915; |