aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Cornwall <Jay.Cornwall@amd.com>2019-07-01 16:46:56 -0400
committerAlex Deucher <alexander.deucher@amd.com>2019-07-18 15:18:06 -0400
commit37f86a9b3617d55ad8189e1b7e6468b85dba4b88 (patch)
treea2525b2957285fbb0ecf091c41455148ec44c953
parent5ddd4a9a7c25a6a23a79f973e7a87b1403503719 (diff)
drm/amdkfd: Merge gfx9/arcturus trap handlers, add ACC VGPR save
ACC VGPRs are a secondary VGPR set of same size as the primary VGPRs. Save them as a block immediately following VGPRs. Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h627
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_arcturus.asm746
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm83
3 files changed, 538 insertions, 918 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index ee700a69c68e..c45ba0013ca5 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -861,200 +861,487 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
861 0xbf9f0000, 0x00000000, 861 0xbf9f0000, 0x00000000,
862}; 862};
863static const uint32_t cwsr_trap_arcturus_hex[] = { 863static const uint32_t cwsr_trap_arcturus_hex[] = {
864 0xbf820001, 0xbf8200ca, 864 0xbf820001, 0xbf8202bd,
865 0xb8f0f802, 0x89708670, 865 0xb8f8f802, 0x89788678,
866 0xb8f1f803, 0x8671ff71, 866 0xb8fbf803, 0x866eff7b,
867 0x00000400, 0xbf850008, 867 0x00000400, 0xbf85003b,
868 0xb8f1f803, 0x8671ff71, 868 0x866eff7b, 0x00000800,
869 0x000001ff, 0xbf850001, 869 0xbf850003, 0x866eff7b,
870 0x806c846c, 0x866dff6d, 870 0x00000100, 0xbf84000c,
871 0x0000ffff, 0xbe801f6c, 871 0x866eff78, 0x00002000,
872 0xb8f1f803, 0x8671ff71, 872 0xbf840005, 0xbf8e0010,
873 0x00000100, 0xbf840006, 873 0xb8eef803, 0x866eff6e,
874 0xbef60080, 0xb9760203, 874 0x00000400, 0xbf84fffb,
875 0x866dff6d, 0x0000ffff, 875 0x8778ff78, 0x00002000,
876 0x80ec886c, 0x82ed806d, 876 0x80ec886c, 0x82ed806d,
877 0xbef60080, 0xb9760283, 877 0xb8eef807, 0x866fff6e,
878 0xbef20068, 0xbef30069, 878 0x001f8000, 0x8e6f8b6f,
879 0xb8f62407, 0x8e769b76, 879 0x8977ff77, 0xfc000000,
880 0x876d766d, 0xb8f603c7, 880 0x87776f77, 0x896eff6e,
881 0x8e769a76, 0x876d766d, 881 0x001f8000, 0xb96ef807,
882 0xb8f6f807, 0x8676ff76, 882 0xb8faf812, 0xb8fbf813,
883 0x00007fff, 0xb976f807, 883 0x8efa887a, 0xc0071bbd,
884 0xbeee007e, 0xbeef007f, 884 0x00000000, 0xbf8cc07f,
885 0xbefe0180, 0xbf900004, 885 0xc0071ebd, 0x00000008,
886 0xbf8e0002, 0xbf88fffe, 886 0xbf8cc07f, 0x86ee6e6e,
887 0xbf840001, 0xbe801d6e,
888 0xb8fbf803, 0x867bff7b,
889 0x000001ff, 0xbf850002,
890 0x806c846c, 0x826d806d,
891 0x866dff6d, 0x0000ffff,
892 0x8f6e8b77, 0x866eff6e,
893 0x001f8000, 0xb96ef807,
894 0x86fe7e7e, 0x86ea6a6a,
895 0x8f6e8378, 0xb96ee0c2,
896 0xbf800002, 0xb9780002,
897 0xbe801f6c, 0x866dff6d,
898 0x0000ffff, 0xbefa0080,
899 0xb97a0283, 0xb8fa2407,
900 0x8e7a9b7a, 0x876d7a6d,
901 0xb8fa03c7, 0x8e7a9a7a,
902 0x876d7a6d, 0xb8faf807,
903 0x867aff7a, 0x00007fff,
904 0xb97af807, 0xbeee007e,
905 0xbeef007f, 0xbefe0180,
906 0xbf900004, 0x877a8478,
907 0xb97af802, 0xbf8e0002,
908 0xbf88fffe, 0xb8fa2a05,
909 0x807a817a, 0x8e7a8a7a,
910 0x8e7a817a, 0xb8fb1605,
911 0x807b817b, 0x8e7b867b,
912 0x807a7b7a, 0x807a7e7a,
913 0x827b807f, 0x867bff7b,
914 0x0000ffff, 0xc04b1c3d,
915 0x00000050, 0xbf8cc07f,
916 0xc04b1d3d, 0x00000060,
917 0xbf8cc07f, 0xc0431e7d,
918 0x00000074, 0xbf8cc07f,
887 0xbef4007e, 0x8675ff7f, 919 0xbef4007e, 0x8675ff7f,
888 0x0000ffff, 0x8775ff75, 920 0x0000ffff, 0x8775ff75,
889 0x00040000, 0xbef60080, 921 0x00040000, 0xbef60080,
890 0xbef700ff, 0x00807fac, 922 0xbef700ff, 0x00807fac,
891 0x8676ff7f, 0x08000000, 923 0x867aff7f, 0x08000000,
892 0x8f768376, 0x87777677, 924 0x8f7a837a, 0x87777a77,
893 0x8676ff7f, 0x70000000, 925 0x867aff7f, 0x70000000,
894 0x8f768176, 0x87777677, 926 0x8f7a817a, 0x87777a77,
895 0xbefb007c, 0xbefa0080, 927 0xbef1007c, 0xbef00080,
896 0xbf8a0000, 0x8676ff7f, 928 0xb8f02a05, 0x80708170,
897 0x04000000, 0xbf840012, 929 0x8e708a70, 0x8e708170,
898 0xbefe00c1, 0xbeff00c1, 930 0xb8fa1605, 0x807a817a,
899 0xb8f14306, 0x8671c171, 931 0x8e7a867a, 0x80707a70,
900 0xbf84000d, 0x8e718671, 932 0xbef60084, 0xbef600ff,
901 0x8e718271, 0xbef60071, 933 0x01000000, 0xbefe007c,
902 0xbef600ff, 0x01000000, 934 0xbefc0070, 0xc0611c7a,
935 0x0000007c, 0xbf8cc07f,
936 0x80708470, 0xbefc007e,
937 0xbefe007c, 0xbefc0070,
938 0xc0611b3a, 0x0000007c,
939 0xbf8cc07f, 0x80708470,
940 0xbefc007e, 0xbefe007c,
941 0xbefc0070, 0xc0611b7a,
942 0x0000007c, 0xbf8cc07f,
943 0x80708470, 0xbefc007e,
944 0xbefe007c, 0xbefc0070,
945 0xc0611bba, 0x0000007c,
946 0xbf8cc07f, 0x80708470,
947 0xbefc007e, 0xbefe007c,
948 0xbefc0070, 0xc0611bfa,
949 0x0000007c, 0xbf8cc07f,
950 0x80708470, 0xbefc007e,
951 0xbefe007c, 0xbefc0070,
952 0xc0611e3a, 0x0000007c,
953 0xbf8cc07f, 0x80708470,
954 0xbefc007e, 0xb8fbf803,
955 0xbefe007c, 0xbefc0070,
956 0xc0611efa, 0x0000007c,
957 0xbf8cc07f, 0x80708470,
958 0xbefc007e, 0xbefe007c,
959 0xbefc0070, 0xc0611a3a,
960 0x0000007c, 0xbf8cc07f,
961 0x80708470, 0xbefc007e,
962 0xbefe007c, 0xbefc0070,
963 0xc0611a7a, 0x0000007c,
964 0xbf8cc07f, 0x80708470,
965 0xbefc007e, 0xb8f1f801,
966 0xbefe007c, 0xbefc0070,
967 0xc0611c7a, 0x0000007c,
968 0xbf8cc07f, 0x80708470,
969 0xbefc007e, 0x867aff7f,
970 0x04000000, 0xbeef0080,
971 0x876f6f7a, 0xb8f02a05,
972 0x80708170, 0x8e708a70,
973 0x8e708170, 0xb8fb1605,
974 0x807b817b, 0x8e7b847b,
975 0x8e76827b, 0xbef600ff,
976 0x01000000, 0xbef20174,
977 0x80747074, 0x82758075,
903 0xbefc0080, 0xbf800000, 978 0xbefc0080, 0xbf800000,
904 0x807cff7c, 0x00000100, 979 0xbe802b00, 0xbe822b02,
905 0x807aff7a, 0x00000100, 980 0xbe842b04, 0xbe862b06,
906 0xbf0a717c, 0xbf85fffa, 981 0xbe882b08, 0xbe8a2b0a,
982 0xbe8c2b0c, 0xbe8e2b0e,
983 0xc06b003a, 0x00000000,
984 0xbf8cc07f, 0xc06b013a,
985 0x00000010, 0xbf8cc07f,
986 0xc06b023a, 0x00000020,
987 0xbf8cc07f, 0xc06b033a,
988 0x00000030, 0xbf8cc07f,
989 0x8074c074, 0x82758075,
990 0x807c907c, 0xbf0a7b7c,
991 0xbf85ffe7, 0xbef40172,
992 0xbef00080, 0xbefe00c1,
993 0xbeff00c1, 0xbee80080,
994 0xbee90080, 0xbef600ff,
995 0x01000000, 0x867aff78,
996 0x00400000, 0xbf850003,
997 0xb8faf803, 0x897a7aff,
998 0x10000000, 0xbf85004d,
999 0xbe840080, 0xd2890000,
1000 0x00000900, 0x80048104,
1001 0xd2890001, 0x00000900,
1002 0x80048104, 0xd2890002,
1003 0x00000900, 0x80048104,
1004 0xd2890003, 0x00000900,
1005 0x80048104, 0xc069003a,
1006 0x00000070, 0xbf8cc07f,
1007 0x80709070, 0xbf06c004,
1008 0xbf84ffee, 0xbe840080,
1009 0xd2890000, 0x00000901,
1010 0x80048104, 0xd2890001,
1011 0x00000901, 0x80048104,
1012 0xd2890002, 0x00000901,
1013 0x80048104, 0xd2890003,
1014 0x00000901, 0x80048104,
1015 0xc069003a, 0x00000070,
1016 0xbf8cc07f, 0x80709070,
1017 0xbf06c004, 0xbf84ffee,
1018 0xbe840080, 0xd2890000,
1019 0x00000902, 0x80048104,
1020 0xd2890001, 0x00000902,
1021 0x80048104, 0xd2890002,
1022 0x00000902, 0x80048104,
1023 0xd2890003, 0x00000902,
1024 0x80048104, 0xc069003a,
1025 0x00000070, 0xbf8cc07f,
1026 0x80709070, 0xbf06c004,
1027 0xbf84ffee, 0xbe840080,
1028 0xd2890000, 0x00000903,
1029 0x80048104, 0xd2890001,
1030 0x00000903, 0x80048104,
1031 0xd2890002, 0x00000903,
1032 0x80048104, 0xd2890003,
1033 0x00000903, 0x80048104,
1034 0xc069003a, 0x00000070,
1035 0xbf8cc07f, 0x80709070,
1036 0xbf06c004, 0xbf84ffee,
1037 0xbf820008, 0xe0724000,
1038 0x701d0000, 0xe0724100,
1039 0x701d0100, 0xe0724200,
1040 0x701d0200, 0xe0724300,
1041 0x701d0300, 0xbefe00c1,
1042 0xbeff00c1, 0xb8fb4306,
1043 0x867bc17b, 0xbf840064,
1044 0xbf8a0000, 0x867aff6f,
1045 0x04000000, 0xbf840060,
1046 0x8e7b867b, 0x8e7b827b,
1047 0xbef6007b, 0xb8f02a05,
1048 0x80708170, 0x8e708a70,
1049 0x8e708170, 0xb8fa1605,
1050 0x807a817a, 0x8e7a867a,
1051 0x80707a70, 0x8070ff70,
1052 0x00000080, 0xbef600ff,
1053 0x01000000, 0xbefc0080,
1054 0xd28c0002, 0x000100c1,
1055 0xd28d0003, 0x000204c1,
1056 0x867aff78, 0x00400000,
1057 0xbf850003, 0xb8faf803,
1058 0x897a7aff, 0x10000000,
1059 0xbf850030, 0x24040682,
1060 0xd86e4000, 0x00000002,
1061 0xbf8cc07f, 0xbe840080,
1062 0xd2890000, 0x00000900,
1063 0x80048104, 0xd2890001,
1064 0x00000900, 0x80048104,
1065 0xd2890002, 0x00000900,
1066 0x80048104, 0xd2890003,
1067 0x00000900, 0x80048104,
1068 0xc069003a, 0x00000070,
1069 0xbf8cc07f, 0x80709070,
1070 0xbf06c004, 0xbf84ffee,
1071 0xbe840080, 0xd2890000,
1072 0x00000901, 0x80048104,
1073 0xd2890001, 0x00000901,
1074 0x80048104, 0xd2890002,
1075 0x00000901, 0x80048104,
1076 0xd2890003, 0x00000901,
1077 0x80048104, 0xc069003a,
1078 0x00000070, 0xbf8cc07f,
1079 0x80709070, 0xbf06c004,
1080 0xbf84ffee, 0x680404ff,
1081 0x00000200, 0xd0c9006a,
1082 0x0000f702, 0xbf87ffd2,
1083 0xbf820015, 0xd1060002,
1084 0x00011103, 0x7e0602ff,
1085 0x00000200, 0xbefc00ff,
1086 0x00010000, 0xbe800077,
1087 0x8677ff77, 0xff7fffff,
1088 0x8777ff77, 0x00058000,
1089 0xd8ec0000, 0x00000002,
1090 0xbf8cc07f, 0xe0765000,
1091 0x701d0002, 0x68040702,
1092 0xd0c9006a, 0x0000f702,
1093 0xbf87fff7, 0xbef70000,
1094 0xbef000ff, 0x00000400,
907 0xbefe00c1, 0xbeff00c1, 1095 0xbefe00c1, 0xbeff00c1,
908 0xb8f12a05, 0x80718171, 1096 0xb8fb2a05, 0x807b817b,
909 0x8e718271, 0x8e768871, 1097 0x8e7b827b, 0x8e76887b,
910 0xbef600ff, 0x01000000, 1098 0xbef600ff, 0x01000000,
1099 0xbefc0084, 0xbf0a7b7c,
1100 0xbf84006d, 0xbf11017c,
1101 0x807bff7b, 0x00001000,
1102 0x867aff78, 0x00400000,
1103 0xbf850003, 0xb8faf803,
1104 0x897a7aff, 0x10000000,
1105 0xbf850051, 0xbe840080,
1106 0xd2890000, 0x00000900,
1107 0x80048104, 0xd2890001,
1108 0x00000900, 0x80048104,
1109 0xd2890002, 0x00000900,
1110 0x80048104, 0xd2890003,
1111 0x00000900, 0x80048104,
1112 0xc069003a, 0x00000070,
1113 0xbf8cc07f, 0x80709070,
1114 0xbf06c004, 0xbf84ffee,
1115 0xbe840080, 0xd2890000,
1116 0x00000901, 0x80048104,
1117 0xd2890001, 0x00000901,
1118 0x80048104, 0xd2890002,
1119 0x00000901, 0x80048104,
1120 0xd2890003, 0x00000901,
1121 0x80048104, 0xc069003a,
1122 0x00000070, 0xbf8cc07f,
1123 0x80709070, 0xbf06c004,
1124 0xbf84ffee, 0xbe840080,
1125 0xd2890000, 0x00000902,
1126 0x80048104, 0xd2890001,
1127 0x00000902, 0x80048104,
1128 0xd2890002, 0x00000902,
1129 0x80048104, 0xd2890003,
1130 0x00000902, 0x80048104,
1131 0xc069003a, 0x00000070,
1132 0xbf8cc07f, 0x80709070,
1133 0xbf06c004, 0xbf84ffee,
1134 0xbe840080, 0xd2890000,
1135 0x00000903, 0x80048104,
1136 0xd2890001, 0x00000903,
1137 0x80048104, 0xd2890002,
1138 0x00000903, 0x80048104,
1139 0xd2890003, 0x00000903,
1140 0x80048104, 0xc069003a,
1141 0x00000070, 0xbf8cc07f,
1142 0x80709070, 0xbf06c004,
1143 0xbf84ffee, 0x807c847c,
1144 0xbf0a7b7c, 0xbf85ffb1,
1145 0xbf9c0000, 0xbf820012,
1146 0x7e000300, 0x7e020301,
1147 0x7e040302, 0x7e060303,
1148 0xe0724000, 0x701d0000,
1149 0xe0724100, 0x701d0100,
1150 0xe0724200, 0x701d0200,
1151 0xe0724300, 0x701d0300,
1152 0x807c847c, 0x8070ff70,
1153 0x00000400, 0xbf0a7b7c,
1154 0xbf85ffef, 0xbf9c0000,
911 0xbefc0080, 0xbf11017c, 1155 0xbefc0080, 0xbf11017c,
912 0x8071ff71, 0x00001000, 1156 0x867aff78, 0x00400000,
913 0x7e000300, 0xe0724000, 1157 0xbf850003, 0xb8faf803,
914 0x7a1d0000, 0x807c817c, 1158 0x897a7aff, 0x10000000,
915 0x807aff7a, 0x00000100, 1159 0xbf850059, 0xd3d84000,
916 0xbf0a717c, 0xbf85fff8, 1160 0x18000100, 0xd3d84001,
917 0xbf9c0000, 0xbefe00c1, 1161 0x18000101, 0xd3d84002,
918 0xbeff00c1, 0xb8f12a05, 1162 0x18000102, 0xd3d84003,
919 0x80718171, 0x8e718271, 1163 0x18000103, 0xbe840080,
920 0x8e768871, 0xbef600ff, 1164 0xd2890000, 0x00000900,
921 0x01000000, 0xbefc0080, 1165 0x80048104, 0xd2890001,
922 0xbf11017c, 0x8071ff71, 1166 0x00000900, 0x80048104,
923 0x00001000, 0xd3d84000, 1167 0xd2890002, 0x00000900,
924 0x18000100, 0x7e000000, 1168 0x80048104, 0xd2890003,
925 0x7e000000, 0xe0724000, 1169 0x00000900, 0x80048104,
926 0x7a1d0000, 0x807c817c, 1170 0xc069003a, 0x00000070,
927 0x807aff7a, 0x00000100, 1171 0xbf8cc07f, 0x80709070,
928 0xbf0a717c, 0xbf85fff5, 1172 0xbf06c004, 0xbf84ffee,
929 0xbf9c0000, 0xb8f11605, 1173 0xbe840080, 0xd2890000,
930 0x80718171, 0x8e718471, 1174 0x00000901, 0x80048104,
931 0x8e768871, 0xbef600ff, 1175 0xd2890001, 0x00000901,
932 0x01000000, 0xbefc0080, 1176 0x80048104, 0xd2890002,
933 0xbf800000, 0xbe802a00, 1177 0x00000901, 0x80048104,
934 0x7e000200, 0xe0724000, 1178 0xd2890003, 0x00000901,
935 0x7a1d0000, 0x807aff7a, 1179 0x80048104, 0xc069003a,
936 0x00000100, 0x807c817c, 1180 0x00000070, 0xbf8cc07f,
937 0xbf0a717c, 0xbf85fff7, 1181 0x80709070, 0xbf06c004,
938 0xbef60084, 0xbef600ff, 1182 0xbf84ffee, 0xbe840080,
939 0x01000000, 0x7e00027b, 1183 0xd2890000, 0x00000902,
940 0xe0724000, 0x7a1d0000, 1184 0x80048104, 0xd2890001,
941 0x807aff7a, 0x00000100, 1185 0x00000902, 0x80048104,
942 0x7e00026c, 0xe0724000, 1186 0xd2890002, 0x00000902,
943 0x7a1d0000, 0x807aff7a, 1187 0x80048104, 0xd2890003,
944 0x00000100, 0x7e00026d, 1188 0x00000902, 0x80048104,
945 0xe0724000, 0x7a1d0000, 1189 0xc069003a, 0x00000070,
946 0x807aff7a, 0x00000100, 1190 0xbf8cc07f, 0x80709070,
947 0x7e00026e, 0xe0724000, 1191 0xbf06c004, 0xbf84ffee,
948 0x7a1d0000, 0x807aff7a, 1192 0xbe840080, 0xd2890000,
949 0x00000100, 0x7e00026f, 1193 0x00000903, 0x80048104,
950 0xe0724000, 0x7a1d0000, 1194 0xd2890001, 0x00000903,
951 0x807aff7a, 0x00000100, 1195 0x80048104, 0xd2890002,
952 0x7e000270, 0xe0724000, 1196 0x00000903, 0x80048104,
953 0x7a1d0000, 0x807aff7a, 1197 0xd2890003, 0x00000903,
954 0x00000100, 0xb8f1f803, 1198 0x80048104, 0xc069003a,
955 0x7e000271, 0xe0724000, 1199 0x00000070, 0xbf8cc07f,
956 0x7a1d0000, 0x807aff7a, 1200 0x80709070, 0xbf06c004,
957 0x00000100, 0x7e000272, 1201 0xbf84ffee, 0x807c847c,
958 0xe0724000, 0x7a1d0000, 1202 0xbf0a7b7c, 0xbf85ffa9,
959 0x807aff7a, 0x00000100, 1203 0xbf9c0000, 0xbf820016,
960 0x7e000273, 0xe0724000, 1204 0xd3d84000, 0x18000100,
961 0x7a1d0000, 0x807aff7a, 1205 0xd3d84001, 0x18000101,
962 0x00000100, 0xb8fbf801, 1206 0xd3d84002, 0x18000102,
963 0x7e00027b, 0xe0724000, 1207 0xd3d84003, 0x18000103,
964 0x7a1d0000, 0x807aff7a, 1208 0xe0724000, 0x701d0000,
965 0x00000100, 0xbf8200bb, 1209 0xe0724100, 0x701d0100,
966 0xbef4007e, 0x8675ff7f, 1210 0xe0724200, 0x701d0200,
967 0x0000ffff, 0x8775ff75, 1211 0xe0724300, 0x701d0300,
968 0x00040000, 0xbef60080, 1212 0x807c847c, 0x8070ff70,
969 0xbef700ff, 0x00807fac, 1213 0x00000400, 0xbf0a7b7c,
970 0x8672ff7f, 0x08000000, 1214 0xbf85ffeb, 0xbf9c0000,
971 0x8f728372, 0x87777277, 1215 0xbf820106, 0xbef4007e,
972 0x8672ff7f, 0x70000000, 1216 0x8675ff7f, 0x0000ffff,
973 0x8f728172, 0x87777277, 1217 0x8775ff75, 0x00040000,
974 0xbef80080, 0x8672ff7f, 1218 0xbef60080, 0xbef700ff,
975 0x04000000, 0xbf840011, 1219 0x00807fac, 0x866eff7f,
1220 0x08000000, 0x8f6e836e,
1221 0x87776e77, 0x866eff7f,
1222 0x70000000, 0x8f6e816e,
1223 0x87776e77, 0x866eff7f,
1224 0x04000000, 0xbf84001f,
976 0xbefe00c1, 0xbeff00c1, 1225 0xbefe00c1, 0xbeff00c1,
977 0xb8ef4306, 0x866fc16f, 1226 0xb8ef4306, 0x866fc16f,
978 0xbf84000c, 0x8e6f866f, 1227 0xbf84001a, 0x8e6f866f,
979 0x8e6f826f, 0xbef6006f, 1228 0x8e6f826f, 0xbef6006f,
1229 0xb8f82a05, 0x80788178,
1230 0x8e788a78, 0x8e788178,
1231 0xb8ee1605, 0x806e816e,
1232 0x8e6e866e, 0x80786e78,
1233 0x8078ff78, 0x00000080,
980 0xbef600ff, 0x01000000, 1234 0xbef600ff, 0x01000000,
981 0xbefc0080, 0x807cff7c, 1235 0xbefc0080, 0xe0510000,
982 0x00000100, 0x8078ff78, 1236 0x781d0000, 0xe0510100,
983 0x00000100, 0xbf0a6f7c, 1237 0x781d0000, 0x807cff7c,
984 0xbf85fffa, 0xbefe00c1, 1238 0x00000200, 0x8078ff78,
985 0xbeff00c1, 0xb8ef2a05, 1239 0x00000200, 0xbf0a6f7c,
986 0x806f816f, 0x8e6f826f, 1240 0xbf85fff6, 0xbef80080,
987 0x8e76886f, 0xbef600ff,
988 0x01000000, 0xbef20078,
989 0x8078ff78, 0x00000100,
990 0xbefc0081, 0xbf11087c,
991 0x806fff6f, 0x00008000,
992 0xe0524000, 0x781d0000,
993 0xbf8c0f70, 0x7e000300,
994 0x807c817c, 0x8078ff78,
995 0x00000100, 0xbf0a6f7c,
996 0xbf85fff7, 0xbf9c0000,
997 0xbefe00c1, 0xbeff00c1, 1241 0xbefe00c1, 0xbeff00c1,
998 0xb8ef2a05, 0x806f816f, 1242 0xb8ef2a05, 0x806f816f,
999 0x8e6f826f, 0x8e76886f, 1243 0x8e6f826f, 0x8e76886f,
1000 0xbef600ff, 0x01000000, 1244 0xbef90076, 0xbef600ff,
1001 0xbefc0080, 0xbf11087c, 1245 0x01000000, 0xbeee0078,
1002 0x806fff6f, 0x00008000, 1246 0x8078ff78, 0x00000400,
1247 0xbef30079, 0x8079ff79,
1248 0x00000400, 0xbefc0084,
1249 0xbf11087c, 0x806fff6f,
1250 0x00008000, 0xe0524000,
1251 0x791d0000, 0xe0524100,
1252 0x791d0100, 0xe0524200,
1253 0x791d0200, 0xe0524300,
1254 0x791d0300, 0x8079ff79,
1255 0x00000400, 0xbf8c0f70,
1256 0xd3d94000, 0x18000100,
1257 0xd3d94001, 0x18000101,
1258 0xd3d94002, 0x18000102,
1259 0xd3d94003, 0x18000103,
1003 0xe0524000, 0x781d0000, 1260 0xe0524000, 0x781d0000,
1004 0xbf8c0f70, 0xd3d94000, 1261 0xe0524100, 0x781d0100,
1005 0x18000100, 0x807c817c, 1262 0xe0524200, 0x781d0200,
1006 0x8078ff78, 0x00000100, 1263 0xe0524300, 0x781d0300,
1007 0xbf0a6f7c, 0xbf85fff6, 1264 0xbf8c0f70, 0x7e000300,
1265 0x7e020301, 0x7e040302,
1266 0x7e060303, 0x807c847c,
1267 0x8078ff78, 0x00000400,
1268 0xbf0a6f7c, 0xbf85ffdb,
1008 0xbf9c0000, 0xe0524000, 1269 0xbf9c0000, 0xe0524000,
1009 0x721d0000, 0xb8ef1605, 1270 0x731d0000, 0xe0524100,
1271 0x731d0100, 0xe0524200,
1272 0x731d0200, 0xe0524300,
1273 0x731d0300, 0xbf8c0f70,
1274 0xd3d94000, 0x18000100,
1275 0xd3d94001, 0x18000101,
1276 0xd3d94002, 0x18000102,
1277 0xd3d94003, 0x18000103,
1278 0xe0524000, 0x6e1d0000,
1279 0xe0524100, 0x6e1d0100,
1280 0xe0524200, 0x6e1d0200,
1281 0xe0524300, 0x6e1d0300,
1282 0xb8f82a05, 0x80788178,
1283 0x8e788a78, 0x8e788178,
1284 0xb8ee1605, 0x806e816e,
1285 0x8e6e866e, 0x80786e78,
1286 0x80f8c078, 0xb8ef1605,
1010 0x806f816f, 0x8e6f846f, 1287 0x806f816f, 0x8e6f846f,
1011 0x8e76886f, 0xbef600ff, 1288 0x8e76826f, 0xbef600ff,
1012 0x01000000, 0xc0211cba, 1289 0x01000000, 0xbefc006f,
1013 0x00000078, 0x8078ff78, 1290 0xc031003a, 0x00000078,
1014 0x00000100, 0xbefc0081, 1291 0x80f8c078, 0xbf8cc07f,
1015 0xc021003a, 0x00000078, 1292 0x80fc907c, 0xbf800000,
1016 0x8078ff78, 0x00000100, 1293 0xbe802d00, 0xbe822d02,
1017 0xbf8cc07f, 0xbe802c00, 1294 0xbe842d04, 0xbe862d06,
1018 0xbf800000, 0x807c817c, 1295 0xbe882d08, 0xbe8a2d0a,
1019 0xbf0a6f7c, 0xbf85fff6, 1296 0xbe8c2d0c, 0xbe8e2d0e,
1020 0xbe800072, 0xbef60084, 1297 0xbf06807c, 0xbf84fff0,
1021 0xbef600ff, 0x01000000, 1298 0xb8f82a05, 0x80788178,
1022 0xc0211bfa, 0x00000078, 1299 0x8e788a78, 0x8e788178,
1023 0x8078ff78, 0x00000100, 1300 0xb8ee1605, 0x806e816e,
1301 0x8e6e866e, 0x80786e78,
1302 0xbef60084, 0xbef600ff,
1303 0x01000000, 0xc0211bfa,
1304 0x00000078, 0x80788478,
1024 0xc0211b3a, 0x00000078, 1305 0xc0211b3a, 0x00000078,
1025 0x8078ff78, 0x00000100, 1306 0x80788478, 0xc0211b7a,
1026 0xc0211b7a, 0x00000078, 1307 0x00000078, 0x80788478,
1027 0x8078ff78, 0x00000100,
1028 0xc0211eba, 0x00000078,
1029 0x8078ff78, 0x00000100,
1030 0xc0211efa, 0x00000078,
1031 0x8078ff78, 0x00000100,
1032 0xc0211c3a, 0x00000078, 1308 0xc0211c3a, 0x00000078,
1033 0x8078ff78, 0x00000100, 1309 0x80788478, 0xc0211c7a,
1034 0xc0211c7a, 0x00000078, 1310 0x00000078, 0x80788478,
1035 0x8078ff78, 0x00000100, 1311 0xc0211eba, 0x00000078,
1312 0x80788478, 0xc0211efa,
1313 0x00000078, 0x80788478,
1036 0xc0211a3a, 0x00000078, 1314 0xc0211a3a, 0x00000078,
1037 0x8078ff78, 0x00000100, 1315 0x80788478, 0xc0211a7a,
1038 0xc0211a7a, 0x00000078, 1316 0x00000078, 0x80788478,
1039 0x8078ff78, 0x00000100,
1040 0xc0211cfa, 0x00000078, 1317 0xc0211cfa, 0x00000078,
1041 0x8078ff78, 0x00000100, 1318 0x80788478, 0xbf8cc07f,
1042 0xbf8cc07f, 0xbef2006d, 1319 0xbefc006f, 0xbefe0070,
1043 0x866dff72, 0x0000ffff, 1320 0xbeff0071, 0x866f7bff,
1044 0xbefc006f, 0xbefe007a,
1045 0xbeff007b, 0x866f71ff,
1046 0x000003ff, 0xb96f4803, 1321 0x000003ff, 0xb96f4803,
1047 0x866f71ff, 0xfffff800, 1322 0x866f7bff, 0xfffff800,
1048 0x8f6f8b6f, 0xb96fa2c3, 1323 0x8f6f8b6f, 0xb96fa2c3,
1049 0xb973f801, 0x866fff72, 1324 0xb973f801, 0xb8ee2a05,
1050 0xf8000000, 0x8f6f9b6f, 1325 0x806e816e, 0x8e6e8a6e,
1051 0x8e6f906f, 0xbef30080, 1326 0x8e6e816e, 0xb8ef1605,
1052 0x87736f73, 0x866fff72, 1327 0x806f816f, 0x8e6f866f,
1053 0x04000000, 0x8f6f9a6f, 1328 0x806e6f6e, 0x806e746e,
1054 0x8e6f8f6f, 0x87736f73, 1329 0x826f8075, 0x866fff6f,
1055 0x866fff70, 0x00800000, 1330 0x0000ffff, 0xc00b1c37,
1056 0x8f6f976f, 0xb973f807, 1331 0x00000050, 0xc00b1d37,
1057 0x86fe7e7e, 0x86ea6a6a, 1332 0x00000060, 0xc0031e77,
1058 0xb970f802, 0xbf8a0000, 1333 0x00000074, 0xbf8cc07f,
1334 0x866fff6d, 0xf8000000,
1335 0x8f6f9b6f, 0x8e6f906f,
1336 0xbeee0080, 0x876e6f6e,
1337 0x866fff6d, 0x04000000,
1338 0x8f6f9a6f, 0x8e6f8f6f,
1339 0x876e6f6e, 0x866fff7a,
1340 0x00800000, 0x8f6f976f,
1341 0xb96ef807, 0x866dff6d,
1342 0x0000ffff, 0x86fe7e7e,
1343 0x86ea6a6a, 0x8f6e837a,
1344 0xb96ee0c2, 0xbf800002,
1345 0xb97a0002, 0xbf8a0000,
1059 0x95806f6c, 0xbf810000, 1346 0x95806f6c, 0xbf810000,
1060}; 1347};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_arcturus.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_arcturus.asm
deleted file mode 100644
index b83e2a643ddb..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_arcturus.asm
+++ /dev/null
@@ -1,746 +0,0 @@
1shader main
2
3asic(DEFAULT)
4
5type(CS)
6
7/*************************************************************************/
8/* control on how to run the shader */
9/*************************************************************************/
10//any hack that needs to be made to run this code in EMU (either becasue various EMU code are not ready or no compute save & restore in EMU run)
11var EMU_RUN_HACK = 0
12var EMU_RUN_HACK_RESTORE_NORMAL = 0
13var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0
14var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0
15var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
16var SAVE_LDS = 0
17var WG_BASE_ADDR_LO = 0x9000a000
18var WG_BASE_ADDR_HI = 0x0
19var WAVE_SPACE = 0x6000 //memory size that each wave occupies in workgroup state mem
20var CTX_SAVE_CONTROL = 0x0
21var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL
22var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either becasue various RTL code are not ready or no compute save & restore in RTL run)
23var SGPR_SAVE_USE_SQC = 0 //use SQC D$ to do the write
24var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //need to change BUF_DATA_FORMAT in S_SAVE_BUF_RSRC_WORD3_MISC from 0 to BUF_DATA_FORMAT_32 if set to 1 (i.e. 0x00827FAC)
25var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing
26
27/**************************************************************************/
28/* variables */
29/**************************************************************************/
30var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
31var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
32var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
33
34var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
35var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
36var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
37var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
38var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
39var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
40
41var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
42var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask
43var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
44var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
45var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
46var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
47var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0
48var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
49var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
50var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
51var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
52
53var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
54var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
55var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME
56
57var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
58var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
59
60
61/* Save */
62var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes
63var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
64
65var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
66var S_SAVE_SPI_INIT_ATC_SHIFT = 27
67var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
68var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
69var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
70var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
71
72var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
73var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
74var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
75var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
76
77var s_save_spi_init_lo = exec_lo
78var s_save_spi_init_hi = exec_hi
79
80var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3¡¯h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
81var s_save_pc_hi = ttmp1
82var s_save_exec_lo = ttmp2
83var s_save_exec_hi = ttmp3
84var s_save_status = ttmp4
85var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine
86var s_save_xnack_mask_lo = ttmp6
87var s_save_xnack_mask_hi = ttmp7
88var s_save_buf_rsrc0 = ttmp8
89var s_save_buf_rsrc1 = ttmp9
90var s_save_buf_rsrc2 = ttmp10
91var s_save_buf_rsrc3 = ttmp11
92
93var s_save_mem_offset = ttmp14
94var s_save_alloc_size = s_save_trapsts //conflict
95var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time)
96var s_save_m0 = ttmp15
97
98/* Restore */
99var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
100var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
101
102var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
103var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
104var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
105var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
106var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
107var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
108
109var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
110var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
111var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
112var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
113
114var s_restore_spi_init_lo = exec_lo
115var s_restore_spi_init_hi = exec_hi
116
117var s_restore_mem_offset = ttmp12
118var s_restore_alloc_size = ttmp3
119var s_restore_tmp = ttmp6
120var s_restore_mem_offset_save = s_restore_tmp //no conflict
121
122var s_restore_m0 = s_restore_alloc_size //no conflict
123
124var s_restore_mode = ttmp7
125
126var s_restore_pc_lo = ttmp0
127var s_restore_pc_hi = ttmp1
128var s_restore_exec_lo = ttmp14
129var s_restore_exec_hi = ttmp15
130var s_restore_status = ttmp4
131var s_restore_trapsts = ttmp5
132var s_restore_xnack_mask_lo = xnack_mask_lo
133var s_restore_xnack_mask_hi = xnack_mask_hi
134var s_restore_buf_rsrc0 = ttmp8
135var s_restore_buf_rsrc1 = ttmp9
136var s_restore_buf_rsrc2 = ttmp10
137var s_restore_buf_rsrc3 = ttmp11
138
139/**************************************************************************/
140/* trap handler entry points */
141/**************************************************************************/
142 if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore
143 //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
144 s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC
145 s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f.
146 s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE
147 //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE
148 s_branch L_SKIP_RESTORE //NOT restore, SAVE actually
149 else
150 s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
151 end
152
153L_JUMP_TO_RESTORE:
154 s_branch L_RESTORE //restore
155
156L_SKIP_RESTORE:
157
158 s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
159 s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save
160 s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
161 s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
162 s_cbranch_scc1 L_SAVE //this is the operation for save
163
164 // ********* Handle non-CWSR traps *******************
165 if (!EMU_RUN_HACK)
166 s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
167 s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
168 s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
169 s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
170
171 L_EXCP_CASE:
172 s_and_b32 ttmp1, ttmp1, 0xFFFF
173 s_rfe_b64 [ttmp0, ttmp1]
174 end
175 // ********* End handling of non-CWSR traps *******************
176
177/**************************************************************************/
178/* save routine */
179/**************************************************************************/
180
181L_SAVE:
182
183 //check whether there is mem_viol
184 s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
185 s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
186 s_cbranch_scc0 L_NO_PC_REWIND
187
188 //if so, need rewind PC assuming GDS operation gets NACKed
189 s_mov_b32 s_save_tmp, 0 //clear mem_viol bit
190 s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
191 s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
192 s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
193 s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc
194
195L_NO_PC_REWIND:
196 s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
197 s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
198
199 s_mov_b32 s_save_xnack_mask_lo, xnack_mask_lo //save XNACK_MASK
200 s_mov_b32 s_save_xnack_mask_hi, xnack_mask_hi
201 s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT
202 s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
203 s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
204 s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY
205 s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
206 s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
207 s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS
208 s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
209
210 s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
211
212 /* inform SPI the readiness and wait for SPI's go signal */
213 s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
214 s_mov_b32 s_save_exec_hi, exec_hi
215 s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
216 if (EMU_RUN_HACK)
217
218 else
219 s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
220 end
221
222 L_SLEEP:
223 s_sleep 0x2
224
225 if (EMU_RUN_HACK)
226
227 else
228 s_cbranch_execz L_SLEEP
229 end
230
231
232 /* setup Resource Contants */
233 if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE))
234 //calculate wd_addr using absolute thread id
235 v_readlane_b32 s_save_tmp, v9, 0
236 s_lshr_b32 s_save_tmp, s_save_tmp, 6
237 s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE
238 s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
239 s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
240 s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
241 else
242 end
243 if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE))
244 s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
245 s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
246 s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
247 else
248 end
249
250
251 s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
252 s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
253 s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
254 s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
255 s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
256 s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
257 s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
258 s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC TODO: ATC deprecated, no need anymore.
259 s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
260 s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
261 s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
262
263 s_mov_b32 s_save_m0, m0 //save M0
264
265 /* global mem offset */
266 s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0
267
268
269 /* the first wave in the threadgroup */
270 s_barrier //FIXME not performance-optimal "LDS is used? wait for other waves in the same TG"
271 s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here
272 s_cbranch_scc0 L_SAVE_VGPR
273
274 /* save LDS */
275 //////////////////////////////
276 L_SAVE_LDS:
277
278 s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
279 s_mov_b32 exec_hi, 0xFFFFFFFF
280
281 s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size
282 s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
283 s_cbranch_scc0 L_SAVE_VGPR //no lds used? jump to L_SAVE_VGPR
284 s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
285 s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes
286 s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
287 if (SWIZZLE_EN)
288 s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
289 else
290 s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
291 end
292 s_mov_b32 m0, 0x0 //lds_offset initial value = 0
293 s_nop 0x0 //Manually inserted wait states
294
295 L_SAVE_LDS_LOOP:
296 if (SAVE_LDS)
297 buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1
298 end
299 s_add_u32 m0, m0, 256 //every buffer_store_lds does 256 bytes
300 s_add_u32 s_save_mem_offset, s_save_mem_offset, 256 //mem offset increased by 256 bytes
301 s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
302 s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete?
303
304
305 /* save VGPRs */
306 //////////////////////////////
307 L_SAVE_VGPR:
308
309 s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
310 s_mov_b32 exec_hi, 0xFFFFFFFF
311
312 s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
313 s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
314 s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible
315 s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
316 if (SWIZZLE_EN)
317 s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
318 else
319 s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
320 end
321
322 s_mov_b32 m0, 0x0 //VGPR initial index value =0
323 s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
324 s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later
325
326 L_SAVE_VGPR_LOOP:
327 v_mov_b32 v0, v0 //v0 = v[0+m0]
328
329 if(USE_MTBUF_INSTEAD_OF_MUBUF)
330 tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
331 else
332 buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
333 end
334
335 s_add_u32 m0, m0, 1 //next vgpr index
336 s_add_u32 s_save_mem_offset, s_save_mem_offset, 256 //every buffer_store_dword does 256 bytes
337 s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
338 s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
339 s_set_gpr_idx_off
340
341
342 /* save ACC_VGPRs */
343 //////////////////////////////
344 L_SAVE_ACC_VGPR:
345
346 s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
347 s_mov_b32 exec_hi, 0xFFFFFFFF
348
349 s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
350 s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
351 s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible
352 s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
353 if (SWIZZLE_EN)
354 s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
355 else
356 s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
357 end
358
359 s_mov_b32 m0, 0x0 //VGPR initial index value =0
360 s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
361 s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later
362
363 L_SAVE_ACC_VGPR_LOOP:
364 v_accvgpr_read v0, v0
365 v_nop
366 v_nop
367 if(USE_MTBUF_INSTEAD_OF_MUBUF)
368 tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
369 else
370 buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
371 end
372
373 s_add_u32 m0, m0, 1 //next vgpr index
374 s_add_u32 s_save_mem_offset, s_save_mem_offset, 256 //every buffer_store_dword does 256 bytes
375 s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
376 s_cbranch_scc1 L_SAVE_ACC_VGPR_LOOP //VGPR save is complete?
377 s_set_gpr_idx_off
378
379
380 /* save SGPRs */
381 //////////////////////////////
382 s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
383 s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
384 s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
385
386 if (SGPR_SAVE_USE_SQC)
387 s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes
388 else
389 s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
390 end
391
392 if (SWIZZLE_EN)
393 s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
394 else
395 s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
396 end
397
398 s_mov_b32 m0, 0x0 //SGPR initial index value =0
399 s_nop 0x0 //Manually inserted wait states
400
401 L_SAVE_SGPR_LOOP:
402 s_movrels_b32 s0, s0 //s0 = s[0+m0]
403 write_sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PV: the best performance should be using s_buffer_store_dwordx4
404 s_add_u32 m0, m0, 1 //next sgpr index
405 s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
406 s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete?
407
408 /* save HW registers */
409 //////////////////////////////
410 L_SAVE_HWREG:
411 s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
412 if (SWIZZLE_EN)
413 s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
414 else
415 s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
416 end
417
418
419 write_sgpr_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //M0
420
421 if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME))
422 s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
423 s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
424 end
425
426 write_sgpr_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PC
427 write_sgpr_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF)
428 write_sgpr_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //EXEC
429 write_sgpr_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF)
430 write_sgpr_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //STATUS
431
432 //s_save_trapsts conflicts with s_save_alloc_size
433 s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
434 write_sgpr_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //TRAPSTS
435
436 write_sgpr_to_mem(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_LO
437 write_sgpr_to_mem(s_save_xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_HI
438
439 //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2
440 s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE
441 write_sgpr_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF)
442
443 /* S_PGM_END_SAVED */ //FIXME graphics ONLY
444 if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT))
445 s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
446 s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
447 s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
448 s_rfe_b64 s_save_pc_lo //Return to the main shader program
449 else
450 end
451
452
453 s_branch L_END_PGM
454
455
456
457/**************************************************************************/
458/* restore routine */
459/**************************************************************************/
460
461L_RESTORE:
462 /* Setup Resource Contants */
463 if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
464 //calculate wd_addr using absolute thread id
465 v_readlane_b32 s_restore_tmp, v9, 0
466 s_lshr_b32 s_restore_tmp, s_restore_tmp, 6
467 s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE
468 s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO
469 s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI
470 s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL
471 else
472 end
473
474 s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
475 s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
476 s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
477 s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
478 s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
479 s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
480 s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
481 s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC TODO: ATC deprecated, no need anymore.
482 s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
483 s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
484 s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
485
486 /* global mem offset */
487 s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0
488
489 /* the first wave in the threadgroup */
490 s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
491 s_cbranch_scc0 L_RESTORE_VGPR
492
493 /* restore LDS */
494 //////////////////////////////
495 L_RESTORE_LDS:
496
497 s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead
498 s_mov_b32 exec_hi, 0xFFFFFFFF
499
500 s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size
501 s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
502 s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
503 s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
504 s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes
505 s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
506 if (SWIZZLE_EN)
507 s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
508 else
509 s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
510 end
511 s_mov_b32 m0, 0x0 //lds_offset initial value = 0
512
513 L_RESTORE_LDS_LOOP:
514 if (SAVE_LDS)
515 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1
516 end
517 s_add_u32 m0, m0, 256 //every buffer_load_dword does 256 bytes
518 s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256 bytes
519 s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
520 s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete?
521
522
523 /* restore VGPRs */
524 //////////////////////////////
525 L_RESTORE_VGPR:
526
527 s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead
528 s_mov_b32 exec_hi, 0xFFFFFFFF
529
530 s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
531 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
532 s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
533 s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
534 if (SWIZZLE_EN)
535 s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
536 else
537 s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
538 end
539 s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
540 s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256
541 s_mov_b32 m0, 1 //VGPR initial index value = 1
542 s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
543 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later
544
545 L_RESTORE_VGPR_LOOP:
546 if(USE_MTBUF_INSTEAD_OF_MUBUF)
547 tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
548 else
549 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
550 end
551 s_waitcnt vmcnt(0) //ensure data ready
552 v_mov_b32 v0, v0 //v[0+m0] = v0
553 s_add_u32 m0, m0, 1 //next vgpr index
554 s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //every buffer_load_dword does 256 bytes
555 s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
556 s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete?
557 s_set_gpr_idx_off
558
559
560 /* restore ACC_VGPRs */
561 //////////////////////////////
562 L_RESTORE_ACC_VGPR:
563
564 s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead
565 s_mov_b32 exec_hi, 0xFFFFFFFF
566
567 s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
568 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
569 s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
570 s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
571 if (SWIZZLE_EN)
572 s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
573 else
574 s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
575 end
576 s_mov_b32 m0, 0 //VGPR initial index value = 0
577 s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
578 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later
579
580 L_RESTORE_ACC_VGPR_LOOP:
581 if(USE_MTBUF_INSTEAD_OF_MUBUF)
582 tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
583 else
584 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
585 end
586 s_waitcnt vmcnt(0) //ensure data ready
587 v_accvgpr_write v0, v0 //v[0+m0] = v0
588 s_add_u32 m0, m0, 1 //next vgpr index
589 s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //every buffer_load_dword does 256 bytes
590 s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
591 s_cbranch_scc1 L_RESTORE_ACC_VGPR_LOOP //VGPR restore (except v0) is complete?
592 s_set_gpr_idx_off
593 /* VGPR restore on v0 */
594 if(USE_MTBUF_INSTEAD_OF_MUBUF)
595 tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
596 else
597 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
598 end
599
600 /* restore SGPRs */
601 //////////////////////////////
602 s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
603 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
604 s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
605
606 if (SGPR_SAVE_USE_SQC)
607 s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes
608 else
609 s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
610 end
611 if (SWIZZLE_EN)
612 s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
613 else
614 s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
615 end
616 read_sgpr_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //save s0 to s_restore_tmp
617 s_mov_b32 m0, 0x1 //SGPR initial index value =1 //go on with with s1
618
619 L_RESTORE_SGPR_LOOP:
620 read_sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PV: further performance improvement can be made
621 s_waitcnt lgkmcnt(0) //ensure data ready
622 s_movreld_b32 s0, s0 //s[0+m0] = s0
623 s_nop 0 // hazard SALU M0=> S_MOVREL
624 s_add_u32 m0, m0, 1 //next sgpr index
625 s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
626 s_cbranch_scc1 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete?
627 s_mov_b32 s0, s_restore_tmp /* SGPR restore on s0 */
628
629 /* restore HW registers */
630 //////////////////////////////
631 L_RESTORE_HWREG:
632 s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
633 if (SWIZZLE_EN)
634 s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
635 else
636 s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
637 end
638
639 read_sgpr_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //M0
640 read_sgpr_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PC
641 read_sgpr_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC)
642 read_sgpr_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //EXEC
643 read_sgpr_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC)
644 read_sgpr_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //STATUS
645 read_sgpr_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //TRAPSTS
646 read_sgpr_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_LO
647 read_sgpr_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_HI
648 read_sgpr_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //MODE
649
650 s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
651
652 s_mov_b32 s_restore_tmp, s_restore_pc_hi
653 s_and_b32 s_restore_pc_hi, s_restore_tmp, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
654
655 //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
656 if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
657 s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
658 s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
659 end
660 if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL))
661 s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal
662 s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
663 end
664
665 s_mov_b32 m0, s_restore_m0
666 s_mov_b32 exec_lo, s_restore_exec_lo
667 s_mov_b32 exec_hi, s_restore_exec_hi
668
669 s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
670 s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
671 s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
672 s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
673 s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
674 //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
675 s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
676 //reuse s_restore_m0 as a temp register
677 s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_RCNT_MASK
678 s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
679 s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
680 s_mov_b32 s_restore_mode, 0x0 //IB_STS is zero
681 s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0
682 s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_FIRST_REPLAY_MASK
683 s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
684 s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
685 s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0
686 s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
687 s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
688 s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_mode
689
690 s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
691 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
692 s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status
693
694 s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG //FIXME not performance-optimal at this time
695
696
697// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
698 s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc
699
700
701/**************************************************************************/
702/* the END */
703/**************************************************************************/
704L_END_PGM:
705 s_endpgm
706
707end
708
709
710/**************************************************************************/
711/* the helper functions */
712/**************************************************************************/
713
714function write_sgpr_to_mem(s, s_rsrc, s_mem_offset, use_sqc, use_mtbuf)
715 if (use_sqc)
716 s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on
717 s_mov_b32 m0, s_mem_offset
718 s_buffer_store_dword s, s_rsrc, m0 glc:1
719 s_add_u32 s_mem_offset, s_mem_offset, 4
720 s_mov_b32 m0, exec_lo
721 elsif (use_mtbuf)
722 v_mov_b32 v0, s
723 tbuffer_store_format_x v0, v0, s_rsrc, s_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
724 s_add_u32 s_mem_offset, s_mem_offset, 256
725 else
726 v_mov_b32 v0, s
727 buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
728 s_add_u32 s_mem_offset, s_mem_offset, 256
729 end
730end
731
732
733
734function read_sgpr_from_mem(s, s_rsrc, s_mem_offset, use_sqc)
735 s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1
736 if (use_sqc)
737 s_add_u32 s_mem_offset, s_mem_offset, 4
738 else
739 s_add_u32 s_mem_offset, s_mem_offset, 256
740 end
741end
742
743
744
745
746
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index 6bae2e022c6e..871f2d431a44 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -197,13 +197,15 @@ var s_restore_spi_init_lo = exec_lo
197var s_restore_spi_init_hi = exec_hi 197var s_restore_spi_init_hi = exec_hi
198 198
199var s_restore_mem_offset = ttmp12 199var s_restore_mem_offset = ttmp12
200var s_restore_accvgpr_offset = ttmp13
200var s_restore_alloc_size = ttmp3 201var s_restore_alloc_size = ttmp3
201var s_restore_tmp = ttmp2 202var s_restore_tmp = ttmp2
202var s_restore_mem_offset_save = s_restore_tmp //no conflict 203var s_restore_mem_offset_save = s_restore_tmp //no conflict
204var s_restore_accvgpr_offset_save = ttmp7
203 205
204var s_restore_m0 = s_restore_alloc_size //no conflict 206var s_restore_m0 = s_restore_alloc_size //no conflict
205 207
206var s_restore_mode = ttmp7 208var s_restore_mode = s_restore_accvgpr_offset_save
207 209
208var s_restore_pc_lo = ttmp0 210var s_restore_pc_lo = ttmp0
209var s_restore_pc_hi = ttmp1 211var s_restore_pc_hi = ttmp1
@@ -226,7 +228,7 @@ var s_restore_ttmps_hi = s_restore_alloc_size //no conflict
226/* Shader Main*/ 228/* Shader Main*/
227 229
228shader main 230shader main
229 asic(GFX9) 231 asic(DEFAULT)
230 type(CS) 232 type(CS)
231 233
232 234
@@ -791,10 +793,48 @@ end
791 793
792L_SAVE_VGPR_END: 794L_SAVE_VGPR_END:
793 795
796if ASIC_TARGET_ARCTURUS
797 // Save ACC VGPRs
798 s_mov_b32 m0, 0x0 //VGPR initial index value =0
799 s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
794 800
801if SAVE_AFTER_XNACK_ERROR
802 check_if_tcp_store_ok()
803 s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP
795 804
805L_SAVE_ACCVGPR_LOOP_SQC:
806 for var vgpr = 0; vgpr < 4; ++ vgpr
807 v_accvgpr_read v[vgpr], acc[vgpr] // v[N] = acc[N+m0]
808 end
809
810 write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
811
812 s_add_u32 m0, m0, 4
813 s_cmp_lt_u32 m0, s_save_alloc_size
814 s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP_SQC
796 815
816 s_set_gpr_idx_off
817 s_branch L_SAVE_ACCVGPR_END
818end
797 819
820L_SAVE_ACCVGPR_LOOP:
821 for var vgpr = 0; vgpr < 4; ++ vgpr
822 v_accvgpr_read v[vgpr], acc[vgpr] // v[N] = acc[N+m0]
823 end
824
825 buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
826 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
827 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
828 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
829
830 s_add_u32 m0, m0, 4
831 s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4
832 s_cmp_lt_u32 m0, s_save_alloc_size
833 s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP
834 s_set_gpr_idx_off
835
836L_SAVE_ACCVGPR_END:
837end
798 838
799 /* S_PGM_END_SAVED */ //FIXME graphics ONLY 839 /* S_PGM_END_SAVED */ //FIXME graphics ONLY
800 if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) 840 if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT))
@@ -921,6 +961,11 @@ end
921 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 961 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
922 s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) 962 s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
923 s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) 963 s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
964
965if ASIC_TARGET_ARCTURUS
966 s_mov_b32 s_restore_accvgpr_offset, s_restore_buf_rsrc2 //ACC VGPRs at end of VGPRs
967end
968
924 if (SWIZZLE_EN) 969 if (SWIZZLE_EN)
925 s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? 970 s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
926 else 971 else
@@ -958,6 +1003,10 @@ else
958 // VGPR load using dw burst 1003 // VGPR load using dw burst
959 s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last 1004 s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
960 s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 1005 s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
1006if ASIC_TARGET_ARCTURUS
1007 s_mov_b32 s_restore_accvgpr_offset_save, s_restore_accvgpr_offset
1008 s_add_u32 s_restore_accvgpr_offset, s_restore_accvgpr_offset, 256*4
1009end
961 s_mov_b32 m0, 4 //VGPR initial index value = 1 1010 s_mov_b32 m0, 4 //VGPR initial index value = 1
962 s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 1011 s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
963 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later 1012 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later
@@ -966,6 +1015,20 @@ else
966 if(USE_MTBUF_INSTEAD_OF_MUBUF) 1015 if(USE_MTBUF_INSTEAD_OF_MUBUF)
967 tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 1016 tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
968 else 1017 else
1018
1019if ASIC_TARGET_ARCTURUS
1020 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1
1021 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256
1022 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256*2
1023 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256*3
1024 s_add_u32 s_restore_accvgpr_offset, s_restore_accvgpr_offset, 256*4
1025 s_waitcnt vmcnt(0)
1026
1027 for var vgpr = 0; vgpr < 4; ++ vgpr
1028 v_accvgpr_write acc[vgpr], v[vgpr]
1029 end
1030end
1031
969 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 1032 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
970 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 1033 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
971 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 1034 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
@@ -982,6 +1045,18 @@ else
982 s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? 1045 s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete?
983 s_set_gpr_idx_off 1046 s_set_gpr_idx_off
984 /* VGPR restore on v0 */ 1047 /* VGPR restore on v0 */
1048if ASIC_TARGET_ARCTURUS
1049 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1
1050 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256
1051 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256*2
1052 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256*3
1053 s_waitcnt vmcnt(0)
1054
1055 for var vgpr = 0; vgpr < 4; ++ vgpr
1056 v_accvgpr_write acc[vgpr], v[vgpr]
1057 end
1058end
1059
985 if(USE_MTBUF_INSTEAD_OF_MUBUF) 1060 if(USE_MTBUF_INSTEAD_OF_MUBUF)
986 tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 1061 tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
987 else 1062 else
@@ -1202,6 +1277,10 @@ function get_vgpr_size_bytes(s_vgpr_size_byte)
1202 s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size 1277 s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
1203 s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 1278 s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
1204 s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible 1279 s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible
1280
1281if ASIC_TARGET_ARCTURUS
1282 s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, 1 // Double size for ACC VGPRs
1283end
1205end 1284end
1206 1285
1207function get_sgpr_size_bytes(s_sgpr_size_byte) 1286function get_sgpr_size_bytes(s_sgpr_size_byte)