aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_rt.c
blob: db308cb08b75051ab459c61efea28f52c736ea7b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
/*
 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
 * policies)
 */

#ifdef CONFIG_RT_GROUP_SCHED

#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)

static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
{
#ifdef CONFIG_SCHED_DEBUG
	WARN_ON_ONCE(!rt_entity_is_task(rt_se));
#endif
	return container_of(rt_se, struct task_struct, rt);
}

static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
	return rt_rq->rq;
}

static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
{
	return rt_se->rt_rq;
}

#else /* CONFIG_RT_GROUP_SCHED */

#define rt_entity_is_task(rt_se) (1)

static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
{
	return container_of(rt_se, struct task_struct, rt);
}

static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
	return container_of(rt_rq, struct rq, rt);
}

static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
{
	struct task_struct *p = rt_task_of(rt_se);
	struct rq *rq = task_rq(p);

	return &rq->rt;
}

#endif /* CONFIG_RT_GROUP_SCHED */

#ifdef CONFIG_SMP

static inline int rt_overloaded(struct rq *rq)
{
	return atomic_read(&rq->rd->rto_count);
}

static inline void rt_set_overload(struct rq *rq)
{
	if (!rq->online)
		return;

	cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
	/*
	 * Make sure the mask is visible before we set
	 * the overload count. That is checked to determine
	 * if we should look at the mask. It would be a shame
	 * if we looked at the mask, but the mask was not
	 * updated yet.
	 */
	wmb();
	atomic_inc(&rq->rd->rto_count);
}

static inline void rt_clear_overload(struct rq *rq)
{
	if (!rq->online)
		return;

	/* the order here really doesn't matter */
	atomic_dec(&rq->rd->rto_count);
	cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
}

static void update_rt_migration(struct rt_rq *rt_rq)
{
	if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
		if (!rt_rq->overloaded) {
			rt_set_overload(rq_of_rt_rq(rt_rq));
			rt_rq->overloaded = 1;
		}
	} else if (rt_rq->overloaded) {
		rt_clear_overload(rq_of_rt_rq(rt_rq));
		rt_rq->overloaded = 0;
	}
}

static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
	if (!rt_entity_is_task(rt_se))
		return;

	rt_rq = &rq_of_rt_rq(rt_rq)->rt;

	rt_rq->rt_nr_total++;
	if (rt_se->nr_cpus_allowed > 1)
		rt_rq->rt_nr_migratory++;

	update_rt_migration(rt_rq);
}

static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
	if (!rt_entity_is_task(rt_se))
		return;

	rt_rq = &rq_of_rt_rq(rt_rq)->rt;

	rt_rq->rt_nr_total--;
	if (rt_se->nr_cpus_allowed > 1)
		rt_rq->rt_nr_migratory--;

	update_rt_migration(rt_rq);
}

static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
{
	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
	plist_node_init(&p->pushable_tasks, p->prio);
	plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
}

static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
{
	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
}

static inline int has_pushable_tasks(struct rq *rq)
{
	return !plist_head_empty(&rq->rt.pushable_tasks);
}

#else

static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
{
}

static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
{
}

static inline
void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
}

static inline
void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
}

#endif /* CONFIG_SMP */

static inline int on_rt_rq(struct sched_rt_entity *rt_se)
{
	return !list_empty(&rt_se->run_list);
}

#ifdef CONFIG_RT_GROUP_SCHED

static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
	if (!rt_rq->tg)
		return RUNTIME_INF;

	return rt_rq->rt_runtime;
}

static inline u64 sched_rt_period(struct rt_rq *rt_rq)
{
	return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
}

static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
{
	list_add_rcu(&rt_rq->leaf_rt_rq_list,
			&rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
}

static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
{
	list_del_rcu(&rt_rq->leaf_rt_rq_list);
}

#define for_each_leaf_rt_rq(rt_rq, rq) \
	list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)

#define for_each_sched_rt_entity(rt_se) \
	for (; rt_se; rt_se = rt_se->parent)

static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
{
	return rt_se->my_q;
}

static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
static void dequeue_rt_entity(struct sched_rt_entity *rt_se);

static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
	struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
	struct sched_rt_entity *rt_se;

	int cpu = cpu_of(rq_of_rt_rq(rt_rq));

	rt_se = rt_rq->tg->rt_se[cpu];

	if (rt_rq->rt_nr_running) {
		if (rt_se && !on_rt_rq(rt_se))
			enqueue_rt_entity(rt_se, false);
		if (rt_rq->highest_prio.curr < curr->prio)
			resched_task(curr);
	}
}

static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
	struct sched_rt_entity *rt_se;
	int cpu = cpu_of(rq_of_rt_rq(rt_rq));

	rt_se = rt_rq->tg->rt_se[cpu];

	if (rt_se && on_rt_rq(rt_se))
		dequeue_rt_entity(rt_se);
}

static inline int rt_rq_throttled(struct rt_rq *rt_rq)
{
	return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
}

static int rt_se_boosted(struct sched_rt_entity *rt_se)
{
	struct rt_rq *rt_rq = group_rt_rq(rt_se);
	struct task_struct *p;

	if (rt_rq)
		return !!rt_rq->rt_nr_boosted;

	p = rt_task_of(rt_se);
	return p->prio != p->normal_prio;
}

#ifdef CONFIG_SMP
static inline const struct cpumask *sched_rt_period_mask(void)
{
	return cpu_rq(smp_processor_id())->rd->span;
}
#else
static inline const struct cpumask *sched_rt_period_mask(void)
{
	return cpu_online_mask;
}
#endif

static inline
struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
{
	return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
}

static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
{
	return &rt_rq->tg->rt_bandwidth;
}

#else /* !CONFIG_RT_GROUP_SCHED */

static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
	return rt_rq->rt_runtime;
}

static inline u64 sched_rt_period(struct rt_rq *rt_rq)
{
	return ktime_to_ns(def_rt_bandwidth.rt_period);
}

static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
{
}

static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
{
}

#define for_each_leaf_rt_rq(rt_rq, rq) \
	for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)

#define for_each_sched_rt_entity(rt_se) \
	for (; rt_se; rt_se = NULL)

static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
{
	return NULL;
}

static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
	if (rt_rq->rt_nr_running)
		resched_task(rq_of_rt_rq(rt_rq)->curr);
}

static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
}

static inline int rt_rq_throttled(struct rt_rq *rt_rq)
{
	return rt_rq->rt_throttled;
}

static inline const struct cpumask *sched_rt_period_mask(void)
{
	return cpu_online_mask;
}

static inline
struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
{
	return &cpu_rq(cpu)->rt;
}

static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
{
	return &def_rt_bandwidth;
}

#endif /* CONFIG_RT_GROUP_SCHED */

#ifdef CONFIG_SMP
/*
 * We ran out of runtime, see if we can borrow some from our neighbours.
 */
static int do_balance_runtime(struct rt_rq *rt_rq)
{
	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
	int i, weight, more = 0;
	u64 rt_period;

	weight = cpumask_weight(rd->span);

	raw_spin_lock(&rt_b->rt_runtime_lock);
	rt_period = ktime_to_ns(rt_b->rt_period);
	for_each_cpu(i, rd->span) {
		struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
		s64 diff;

		if (iter == rt_rq)
			continue;

		raw_spin_lock(&iter->rt_runtime_lock);
		/*
		 * Either all rqs have inf runtime and there's nothing to steal
		 * or __disable_runtime() below sets a specific rq to inf to
		 * indicate its been disabled and disalow stealing.
		 */
		if (iter->rt_runtime == RUNTIME_INF)
			goto next;

		/*
		 * From runqueues with spare time, take 1/n part of their
		 * spare time, but no more than our period.
		 */
		diff = iter->rt_runtime - iter->rt_time;
		if (diff > 0) {
			diff = div_u64((u64)diff, weight);
			if (rt_rq->rt_runtime + diff > rt_period)
				diff = rt_period - rt_rq->rt_runtime;
			iter->rt_runtime -= diff;
			rt_rq->rt_runtime += diff;
			more = 1;
			if (rt_rq->rt_runtime == rt_period) {
				raw_spin_unlock(&iter->rt_runtime_lock);
				break;
			}
		}
next:
		raw_spin_unlock(&iter->rt_runtime_lock);
	}
	raw_spin_unlock(&rt_b->rt_runtime_lock);

	return more;
}

/*
 * Ensure this RQ takes back all the runtime it lend to its neighbours.
 */
static void __disable_runtime(struct rq *rq)
{
	struct root_domain *rd = rq->rd;
	struct rt_rq *rt_rq;

	if (unlikely(!scheduler_running))
		return;

	for_each_leaf_rt_rq(rt_rq, rq) {
		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
		s64 want;
		int i;

		raw_spin_lock(&rt_b->rt_runtime_lock);
		raw_spin_lock(&rt_rq->rt_runtime_lock);
		/*
		 * Either we're all inf and nobody needs to borrow, or we're
		 * already disabled and thus have nothing to do, or we have
		 * exactly the right amount of runtime to take out.
		 */
		if (rt_rq->rt_runtime == RUNTIME_INF ||
				rt_rq->rt_runtime == rt_b->rt_runtime)
			goto balanced;
		raw_spin_unlock(&rt_rq->rt_runtime_lock);

		/*
		 * Calculate the difference between what we started out with
		 * and what we current have, that's the amount of runtime
		 * we lend and now have to reclaim.
		 */
		want = rt_b->rt_runtime - rt_rq->rt_runtime;

		/*
		 * Greedy reclaim, take back as much as we can.
		 */
		for_each_cpu(i, rd->span) {
			struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
			s64 diff;

			/*
			 * Can't reclaim from ourselves or disabled runqueues.
			 */
			if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
				continue;

			raw_spin_lock(&iter->rt_runtime_lock);
			if (want > 0) {
				diff = min_t(s64, iter->rt_runtime, want);
				iter->rt_runtime -= diff;
				want -= diff;
			} else {
				iter->rt_runtime -= want;
				want -= want;
			}
			raw_spin_unlock(&iter->rt_runtime_lock);

			if (!want)
				break;
		}

		raw_spin_lock(&rt_rq->rt_runtime_lock);
		/*
		 * We cannot be left wanting - that would mean some runtime
		 * leaked out of the system.
		 */
		BUG_ON(want);
balanced:
		/*
		 * Disable all the borrow logic by pretending we have inf
		 * runtime - in which case borrowing doesn't make sense.
		 */
		rt_rq->rt_runtime = RUNTIME_INF;
		raw_spin_unlock(&rt_rq->rt_runtime_lock);
		raw_spin_unlock(&rt_b->rt_runtime_lock);
	}
}

static void disable_runtime(struct rq *rq)
{
	unsigned long flags;

	raw_spin_lock_irqsave(&rq->lock, flags);
	__disable_runtime(rq);
	raw_spin_unlock_irqrestore(&rq->lock, flags);
}

static void __enable_runtime(struct rq *rq)
{
	struct rt_rq *rt_rq;

	if (unlikely(!scheduler_running))
		return;

	/*
	 * Reset each runqueue's bandwidth settings
	 */
	for_each_leaf_rt_rq(rt_rq, rq) {
		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);

		raw_spin_lock(&rt_b->rt_runtime_lock);
		raw_spin_lock(&rt_rq->rt_runtime_lock);
		rt_rq->rt_runtime = rt_b->rt_runtime;
		rt_rq->rt_time = 0;
		rt_rq->rt_throttled = 0;
		raw_spin_unlock(&rt_rq->rt_runtime_lock);
		raw_spin_unlock(&rt_b->rt_runtime_lock);
	}
}

static void enable_runtime(struct rq *rq)
{
	unsigned long flags;

	raw_spin_lock_irqsave(&rq->lock, flags);
	__enable_runtime(rq);
	raw_spin_unlock_irqrestore(&rq->lock, flags);
}

static int balance_runtime(struct rt_rq *rt_rq)
{
	int more = 0;

	if (rt_rq->rt_time > rt_rq->rt_runtime) {
		raw_spin_unlock(&rt_rq->rt_runtime_lock);
		more = do_balance_runtime(rt_rq);
		raw_spin_lock(&rt_rq->rt_runtime_lock);
	}

	return more;
}
#else /* !CONFIG_SMP */
static inline int balance_runtime(struct rt_rq *rt_rq)
{
	return 0;
}
#endif /* CONFIG_SMP */

static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
{
	int i, idle = 1;
	const struct cpumask *span;

	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
		return 1;

	span = sched_rt_period_mask();
	for_each_cpu(i, span) {
		int enqueue = 0;
		struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
		struct rq *rq = rq_of_rt_rq(rt_rq);

		raw_spin_lock(&rq->lock);
		if (rt_rq->rt_time) {
			u64 runtime;

			raw_spin_lock(&rt_rq->rt_runtime_lock);
			if (rt_rq->rt_throttled)
				balance_runtime(rt_rq);
			runtime = rt_rq->rt_runtime;
			rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
			if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
				rt_rq->rt_throttled = 0;
				enqueue = 1;
			}
			if (rt_rq->rt_time || rt_rq->rt_nr_running)
				idle = 0;
			raw_spin_unlock(&rt_rq->rt_runtime_lock);
		} else if (rt_rq->rt_nr_running) {
			idle = 0;
			if (!rt_rq_throttled(rt_rq))
				enqueue = 1;
		}

		if (enqueue)
			sched_rt_rq_enqueue(rt_rq);
		raw_spin_unlock(&rq->lock);
	}

	return idle;
}

static inline int rt_se_prio(struct sched_rt_entity *rt_se)
{
#ifdef CONFIG_RT_GROUP_SCHED
	struct rt_rq *rt_rq = group_rt_rq(rt_se);

	if (rt_rq)
		return rt_rq->highest_prio.curr;
#endif

	return rt_task_of(rt_se)->prio;
}

static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
{
	u64 runtime = sched_rt_runtime(rt_rq);

	if (rt_rq->rt_throttled)
		return rt_rq_throttled(rt_rq);

	if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
		return 0;

	balance_runtime(rt_rq);
	runtime = sched_rt_runtime(rt_rq);
	if (runtime == RUNTIME_INF)
		return 0;

	if (rt_rq->rt_time > runtime) {
		rt_rq->rt_throttled = 1;
		if (rt_rq_throttled(rt_rq)) {
			sched_rt_rq_dequeue(rt_rq);
			return 1;
		}
	}

	return 0;
}

/*
 * Update the current task's runtime statistics. Skip current tasks that
 * are not in our scheduling class.
 */
static void update_curr_rt(struct rq *rq)
{
	struct task_struct *curr = rq->curr;
	struct sched_rt_entity *rt_se = &curr->rt;
	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
	u64 delta_exec;

	if (curr->sched_class != &rt_sched_class)
		return;

	delta_exec = rq->clock_task - curr->se.exec_start;
	if (unlikely((s64)delta_exec < 0))
		delta_exec = 0;

	schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec));

	curr->se.sum_exec_runtime += delta_exec;
	account_group_exec_runtime(curr, delta_exec);

	curr->se.exec_start = rq->clock_task;
	cpuacct_charge(curr, delta_exec);

	sched_rt_avg_update(rq, delta_exec);

	if (!rt_bandwidth_enabled())
		return;

	for_each_sched_rt_entity(rt_se) {
		rt_rq = rt_rq_of_se(rt_se);

		if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
			raw_spin_lock(&rt_rq->rt_runtime_lock);
			rt_rq->rt_time += delta_exec;
			if (sched_rt_runtime_exceeded(rt_rq))
				resched_task(curr);
			raw_spin_unlock(&rt_rq->rt_runtime_lock);
		}
	}
}

#if defined CONFIG_SMP

static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);

static inline int next_prio(struct rq *rq)
{
	struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);

	if (next && rt_prio(next->prio))
		return next->prio;
	else
		return MAX_RT_PRIO;
}

static void
inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
{
	struct rq *rq = rq_of_rt_rq(rt_rq);

	if (prio < prev_prio) {

		/*
		 * If the new task is higher in priority than anything on the
		 * run-queue, we know that the previous high becomes our
		 * next-highest.
		 */
		rt_rq->highest_prio.next = prev_prio;

		if (rq->online)
			cpupri_set(&rq->rd->cpupri, rq->cpu, prio);

	} else if (prio == rt_rq->highest_prio.curr)
		/*
		 * If the next task is equal in priority to the highest on
		 * the run-queue, then we implicitly know that the next highest
		 * task cannot be any lower than current
		 */
		rt_rq->highest_prio.next = prio;
	else if (prio < rt_rq->highest_prio.next)
		/*
		 * Otherwise, we need to recompute next-highest
		 */
		rt_rq->highest_prio.next = next_prio(rq);
}

static void
dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
{
	struct rq *rq = rq_of_rt_rq(rt_rq);

	if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next))
		rt_rq->highest_prio.next = next_prio(rq);

	if (rq->online && rt_rq->highest_prio.curr != prev_prio)
		cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
}

#else /* CONFIG_SMP */

static inline
void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
static inline
void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}

#endif /* CONFIG_SMP */

#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
static void
inc_rt_prio(struct rt_rq *rt_rq, int prio)
{
	int prev_prio = rt_rq->highest_prio.curr;

	if (prio < prev_prio)
		rt_rq->highest_prio.curr = prio;

	inc_rt_prio_smp(rt_rq, prio, prev_prio);
}

static void
dec_rt_prio(struct rt_rq *rt_rq, int prio)
{
	int prev_prio = rt_rq->highest_prio.curr;

	if (rt_rq->rt_nr_running) {

		WARN_ON(prio < prev_prio);

		/*
		 * This may have been our highest task, and therefore
		 * we may have some recomputation to do
		 */
		if (prio == prev_prio) {
			struct rt_prio_array *array = &rt_rq->active;

			rt_rq->highest_prio.curr =
				sched_find_first_bit(array->bitmap);
		}

	} else
		rt_rq->highest_prio.curr = MAX_RT_PRIO;

	dec_rt_prio_smp(rt_rq, prio, prev_prio);
}

#else

static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}

#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */

#ifdef CONFIG_RT_GROUP_SCHED

static void
inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
	if (rt_se_boosted(rt_se))
		rt_rq->rt_nr_boosted++;

	if (rt_rq->tg)
		start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
}

static void
dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
	if (rt_se_boosted(rt_se))
		rt_rq->rt_nr_boosted--;

	WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
}

#else /* CONFIG_RT_GROUP_SCHED */

static void
inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
	start_rt_bandwidth(&def_rt_bandwidth);
}

static inline
void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}

#endif /* CONFIG_RT_GROUP_SCHED */

static inline
void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
	int prio = rt_se_prio(rt_se);

	WARN_ON(!rt_prio(prio));
	rt_rq->rt_nr_running++;

	inc_rt_prio(rt_rq, prio);
	inc_rt_migration(rt_se, rt_rq);
	inc_rt_group(rt_se, rt_rq);
}

static inline
void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
	WARN_ON(!rt_rq->rt_nr_running);
	rt_rq->rt_nr_running--;

	dec_rt_prio(rt_rq, rt_se_prio(rt_se));
	dec_rt_migration(rt_se, rt_rq);
	dec_rt_group(rt_se, rt_rq);
}

static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
{
	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
	struct rt_prio_array *array = &rt_rq->active;
	struct rt_rq *group_rq = group_rt_rq(rt_se);
	struct list_head *queue = array->queue + rt_se_prio(rt_se);

	/*
	 * Don't enqueue the group if its throttled, or when empty.
	 * The latter is a consequence of the former when a child group
	 * get throttled and the current group doesn't have any other
	 * active members.
	 */
	if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
		return;

	if (!rt_rq->rt_nr_running)
		list_add_leaf_rt_rq(rt_rq);

	if (head)
		list_add(&rt_se->run_list, queue);
	else
		list_add_tail(&rt_se->run_list, queue);
	__set_bit(rt_se_prio(rt_se), array->bitmap);

	inc_rt_tasks(rt_se, rt_rq);
}

static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
{
	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
	struct rt_prio_array *array = &rt_rq->active;

	list_del_init(&rt_se->run_list);
	if (list_empty(array->queue + rt_se_prio(rt_se)))
		__clear_bit(rt_se_prio(rt_se), array->bitmap);

	dec_rt_tasks(rt_se, rt_rq);
	if (!rt_rq->rt_nr_running)
		list_del_leaf_rt_rq(rt_rq);
}

/*
 * Because the prio of an upper entry depends on the lower
 * entries, we must remove entries top - down.
 */
static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
{
	struct sched_rt_entity *back = NULL;

	for_each_sched_rt_entity(rt_se) {
		rt_se->back = back;
		back = rt_se;
	}

	for (rt_se = back; rt_se; rt_se = rt_se->back) {
		if (on_rt_rq(rt_se))
			__dequeue_rt_entity(rt_se);
	}
}

static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
{
	dequeue_rt_stack(rt_se);
	for_each_sched_rt_entity(rt_se)
		__enqueue_rt_entity(rt_se, head);
}

static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
{
	dequeue_rt_stack(rt_se);

	for_each_sched_rt_entity(rt_se) {
		struct rt_rq *rt_rq = group_rt_rq(rt_se);

		if (rt_rq && rt_rq->rt_nr_running)
			__enqueue_rt_entity(rt_se, false);
	}
}

/*
 * Adding/removing a task to/from a priority array:
 */
static void
enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
	struct sched_rt_entity *rt_se = &p->rt;

	if (flags & ENQUEUE_WAKEUP)
		rt_se->timeout = 0;

	enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);

	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
		enqueue_pushable_task(rq, p);
}

static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
	struct sched_rt_entity *rt_se = &p->rt;

	update_curr_rt(rq);
	dequeue_rt_entity(rt_se);

	dequeue_pushable_task(rq, p);
}

/*
 * Put task to the end of the run list without the overhead of dequeue
 * followed by enqueue.
 */
static void
requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
{
	if (on_rt_rq(rt_se)) {
		struct rt_prio_array *array = &rt_rq->active;
		struct list_head *queue = array->queue + rt_se_prio(rt_se);

		if (head)
			list_move(&rt_se->run_list, queue);
		else
			list_move_tail(&rt_se->run_list, queue);
	}
}

static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
{
	struct sched_rt_entity *rt_se = &p->rt;
	struct rt_rq *rt_rq;

	for_each_sched_rt_entity(rt_se) {
		rt_rq = rt_rq_of_se(rt_se);
		requeue_rt_entity(rt_rq, rt_se, head);
	}
}

static void yield_task_rt(struct rq *rq)
{
	requeue_task_rt(rq, rq->curr, 0);
}

#ifdef CONFIG_SMP
static int find_lowest_rq(struct task_struct *task);

static int
select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
{
	if (sd_flag != SD_BALANCE_WAKE)
		return smp_processor_id();

	/*
	 * If the current task is an RT task, then
	 * try to see if we can wake this RT task up on another
	 * runqueue. Otherwise simply start this RT task
	 * on its current runqueue.
	 *
	 * We want to avoid overloading runqueues. If the woken
	 * task is a higher priority, then it will stay on this CPU
	 * and the lower prio task should be moved to another CPU.
	 * Even though this will probably make the lower prio task
	 * lose its cache, we do not want to bounce a higher task
	 * around just because it gave up its CPU, perhaps for a
	 * lock?
	 *
	 * For equal prio tasks, we just let the scheduler sort it out.
	 */
	if (unlikely(rt_task(rq->curr)) &&
	    (rq->curr->rt.nr_cpus_allowed < 2 ||
	     rq->curr->prio < p->prio) &&
	    (p->rt.nr_cpus_allowed > 1)) {
		int cpu = find_lowest_rq(p);

		return (cpu == -1) ? task_cpu(p) : cpu;
	}

	/*
	 * Otherwise, just let it ride on the affined RQ and the
	 * post-schedule router will push the preempted task away
	 */
	return task_cpu(p);
}

static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
{
	if (rq->curr->rt.nr_cpus_allowed == 1)
		return;

	if (p->rt.nr_cpus_allowed != 1
	    && cpupri_find(&rq->rd->cpupri, p, NULL))
		return;

	if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
		return;

	/*
	 * There appears to be other cpus that can accept
	 * current and none to run 'p', so lets reschedule
	 * to try and push current away:
	 */
	requeue_task_rt(rq, p, 1);
	resched_task(rq->curr);
}

#endif /* CONFIG_SMP */

/*
 * Preempt the current task with a newly woken task if needed:
 */
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
{
	if (p->prio < rq->curr->prio) {
		resched_task(rq->curr);
		return;
	}

#ifdef CONFIG_SMP
	/*
	 * If:
	 *
	 * - the newly woken task is of equal priority to the current task
	 * - the newly woken task is non-migratable while current is migratable
	 * - current will be preempted on the next reschedule
	 *
	 * we should check to see if current can readily move to a different
	 * cpu.  If so, we will reschedule to allow the push logic to try
	 * to move current somewhere else, making room for our non-migratable
	 * task.
	 */
	if (p->prio == rq->curr->prio && !need_resched())
		check_preempt_equal_prio(rq, p);
#endif
}

static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
						   struct rt_rq *rt_rq)
{
	struct rt_prio_array *array = &rt_rq->active;
	struct sched_rt_entity *next = NULL;
	struct list_head *queue;
	int idx;

	idx = sched_find_first_bit(array->bitmap);
	BUG_ON(idx >= MAX_RT_PRIO);

	queue = array->queue + idx;
	next = list_entry(queue->next, struct sched_rt_entity, run_list);

	return next;
}

static struct task_struct *_pick_next_task_rt(struct rq *rq)
{
	struct sched_rt_entity *rt_se;
	struct task_struct *p;
	struct rt_rq *rt_rq;

	rt_rq = &rq->rt;

	if (unlikely(!rt_rq->rt_nr_running))
		return NULL;

	if (rt_rq_throttled(rt_rq))
		return NULL;

	do {
		rt_se = pick_next_rt_entity(rq, rt_rq);
		BUG_ON(!rt_se);
		rt_rq = group_rt_rq(rt_se);
	} while (rt_rq);

	p = rt_task_of(rt_se);
	p->se.exec_start = rq->clock_task;

	return p;
}

static struct task_struct *pick_next_task_rt(struct rq *rq)
{
	struct task_struct *p = _pick_next_task_rt(rq);

	/* The running task is never eligible for pushing */
	if (p)
		dequeue_pushable_task(rq, p);

#ifdef CONFIG_SMP
	/*
	 * We detect this state here so that we can avoid taking the RQ
	 * lock again later if there is no need to push
	 */
	rq->post_schedule = has_pushable_tasks(rq);
#endif

	return p;
}

static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
{
	update_curr_rt(rq);
	p->se.exec_start = 0;

	/*
	 * The previous task needs to be made eligible for pushing
	 * if it is still active
	 */
	if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
		enqueue_pushable_task(rq, p);
}

#ifdef CONFIG_SMP

/* Only try algorithms three times */
#define RT_MAX_TRIES 3

static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);

static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
	if (!task_running(rq, p) &&
	    (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
	    (p->rt.nr_cpus_allowed > 1))
		return 1;
	return 0;
}

/* Return the second highest RT task, NULL otherwise */
static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
{
	struct task_struct *next = NULL;
	struct sched_rt_entity *rt_se;
	struct rt_prio_array *array;
	struct rt_rq *rt_rq;
	int idx;

	for_each_leaf_rt_rq(rt_rq, rq) {
		array = &rt_rq->active;
		idx = sched_find_first_bit(array->bitmap);
next_idx:
		if (idx >= MAX_RT_PRIO)
			continue;
		if (next && next->prio < idx)
			continue;
		list_for_each_entry(rt_se, array->queue + idx, run_list) {
			struct task_struct *p;

			if (!rt_entity_is_task(rt_se))
				continue;

			p = rt_task_of(rt_se);
			if (pick_rt_task(rq, p, cpu)) {
				next = p;
				break;
			}
		}
		if (!next) {
			idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
			goto next_idx;
		}
	}

	return next;
}

static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);

static int find_lowest_rq(struct task_struct *task)
{
	struct sched_domain *sd;
	struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
	int this_cpu = smp_processor_id();
	int cpu      = task_cpu(task);

	if (task->rt.nr_cpus_allowed == 1)
		return -1; /* No other targets possible */

	if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
		return -1; /* No targets found */

	/*
	 * At this point we have built a mask of cpus representing the
	 * lowest priority tasks in the system.  Now we want to elect
	 * the best one based on our affinity and topology.
	 *
	 * We prioritize the last cpu that the task executed on since
	 * it is most likely cache-hot in that location.
	 */
	if (cpumask_test_cpu(cpu, lowest_mask))
		return cpu;

	/*
	 * Otherwise, we consult the sched_domains span maps to figure
	 * out which cpu is logically closest to our hot cache data.
	 */
	if (!cpumask_test_cpu(this_cpu, lowest_mask))
		this_cpu = -1; /* Skip this_cpu opt if not among lowest */

	for_each_domain(cpu, sd) {
		if (sd->flags & SD_WAKE_AFFINE) {
			int best_cpu;

			/*
			 * "this_cpu" is cheaper to preempt than a
			 * remote processor.
			 */
			if (this_cpu != -1 &&
			    cpumask_test_cpu(this_cpu, sched_domain_span(sd)))
				return this_cpu;

			best_cpu = cpumask_first_and(lowest_mask,
						     sched_domain_span(sd));
			if (best_cpu < nr_cpu_ids)
				return best_cpu;
		}
	}

	/*
	 * And finally, if there were no matches within the domains
	 * just give the caller *something* to work with from the compatible
	 * locations.
	 */
	if (this_cpu != -1)
		return this_cpu;

	cpu = cpumask_any(lowest_mask);
	if (cpu < nr_cpu_ids)
		return cpu;
	return -1;
}

/* Will lock the rq it finds */
static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
{
	struct rq *lowest_rq = NULL;
	int tries;
	int cpu;

	for (tries = 0; tries < RT_MAX_TRIES; tries++) {
		cpu = find_lowest_rq(task);

		if ((cpu == -1) || (cpu == rq->cpu))
			break;

		lowest_rq = cpu_rq(cpu);

		/* if the prio of this runqueue changed, try again */
		if (double_lock_balance(rq, lowest_rq)) {
			/*
			 * We had to unlock the run queue. In
			 * the mean time, task could have
			 * migrated already or had its affinity changed.
			 * Also make sure that it wasn't scheduled on its rq.
			 */
			if (unlikely(task_rq(task) != rq ||
				     !cpumask_test_cpu(lowest_rq->cpu,
						       &task->cpus_allowed) ||
				     task_running(rq, task) ||
				     !task->se.on_rq)) {

				raw_spin_unlock(&lowest_rq->lock);
				lowest_rq = NULL;
				break;
			}
		}

		/* If this rq is still suitable use it. */
		if (lowest_rq->rt.highest_prio.curr > task->prio)
			break;

		/* try again */
		double_unlock_balance(rq, lowest_rq);
		lowest_rq = NULL;
	}

	return lowest_rq;
}

static struct task_struct *pick_next_pushable_task(struct rq *rq)
{
	struct task_struct *p;

	if (!has_pushable_tasks(rq))
		return NULL;

	p = plist_first_entry(&rq->rt.pushable_tasks,
			      struct task_struct, pushable_tasks);

	BUG_ON(rq->cpu != task_cpu(p));
	BUG_ON(task_current(rq, p));
	BUG_ON(p->rt.nr_cpus_allowed <= 1);

	BUG_ON(!p->se.on_rq);
	BUG_ON(!rt_task(p));

	return p;
}

/*
 * If the current CPU has more than one RT task, see if the non
 * running task can migrate over to a CPU that is running a task
 * of lesser priority.
 */
static int push_rt_task(struct rq *rq)
{
	struct task_struct *next_task;
	struct rq *lowest_rq;

	if (!rq->rt.overloaded)
		return 0;

	next_task = pick_next_pushable_task(rq);
	if (!next_task)
		return 0;

retry:
	if (unlikely(next_task == rq->curr)) {
		WARN_ON(1);
		return 0;
	}

	/*
	 * It's possible that the next_task slipped in of
	 * higher priority than current. If that's the case
	 * just reschedule current.
	 */
	if (unlikely(next_task->prio < rq->curr->prio)) {
		resched_task(rq->curr);
		return 0;
	}

	/* We might release rq lock */
	get_task_struct(next_task);

	/* find_lock_lowest_rq locks the rq if found */
	lowest_rq = find_lock_lowest_rq(next_task, rq);
	if (!lowest_rq) {
		struct task_struct *task;
		/*
		 * find lock_lowest_rq releases rq->lock
		 * so it is possible that next_task has migrated.
		 *
		 * We need to make sure that the task is still on the same
		 * run-queue and is also still the next task eligible for
		 * pushing.
		 */
		task = pick_next_pushable_task(rq);
		if (task_cpu(next_task) == rq->cpu && task == next_task) {
			/*
			 * If we get here, the task hasnt moved at all, but
			 * it has failed to push.  We will not try again,
			 * since the other cpus will pull from us when they
			 * are ready.
			 */
			dequeue_pushable_task(rq, next_task);
			goto out;
		}

		if (!task)
			/* No more tasks, just exit */
			goto out;

		/*
		 * Something has shifted, try again.
		 */
		put_task_struct(next_task);
		next_task = task;
		goto retry;
	}

	deactivate_task(rq, next_task, 0);
	set_task_cpu(next_task, lowest_rq->cpu);
	activate_task(lowest_rq, next_task, 0);

	resched_task(lowest_rq->curr);

	double_unlock_balance(rq, lowest_rq);

out:
	put_task_struct(next_task);

	return 1;
}

static void push_rt_tasks(struct rq *rq)
{
	/* push_rt_task will return true if it moved an RT */
	while (push_rt_task(rq))
		;
}

static int pull_rt_task(struct rq *this_rq)
{
	int this_cpu = this_rq->cpu, ret = 0, cpu;
	struct task_struct *p;
	struct rq *src_rq;

	if (likely(!rt_overloaded(this_rq)))
		return 0;

	for_each_cpu(cpu, this_rq->rd->rto_mask) {
		if (this_cpu == cpu)
			continue;

		src_rq = cpu_rq(cpu);

		/*
		 * Don't bother taking the src_rq->lock if the next highest
		 * task is known to be lower-priority than our current task.
		 * This may look racy, but if this value is about to go
		 * logically higher, the src_rq will push this task away.
		 * And if its going logically lower, we do not care
		 */
		if (src_rq->rt.highest_prio.next >=
		    this_rq->rt.highest_prio.curr)
			continue;

		/*
		 * We can potentially drop this_rq's lock in
		 * double_lock_balance, and another CPU could
		 * alter this_rq
		 */
		double_lock_balance(this_rq, src_rq);

		/*
		 * Are there still pullable RT tasks?
		 */
		if (src_rq->rt.rt_nr_running <= 1)
			goto skip;

		p = pick_next_highest_task_rt(src_rq, this_cpu);

		/*
		 * Do we have an RT task that preempts
		 * the to-be-scheduled task?
		 */
		if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
			WARN_ON(p == src_rq->curr);
			WARN_ON(!p->se.on_rq);

			/*
			 * There's a chance that p is higher in priority
			 * than what's currently running on its cpu.
			 * This is just that p is wakeing up and hasn't
			 * had a chance to schedule. We only pull
			 * p if it is lower in priority than the
			 * current task on the run queue
			 */
			if (p->prio < src_rq->curr->prio)
				goto skip;

			ret = 1;

			deactivate_task(src_rq, p, 0);
			set_task_cpu(p, this_cpu);
			activate_task(this_rq, p, 0);
			/*
			 * We continue with the search, just in
			 * case there's an even higher prio task
			 * in another runqueue. (low likelyhood
			 * but possible)
			 */
		}
skip:
		double_unlock_balance(this_rq, src_rq);
	}

	return ret;
}

static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
{
	/* Try to pull RT tasks here if we lower this rq's prio */
	if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio)
		pull_rt_task(rq);
}

static void post_schedule_rt(struct rq *rq)
{
	push_rt_tasks(rq);
}

/*
 * If we are not running and we are not going to reschedule soon, we should
 * try to push tasks away now
 */
static void task_woken_rt(struct rq *rq, struct task_struct *p)
{
	if (!task_running(rq, p) &&
	    !test_tsk_need_resched(rq->curr) &&
	    has_pushable_tasks(rq) &&
	    p->rt.nr_cpus_allowed > 1 &&
	    rt_task(rq->curr) &&
	    (rq->curr->rt.nr_cpus_allowed < 2 ||
	     rq->curr->prio < p->prio))
		push_rt_tasks(rq);
}

static void set_cpus_allowed_rt(struct task_struct *p,
				const struct cpumask *new_mask)
{
	int weight = cpumask_weight(new_mask);

	BUG_ON(!rt_task(p));

	/*
	 * Update the migration status of the RQ if we have an RT task
	 * which is running AND changing its weight value.
	 */
	if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
		struct rq *rq = task_rq(p);

		if (!task_current(rq, p)) {
			/*
			 * Make sure we dequeue this task from the pushable list
			 * before going further.  It will either remain off of
			 * the list because we are no longer pushable, or it
			 * will be requeued.
			 */
			if (p->rt.nr_cpus_allowed > 1)
				dequeue_pushable_task(rq, p);

			/*
			 * Requeue if our weight is changing and still > 1
			 */
			if (weight > 1)
				enqueue_pushable_task(rq, p);

		}

		if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) {
			rq->rt.rt_nr_migratory++;
		} else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) {
			BUG_ON(!rq->rt.rt_nr_migratory);
			rq->rt.rt_nr_migratory--;
		}

		update_rt_migration(&rq->rt);
	}

	cpumask_copy(&p->cpus_allowed, new_mask);
	p->rt.nr_cpus_allowed = weight;
}

/* Assumes rq->lock is held */
static void rq_online_rt(struct rq *rq)
{
	if (rq->rt.overloaded)
		rt_set_overload(rq);

	__enable_runtime(rq);

	cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
}

/* Assumes rq->lock is held */
static void rq_offline_rt(struct rq *rq)
{
	if (rq->rt.overloaded)
		rt_clear_overload(rq);

	__disable_runtime(rq);

	cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
}

/*
 * When switch from the rt queue, we bring ourselves to a position
 * that we might want to pull RT tasks from other runqueues.
 */
static void switched_from_rt(struct rq *rq, struct task_struct *p)
{
	/*
	 * If there are other RT tasks then we will reschedule
	 * and the scheduling of the other RT tasks will handle
	 * the balancing. But if we are the last RT task
	 * we may need to handle the pulling of RT tasks
	 * now.
	 */
	if (p->se.on_rq && !rq->rt.rt_nr_running)
		pull_rt_task(rq);
}

static inline void init_sched_rt_class(void)
{
	unsigned int i;

	for_each_possible_cpu(i)
		zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
					GFP_KERNEL, cpu_to_node(i));
}
#endif /* CONFIG_SMP */

/*
 * When switching a task to RT, we may overload the runqueue
 * with RT tasks. In this case we try to push them off to
 * other runqueues.
 */
static void switched_to_rt(struct rq *rq, struct task_struct *p)
{
	int check_resched = 1;

	/*
	 * If we are already running, then there's nothing
	 * that needs to be done. But if we are not running
	 * we may need to preempt the current running task.
	 * If that current running task is also an RT task
	 * then see if we can move to another run queue.
	 */
	if (p->se.on_rq && rq->curr != p) {
#ifdef CONFIG_SMP
		if (rq->rt.overloaded && push_rt_task(rq) &&
		    /* Don't resched if we changed runqueues */
		    rq != task_rq(p))
			check_resched = 0;
#endif /* CONFIG_SMP */
		if (check_resched && p->prio < rq->curr->prio)
			resched_task(rq->curr);
	}
}

/*
 * Priority of the task has changed. This may cause
 * us to initiate a push or pull.
 */
static void
prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
{
	if (!p->se.on_rq)
		return;

	if (rq->curr == p) {
#ifdef CONFIG_SMP
		/*
		 * If our priority decreases while running, we
		 * may need to pull tasks to this runqueue.
		 */
		if (oldprio < p->prio)
			pull_rt_task(rq);
		/*
		 * If there's a higher priority task waiting to run
		 * then reschedule. Note, the above pull_rt_task
		 * can release the rq lock and p could migrate.
		 * Only reschedule if p is still on the same runqueue.
		 */
		if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
			resched_task(p);
#else
		/* For UP simply resched on drop of prio */
		if (oldprio < p->prio)
			resched_task(p);
#endif /* CONFIG_SMP */
	} else {
		/*
		 * This task is not running, but if it is
		 * greater than the current running task
		 * then reschedule.
		 */
		if (p->prio < rq->curr->prio)
			resched_task(rq->curr);
	}
}

static void watchdog(struct rq *rq, struct task_struct *p)
{
	unsigned long soft, hard;

	/* max may change after cur was read, this will be fixed next tick */
	soft = task_rlimit(p, RLIMIT_RTTIME);
	hard = task_rlimit_max(p, RLIMIT_RTTIME);

	if (soft != RLIM_INFINITY) {
		unsigned long next;

		p->rt.timeout++;
		next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
		if (p->rt.timeout > next)
			p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
	}
}

static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
{
	update_curr_rt(rq);

	watchdog(rq, p);

	/*
	 * RR tasks need a special form of timeslice management.
	 * FIFO tasks have no timeslices.
	 */
	if (p->policy != SCHED_RR)
		return;

	if (--p->rt.time_slice)
		return;

	p->rt.time_slice = DEF_TIMESLICE;

	/*
	 * Requeue to the end of queue if we are not the only element
	 * on the queue:
	 */
	if (p->rt.run_list.prev != p->rt.run_list.next) {
		requeue_task_rt(rq, p, 0);
		set_tsk_need_resched(p);
	}
}

static void set_curr_task_rt(struct rq *rq)
{
	struct task_struct *p = rq->curr;

	p->se.exec_start = rq->clock_task;

	/* The running task is never eligible for pushing */
	dequeue_pushable_task(rq, p);
}

static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
{
	/*
	 * Time slice is 0 for SCHED_FIFO tasks
	 */
	if (task->policy == SCHED_RR)
		return DEF_TIMESLICE;
	else
		return 0;
}

static const struct sched_class rt_sched_class = {
	.next			= &fair_sched_class,
	.enqueue_task		= enqueue_task_rt,
	.dequeue_task		= dequeue_task_rt,
	.yield_task		= yield_task_rt,

	.check_preempt_curr	= check_preempt_curr_rt,

	.pick_next_task		= pick_next_task_rt,
	.put_prev_task		= put_prev_task_rt,

#ifdef CONFIG_SMP
	.select_task_rq		= select_task_rq_rt,

	.set_cpus_allowed       = set_cpus_allowed_rt,
	.rq_online              = rq_online_rt,
	.rq_offline             = rq_offline_rt,
	.pre_schedule		= pre_schedule_rt,
	.post_schedule		= post_schedule_rt,
	.task_woken		= task_woken_rt,
	.switched_from		= switched_from_rt,
#endif

	.set_curr_task          = set_curr_task_rt,
	.task_tick		= task_tick_rt,

	.get_rr_interval	= get_rr_interval_rt,

	.prio_changed		= prio_changed_rt,
	.switched_to		= switched_to_rt,
};

#ifdef CONFIG_SCHED_DEBUG
extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);

static void print_rt_stats(struct seq_file *m, int cpu)
{
	struct rt_rq *rt_rq;

	rcu_read_lock();
	for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu))
		print_rt_rq(m, cpu, rt_rq);
	rcu_read_unlock();
}
#endif /* CONFIG_SCHED_DEBUG */

ss="hl kwb">struct list_head *head) { struct ScsiReqBlk *i; list_for_each_entry(i, head, list) if (i->cmd == cmd) return i; return NULL; } static struct ScsiReqBlk *srb_get_free(struct AdapterCtlBlk *acb) { struct list_head *head = &acb->srb_free_list; struct ScsiReqBlk *srb = NULL; if (!list_empty(head)) { srb = list_entry(head->next, struct ScsiReqBlk, list); list_del(head->next); dprintkdbg(DBG_0, "srb_get_free: srb=%p\n", srb); } return srb; } static void srb_free_insert(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb) { dprintkdbg(DBG_0, "srb_free_insert: srb=%p\n", srb); list_add_tail(&srb->list, &acb->srb_free_list); } static void srb_waiting_insert(struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { dprintkdbg(DBG_0, "srb_waiting_insert: (0x%p) <%02i-%i> srb=%p\n", srb->cmd, dcb->target_id, dcb->target_lun, srb); list_add(&srb->list, &dcb->srb_waiting_list); } static void srb_waiting_append(struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { dprintkdbg(DBG_0, "srb_waiting_append: (0x%p) <%02i-%i> srb=%p\n", srb->cmd, dcb->target_id, dcb->target_lun, srb); list_add_tail(&srb->list, &dcb->srb_waiting_list); } static void srb_going_append(struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { dprintkdbg(DBG_0, "srb_going_append: (0x%p) <%02i-%i> srb=%p\n", srb->cmd, dcb->target_id, dcb->target_lun, srb); list_add_tail(&srb->list, &dcb->srb_going_list); } static void srb_going_remove(struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { struct ScsiReqBlk *i; struct ScsiReqBlk *tmp; dprintkdbg(DBG_0, "srb_going_remove: (0x%p) <%02i-%i> srb=%p\n", srb->cmd, dcb->target_id, dcb->target_lun, srb); list_for_each_entry_safe(i, tmp, &dcb->srb_going_list, list) if (i == srb) { list_del(&srb->list); break; } } static void srb_waiting_remove(struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { struct ScsiReqBlk *i; struct ScsiReqBlk *tmp; dprintkdbg(DBG_0, "srb_waiting_remove: (0x%p) <%02i-%i> srb=%p\n", srb->cmd, dcb->target_id, dcb->target_lun, srb); list_for_each_entry_safe(i, tmp, &dcb->srb_waiting_list, list) if (i == srb) { list_del(&srb->list); break; } } static void srb_going_to_waiting_move(struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { dprintkdbg(DBG_0, "srb_going_to_waiting_move: (0x%p) <%02i-%i> srb=%p\n", srb->cmd, dcb->target_id, dcb->target_lun, srb); list_move(&srb->list, &dcb->srb_waiting_list); } static void srb_waiting_to_going_move(struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { dprintkdbg(DBG_0, "srb_waiting_to_going_move: (0x%p) <%02i-%i> srb=%p\n", srb->cmd, dcb->target_id, dcb->target_lun, srb); list_move(&srb->list, &dcb->srb_going_list); } /* Sets the timer to wake us up */ static void waiting_set_timer(struct AdapterCtlBlk *acb, unsigned long to) { if (timer_pending(&acb->waiting_timer)) return; init_timer(&acb->waiting_timer); acb->waiting_timer.function = waiting_timeout; acb->waiting_timer.data = (unsigned long) acb; if (time_before(jiffies + to, acb->scsi_host->last_reset - HZ / 2)) acb->waiting_timer.expires = acb->scsi_host->last_reset - HZ / 2 + 1; else acb->waiting_timer.expires = jiffies + to + 1; add_timer(&acb->waiting_timer); } /* Send the next command from the waiting list to the bus */ static void waiting_process_next(struct AdapterCtlBlk *acb) { struct DeviceCtlBlk *start = NULL; struct DeviceCtlBlk *pos; struct DeviceCtlBlk *dcb; struct ScsiReqBlk *srb; struct list_head *dcb_list_head = &acb->dcb_list; if (acb->active_dcb || (acb->acb_flag & (RESET_DETECT + RESET_DONE + RESET_DEV))) return; if (timer_pending(&acb->waiting_timer)) del_timer(&acb->waiting_timer); if (list_empty(dcb_list_head)) return; /* * Find the starting dcb. Need to find it again in the list * since the list may have changed since we set the ptr to it */ list_for_each_entry(dcb, dcb_list_head, list) if (dcb == acb->dcb_run_robin) { start = dcb; break; } if (!start) { /* This can happen! */ start = list_entry(dcb_list_head->next, typeof(*start), list); acb->dcb_run_robin = start; } /* * Loop over the dcb, but we start somewhere (potentially) in * the middle of the loop so we need to manully do this. */ pos = start; do { struct list_head *waiting_list_head = &pos->srb_waiting_list; /* Make sure, the next another device gets scheduled ... */ acb->dcb_run_robin = dcb_get_next(dcb_list_head, acb->dcb_run_robin); if (list_empty(waiting_list_head) || pos->max_command <= list_size(&pos->srb_going_list)) { /* move to next dcb */ pos = dcb_get_next(dcb_list_head, pos); } else { srb = list_entry(waiting_list_head->next, struct ScsiReqBlk, list); /* Try to send to the bus */ if (!start_scsi(acb, pos, srb)) srb_waiting_to_going_move(pos, srb); else waiting_set_timer(acb, HZ/50); break; } } while (pos != start); } /* Wake up waiting queue */ static void waiting_timeout(unsigned long ptr) { unsigned long flags; struct AdapterCtlBlk *acb = (struct AdapterCtlBlk *)ptr; dprintkdbg(DBG_1, "waiting_timeout: Queue woken up by timer. acb=%p\n", acb); DC395x_LOCK_IO(acb->scsi_host, flags); waiting_process_next(acb); DC395x_UNLOCK_IO(acb->scsi_host, flags); } /* Get the DCB for a given ID/LUN combination */ static struct DeviceCtlBlk *find_dcb(struct AdapterCtlBlk *acb, u8 id, u8 lun) { return acb->children[id][lun]; } /* Send SCSI Request Block (srb) to adapter (acb) */ static void send_srb(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb) { struct DeviceCtlBlk *dcb = srb->dcb; if (dcb->max_command <= list_size(&dcb->srb_going_list) || acb->active_dcb || (acb->acb_flag & (RESET_DETECT + RESET_DONE + RESET_DEV))) { srb_waiting_append(dcb, srb); waiting_process_next(acb); return; } if (!start_scsi(acb, dcb, srb)) srb_going_append(dcb, srb); else { srb_waiting_insert(dcb, srb); waiting_set_timer(acb, HZ / 50); } } /* Prepare SRB for being sent to Device DCB w/ command *cmd */ static void build_srb(struct scsi_cmnd *cmd, struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { int nseg; enum dma_data_direction dir = cmd->sc_data_direction; dprintkdbg(DBG_0, "build_srb: (0x%p) <%02i-%i>\n", cmd, dcb->target_id, dcb->target_lun); srb->dcb = dcb; srb->cmd = cmd; srb->sg_count = 0; srb->total_xfer_length = 0; srb->sg_bus_addr = 0; srb->sg_index = 0; srb->adapter_status = 0; srb->target_status = 0; srb->msg_count = 0; srb->status = 0; srb->flag = 0; srb->state = 0; srb->retry_count = 0; srb->tag_number = TAG_NONE; srb->scsi_phase = PH_BUS_FREE; /* initial phase */ srb->end_message = 0; nseg = scsi_dma_map(cmd); BUG_ON(nseg < 0); if (dir == PCI_DMA_NONE || !nseg) { dprintkdbg(DBG_0, "build_srb: [0] len=%d buf=%p use_sg=%d !MAP=%08x\n", cmd->bufflen, scsi_sglist(cmd), scsi_sg_count(cmd), srb->segment_x[0].address); } else { int i; u32 reqlen = scsi_bufflen(cmd); struct scatterlist *sg; struct SGentry *sgp = srb->segment_x; srb->sg_count = nseg; dprintkdbg(DBG_0, "build_srb: [n] len=%d buf=%p use_sg=%d segs=%d\n", reqlen, scsi_sglist(cmd), scsi_sg_count(cmd), srb->sg_count); scsi_for_each_sg(cmd, sg, srb->sg_count, i) { u32 busaddr = (u32)sg_dma_address(sg); u32 seglen = (u32)sg->length; sgp[i].address = busaddr; sgp[i].length = seglen; srb->total_xfer_length += seglen; } sgp += srb->sg_count - 1; /* * adjust last page if too big as it is allocated * on even page boundaries */ if (srb->total_xfer_length > reqlen) { sgp->length -= (srb->total_xfer_length - reqlen); srb->total_xfer_length = reqlen; } /* Fixup for WIDE padding - make sure length is even */ if (dcb->sync_period & WIDE_SYNC && srb->total_xfer_length % 2) { srb->total_xfer_length++; sgp->length++; } srb->sg_bus_addr = pci_map_single(dcb->acb->dev, srb->segment_x, SEGMENTX_LEN, PCI_DMA_TODEVICE); dprintkdbg(DBG_SG, "build_srb: [n] map sg %p->%08x(%05x)\n", srb->segment_x, srb->sg_bus_addr, SEGMENTX_LEN); } srb->request_length = srb->total_xfer_length; } /** * dc395x_queue_command - queue scsi command passed from the mid * layer, invoke 'done' on completion * * @cmd: pointer to scsi command object * @done: function pointer to be invoked on completion * * Returns 1 if the adapter (host) is busy, else returns 0. One * reason for an adapter to be busy is that the number * of outstanding queued commands is already equal to * struct Scsi_Host::can_queue . * * Required: if struct Scsi_Host::can_queue is ever non-zero * then this function is required. * * Locks: struct Scsi_Host::host_lock held on entry (with "irqsave") * and is expected to be held on return. * **/ static int dc395x_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) { struct DeviceCtlBlk *dcb; struct ScsiReqBlk *srb; struct AdapterCtlBlk *acb = (struct AdapterCtlBlk *)cmd->device->host->hostdata; dprintkdbg(DBG_0, "queue_command: (0x%p) <%02i-%i> cmnd=0x%02x\n", cmd, cmd->device->id, cmd->device->lun, cmd->cmnd[0]); /* Assume BAD_TARGET; will be cleared later */ cmd->result = DID_BAD_TARGET << 16; /* ignore invalid targets */ if (cmd->device->id >= acb->scsi_host->max_id || cmd->device->lun >= acb->scsi_host->max_lun || cmd->device->lun >31) { goto complete; } /* does the specified lun on the specified device exist */ if (!(acb->dcb_map[cmd->device->id] & (1 << cmd->device->lun))) { dprintkl(KERN_INFO, "queue_command: Ignore target <%02i-%i>\n", cmd->device->id, cmd->device->lun); goto complete; } /* do we have a DCB for the device */ dcb = find_dcb(acb, cmd->device->id, cmd->device->lun); if (!dcb) { /* should never happen */ dprintkl(KERN_ERR, "queue_command: No such device <%02i-%i>", cmd->device->id, cmd->device->lun); goto complete; } /* set callback and clear result in the command */ cmd->scsi_done = done; cmd->result = 0; srb = srb_get_free(acb); if (!srb) { /* * Return 1 since we are unable to queue this command at this * point in time. */ dprintkdbg(DBG_0, "queue_command: No free srb's\n"); return 1; } build_srb(cmd, dcb, srb); if (!list_empty(&dcb->srb_waiting_list)) { /* append to waiting queue */ srb_waiting_append(dcb, srb); waiting_process_next(acb); } else { /* process immediately */ send_srb(acb, srb); } dprintkdbg(DBG_1, "queue_command: (0x%p) done\n", cmd); return 0; complete: /* * Complete the command immediatey, and then return 0 to * indicate that we have handled the command. This is usually * done when the commad is for things like non existent * devices. */ done(cmd); return 0; } static DEF_SCSI_QCMD(dc395x_queue_command) /* * Return the disk geometry for the given SCSI device. */ static int dc395x_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int *info) { #ifdef CONFIG_SCSI_DC395x_TRMS1040_TRADMAP int heads, sectors, cylinders; struct AdapterCtlBlk *acb; int size = capacity; dprintkdbg(DBG_0, "dc395x_bios_param..............\n"); acb = (struct AdapterCtlBlk *)sdev->host->hostdata; heads = 64; sectors = 32; cylinders = size / (heads * sectors); if ((acb->gmode2 & NAC_GREATER_1G) && (cylinders > 1024)) { heads = 255; sectors = 63; cylinders = size / (heads * sectors); } geom[0] = heads; geom[1] = sectors; geom[2] = cylinders; return 0; #else return scsicam_bios_param(bdev, capacity, info); #endif } static void dump_register_info(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { u16 pstat; struct pci_dev *dev = acb->dev; pci_read_config_word(dev, PCI_STATUS, &pstat); if (!dcb) dcb = acb->active_dcb; if (!srb && dcb) srb = dcb->active_srb; if (srb) { if (!srb->cmd) dprintkl(KERN_INFO, "dump: srb=%p cmd=%p OOOPS!\n", srb, srb->cmd); else dprintkl(KERN_INFO, "dump: srb=%p cmd=%p " "cmnd=0x%02x <%02i-%i>\n", srb, srb->cmd, srb->cmd->cmnd[0], srb->cmd->device->id, srb->cmd->device->lun); printk(" sglist=%p cnt=%i idx=%i len=%zu\n", srb->segment_x, srb->sg_count, srb->sg_index, srb->total_xfer_length); printk(" state=0x%04x status=0x%02x phase=0x%02x (%sconn.)\n", srb->state, srb->status, srb->scsi_phase, (acb->active_dcb) ? "" : "not"); } dprintkl(KERN_INFO, "dump: SCSI{status=0x%04x fifocnt=0x%02x " "signals=0x%02x irqstat=0x%02x sync=0x%02x target=0x%02x " "rselid=0x%02x ctr=0x%08x irqen=0x%02x config=0x%04x " "config2=0x%02x cmd=0x%02x selto=0x%02x}\n", DC395x_read16(acb, TRM_S1040_SCSI_STATUS), DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT), DC395x_read8(acb, TRM_S1040_SCSI_SIGNAL), DC395x_read8(acb, TRM_S1040_SCSI_INTSTATUS), DC395x_read8(acb, TRM_S1040_SCSI_SYNC), DC395x_read8(acb, TRM_S1040_SCSI_TARGETID), DC395x_read8(acb, TRM_S1040_SCSI_IDMSG), DC395x_read32(acb, TRM_S1040_SCSI_COUNTER), DC395x_read8(acb, TRM_S1040_SCSI_INTEN), DC395x_read16(acb, TRM_S1040_SCSI_CONFIG0), DC395x_read8(acb, TRM_S1040_SCSI_CONFIG2), DC395x_read8(acb, TRM_S1040_SCSI_COMMAND), DC395x_read8(acb, TRM_S1040_SCSI_TIMEOUT)); dprintkl(KERN_INFO, "dump: DMA{cmd=0x%04x fifocnt=0x%02x fstat=0x%02x " "irqstat=0x%02x irqen=0x%02x cfg=0x%04x tctr=0x%08x " "ctctr=0x%08x addr=0x%08x:0x%08x}\n", DC395x_read16(acb, TRM_S1040_DMA_COMMAND), DC395x_read8(acb, TRM_S1040_DMA_FIFOCNT), DC395x_read8(acb, TRM_S1040_DMA_FIFOSTAT), DC395x_read8(acb, TRM_S1040_DMA_STATUS), DC395x_read8(acb, TRM_S1040_DMA_INTEN), DC395x_read16(acb, TRM_S1040_DMA_CONFIG), DC395x_read32(acb, TRM_S1040_DMA_XCNT), DC395x_read32(acb, TRM_S1040_DMA_CXCNT), DC395x_read32(acb, TRM_S1040_DMA_XHIGHADDR), DC395x_read32(acb, TRM_S1040_DMA_XLOWADDR)); dprintkl(KERN_INFO, "dump: gen{gctrl=0x%02x gstat=0x%02x gtmr=0x%02x} " "pci{status=0x%04x}\n", DC395x_read8(acb, TRM_S1040_GEN_CONTROL), DC395x_read8(acb, TRM_S1040_GEN_STATUS), DC395x_read8(acb, TRM_S1040_GEN_TIMER), pstat); } static inline void clear_fifo(struct AdapterCtlBlk *acb, char *txt) { #if debug_enabled(DBG_FIFO) u8 lines = DC395x_read8(acb, TRM_S1040_SCSI_SIGNAL); u8 fifocnt = DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT); if (!(fifocnt & 0x40)) dprintkdbg(DBG_FIFO, "clear_fifo: (%i bytes) on phase %02x in %s\n", fifocnt & 0x3f, lines, txt); #endif DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_CLRFIFO); } static void reset_dev_param(struct AdapterCtlBlk *acb) { struct DeviceCtlBlk *dcb; struct NvRamType *eeprom = &acb->eeprom; dprintkdbg(DBG_0, "reset_dev_param: acb=%p\n", acb); list_for_each_entry(dcb, &acb->dcb_list, list) { u8 period_index; dcb->sync_mode &= ~(SYNC_NEGO_DONE + WIDE_NEGO_DONE); dcb->sync_period = 0; dcb->sync_offset = 0; dcb->dev_mode = eeprom->target[dcb->target_id].cfg0; period_index = eeprom->target[dcb->target_id].period & 0x07; dcb->min_nego_period = clock_period[period_index]; if (!(dcb->dev_mode & NTC_DO_WIDE_NEGO) || !(acb->config & HCC_WIDE_CARD)) dcb->sync_mode &= ~WIDE_NEGO_ENABLE; } } /* * perform a hard reset on the SCSI bus * @cmd - some command for this host (for fetching hooks) * Returns: SUCCESS (0x2002) on success, else FAILED (0x2003). */ static int __dc395x_eh_bus_reset(struct scsi_cmnd *cmd) { struct AdapterCtlBlk *acb = (struct AdapterCtlBlk *)cmd->device->host->hostdata; dprintkl(KERN_INFO, "eh_bus_reset: (0%p) target=<%02i-%i> cmd=%p\n", cmd, cmd->device->id, cmd->device->lun, cmd); if (timer_pending(&acb->waiting_timer)) del_timer(&acb->waiting_timer); /* * disable interrupt */ DC395x_write8(acb, TRM_S1040_DMA_INTEN, 0x00); DC395x_write8(acb, TRM_S1040_SCSI_INTEN, 0x00); DC395x_write8(acb, TRM_S1040_SCSI_CONTROL, DO_RSTMODULE); DC395x_write8(acb, TRM_S1040_DMA_CONTROL, DMARESETMODULE); reset_scsi_bus(acb); udelay(500); /* We may be in serious trouble. Wait some seconds */ acb->scsi_host->last_reset = jiffies + 3 * HZ / 2 + HZ * acb->eeprom.delay_time; /* * re-enable interrupt */ /* Clear SCSI FIFO */ DC395x_write8(acb, TRM_S1040_DMA_CONTROL, CLRXFIFO); clear_fifo(acb, "eh_bus_reset"); /* Delete pending IRQ */ DC395x_read8(acb, TRM_S1040_SCSI_INTSTATUS); set_basic_config(acb); reset_dev_param(acb); doing_srb_done(acb, DID_RESET, cmd, 0); acb->active_dcb = NULL; acb->acb_flag = 0; /* RESET_DETECT, RESET_DONE ,RESET_DEV */ waiting_process_next(acb); return SUCCESS; } static int dc395x_eh_bus_reset(struct scsi_cmnd *cmd) { int rc; spin_lock_irq(cmd->device->host->host_lock); rc = __dc395x_eh_bus_reset(cmd); spin_unlock_irq(cmd->device->host->host_lock); return rc; } /* * abort an errant SCSI command * @cmd - command to be aborted * Returns: SUCCESS (0x2002) on success, else FAILED (0x2003). */ static int dc395x_eh_abort(struct scsi_cmnd *cmd) { /* * Look into our command queues: If it has not been sent already, * we remove it and return success. Otherwise fail. */ struct AdapterCtlBlk *acb = (struct AdapterCtlBlk *)cmd->device->host->hostdata; struct DeviceCtlBlk *dcb; struct ScsiReqBlk *srb; dprintkl(KERN_INFO, "eh_abort: (0x%p) target=<%02i-%i> cmd=%p\n", cmd, cmd->device->id, cmd->device->lun, cmd); dcb = find_dcb(acb, cmd->device->id, cmd->device->lun); if (!dcb) { dprintkl(KERN_DEBUG, "eh_abort: No such device\n"); return FAILED; } srb = find_cmd(cmd, &dcb->srb_waiting_list); if (srb) { srb_waiting_remove(dcb, srb); pci_unmap_srb_sense(acb, srb); pci_unmap_srb(acb, srb); free_tag(dcb, srb); srb_free_insert(acb, srb); dprintkl(KERN_DEBUG, "eh_abort: Command was waiting\n"); cmd->result = DID_ABORT << 16; return SUCCESS; } srb = find_cmd(cmd, &dcb->srb_going_list); if (srb) { dprintkl(KERN_DEBUG, "eh_abort: Command in progress\n"); /* XXX: Should abort the command here */ } else { dprintkl(KERN_DEBUG, "eh_abort: Command not found\n"); } return FAILED; } /* SDTR */ static void build_sdtr(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { u8 *ptr = srb->msgout_buf + srb->msg_count; if (srb->msg_count > 1) { dprintkl(KERN_INFO, "build_sdtr: msgout_buf BUSY (%i: %02x %02x)\n", srb->msg_count, srb->msgout_buf[0], srb->msgout_buf[1]); return; } if (!(dcb->dev_mode & NTC_DO_SYNC_NEGO)) { dcb->sync_offset = 0; dcb->min_nego_period = 200 >> 2; } else if (dcb->sync_offset == 0) dcb->sync_offset = SYNC_NEGO_OFFSET; *ptr++ = MSG_EXTENDED; /* (01h) */ *ptr++ = 3; /* length */ *ptr++ = EXTENDED_SDTR; /* (01h) */ *ptr++ = dcb->min_nego_period; /* Transfer period (in 4ns) */ *ptr++ = dcb->sync_offset; /* Transfer period (max. REQ/ACK dist) */ srb->msg_count += 5; srb->state |= SRB_DO_SYNC_NEGO; } /* WDTR */ static void build_wdtr(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { u8 wide = ((dcb->dev_mode & NTC_DO_WIDE_NEGO) & (acb->config & HCC_WIDE_CARD)) ? 1 : 0; u8 *ptr = srb->msgout_buf + srb->msg_count; if (srb->msg_count > 1) { dprintkl(KERN_INFO, "build_wdtr: msgout_buf BUSY (%i: %02x %02x)\n", srb->msg_count, srb->msgout_buf[0], srb->msgout_buf[1]); return; } *ptr++ = MSG_EXTENDED; /* (01h) */ *ptr++ = 2; /* length */ *ptr++ = EXTENDED_WDTR; /* (03h) */ *ptr++ = wide; srb->msg_count += 4; srb->state |= SRB_DO_WIDE_NEGO; } #if 0 /* Timer to work around chip flaw: When selecting and the bus is * busy, we sometimes miss a Selection timeout IRQ */ void selection_timeout_missed(unsigned long ptr); /* Sets the timer to wake us up */ static void selto_timer(struct AdapterCtlBlk *acb) { if (timer_pending(&acb->selto_timer)) return; acb->selto_timer.function = selection_timeout_missed; acb->selto_timer.data = (unsigned long) acb; if (time_before (jiffies + HZ, acb->scsi_host->last_reset + HZ / 2)) acb->selto_timer.expires = acb->scsi_host->last_reset + HZ / 2 + 1; else acb->selto_timer.expires = jiffies + HZ + 1; add_timer(&acb->selto_timer); } void selection_timeout_missed(unsigned long ptr) { unsigned long flags; struct AdapterCtlBlk *acb = (struct AdapterCtlBlk *)ptr; struct ScsiReqBlk *srb; dprintkl(KERN_DEBUG, "Chip forgot to produce SelTO IRQ!\n"); if (!acb->active_dcb || !acb->active_dcb->active_srb) { dprintkl(KERN_DEBUG, "... but no cmd pending? Oops!\n"); return; } DC395x_LOCK_IO(acb->scsi_host, flags); srb = acb->active_dcb->active_srb; disconnect(acb); DC395x_UNLOCK_IO(acb->scsi_host, flags); } #endif static u8 start_scsi(struct AdapterCtlBlk* acb, struct DeviceCtlBlk* dcb, struct ScsiReqBlk* srb) { u16 s_stat2, return_code; u8 s_stat, scsicommand, i, identify_message; u8 *ptr; dprintkdbg(DBG_0, "start_scsi: (0x%p) <%02i-%i> srb=%p\n", dcb->target_id, dcb->target_lun, srb); srb->tag_number = TAG_NONE; /* acb->tag_max_num: had error read in eeprom */ s_stat = DC395x_read8(acb, TRM_S1040_SCSI_SIGNAL); s_stat2 = 0; s_stat2 = DC395x_read16(acb, TRM_S1040_SCSI_STATUS); #if 1 if (s_stat & 0x20 /* s_stat2 & 0x02000 */ ) { dprintkdbg(DBG_KG, "start_scsi: (0x%p) BUSY %02x %04x\n", s_stat, s_stat2); /* * Try anyway? * * We could, BUT: Sometimes the TRM_S1040 misses to produce a Selection * Timeout, a Disconnect or a Reselection IRQ, so we would be screwed! * (This is likely to be a bug in the hardware. Obviously, most people * only have one initiator per SCSI bus.) * Instead let this fail and have the timer make sure the command is * tried again after a short time */ /*selto_timer (acb); */ return 1; } #endif if (acb->active_dcb) { dprintkl(KERN_DEBUG, "start_scsi: (0x%p) Attempt to start a" "command while another command (0x%p) is active.", srb->cmd, acb->active_dcb->active_srb ? acb->active_dcb->active_srb->cmd : 0); return 1; } if (DC395x_read16(acb, TRM_S1040_SCSI_STATUS) & SCSIINTERRUPT) { dprintkdbg(DBG_KG, "start_scsi: (0x%p) Failed (busy)\n", srb->cmd); return 1; } /* Allow starting of SCSI commands half a second before we allow the mid-level * to queue them again after a reset */ if (time_before(jiffies, acb->scsi_host->last_reset - HZ / 2)) { dprintkdbg(DBG_KG, "start_scsi: Refuse cmds (reset wait)\n"); return 1; } /* Flush FIFO */ clear_fifo(acb, "start_scsi"); DC395x_write8(acb, TRM_S1040_SCSI_HOSTID, acb->scsi_host->this_id); DC395x_write8(acb, TRM_S1040_SCSI_TARGETID, dcb->target_id); DC395x_write8(acb, TRM_S1040_SCSI_SYNC, dcb->sync_period); DC395x_write8(acb, TRM_S1040_SCSI_OFFSET, dcb->sync_offset); srb->scsi_phase = PH_BUS_FREE; /* initial phase */ identify_message = dcb->identify_msg; /*DC395x_TRM_write8(TRM_S1040_SCSI_IDMSG, identify_message); */ /* Don't allow disconnection for AUTO_REQSENSE: Cont.All.Cond.! */ if (srb->flag & AUTO_REQSENSE) identify_message &= 0xBF; if (((srb->cmd->cmnd[0] == INQUIRY) || (srb->cmd->cmnd[0] == REQUEST_SENSE) || (srb->flag & AUTO_REQSENSE)) && (((dcb->sync_mode & WIDE_NEGO_ENABLE) && !(dcb->sync_mode & WIDE_NEGO_DONE)) || ((dcb->sync_mode & SYNC_NEGO_ENABLE) && !(dcb->sync_mode & SYNC_NEGO_DONE))) && (dcb->target_lun == 0)) { srb->msgout_buf[0] = identify_message; srb->msg_count = 1; scsicommand = SCMD_SEL_ATNSTOP; srb->state = SRB_MSGOUT; #ifndef SYNC_FIRST if (dcb->sync_mode & WIDE_NEGO_ENABLE && dcb->inquiry7 & SCSI_INQ_WBUS16) { build_wdtr(acb, dcb, srb); goto no_cmd; } #endif if (dcb->sync_mode & SYNC_NEGO_ENABLE && dcb->inquiry7 & SCSI_INQ_SYNC) { build_sdtr(acb, dcb, srb); goto no_cmd; } if (dcb->sync_mode & WIDE_NEGO_ENABLE && dcb->inquiry7 & SCSI_INQ_WBUS16) { build_wdtr(acb, dcb, srb); goto no_cmd; } srb->msg_count = 0; } /* Send identify message */ DC395x_write8(acb, TRM_S1040_SCSI_FIFO, identify_message); scsicommand = SCMD_SEL_ATN; srb->state = SRB_START_; #ifndef DC395x_NO_TAGQ if ((dcb->sync_mode & EN_TAG_QUEUEING) && (identify_message & 0xC0)) { /* Send Tag message */ u32 tag_mask = 1; u8 tag_number = 0; while (tag_mask & dcb->tag_mask && tag_number < dcb->max_command) { tag_mask = tag_mask << 1; tag_number++; } if (tag_number >= dcb->max_command) { dprintkl(KERN_WARNING, "start_scsi: (0x%p) " "Out of tags target=<%02i-%i>)\n", srb->cmd, srb->cmd->device->id, srb->cmd->device->lun); srb->state = SRB_READY; DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_HWRESELECT); return 1; } /* Send Tag id */ DC395x_write8(acb, TRM_S1040_SCSI_FIFO, MSG_SIMPLE_QTAG); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, tag_number); dcb->tag_mask |= tag_mask; srb->tag_number = tag_number; scsicommand = SCMD_SEL_ATN3; srb->state = SRB_START_; } #endif /*polling:*/ /* Send CDB ..command block ......... */ dprintkdbg(DBG_KG, "start_scsi: (0x%p) <%02i-%i> cmnd=0x%02x tag=%i\n", srb->cmd, srb->cmd->device->id, srb->cmd->device->lun, srb->cmd->cmnd[0], srb->tag_number); if (srb->flag & AUTO_REQSENSE) { DC395x_write8(acb, TRM_S1040_SCSI_FIFO, REQUEST_SENSE); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, (dcb->target_lun << 5)); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 0); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 0); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, SCSI_SENSE_BUFFERSIZE); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 0); } else { ptr = (u8 *)srb->cmd->cmnd; for (i = 0; i < srb->cmd->cmd_len; i++) DC395x_write8(acb, TRM_S1040_SCSI_FIFO, *ptr++); } no_cmd: DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_HWRESELECT | DO_DATALATCH); if (DC395x_read16(acb, TRM_S1040_SCSI_STATUS) & SCSIINTERRUPT) { /* * If start_scsi return 1: * we caught an interrupt (must be reset or reselection ... ) * : Let's process it first! */ dprintkdbg(DBG_0, "start_scsi: (0x%p) <%02i-%i> Failed - busy\n", srb->cmd, dcb->target_id, dcb->target_lun); srb->state = SRB_READY; free_tag(dcb, srb); srb->msg_count = 0; return_code = 1; /* This IRQ should NOT get lost, as we did not acknowledge it */ } else { /* * If start_scsi returns 0: * we know that the SCSI processor is free */ srb->scsi_phase = PH_BUS_FREE; /* initial phase */ dcb->active_srb = srb; acb->active_dcb = dcb; return_code = 0; /* it's important for atn stop */ DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH | DO_HWRESELECT); /* SCSI command */ DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, scsicommand); } return return_code; } #define DC395x_ENABLE_MSGOUT \ DC395x_write16 (acb, TRM_S1040_SCSI_CONTROL, DO_SETATN); \ srb->state |= SRB_MSGOUT /* abort command */ static inline void enable_msgout_abort(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb) { srb->msgout_buf[0] = ABORT; srb->msg_count = 1; DC395x_ENABLE_MSGOUT; srb->state &= ~SRB_MSGIN; srb->state |= SRB_MSGOUT; } /** * dc395x_handle_interrupt - Handle an interrupt that has been confirmed to * have been triggered for this card. * * @acb: a pointer to the adpter control block * @scsi_status: the status return when we checked the card **/ static void dc395x_handle_interrupt(struct AdapterCtlBlk *acb, u16 scsi_status) { struct DeviceCtlBlk *dcb; struct ScsiReqBlk *srb; u16 phase; u8 scsi_intstatus; unsigned long flags; void (*dc395x_statev)(struct AdapterCtlBlk *, struct ScsiReqBlk *, u16 *); DC395x_LOCK_IO(acb->scsi_host, flags); /* This acknowledges the IRQ */ scsi_intstatus = DC395x_read8(acb, TRM_S1040_SCSI_INTSTATUS); if ((scsi_status & 0x2007) == 0x2002) dprintkl(KERN_DEBUG, "COP after COP completed? %04x\n", scsi_status); if (debug_enabled(DBG_KG)) { if (scsi_intstatus & INT_SELTIMEOUT) dprintkdbg(DBG_KG, "handle_interrupt: Selection timeout\n"); } /*dprintkl(KERN_DEBUG, "handle_interrupt: intstatus = 0x%02x ", scsi_intstatus); */ if (timer_pending(&acb->selto_timer)) del_timer(&acb->selto_timer); if (scsi_intstatus & (INT_SELTIMEOUT | INT_DISCONNECT)) { disconnect(acb); /* bus free interrupt */ goto out_unlock; } if (scsi_intstatus & INT_RESELECTED) { reselect(acb); goto out_unlock; } if (scsi_intstatus & INT_SELECT) { dprintkl(KERN_INFO, "Host does not support target mode!\n"); goto out_unlock; } if (scsi_intstatus & INT_SCSIRESET) { scsi_reset_detect(acb); goto out_unlock; } if (scsi_intstatus & (INT_BUSSERVICE | INT_CMDDONE)) { dcb = acb->active_dcb; if (!dcb) { dprintkl(KERN_DEBUG, "Oops: BusService (%04x %02x) w/o ActiveDCB!\n", scsi_status, scsi_intstatus); goto out_unlock; } srb = dcb->active_srb; if (dcb->flag & ABORT_DEV_) { dprintkdbg(DBG_0, "MsgOut Abort Device.....\n"); enable_msgout_abort(acb, srb); } /* software sequential machine */ phase = (u16)srb->scsi_phase; /* * 62037 or 62137 * call dc395x_scsi_phase0[]... "phase entry" * handle every phase before start transfer */ /* data_out_phase0, phase:0 */ /* data_in_phase0, phase:1 */ /* command_phase0, phase:2 */ /* status_phase0, phase:3 */ /* nop0, phase:4 PH_BUS_FREE .. initial phase */ /* nop0, phase:5 PH_BUS_FREE .. initial phase */ /* msgout_phase0, phase:6 */ /* msgin_phase0, phase:7 */ dc395x_statev = dc395x_scsi_phase0[phase]; dc395x_statev(acb, srb, &scsi_status); /* * if there were any exception occurred scsi_status * will be modify to bus free phase new scsi_status * transfer out from ... previous dc395x_statev */ srb->scsi_phase = scsi_status & PHASEMASK; phase = (u16)scsi_status & PHASEMASK; /* * call dc395x_scsi_phase1[]... "phase entry" handle * every phase to do transfer */ /* data_out_phase1, phase:0 */ /* data_in_phase1, phase:1 */ /* command_phase1, phase:2 */ /* status_phase1, phase:3 */ /* nop1, phase:4 PH_BUS_FREE .. initial phase */ /* nop1, phase:5 PH_BUS_FREE .. initial phase */ /* msgout_phase1, phase:6 */ /* msgin_phase1, phase:7 */ dc395x_statev = dc395x_scsi_phase1[phase]; dc395x_statev(acb, srb, &scsi_status); } out_unlock: DC395x_UNLOCK_IO(acb->scsi_host, flags); } static irqreturn_t dc395x_interrupt(int irq, void *dev_id) { struct AdapterCtlBlk *acb = dev_id; u16 scsi_status; u8 dma_status; irqreturn_t handled = IRQ_NONE; /* * Check for pending interrupt */ scsi_status = DC395x_read16(acb, TRM_S1040_SCSI_STATUS); dma_status = DC395x_read8(acb, TRM_S1040_DMA_STATUS); if (scsi_status & SCSIINTERRUPT) { /* interrupt pending - let's process it! */ dc395x_handle_interrupt(acb, scsi_status); handled = IRQ_HANDLED; } else if (dma_status & 0x20) { /* Error from the DMA engine */ dprintkl(KERN_INFO, "Interrupt from DMA engine: 0x%02x!\n", dma_status); #if 0 dprintkl(KERN_INFO, "This means DMA error! Try to handle ...\n"); if (acb->active_dcb) { acb->active_dcb-> flag |= ABORT_DEV_; if (acb->active_dcb->active_srb) enable_msgout_abort(acb, acb->active_dcb->active_srb); } DC395x_write8(acb, TRM_S1040_DMA_CONTROL, ABORTXFER | CLRXFIFO); #else dprintkl(KERN_INFO, "Ignoring DMA error (probably a bad thing) ...\n"); acb = NULL; #endif handled = IRQ_HANDLED; } return handled; } static void msgout_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { dprintkdbg(DBG_0, "msgout_phase0: (0x%p)\n", srb->cmd); if (srb->state & (SRB_UNEXPECT_RESEL + SRB_ABORT_SENT)) *pscsi_status = PH_BUS_FREE; /*.. initial phase */ DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important for atn stop */ srb->state &= ~SRB_MSGOUT; } static void msgout_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { u16 i; u8 *ptr; dprintkdbg(DBG_0, "msgout_phase1: (0x%p)\n", srb->cmd); clear_fifo(acb, "msgout_phase1"); if (!(srb->state & SRB_MSGOUT)) { srb->state |= SRB_MSGOUT; dprintkl(KERN_DEBUG, "msgout_phase1: (0x%p) Phase unexpected\n", srb->cmd); /* So what ? */ } if (!srb->msg_count) { dprintkdbg(DBG_0, "msgout_phase1: (0x%p) NOP msg\n", srb->cmd); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, MSG_NOP); DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important for atn stop */ DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_FIFO_OUT); return; } ptr = (u8 *)srb->msgout_buf; for (i = 0; i < srb->msg_count; i++) DC395x_write8(acb, TRM_S1040_SCSI_FIFO, *ptr++); srb->msg_count = 0; if (srb->msgout_buf[0] == MSG_ABORT) srb->state = SRB_ABORT_SENT; DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_FIFO_OUT); } static void command_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { dprintkdbg(DBG_0, "command_phase0: (0x%p)\n", srb->cmd); DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); } static void command_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { struct DeviceCtlBlk *dcb; u8 *ptr; u16 i; dprintkdbg(DBG_0, "command_phase1: (0x%p)\n", srb->cmd); clear_fifo(acb, "command_phase1"); DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_CLRATN); if (!(srb->flag & AUTO_REQSENSE)) { ptr = (u8 *)srb->cmd->cmnd; for (i = 0; i < srb->cmd->cmd_len; i++) { DC395x_write8(acb, TRM_S1040_SCSI_FIFO, *ptr); ptr++; } } else { DC395x_write8(acb, TRM_S1040_SCSI_FIFO, REQUEST_SENSE); dcb = acb->active_dcb; /* target id */ DC395x_write8(acb, TRM_S1040_SCSI_FIFO, (dcb->target_lun << 5)); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 0); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 0); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, SCSI_SENSE_BUFFERSIZE); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 0); } srb->state |= SRB_COMMAND; /* it's important for atn stop */ DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* SCSI command */ DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_FIFO_OUT); } /* * Verify that the remaining space in the hw sg lists is the same as * the count of remaining bytes in srb->total_xfer_length */ static void sg_verify_length(struct ScsiReqBlk *srb) { if (debug_enabled(DBG_SG)) { unsigned len = 0; unsigned idx = srb->sg_index; struct SGentry *psge = srb->segment_x + idx; for (; idx < srb->sg_count; psge++, idx++) len += psge->length; if (len != srb->total_xfer_length) dprintkdbg(DBG_SG, "Inconsistent SRB S/G lengths (Tot=%i, Count=%i) !!\n", srb->total_xfer_length, len); } } /* * Compute the next Scatter Gather list index and adjust its length * and address if necessary */ static void sg_update_list(struct ScsiReqBlk *srb, u32 left) { u8 idx; u32 xferred = srb->total_xfer_length - left; /* bytes transferred */ struct SGentry *psge = srb->segment_x + srb->sg_index; dprintkdbg(DBG_0, "sg_update_list: Transferred %i of %i bytes, %i remain\n", xferred, srb->total_xfer_length, left); if (xferred == 0) { /* nothing to update since we did not transfer any data */ return; } sg_verify_length(srb); srb->total_xfer_length = left; /* update remaining count */ for (idx = srb->sg_index; idx < srb->sg_count; idx++) { if (xferred >= psge->length) { /* Complete SG entries done */ xferred -= psge->length; } else { /* Partial SG entry done */ psge->length -= xferred; psge->address += xferred; srb->sg_index = idx; pci_dma_sync_single_for_device(srb->dcb-> acb->dev, srb->sg_bus_addr, SEGMENTX_LEN, PCI_DMA_TODEVICE); break; } psge++; } sg_verify_length(srb); } /* * We have transferred a single byte (PIO mode?) and need to update * the count of bytes remaining (total_xfer_length) and update the sg * entry to either point to next byte in the current sg entry, or of * already at the end to point to the start of the next sg entry */ static void sg_subtract_one(struct ScsiReqBlk *srb) { sg_update_list(srb, srb->total_xfer_length - 1); } /* * cleanup_after_transfer * * Makes sure, DMA and SCSI engine are empty, after the transfer has finished * KG: Currently called from StatusPhase1 () * Should probably also be called from other places * Best might be to call it in DataXXPhase0, if new phase will differ */ static void cleanup_after_transfer(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb) { /*DC395x_write8 (TRM_S1040_DMA_STATUS, FORCEDMACOMP); */ if (DC395x_read16(acb, TRM_S1040_DMA_COMMAND) & 0x0001) { /* read */ if (!(DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) & 0x40)) clear_fifo(acb, "cleanup/in"); if (!(DC395x_read8(acb, TRM_S1040_DMA_FIFOSTAT) & 0x80)) DC395x_write8(acb, TRM_S1040_DMA_CONTROL, CLRXFIFO); } else { /* write */ if (!(DC395x_read8(acb, TRM_S1040_DMA_FIFOSTAT) & 0x80)) DC395x_write8(acb, TRM_S1040_DMA_CONTROL, CLRXFIFO); if (!(DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) & 0x40)) clear_fifo(acb, "cleanup/out"); } DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); } /* * Those no of bytes will be transferred w/ PIO through the SCSI FIFO * Seems to be needed for unknown reasons; could be a hardware bug :-( */ #define DC395x_LASTPIO 4 static void data_out_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { struct DeviceCtlBlk *dcb = srb->dcb; u16 scsi_status = *pscsi_status; u32 d_left_counter = 0; dprintkdbg(DBG_0, "data_out_phase0: (0x%p) <%02i-%i>\n", srb->cmd, srb->cmd->device->id, srb->cmd->device->lun); /* * KG: We need to drain the buffers before we draw any conclusions! * This means telling the DMA to push the rest into SCSI, telling * SCSI to push the rest to the bus. * However, the device might have been the one to stop us (phase * change), and the data in transit just needs to be accounted so * it can be retransmitted.) */ /* * KG: Stop DMA engine pushing more data into the SCSI FIFO * If we need more data, the DMA SG list will be freshly set up, anyway */ dprintkdbg(DBG_PIO, "data_out_phase0: " "DMA{fifocnt=0x%02x fifostat=0x%02x} " "SCSI{fifocnt=0x%02x cnt=0x%06x status=0x%04x} total=0x%06x\n", DC395x_read8(acb, TRM_S1040_DMA_FIFOCNT), DC395x_read8(acb, TRM_S1040_DMA_FIFOSTAT), DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT), DC395x_read32(acb, TRM_S1040_SCSI_COUNTER), scsi_status, srb->total_xfer_length); DC395x_write8(acb, TRM_S1040_DMA_CONTROL, STOPDMAXFER | CLRXFIFO); if (!(srb->state & SRB_XFERPAD)) { if (scsi_status & PARITYERROR) srb->status |= PARITY_ERROR; /* * KG: Right, we can't just rely on the SCSI_COUNTER, because this * is the no of bytes it got from the DMA engine not the no it * transferred successfully to the device. (And the difference could * be as much as the FIFO size, I guess ...) */ if (!(scsi_status & SCSIXFERDONE)) { /* * when data transfer from DMA FIFO to SCSI FIFO * if there was some data left in SCSI FIFO */ d_left_counter = (u32)(DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) & 0x1F); if (dcb->sync_period & WIDE_SYNC) d_left_counter <<= 1; dprintkdbg(DBG_KG, "data_out_phase0: FIFO contains %i %s\n" "SCSI{fifocnt=0x%02x cnt=0x%08x} " "DMA{fifocnt=0x%04x cnt=0x%02x ctr=0x%08x}\n", DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT), (dcb->sync_period & WIDE_SYNC) ? "words" : "bytes", DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT), DC395x_read32(acb, TRM_S1040_SCSI_COUNTER), DC395x_read8(acb, TRM_S1040_DMA_FIFOCNT), DC395x_read8(acb, TRM_S1040_DMA_FIFOSTAT), DC395x_read32(acb, TRM_S1040_DMA_CXCNT)); } /* * calculate all the residue data that not yet tranfered * SCSI transfer counter + left in SCSI FIFO data * * .....TRM_S1040_SCSI_COUNTER (24bits) * The counter always decrement by one for every SCSI byte transfer. * .....TRM_S1040_SCSI_FIFOCNT ( 5bits) * The counter is SCSI FIFO offset counter (in units of bytes or! words) */ if (srb->total_xfer_length > DC395x_LASTPIO) d_left_counter += DC395x_read32(acb, TRM_S1040_SCSI_COUNTER); /* Is this a good idea? */ /*clear_fifo(acb, "DOP1"); */ /* KG: What is this supposed to be useful for? WIDE padding stuff? */ if (d_left_counter == 1 && dcb->sync_period & WIDE_SYNC && scsi_bufflen(srb->cmd) % 2) { d_left_counter = 0; dprintkl(KERN_INFO, "data_out_phase0: Discard 1 byte (0x%02x)\n", scsi_status); } /* * KG: Oops again. Same thinko as above: The SCSI might have been * faster than the DMA engine, so that it ran out of data. * In that case, we have to do just nothing! * But: Why the interrupt: No phase change. No XFERCNT_2_ZERO. Or? */ /* * KG: This is nonsense: We have been WRITING data to the bus * If the SCSI engine has no bytes left, how should the DMA engine? */ if (d_left_counter == 0) { srb->total_xfer_length = 0; } else { /* * if transfer not yet complete * there were some data residue in SCSI FIFO or * SCSI transfer counter not empty */ long oldxferred = srb->total_xfer_length - d_left_counter; const int diff = (dcb->sync_period & WIDE_SYNC) ? 2 : 1; sg_update_list(srb, d_left_counter); /* KG: Most ugly hack! Apparently, this works around a chip bug */ if ((srb->segment_x[srb->sg_index].length == diff && scsi_sg_count(srb->cmd)) || ((oldxferred & ~PAGE_MASK) == (PAGE_SIZE - diff)) ) { dprintkl(KERN_INFO, "data_out_phase0: " "Work around chip bug (%i)?\n", diff); d_left_counter = srb->total_xfer_length - diff; sg_update_list(srb, d_left_counter); /*srb->total_xfer_length -= diff; */ /*srb->virt_addr += diff; */ /*if (srb->cmd->use_sg) */ /* srb->sg_index++; */ } } } if ((*pscsi_status & PHASEMASK) != PH_DATA_OUT) { cleanup_after_transfer(acb, srb); } } static void data_out_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { dprintkdbg(DBG_0, "data_out_phase1: (0x%p) <%02i-%i>\n", srb->cmd, srb->cmd->device->id, srb->cmd->device->lun); clear_fifo(acb, "data_out_phase1"); /* do prepare before transfer when data out phase */ data_io_transfer(acb, srb, XFERDATAOUT); } static void data_in_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { u16 scsi_status = *pscsi_status; dprintkdbg(DBG_0, "data_in_phase0: (0x%p) <%02i-%i>\n", srb->cmd, srb->cmd->device->id, srb->cmd->device->lun); /* * KG: DataIn is much more tricky than DataOut. When the device is finished * and switches to another phase, the SCSI engine should be finished too. * But: There might still be bytes left in its FIFO to be fetched by the DMA * engine and transferred to memory. * We should wait for the FIFOs to be emptied by that (is there any way to * enforce this?) and then stop the DMA engine, because it might think, that * there are more bytes to follow. Yes, the device might disconnect prior to * having all bytes transferred! * Also we should make sure that all data from the DMA engine buffer's really * made its way to the system memory! Some documentation on this would not * seem to be a bad idea, actually. */ if (!(srb->state & SRB_XFERPAD)) { u32 d_left_counter; unsigned int sc, fc; if (scsi_status & PARITYERROR) { dprintkl(KERN_INFO, "data_in_phase0: (0x%p) " "Parity Error\n", srb->cmd); srb->status |= PARITY_ERROR; } /* * KG: We should wait for the DMA FIFO to be empty ... * but: it would be better to wait first for the SCSI FIFO and then the * the DMA FIFO to become empty? How do we know, that the device not already * sent data to the FIFO in a MsgIn phase, eg.? */ if (!(DC395x_read8(acb, TRM_S1040_DMA_FIFOSTAT) & 0x80)) { #if 0 int ctr = 6000000; dprintkl(KERN_DEBUG, "DIP0: Wait for DMA FIFO to flush ...\n"); /*DC395x_write8 (TRM_S1040_DMA_CONTROL, STOPDMAXFER); */ /*DC395x_write32 (TRM_S1040_SCSI_COUNTER, 7); */ /*DC395x_write8 (TRM_S1040_SCSI_COMMAND, SCMD_DMA_IN); */ while (! (DC395x_read16(acb, TRM_S1040_DMA_FIFOSTAT) & 0x80) && --ctr); if (ctr < 6000000 - 1) dprintkl(KERN_DEBUG "DIP0: Had to wait for DMA ...\n"); if (!ctr) dprintkl(KERN_ERR, "Deadlock in DIP0 waiting for DMA FIFO empty!!\n"); /*DC395x_write32 (TRM_S1040_SCSI_COUNTER, 0); */ #endif dprintkdbg(DBG_KG, "data_in_phase0: " "DMA{fifocnt=0x%02x fifostat=0x%02x}\n", DC395x_read8(acb, TRM_S1040_DMA_FIFOCNT), DC395x_read8(acb, TRM_S1040_DMA_FIFOSTAT)); } /* Now: Check remainig data: The SCSI counters should tell us ... */ sc = DC395x_read32(acb, TRM_S1040_SCSI_COUNTER); fc = DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT); d_left_counter = sc + ((fc & 0x1f) << ((srb->dcb->sync_period & WIDE_SYNC) ? 1 : 0)); dprintkdbg(DBG_KG, "data_in_phase0: " "SCSI{fifocnt=0x%02x%s ctr=0x%08x} " "DMA{fifocnt=0x%02x fifostat=0x%02x ctr=0x%08x} " "Remain{totxfer=%i scsi_fifo+ctr=%i}\n", fc, (srb->dcb->sync_period & WIDE_SYNC) ? "words" : "bytes", sc, fc, DC395x_read8(acb, TRM_S1040_DMA_FIFOSTAT), DC395x_read32(acb, TRM_S1040_DMA_CXCNT), srb->total_xfer_length, d_left_counter); #if DC395x_LASTPIO /* KG: Less than or equal to 4 bytes can not be transferred via DMA, it seems. */ if (d_left_counter && srb->total_xfer_length <= DC395x_LASTPIO) { size_t left_io = srb->total_xfer_length; /*u32 addr = (srb->segment_x[srb->sg_index].address); */ /*sg_update_list (srb, d_left_counter); */ dprintkdbg(DBG_PIO, "data_in_phase0: PIO (%i %s) " "for remaining %i bytes:", fc & 0x1f, (srb->dcb->sync_period & WIDE_SYNC) ? "words" : "bytes", srb->total_xfer_length); if (srb->dcb->sync_period & WIDE_SYNC) DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2, CFG2_WIDEFIFO); while (left_io) { unsigned char *virt, *base = NULL; unsigned long flags = 0; size_t len = left_io; size_t offset = srb->request_length - left_io; local_irq_save(flags); /* Assumption: it's inside one page as it's at most 4 bytes and I just assume it's on a 4-byte boundary */ base = scsi_kmap_atomic_sg(scsi_sglist(srb->cmd), srb->sg_count, &offset, &len); virt = base + offset; left_io -= len; while (len) { u8 byte; byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO); *virt++ = byte; if (debug_enabled(DBG_PIO)) printk(" %02x", byte); d_left_counter--; sg_subtract_one(srb); len--; fc = DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT); if (fc == 0x40) { left_io = 0; break; } } WARN_ON((fc != 0x40) == !d_left_counter); if (fc == 0x40 && (srb->dcb->sync_period & WIDE_SYNC)) { /* Read the last byte ... */ if (srb->total_xfer_length > 0) { u8 byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO); *virt++ = byte; srb->total_xfer_length--; if (debug_enabled(DBG_PIO)) printk(" %02x", byte); } DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2, 0); } scsi_kunmap_atomic_sg(base); local_irq_restore(flags); } /*printk(" %08x", *(u32*)(bus_to_virt (addr))); */ /*srb->total_xfer_length = 0; */ if (debug_enabled(DBG_PIO)) printk("\n"); } #endif /* DC395x_LASTPIO */ #if 0 /* * KG: This was in DATAOUT. Does it also belong here? * Nobody seems to know what counter and fifo_cnt count exactly ... */ if (!(scsi_status & SCSIXFERDONE)) { /* * when data transfer from DMA FIFO to SCSI FIFO * if there was some data left in SCSI FIFO */ d_left_counter = (u32)(DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) & 0x1F); if (srb->dcb->sync_period & WIDE_SYNC) d_left_counter <<= 1; /* * if WIDE scsi SCSI FIFOCNT unit is word !!! * so need to *= 2 * KG: Seems to be correct ... */ } #endif /* KG: This should not be needed any more! */ if (d_left_counter == 0 || (scsi_status & SCSIXFERCNT_2_ZERO)) { #if 0 int ctr = 6000000; u8 TempDMAstatus; do { TempDMAstatus = DC395x_read8(acb, TRM_S1040_DMA_STATUS); } while (!(TempDMAstatus & DMAXFERCOMP) && --ctr); if (!ctr) dprintkl(KERN_ERR, "Deadlock in DataInPhase0 waiting for DMA!!\n"); srb->total_xfer_length = 0; #endif srb->total_xfer_length = d_left_counter; } else { /* phase changed */ /* * parsing the case: * when a transfer not yet complete * but be disconnected by target * if transfer not yet complete * there were some data residue in SCSI FIFO or * SCSI transfer counter not empty */ sg_update_list(srb, d_left_counter); } } /* KG: The target may decide to disconnect: Empty FIFO before! */ if ((*pscsi_status & PHASEMASK) != PH_DATA_IN) { cleanup_after_transfer(acb, srb); } } static void data_in_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { dprintkdbg(DBG_0, "data_in_phase1: (0x%p) <%02i-%i>\n", srb->cmd, srb->cmd->device->id, srb->cmd->device->lun); data_io_transfer(acb, srb, XFERDATAIN); } static void data_io_transfer(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 io_dir) { struct DeviceCtlBlk *dcb = srb->dcb; u8 bval; dprintkdbg(DBG_0, "data_io_transfer: (0x%p) <%02i-%i> %c len=%i, sg=(%i/%i)\n", srb->cmd, srb->cmd->device->id, srb->cmd->device->lun, ((io_dir & DMACMD_DIR) ? 'r' : 'w'), srb->total_xfer_length, srb->sg_index, srb->sg_count); if (srb == acb->tmp_srb) dprintkl(KERN_ERR, "data_io_transfer: Using tmp_srb!\n"); if (srb->sg_index >= srb->sg_count) { /* can't happen? out of bounds error */ return; } if (srb->total_xfer_length > DC395x_LASTPIO) { u8 dma_status = DC395x_read8(acb, TRM_S1040_DMA_STATUS); /* * KG: What should we do: Use SCSI Cmd 0x90/0x92? * Maybe, even ABORTXFER would be appropriate */ if (dma_status & XFERPENDING) { dprintkl(KERN_DEBUG, "data_io_transfer: Xfer pending! " "Expect trouble!\n"); dump_register_info(acb, dcb, srb); DC395x_write8(acb, TRM_S1040_DMA_CONTROL, CLRXFIFO); } /* clear_fifo(acb, "IO"); */ /* * load what physical address of Scatter/Gather list table * want to be transfer */ srb->state |= SRB_DATA_XFER; DC395x_write32(acb, TRM_S1040_DMA_XHIGHADDR, 0); if (scsi_sg_count(srb->cmd)) { /* with S/G */ io_dir |= DMACMD_SG; DC395x_write32(acb, TRM_S1040_DMA_XLOWADDR, srb->sg_bus_addr + sizeof(struct SGentry) * srb->sg_index); /* load how many bytes in the sg list table */ DC395x_write32(acb, TRM_S1040_DMA_XCNT, ((u32)(srb->sg_count - srb->sg_index) << 3)); } else { /* without S/G */ io_dir &= ~DMACMD_SG; DC395x_write32(acb, TRM_S1040_DMA_XLOWADDR, srb->segment_x[0].address); DC395x_write32(acb, TRM_S1040_DMA_XCNT, srb->segment_x[0].length); } /* load total transfer length (24bits) max value 16Mbyte */ DC395x_write32(acb, TRM_S1040_SCSI_COUNTER, srb->total_xfer_length); DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important for atn stop */ if (io_dir & DMACMD_DIR) { /* read */ DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_DMA_IN); DC395x_write16(acb, TRM_S1040_DMA_COMMAND, io_dir); } else { DC395x_write16(acb, TRM_S1040_DMA_COMMAND, io_dir); DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_DMA_OUT); } } #if DC395x_LASTPIO else if (srb->total_xfer_length > 0) { /* The last four bytes: Do PIO */ /* * load what physical address of Scatter/Gather list table * want to be transfer */ srb->state |= SRB_DATA_XFER; /* load total transfer length (24bits) max value 16Mbyte */ DC395x_write32(acb, TRM_S1040_SCSI_COUNTER, srb->total_xfer_length); DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important for atn stop */ if (io_dir & DMACMD_DIR) { /* read */ DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_FIFO_IN); } else { /* write */ int ln = srb->total_xfer_length; size_t left_io = srb->total_xfer_length; if (srb->dcb->sync_period & WIDE_SYNC) DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2, CFG2_WIDEFIFO); while (left_io) { unsigned char *virt, *base = NULL; unsigned long flags = 0; size_t len = left_io; size_t offset = srb->request_length - left_io; local_irq_save(flags); /* Again, max 4 bytes */ base = scsi_kmap_atomic_sg(scsi_sglist(srb->cmd), srb->sg_count, &offset, &len); virt = base + offset; left_io -= len; while (len--) { if (debug_enabled(DBG_PIO)) printk(" %02x", *virt); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, *virt++); sg_subtract_one(srb); } scsi_kunmap_atomic_sg(base); local_irq_restore(flags); } if (srb->dcb->sync_period & WIDE_SYNC) { if (ln % 2) { DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 0); if (debug_enabled(DBG_PIO)) printk(" |00"); } DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2, 0); } /*DC395x_write32(acb, TRM_S1040_SCSI_COUNTER, ln); */ if (debug_enabled(DBG_PIO)) printk("\n"); DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_FIFO_OUT); } } #endif /* DC395x_LASTPIO */ else { /* xfer pad */ u8 data = 0, data2 = 0; if (srb->sg_count) { srb->adapter_status = H_OVER_UNDER_RUN; srb->status |= OVER_RUN; } /* * KG: despite the fact that we are using 16 bits I/O ops * the SCSI FIFO is only 8 bits according to the docs * (we can set bit 1 in 0x8f to serialize FIFO access ...) */ if (dcb->sync_period & WIDE_SYNC) { DC395x_write32(acb, TRM_S1040_SCSI_COUNTER, 2); DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2, CFG2_WIDEFIFO); if (io_dir & DMACMD_DIR) { data = DC395x_read8(acb, TRM_S1040_SCSI_FIFO); data2 = DC395x_read8(acb, TRM_S1040_SCSI_FIFO); } else { /* Danger, Robinson: If you find KGs * scattered over the wide disk, the driver * or chip is to blame :-( */ DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 'K'); DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 'G'); } DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2, 0); } else { DC395x_write32(acb, TRM_S1040_SCSI_COUNTER, 1); /* Danger, Robinson: If you find a collection of Ks on your disk * something broke :-( */ if (io_dir & DMACMD_DIR) data = DC395x_read8(acb, TRM_S1040_SCSI_FIFO); else DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 'K'); } srb->state |= SRB_XFERPAD; DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important for atn stop */ /* SCSI command */ bval = (io_dir & DMACMD_DIR) ? SCMD_FIFO_IN : SCMD_FIFO_OUT; DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, bval); } } static void status_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { dprintkdbg(DBG_0, "status_phase0: (0x%p) <%02i-%i>\n", srb->cmd, srb->cmd->device->id, srb->cmd->device->lun); srb->target_status = DC395x_read8(acb, TRM_S1040_SCSI_FIFO); srb->end_message = DC395x_read8(acb, TRM_S1040_SCSI_FIFO); /* get message */ srb->state = SRB_COMPLETED; *pscsi_status = PH_BUS_FREE; /*.. initial phase */ DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important for atn stop */ DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_MSGACCEPT); } static void status_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { dprintkdbg(DBG_0, "status_phase1: (0x%p) <%02i-%i>\n", srb->cmd, srb->cmd->device->id, srb->cmd->device->lun); srb->state = SRB_STATUS; DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important for atn stop */ DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_COMP); } /* Check if the message is complete */ static inline u8 msgin_completed(u8 * msgbuf, u32 len) { if (*msgbuf == EXTENDED_MESSAGE) { if (len < 2) return 0; if (len < msgbuf[1] + 2) return 0; } else if (*msgbuf >= 0x20 && *msgbuf <= 0x2f) /* two byte messages */ if (len < 2) return 0; return 1; } /* reject_msg */ static inline void msgin_reject(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb) { srb->msgout_buf[0] = MESSAGE_REJECT; srb->msg_count = 1; DC395x_ENABLE_MSGOUT; srb->state &= ~SRB_MSGIN; srb->state |= SRB_MSGOUT; dprintkl(KERN_INFO, "msgin_reject: 0x%02x <%02i-%i>\n", srb->msgin_buf[0], srb->dcb->target_id, srb->dcb->target_lun); } static struct ScsiReqBlk *msgin_qtag(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb, u8 tag) { struct ScsiReqBlk *srb = NULL; struct ScsiReqBlk *i; dprintkdbg(DBG_0, "msgin_qtag: (0x%p) tag=%i srb=%p\n", srb->cmd, tag, srb); if (!(dcb->tag_mask & (1 << tag))) dprintkl(KERN_DEBUG, "msgin_qtag: tag_mask=0x%08x does not reserve tag %i!\n", dcb->tag_mask, tag); if (list_empty(&dcb->srb_going_list)) goto mingx0; list_for_each_entry(i, &dcb->srb_going_list, list) { if (i->tag_number == tag) { srb = i; break; } } if (!srb) goto mingx0; dprintkdbg(DBG_0, "msgin_qtag: (0x%p) <%02i-%i>\n", srb->cmd, srb->dcb->target_id, srb->dcb->target_lun); if (dcb->flag & ABORT_DEV_) { /*srb->state = SRB_ABORT_SENT; */ enable_msgout_abort(acb, srb); } if (!(srb->state & SRB_DISCONNECT)) goto mingx0; memcpy(srb->msgin_buf, dcb->active_srb->msgin_buf, acb->msg_len); srb->state |= dcb->active_srb->state; srb->state |= SRB_DATA_XFER; dcb->active_srb = srb; /* How can we make the DORS happy? */ return srb; mingx0: srb = acb->tmp_srb; srb->state = SRB_UNEXPECT_RESEL; dcb->active_srb = srb; srb->msgout_buf[0] = MSG_ABORT_TAG; srb->msg_count = 1; DC395x_ENABLE_MSGOUT; dprintkl(KERN_DEBUG, "msgin_qtag: Unknown tag %i - abort\n", tag); return srb; } static inline void reprogram_regs(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb) { DC395x_write8(acb, TRM_S1040_SCSI_TARGETID, dcb->target_id); DC395x_write8(acb, TRM_S1040_SCSI_SYNC, dcb->sync_period); DC395x_write8(acb, TRM_S1040_SCSI_OFFSET, dcb->sync_offset); set_xfer_rate(acb, dcb); } /* set async transfer mode */ static void msgin_set_async(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb) { struct DeviceCtlBlk *dcb = srb->dcb; dprintkl(KERN_DEBUG, "msgin_set_async: No sync transfers <%02i-%i>\n", dcb->target_id, dcb->target_lun); dcb->sync_mode &= ~(SYNC_NEGO_ENABLE); dcb->sync_mode |= SYNC_NEGO_DONE; /*dcb->sync_period &= 0; */ dcb->sync_offset = 0; dcb->min_nego_period = 200 >> 2; /* 200ns <=> 5 MHz */ srb->state &= ~SRB_DO_SYNC_NEGO; reprogram_regs(acb, dcb); if ((dcb->sync_mode & WIDE_NEGO_ENABLE) && !(dcb->sync_mode & WIDE_NEGO_DONE)) { build_wdtr(acb, dcb, srb); DC395x_ENABLE_MSGOUT; dprintkdbg(DBG_0, "msgin_set_async(rej): Try WDTR anyway\n"); } } /* set sync transfer mode */ static void msgin_set_sync(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb) { struct DeviceCtlBlk *dcb = srb->dcb; u8 bval; int fact; dprintkdbg(DBG_1, "msgin_set_sync: <%02i> Sync: %ins " "(%02i.%01i MHz) Offset %i\n", dcb->target_id, srb->msgin_buf[3] << 2, (250 / srb->msgin_buf[3]), ((250 % srb->msgin_buf[3]) * 10) / srb->msgin_buf[3], srb->msgin_buf[4]); if (srb->msgin_buf[4] > 15) srb->msgin_buf[4] = 15; if (!(dcb->dev_mode & NTC_DO_SYNC_NEGO)) dcb->sync_offset = 0; else if (dcb->sync_offset == 0) dcb->sync_offset = srb->msgin_buf[4]; if (srb->msgin_buf[4] > dcb->sync_offset) srb->msgin_buf[4] = dcb->sync_offset; else dcb->sync_offset = srb->msgin_buf[4]; bval = 0; while (bval < 7 && (srb->msgin_buf[3] > clock_period[bval] || dcb->min_nego_period > clock_period[bval])) bval++; if (srb->msgin_buf[3] < clock_period[bval]) dprintkl(KERN_INFO, "msgin_set_sync: Increase sync nego period to %ins\n", clock_period[bval] << 2); srb->msgin_buf[3] = clock_period[bval]; dcb->sync_period &= 0xf0; dcb->sync_period |= ALT_SYNC | bval; dcb->min_nego_period = srb->msgin_buf[3]; if (dcb->sync_period & WIDE_SYNC) fact = 500; else fact = 250; dprintkl(KERN_INFO, "Target %02i: %s Sync: %ins Offset %i (%02i.%01i MB/s)\n", dcb->target_id, (fact == 500) ? "Wide16" : "", dcb->min_nego_period << 2, dcb->sync_offset, (fact / dcb->min_nego_period), ((fact % dcb->min_nego_period) * 10 + dcb->min_nego_period / 2) / dcb->min_nego_period); if (!(srb->state & SRB_DO_SYNC_NEGO)) { /* Reply with corrected SDTR Message */ dprintkl(KERN_DEBUG, "msgin_set_sync: answer w/%ins %i\n", srb->msgin_buf[3] << 2, srb->msgin_buf[4]); memcpy(srb->msgout_buf, srb->msgin_buf, 5); srb->msg_count = 5; DC395x_ENABLE_MSGOUT; dcb->sync_mode |= SYNC_NEGO_DONE; } else { if ((dcb->sync_mode & WIDE_NEGO_ENABLE) && !(dcb->sync_mode & WIDE_NEGO_DONE)) { build_wdtr(acb, dcb, srb); DC395x_ENABLE_MSGOUT; dprintkdbg(DBG_0, "msgin_set_sync: Also try WDTR\n"); } } srb->state &= ~SRB_DO_SYNC_NEGO; dcb->sync_mode |= SYNC_NEGO_DONE | SYNC_NEGO_ENABLE; reprogram_regs(acb, dcb); } static inline void msgin_set_nowide(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb) { struct DeviceCtlBlk *dcb = srb->dcb; dprintkdbg(DBG_1, "msgin_set_nowide: <%02i>\n", dcb->target_id); dcb->sync_period &= ~WIDE_SYNC; dcb->sync_mode &= ~(WIDE_NEGO_ENABLE); dcb->sync_mode |= WIDE_NEGO_DONE; srb->state &= ~SRB_DO_WIDE_NEGO; reprogram_regs(acb, dcb); if ((dcb->sync_mode & SYNC_NEGO_ENABLE) && !(dcb->sync_mode & SYNC_NEGO_DONE)) { build_sdtr(acb, dcb, srb); DC395x_ENABLE_MSGOUT; dprintkdbg(DBG_0, "msgin_set_nowide: Rejected. Try SDTR anyway\n"); } } static void msgin_set_wide(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb) { struct DeviceCtlBlk *dcb = srb->dcb; u8 wide = (dcb->dev_mode & NTC_DO_WIDE_NEGO && acb->config & HCC_WIDE_CARD) ? 1 : 0; dprintkdbg(DBG_1, "msgin_set_wide: <%02i>\n", dcb->target_id); if (srb->msgin_buf[3] > wide) srb->msgin_buf[3] = wide; /* Completed */ if (!(srb->state & SRB_DO_WIDE_NEGO)) { dprintkl(KERN_DEBUG, "msgin_set_wide: Wide nego initiated <%02i>\n", dcb->target_id); memcpy(srb->msgout_buf, srb->msgin_buf, 4); srb->msg_count = 4; srb->state |= SRB_DO_WIDE_NEGO; DC395x_ENABLE_MSGOUT; } dcb->sync_mode |= (WIDE_NEGO_ENABLE | WIDE_NEGO_DONE); if (srb->msgin_buf[3] > 0) dcb->sync_period |= WIDE_SYNC; else dcb->sync_period &= ~WIDE_SYNC; srb->state &= ~SRB_DO_WIDE_NEGO; /*dcb->sync_mode &= ~(WIDE_NEGO_ENABLE+WIDE_NEGO_DONE); */ dprintkdbg(DBG_1, "msgin_set_wide: Wide (%i bit) negotiated <%02i>\n", (8 << srb->msgin_buf[3]), dcb->target_id); reprogram_regs(acb, dcb); if ((dcb->sync_mode & SYNC_NEGO_ENABLE) && !(dcb->sync_mode & SYNC_NEGO_DONE)) { build_sdtr(acb, dcb, srb); DC395x_ENABLE_MSGOUT; dprintkdbg(DBG_0, "msgin_set_wide: Also try SDTR.\n"); } } /* * extended message codes: * * code description * * 02h Reserved * 00h MODIFY DATA POINTER * 01h SYNCHRONOUS DATA TRANSFER REQUEST * 03h WIDE DATA TRANSFER REQUEST * 04h - 7Fh Reserved * 80h - FFh Vendor specific */ static void msgin_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { struct DeviceCtlBlk *dcb = acb->active_dcb; dprintkdbg(DBG_0, "msgin_phase0: (0x%p)\n", srb->cmd); srb->msgin_buf[acb->msg_len++] = DC395x_read8(acb, TRM_S1040_SCSI_FIFO); if (msgin_completed(srb->msgin_buf, acb->msg_len)) { /* Now eval the msg */ switch (srb->msgin_buf[0]) { case DISCONNECT: srb->state = SRB_DISCONNECT; break; case SIMPLE_QUEUE_TAG: case HEAD_OF_QUEUE_TAG: case ORDERED_QUEUE_TAG: srb = msgin_qtag(acb, dcb, srb->msgin_buf[1]); break; case MESSAGE_REJECT: DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_CLRATN | DO_DATALATCH); /* A sync nego message was rejected ! */ if (srb->state & SRB_DO_SYNC_NEGO) { msgin_set_async(acb, srb); break; } /* A wide nego message was rejected ! */ if (srb->state & SRB_DO_WIDE_NEGO) { msgin_set_nowide(acb, srb); break; } enable_msgout_abort(acb, srb); /*srb->state |= SRB_ABORT_SENT */ break; case EXTENDED_MESSAGE: /* SDTR */ if (srb->msgin_buf[1] == 3 && srb->msgin_buf[2] == EXTENDED_SDTR) { msgin_set_sync(acb, srb); break; } /* WDTR */ if (srb->msgin_buf[1] == 2 && srb->msgin_buf[2] == EXTENDED_WDTR && srb->msgin_buf[3] <= 2) { /* sanity check ... */ msgin_set_wide(acb, srb); break; } msgin_reject(acb, srb); break; case MSG_IGNOREWIDE: /* Discard wide residual */ dprintkdbg(DBG_0, "msgin_phase0: Ignore Wide Residual!\n"); break; case COMMAND_COMPLETE: /* nothing has to be done */ break; case SAVE_POINTERS: /* * SAVE POINTER may be ignored as we have the struct * ScsiReqBlk* associated with the scsi command. */ dprintkdbg(DBG_0, "msgin_phase0: (0x%p) " "SAVE POINTER rem=%i Ignore\n", srb->cmd, srb->total_xfer_length); break; case RESTORE_POINTERS: dprintkdbg(DBG_0, "msgin_phase0: RESTORE POINTER. Ignore\n"); break; case ABORT: dprintkdbg(DBG_0, "msgin_phase0: (0x%p) " "<%02i-%i> ABORT msg\n", srb->cmd, dcb->target_id, dcb->target_lun); dcb->flag |= ABORT_DEV_; enable_msgout_abort(acb, srb); break; default: /* reject unknown messages */ if (srb->msgin_buf[0] & IDENTIFY_BASE) { dprintkdbg(DBG_0, "msgin_phase0: Identify msg\n"); srb->msg_count = 1; srb->msgout_buf[0] = dcb->identify_msg; DC395x_ENABLE_MSGOUT; srb->state |= SRB_MSGOUT; /*break; */ } msgin_reject(acb, srb); } /* Clear counter and MsgIn state */ srb->state &= ~SRB_MSGIN; acb->msg_len = 0; } *pscsi_status = PH_BUS_FREE; DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important ... you know! */ DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_MSGACCEPT); } static void msgin_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { dprintkdbg(DBG_0, "msgin_phase1: (0x%p)\n", srb->cmd); clear_fifo(acb, "msgin_phase1"); DC395x_write32(acb, TRM_S1040_SCSI_COUNTER, 1); if (!(srb->state & SRB_MSGIN)) { srb->state &= ~SRB_DISCONNECT; srb->state |= SRB_MSGIN; } DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important for atn stop */ /* SCSI command */ DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_FIFO_IN); } static void nop0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { } static void nop1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb, u16 *pscsi_status) { } static void set_xfer_rate(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb) { struct DeviceCtlBlk *i; /* set all lun device's period, offset */ if (dcb->identify_msg & 0x07) return; if (acb->scan_devices) { current_sync_offset = dcb->sync_offset; return; } list_for_each_entry(i, &acb->dcb_list, list) if (i->target_id == dcb->target_id) { i->sync_period = dcb->sync_period; i->sync_offset = dcb->sync_offset; i->sync_mode = dcb->sync_mode; i->min_nego_period = dcb->min_nego_period; } } static void disconnect(struct AdapterCtlBlk *acb) { struct DeviceCtlBlk *dcb = acb->active_dcb; struct ScsiReqBlk *srb; if (!dcb) { dprintkl(KERN_ERR, "disconnect: No such device\n"); udelay(500); /* Suspend queue for a while */ acb->scsi_host->last_reset = jiffies + HZ / 2 + HZ * acb->eeprom.delay_time; clear_fifo(acb, "disconnectEx"); DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_HWRESELECT); return; } srb = dcb->active_srb; acb->active_dcb = NULL; dprintkdbg(DBG_0, "disconnect: (0x%p)\n", srb->cmd); srb->scsi_phase = PH_BUS_FREE; /* initial phase */ clear_fifo(acb, "disconnect"); DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_HWRESELECT); if (srb->state & SRB_UNEXPECT_RESEL) { dprintkl(KERN_ERR, "disconnect: Unexpected reselection <%02i-%i>\n", dcb->target_id, dcb->target_lun); srb->state = 0; waiting_process_next(acb); } else if (srb->state & SRB_ABORT_SENT) { dcb->flag &= ~ABORT_DEV_; acb->scsi_host->last_reset = jiffies + HZ / 2 + 1; dprintkl(KERN_ERR, "disconnect: SRB_ABORT_SENT\n"); doing_srb_done(acb, DID_ABORT, srb->cmd, 1); waiting_process_next(acb); } else { if ((srb->state & (SRB_START_ + SRB_MSGOUT)) || !(srb-> state & (SRB_DISCONNECT + SRB_COMPLETED))) { /* * Selection time out * SRB_START_ || SRB_MSGOUT || (!SRB_DISCONNECT && !SRB_COMPLETED) */ /* Unexp. Disc / Sel Timeout */ if (srb->state != SRB_START_ && srb->state != SRB_MSGOUT) { srb->state = SRB_READY; dprintkl(KERN_DEBUG, "disconnect: (0x%p) Unexpected\n", srb->cmd); srb->target_status = SCSI_STAT_SEL_TIMEOUT; goto disc1; } else { /* Normal selection timeout */ dprintkdbg(DBG_KG, "disconnect: (0x%p) " "<%02i-%i> SelTO\n", srb->cmd, dcb->target_id, dcb->target_lun); if (srb->retry_count++ > DC395x_MAX_RETRIES || acb->scan_devices) { srb->target_status = SCSI_STAT_SEL_TIMEOUT; goto disc1; } free_tag(dcb, srb); srb_going_to_waiting_move(dcb, srb); dprintkdbg(DBG_KG, "disconnect: (0x%p) Retry\n", srb->cmd); waiting_set_timer(acb, HZ / 20); } } else if (srb->state & SRB_DISCONNECT) { u8 bval = DC395x_read8(acb, TRM_S1040_SCSI_SIGNAL); /* * SRB_DISCONNECT (This is what we expect!) */ if (bval & 0x40) { dprintkdbg(DBG_0, "disconnect: SCSI bus stat " " 0x%02x: ACK set! Other controllers?\n", bval); /* It could come from another initiator, therefore don't do much ! */ } else waiting_process_next(acb); } else if (srb->state & SRB_COMPLETED) { disc1: /* ** SRB_COMPLETED */ free_tag(dcb, srb); dcb->active_srb = NULL; srb->state = SRB_FREE; srb_done(acb, dcb, srb); } } } static void reselect(struct AdapterCtlBlk *acb) { struct DeviceCtlBlk *dcb = acb->active_dcb; struct ScsiReqBlk *srb = NULL; u16 rsel_tar_lun_id; u8 id, lun; u8 arblostflag = 0; dprintkdbg(DBG_0, "reselect: acb=%p\n", acb); clear_fifo(acb, "reselect"); /*DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_HWRESELECT | DO_DATALATCH); */ /* Read Reselected Target ID and LUN */ rsel_tar_lun_id = DC395x_read16(acb, TRM_S1040_SCSI_TARGETID); if (dcb) { /* Arbitration lost but Reselection win */ srb = dcb->active_srb; if (!srb) { dprintkl(KERN_DEBUG, "reselect: Arb lost Resel won, " "but active_srb == NULL\n"); DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important for atn stop */ return; } /* Why the if ? */ if (!acb->scan_devices) { dprintkdbg(DBG_KG, "reselect: (0x%p) <%02i-%i> " "Arb lost but Resel win rsel=%i stat=0x%04x\n", srb->cmd, dcb->target_id, dcb->target_lun, rsel_tar_lun_id, DC395x_read16(acb, TRM_S1040_SCSI_STATUS)); arblostflag = 1; /*srb->state |= SRB_DISCONNECT; */ srb->state = SRB_READY; free_tag(dcb, srb); srb_going_to_waiting_move(dcb, srb); waiting_set_timer(acb, HZ / 20); /* return; */ } } /* Read Reselected Target Id and LUN */ if (!(rsel_tar_lun_id & (IDENTIFY_BASE << 8))) dprintkl(KERN_DEBUG, "reselect: Expects identify msg. " "Got %i!\n", rsel_tar_lun_id); id = rsel_tar_lun_id & 0xff; lun = (rsel_tar_lun_id >> 8) & 7; dcb = find_dcb(acb, id, lun); if (!dcb) { dprintkl(KERN_ERR, "reselect: From non existent device " "<%02i-%i>\n", id, lun); DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important for atn stop */ return; } acb->active_dcb = dcb; if (!(dcb->dev_mode & NTC_DO_DISCONNECT)) dprintkl(KERN_DEBUG, "reselect: in spite of forbidden " "disconnection? <%02i-%i>\n", dcb->target_id, dcb->target_lun); if (dcb->sync_mode & EN_TAG_QUEUEING /*&& !arblostflag */) { srb = acb->tmp_srb; dcb->active_srb = srb; } else { /* There can be only one! */ srb = dcb->active_srb; if (!srb || !(srb->state & SRB_DISCONNECT)) { /* * abort command */ dprintkl(KERN_DEBUG, "reselect: w/o disconnected cmds <%02i-%i>\n", dcb->target_id, dcb->target_lun); srb = acb->tmp_srb; srb->state = SRB_UNEXPECT_RESEL; dcb->active_srb = srb; enable_msgout_abort(acb, srb); } else { if (dcb->flag & ABORT_DEV_) { /*srb->state = SRB_ABORT_SENT; */ enable_msgout_abort(acb, srb); } else srb->state = SRB_DATA_XFER; } } srb->scsi_phase = PH_BUS_FREE; /* initial phase */ /* Program HA ID, target ID, period and offset */ dprintkdbg(DBG_0, "reselect: select <%i>\n", dcb->target_id); DC395x_write8(acb, TRM_S1040_SCSI_HOSTID, acb->scsi_host->this_id); /* host ID */ DC395x_write8(acb, TRM_S1040_SCSI_TARGETID, dcb->target_id); /* target ID */ DC395x_write8(acb, TRM_S1040_SCSI_OFFSET, dcb->sync_offset); /* offset */ DC395x_write8(acb, TRM_S1040_SCSI_SYNC, dcb->sync_period); /* sync period, wide */ DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH); /* it's important for atn stop */ /* SCSI command */ DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_MSGACCEPT); } static inline u8 tagq_blacklist(char *name) { #ifndef DC395x_NO_TAGQ #if 0 u8 i; for (i = 0; i < BADDEVCNT; i++) if (memcmp(name, DC395x_baddevname1[i], 28) == 0) return 1; #endif return 0; #else return 1; #endif } static void disc_tagq_set(struct DeviceCtlBlk *dcb, struct ScsiInqData *ptr) { /* Check for SCSI format (ANSI and Response data format) */ if ((ptr->Vers & 0x07) >= 2 || (ptr->RDF & 0x0F) == 2) { if ((ptr->Flags & SCSI_INQ_CMDQUEUE) && (dcb->dev_mode & NTC_DO_TAG_QUEUEING) && /*(dcb->dev_mode & NTC_DO_DISCONNECT) */ /* ((dcb->dev_type == TYPE_DISK) || (dcb->dev_type == TYPE_MOD)) && */ !tagq_blacklist(((char *)ptr) + 8)) { if (dcb->max_command == 1) dcb->max_command = dcb->acb->tag_max_num; dcb->sync_mode |= EN_TAG_QUEUEING; /*dcb->tag_mask = 0; */ } else dcb->max_command = 1; } } static void add_dev(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb, struct ScsiInqData *ptr) { u8 bval1 = ptr->DevType & SCSI_DEVTYPE; dcb->dev_type = bval1; /* if (bval1 == TYPE_DISK || bval1 == TYPE_MOD) */ disc_tagq_set(dcb, ptr); } /* unmap mapped pci regions from SRB */ static void pci_unmap_srb(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb) { struct scsi_cmnd *cmd = srb->cmd; enum dma_data_direction dir = cmd->sc_data_direction; if (scsi_sg_count(cmd) && dir != PCI_DMA_NONE) { /* unmap DC395x SG list */ dprintkdbg(DBG_SG, "pci_unmap_srb: list=%08x(%05x)\n", srb->sg_bus_addr, SEGMENTX_LEN); pci_unmap_single(acb->dev, srb->sg_bus_addr, SEGMENTX_LEN, PCI_DMA_TODEVICE); dprintkdbg(DBG_SG, "pci_unmap_srb: segs=%i buffer=%p\n", scsi_sg_count(cmd), scsi_bufflen(cmd)); /* unmap the sg segments */ scsi_dma_unmap(cmd); } } /* unmap mapped pci sense buffer from SRB */ static void pci_unmap_srb_sense(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb) { if (!(srb->flag & AUTO_REQSENSE)) return; /* Unmap sense buffer */ dprintkdbg(DBG_SG, "pci_unmap_srb_sense: buffer=%08x\n", srb->segment_x[0].address); pci_unmap_single(acb->dev, srb->segment_x[0].address, srb->segment_x[0].length, PCI_DMA_FROMDEVICE); /* Restore SG stuff */ srb->total_xfer_length = srb->xferred; srb->segment_x[0].address = srb->segment_x[DC395x_MAX_SG_LISTENTRY - 1].address; srb->segment_x[0].length = srb->segment_x[DC395x_MAX_SG_LISTENTRY - 1].length; } /* * Complete execution of a SCSI command * Signal completion to the generic SCSI driver */ static void srb_done(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { u8 tempcnt, status; struct scsi_cmnd *cmd = srb->cmd; enum dma_data_direction dir = cmd->sc_data_direction; int ckc_only = 1; dprintkdbg(DBG_1, "srb_done: (0x%p) <%02i-%i>\n", srb->cmd, srb->cmd->device->id, srb->cmd->device->lun); dprintkdbg(DBG_SG, "srb_done: srb=%p sg=%i(%i/%i) buf=%p\n", srb, scsi_sg_count(cmd), srb->sg_index, srb->sg_count, scsi_sgtalbe(cmd)); status = srb->target_status; if (srb->flag & AUTO_REQSENSE) { dprintkdbg(DBG_0, "srb_done: AUTO_REQSENSE1\n"); pci_unmap_srb_sense(acb, srb); /* ** target status.......................... */ srb->flag &= ~AUTO_REQSENSE; srb->adapter_status = 0; srb->target_status = CHECK_CONDITION << 1; if (debug_enabled(DBG_1)) { switch (cmd->sense_buffer[2] & 0x0f) { case NOT_READY: dprintkl(KERN_DEBUG, "ReqSense: NOT_READY cmnd=0x%02x <%02i-%i> stat=%i scan=%i ", cmd->cmnd[0], dcb->target_id, dcb->target_lun, status, acb->scan_devices); break; case UNIT_ATTENTION: dprintkl(KERN_DEBUG, "ReqSense: UNIT_ATTENTION cmnd=0x%02x <%02i-%i> stat=%i scan=%i ", cmd->cmnd[0], dcb->target_id, dcb->target_lun, status, acb->scan_devices); break; case ILLEGAL_REQUEST: dprintkl(KERN_DEBUG, "ReqSense: ILLEGAL_REQUEST cmnd=0x%02x <%02i-%i> stat=%i scan=%i ", cmd->cmnd[0], dcb->target_id, dcb->target_lun, status, acb->scan_devices); break; case MEDIUM_ERROR: dprintkl(KERN_DEBUG, "ReqSense: MEDIUM_ERROR cmnd=0x%02x <%02i-%i> stat=%i scan=%i ", cmd->cmnd[0], dcb->target_id, dcb->target_lun, status, acb->scan_devices); break; case HARDWARE_ERROR: dprintkl(KERN_DEBUG, "ReqSense: HARDWARE_ERROR cmnd=0x%02x <%02i-%i> stat=%i scan=%i ", cmd->cmnd[0], dcb->target_id, dcb->target_lun, status, acb->scan_devices); break; } if (cmd->sense_buffer[7] >= 6) printk("sense=0x%02x ASC=0x%02x ASCQ=0x%02x " "(0x%08x 0x%08x)\n", cmd->sense_buffer[2], cmd->sense_buffer[12], cmd->sense_buffer[13], *((unsigned int *)(cmd->sense_buffer + 3)), *((unsigned int *)(cmd->sense_buffer + 8))); else printk("sense=0x%02x No ASC/ASCQ (0x%08x)\n", cmd->sense_buffer[2], *((unsigned int *)(cmd->sense_buffer + 3))); } if (status == (CHECK_CONDITION << 1)) { cmd->result = DID_BAD_TARGET << 16; goto ckc_e; } dprintkdbg(DBG_0, "srb_done: AUTO_REQSENSE2\n"); if (srb->total_xfer_length && srb->total_xfer_length >= cmd->underflow) cmd->result = MK_RES_LNX(DRIVER_SENSE, DID_OK, srb->end_message, CHECK_CONDITION); /*SET_RES_DID(cmd->result,DID_OK) */ else cmd->result = MK_RES_LNX(DRIVER_SENSE, DID_OK, srb->end_message, CHECK_CONDITION); goto ckc_e; } /*************************************************************/ if (status) { /* * target status.......................... */ if (status_byte(status) == CHECK_CONDITION) { request_sense(acb, dcb, srb); return; } else if (status_byte(status) == QUEUE_FULL) { tempcnt = (u8)list_size(&dcb->srb_going_list); dprintkl(KERN_INFO, "QUEUE_FULL for dev <%02i-%i> with %i cmnds\n", dcb->target_id, dcb->target_lun, tempcnt); if (tempcnt > 1) tempcnt--; dcb->max_command = tempcnt; free_tag(dcb, srb); srb_going_to_waiting_move(dcb, srb); waiting_set_timer(acb, HZ / 20); srb->adapter_status = 0; srb->target_status = 0; return; } else if (status == SCSI_STAT_SEL_TIMEOUT) { srb->adapter_status = H_SEL_TIMEOUT; srb->target_status = 0; cmd->result = DID_NO_CONNECT << 16; } else { srb->adapter_status = 0; SET_RES_DID(cmd->result, DID_ERROR); SET_RES_MSG(cmd->result, srb->end_message); SET_RES_TARGET(cmd->result, status); } } else { /* ** process initiator status.......................... */ status = srb->adapter_status; if (status & H_OVER_UNDER_RUN) { srb->target_status = 0; SET_RES_DID(cmd->result, DID_OK); SET_RES_MSG(cmd->result, srb->end_message); } else if (srb->status & PARITY_ERROR) { SET_RES_DID(cmd->result, DID_PARITY); SET_RES_MSG(cmd->result, srb->end_message); } else { /* No error */ srb->adapter_status = 0; srb->target_status = 0; SET_RES_DID(cmd->result, DID_OK); } } if (dir != PCI_DMA_NONE && scsi_sg_count(cmd)) pci_dma_sync_sg_for_cpu(acb->dev, scsi_sglist(cmd), scsi_sg_count(cmd), dir); ckc_only = 0; /* Check Error Conditions */ ckc_e: if (cmd->cmnd[0] == INQUIRY) { unsigned char *base = NULL; struct ScsiInqData *ptr; unsigned long flags = 0; struct scatterlist* sg = scsi_sglist(cmd); size_t offset = 0, len = sizeof(struct ScsiInqData); local_irq_save(flags); base = scsi_kmap_atomic_sg(sg, scsi_sg_count(cmd), &offset, &len); ptr = (struct ScsiInqData *)(base + offset); if (!ckc_only && (cmd->result & RES_DID) == 0 && cmd->cmnd[2] == 0 && scsi_bufflen(cmd) >= 8 && dir != PCI_DMA_NONE && ptr && (ptr->Vers & 0x07) >= 2) dcb->inquiry7 = ptr->Flags; /*if( srb->cmd->cmnd[0] == INQUIRY && */ /* (host_byte(cmd->result) == DID_OK || status_byte(cmd->result) & CHECK_CONDITION) ) */ if ((cmd->result == (DID_OK << 16) || status_byte(cmd->result) & CHECK_CONDITION)) { if (!dcb->init_tcq_flag) { add_dev(acb, dcb, ptr); dcb->init_tcq_flag = 1; } } scsi_kunmap_atomic_sg(base); local_irq_restore(flags); } /* Here is the info for Doug Gilbert's sg3 ... */ scsi_set_resid(cmd, srb->total_xfer_length); /* This may be interpreted by sb. or not ... */ cmd->SCp.this_residual = srb->total_xfer_length; cmd->SCp.buffers_residual = 0; if (debug_enabled(DBG_KG)) { if (srb->total_xfer_length) dprintkdbg(DBG_KG, "srb_done: (0x%p) <%02i-%i> " "cmnd=0x%02x Missed %i bytes\n", cmd, cmd->device->id, cmd->device->lun, cmd->cmnd[0], srb->total_xfer_length); } srb_going_remove(dcb, srb); /* Add to free list */ if (srb == acb->tmp_srb) dprintkl(KERN_ERR, "srb_done: ERROR! Completed cmd with tmp_srb\n"); else { dprintkdbg(DBG_0, "srb_done: (0x%p) done result=0x%08x\n", cmd, cmd->result); srb_free_insert(acb, srb); } pci_unmap_srb(acb, srb); cmd->scsi_done(cmd); waiting_process_next(acb); } /* abort all cmds in our queues */ static void doing_srb_done(struct AdapterCtlBlk *acb, u8 did_flag, struct scsi_cmnd *cmd, u8 force) { struct DeviceCtlBlk *dcb; dprintkl(KERN_INFO, "doing_srb_done: pids "); list_for_each_entry(dcb, &acb->dcb_list, list) { struct ScsiReqBlk *srb; struct ScsiReqBlk *tmp; struct scsi_cmnd *p; list_for_each_entry_safe(srb, tmp, &dcb->srb_going_list, list) { enum dma_data_direction dir; int result; p = srb->cmd; dir = p->sc_data_direction; result = MK_RES(0, did_flag, 0, 0); printk("G:%p(%02i-%i) ", p, p->device->id, p->device->lun); srb_going_remove(dcb, srb); free_tag(dcb, srb); srb_free_insert(acb, srb); p->result = result; pci_unmap_srb_sense(acb, srb); pci_unmap_srb(acb, srb); if (force) { /* For new EH, we normally don't need to give commands back, * as they all complete or all time out */ p->scsi_done(p); } } if (!list_empty(&dcb->srb_going_list)) dprintkl(KERN_DEBUG, "How could the ML send cmnds to the Going queue? <%02i-%i>\n", dcb->target_id, dcb->target_lun); if (dcb->tag_mask) dprintkl(KERN_DEBUG, "tag_mask for <%02i-%i> should be empty, is %08x!\n", dcb->target_id, dcb->target_lun, dcb->tag_mask); /* Waiting queue */ list_for_each_entry_safe(srb, tmp, &dcb->srb_waiting_list, list) { int result; p = srb->cmd; result = MK_RES(0, did_flag, 0, 0); printk("W:%p<%02i-%i>", p, p->device->id, p->device->lun); srb_waiting_remove(dcb, srb); srb_free_insert(acb, srb); p->result = result; pci_unmap_srb_sense(acb, srb); pci_unmap_srb(acb, srb); if (force) { /* For new EH, we normally don't need to give commands back, * as they all complete or all time out */ cmd->scsi_done(cmd); } } if (!list_empty(&dcb->srb_waiting_list)) dprintkl(KERN_DEBUG, "ML queued %i cmnds again to <%02i-%i>\n", list_size(&dcb->srb_waiting_list), dcb->target_id, dcb->target_lun); dcb->flag &= ~ABORT_DEV_; } printk("\n"); } static void reset_scsi_bus(struct AdapterCtlBlk *acb) { dprintkdbg(DBG_0, "reset_scsi_bus: acb=%p\n", acb); acb->acb_flag |= RESET_DEV; /* RESET_DETECT, RESET_DONE, RESET_DEV */ DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_RSTSCSI); while (!(DC395x_read8(acb, TRM_S1040_SCSI_INTSTATUS) & INT_SCSIRESET)) /* nothing */; } static void set_basic_config(struct AdapterCtlBlk *acb) { u8 bval; u16 wval; DC395x_write8(acb, TRM_S1040_SCSI_TIMEOUT, acb->sel_timeout); if (acb->config & HCC_PARITY) bval = PHASELATCH | INITIATOR | BLOCKRST | PARITYCHECK; else bval = PHASELATCH | INITIATOR | BLOCKRST; DC395x_write8(acb, TRM_S1040_SCSI_CONFIG0, bval); /* program configuration 1: Act_Neg (+ Act_Neg_Enh? + Fast_Filter? + DataDis?) */ DC395x_write8(acb, TRM_S1040_SCSI_CONFIG1, 0x03); /* was 0x13: default */ /* program Host ID */ DC395x_write8(acb, TRM_S1040_SCSI_HOSTID, acb->scsi_host->this_id); /* set ansynchronous transfer */ DC395x_write8(acb, TRM_S1040_SCSI_OFFSET, 0x00); /* Turn LED control off */ wval = DC395x_read16(acb, TRM_S1040_GEN_CONTROL) & 0x7F; DC395x_write16(acb, TRM_S1040_GEN_CONTROL, wval); /* DMA config */ wval = DC395x_read16(acb, TRM_S1040_DMA_CONFIG) & ~DMA_FIFO_CTRL; wval |= DMA_FIFO_HALF_HALF | DMA_ENHANCE /*| DMA_MEM_MULTI_READ */ ; DC395x_write16(acb, TRM_S1040_DMA_CONFIG, wval); /* Clear pending interrupt status */ DC395x_read8(acb, TRM_S1040_SCSI_INTSTATUS); /* Enable SCSI interrupt */ DC395x_write8(acb, TRM_S1040_SCSI_INTEN, 0x7F); DC395x_write8(acb, TRM_S1040_DMA_INTEN, EN_SCSIINTR | EN_DMAXFERERROR /*| EN_DMAXFERABORT | EN_DMAXFERCOMP | EN_FORCEDMACOMP */ ); } static void scsi_reset_detect(struct AdapterCtlBlk *acb) { dprintkl(KERN_INFO, "scsi_reset_detect: acb=%p\n", acb); /* delay half a second */ if (timer_pending(&acb->waiting_timer)) del_timer(&acb->waiting_timer); DC395x_write8(acb, TRM_S1040_SCSI_CONTROL, DO_RSTMODULE); DC395x_write8(acb, TRM_S1040_DMA_CONTROL, DMARESETMODULE); /*DC395x_write8(acb, TRM_S1040_DMA_CONTROL,STOPDMAXFER); */ udelay(500); /* Maybe we locked up the bus? Then lets wait even longer ... */ acb->scsi_host->last_reset = jiffies + 5 * HZ / 2 + HZ * acb->eeprom.delay_time; clear_fifo(acb, "scsi_reset_detect"); set_basic_config(acb); /*1.25 */ /*DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_HWRESELECT); */ if (acb->acb_flag & RESET_DEV) { /* RESET_DETECT, RESET_DONE, RESET_DEV */ acb->acb_flag |= RESET_DONE; } else { acb->acb_flag |= RESET_DETECT; reset_dev_param(acb); doing_srb_done(acb, DID_RESET, NULL, 1); /*DC395x_RecoverSRB( acb ); */ acb->active_dcb = NULL; acb->acb_flag = 0; waiting_process_next(acb); } } static void request_sense(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb) { struct scsi_cmnd *cmd = srb->cmd; dprintkdbg(DBG_1, "request_sense: (0x%p) <%02i-%i>\n", cmd, cmd->device->id, cmd->device->lun); srb->flag |= AUTO_REQSENSE; srb->adapter_status = 0; srb->target_status = 0; /* KG: Can this prevent crap sense data ? */ memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); /* Save some data */ srb->segment_x[DC395x_MAX_SG_LISTENTRY - 1].address = srb->segment_x[0].address; srb->segment_x[DC395x_MAX_SG_LISTENTRY - 1].length = srb->segment_x[0].length; srb->xferred = srb->total_xfer_length; /* srb->segment_x : a one entry of S/G list table */ srb->total_xfer_length = SCSI_SENSE_BUFFERSIZE; srb->segment_x[0].length = SCSI_SENSE_BUFFERSIZE; /* Map sense buffer */ srb->segment_x[0].address = pci_map_single(acb->dev, cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, PCI_DMA_FROMDEVICE); dprintkdbg(DBG_SG, "request_sense: map buffer %p->%08x(%05x)\n", cmd->sense_buffer, srb->segment_x[0].address, SCSI_SENSE_BUFFERSIZE); srb->sg_count = 1; srb->sg_index = 0; if (start_scsi(acb, dcb, srb)) { /* Should only happen, if sb. else grabs the bus */ dprintkl(KERN_DEBUG, "request_sense: (0x%p) failed <%02i-%i>\n", srb->cmd, dcb->target_id, dcb->target_lun); srb_going_to_waiting_move(dcb, srb); waiting_set_timer(acb, HZ / 100); } } /** * device_alloc - Allocate a new device instance. This create the * devices instance and sets up all the data items. The adapter * instance is required to obtain confiuration information for this * device. This does *not* add this device to the adapters device * list. * * @acb: The adapter to obtain configuration information from. * @target: The target for the new device. * @lun: The lun for the new device. * * Return the new device if successful or NULL on failure. **/ static struct DeviceCtlBlk *device_alloc(struct AdapterCtlBlk *acb, u8 target, u8 lun) { struct NvRamType *eeprom = &acb->eeprom; u8 period_index = eeprom->target[target].period & 0x07; struct DeviceCtlBlk *dcb; dcb = kmalloc(sizeof(struct DeviceCtlBlk), GFP_ATOMIC); dprintkdbg(DBG_0, "device_alloc: <%02i-%i>\n", target, lun); if (!dcb) return NULL; dcb->acb = NULL; INIT_LIST_HEAD(&dcb->srb_going_list); INIT_LIST_HEAD(&dcb->srb_waiting_list); dcb->active_srb = NULL; dcb->tag_mask = 0; dcb->max_command = 1; dcb->target_id = target; dcb->target_lun = lun; #ifndef DC395x_NO_DISCONNECT dcb->identify_msg = IDENTIFY(dcb->dev_mode & NTC_DO_DISCONNECT, lun); #else dcb->identify_msg = IDENTIFY(0, lun); #endif dcb->dev_mode = eeprom->target[target].cfg0; dcb->inquiry7 = 0; dcb->sync_mode = 0; dcb->min_nego_period = clock_period[period_index]; dcb->sync_period = 0; dcb->sync_offset = 0; dcb->flag = 0; #ifndef DC395x_NO_WIDE if ((dcb->dev_mode & NTC_DO_WIDE_NEGO) && (acb->config & HCC_WIDE_CARD)) dcb->sync_mode |= WIDE_NEGO_ENABLE; #endif #ifndef DC395x_NO_SYNC if (dcb->dev_mode & NTC_DO_SYNC_NEGO) if (!(lun) || current_sync_offset) dcb->sync_mode |= SYNC_NEGO_ENABLE; #endif if (dcb->target_lun != 0) { /* Copy settings */ struct DeviceCtlBlk *p; list_for_each_entry(p, &acb->dcb_list, list) if (p->target_id == dcb->target_id) break; dprintkdbg(DBG_1, "device_alloc: <%02i-%i> copy from <%02i-%i>\n", dcb->target_id, dcb->target_lun, p->target_id, p->target_lun); dcb->sync_mode = p->sync_mode; dcb->sync_period = p->sync_period; dcb->min_nego_period = p->min_nego_period; dcb->sync_offset = p->sync_offset; dcb->inquiry7 = p->inquiry7; } return dcb; } /** * adapter_add_device - Adds the device instance to the adaptor instance. * * @acb: The adapter device to be updated * @dcb: A newly created and initialised device instance to add. **/ static void adapter_add_device(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb) { /* backpointer to adapter */ dcb->acb = acb; /* set run_robin to this device if it is currently empty */ if (list_empty(&acb->dcb_list)) acb->dcb_run_robin = dcb; /* add device to list */ list_add_tail(&dcb->list, &acb->dcb_list); /* update device maps */ acb->dcb_map[dcb->target_id] |= (1 << dcb->target_lun); acb->children[dcb->target_id][dcb->target_lun] = dcb; } /** * adapter_remove_device - Removes the device instance from the adaptor * instance. The device instance is not check in any way or freed by this. * The caller is expected to take care of that. This will simply remove the * device from the adapters data strcutures. * * @acb: The adapter device to be updated * @dcb: A device that has previously been added to the adapter. **/ static void adapter_remove_device(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb) { struct DeviceCtlBlk *i; struct DeviceCtlBlk *tmp; dprintkdbg(DBG_0, "adapter_remove_device: <%02i-%i>\n", dcb->target_id, dcb->target_lun); /* fix up any pointers to this device that we have in the adapter */ if (acb->active_dcb == dcb) acb->active_dcb = NULL; if (acb->dcb_run_robin == dcb) acb->dcb_run_robin = dcb_get_next(&acb->dcb_list, dcb); /* unlink from list */ list_for_each_entry_safe(i, tmp, &acb->dcb_list, list) if (dcb == i) { list_del(&i->list); break; } /* clear map and children */ acb->dcb_map[dcb->target_id] &= ~(1 << dcb->target_lun); acb->children[dcb->target_id][dcb->target_lun] = NULL; dcb->acb = NULL; } /** * adapter_remove_and_free_device - Removes a single device from the adapter * and then frees the device information. * * @acb: The adapter device to be updated * @dcb: A device that has previously been added to the adapter. */ static void adapter_remove_and_free_device(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb) { if (list_size(&dcb->srb_going_list) > 1) { dprintkdbg(DBG_1, "adapter_remove_and_free_device: <%02i-%i> " "Won't remove because of %i active requests.\n", dcb->target_id, dcb->target_lun, list_size(&dcb->srb_going_list)); return; } adapter_remove_device(acb, dcb); kfree(dcb); } /** * adapter_remove_and_free_all_devices - Removes and frees all of the * devices associated with the specified adapter. * * @acb: The adapter from which all devices should be removed. **/ static void adapter_remove_and_free_all_devices(struct AdapterCtlBlk* acb) { struct DeviceCtlBlk *dcb; struct DeviceCtlBlk *tmp; dprintkdbg(DBG_1, "adapter_remove_and_free_all_devices: num=%i\n", list_size(&acb->dcb_list)); list_for_each_entry_safe(dcb, tmp, &acb->dcb_list, list) adapter_remove_and_free_device(acb, dcb); } /** * dc395x_slave_alloc - Called by the scsi mid layer to tell us about a new * scsi device that we need to deal with. We allocate a new device and then * insert that device into the adapters device list. * * @scsi_device: The new scsi device that we need to handle. **/ static int dc395x_slave_alloc(struct scsi_device *scsi_device) { struct AdapterCtlBlk *acb = (struct AdapterCtlBlk *)scsi_device->host->hostdata; struct DeviceCtlBlk *dcb; dcb = device_alloc(acb, scsi_device->id, scsi_device->lun); if (!dcb) return -ENOMEM; adapter_add_device(acb, dcb); return 0; } /** * dc395x_slave_destroy - Called by the scsi mid layer to tell us about a * device that is going away. * * @scsi_device: The new scsi device that we need to handle. **/ static void dc395x_slave_destroy(struct scsi_device *scsi_device) { struct AdapterCtlBlk *acb = (struct AdapterCtlBlk *)scsi_device->host->hostdata; struct DeviceCtlBlk *dcb = find_dcb(acb, scsi_device->id, scsi_device->lun); if (dcb) adapter_remove_and_free_device(acb, dcb); } /** * trms1040_wait_30us: wait for 30 us * * Waits for 30us (using the chip by the looks of it..) * * @io_port: base I/O address **/ static void __devinit trms1040_wait_30us(unsigned long io_port) { /* ScsiPortStallExecution(30); wait 30 us */ outb(5, io_port + TRM_S1040_GEN_TIMER); while (!(inb(io_port + TRM_S1040_GEN_STATUS) & GTIMEOUT)) /* nothing */ ; } /** * trms1040_write_cmd - write the secified command and address to * chip * * @io_port: base I/O address * @cmd: SB + op code (command) to send * @addr: address to send **/ static void __devinit trms1040_write_cmd(unsigned long io_port, u8 cmd, u8 addr) { int i; u8 send_data; /* program SB + OP code */ for (i = 0; i < 3; i++, cmd <<= 1) { send_data = NVR_SELECT; if (cmd & 0x04) /* Start from bit 2 */ send_data |= NVR_BITOUT; outb(send_data, io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); outb((send_data | NVR_CLOCK), io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); } /* send address */ for (i = 0; i < 7; i++, addr <<= 1) { send_data = NVR_SELECT; if (addr & 0x40) /* Start from bit 6 */ send_data |= NVR_BITOUT; outb(send_data, io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); outb((send_data | NVR_CLOCK), io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); } outb(NVR_SELECT, io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); } /** * trms1040_set_data - store a single byte in the eeprom * * Called from write all to write a single byte into the SSEEPROM * Which is done one bit at a time. * * @io_port: base I/O address * @addr: offset into EEPROM * @byte: bytes to write **/ static void __devinit trms1040_set_data(unsigned long io_port, u8 addr, u8 byte) { int i; u8 send_data; /* Send write command & address */ trms1040_write_cmd(io_port, 0x05, addr); /* Write data */ for (i = 0; i < 8; i++, byte <<= 1) { send_data = NVR_SELECT; if (byte & 0x80) /* Start from bit 7 */ send_data |= NVR_BITOUT; outb(send_data, io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); outb((send_data | NVR_CLOCK), io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); } outb(NVR_SELECT, io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); /* Disable chip select */ outb(0, io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); outb(NVR_SELECT, io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); /* Wait for write ready */ while (1) { outb((NVR_SELECT | NVR_CLOCK), io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); outb(NVR_SELECT, io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); if (inb(io_port + TRM_S1040_GEN_NVRAM) & NVR_BITIN) break; } /* Disable chip select */ outb(0, io_port + TRM_S1040_GEN_NVRAM); } /** * trms1040_write_all - write 128 bytes to the eeprom * * Write the supplied 128 bytes to the chips SEEPROM * * @eeprom: the data to write * @io_port: the base io port **/ static void __devinit trms1040_write_all(struct NvRamType *eeprom, unsigned long io_port) { u8 *b_eeprom = (u8 *)eeprom; u8 addr; /* Enable SEEPROM */ outb((inb(io_port + TRM_S1040_GEN_CONTROL) | EN_EEPROM), io_port + TRM_S1040_GEN_CONTROL); /* write enable */ trms1040_write_cmd(io_port, 0x04, 0xFF); outb(0, io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); /* write */ for (addr = 0; addr < 128; addr++, b_eeprom++) trms1040_set_data(io_port, addr, *b_eeprom); /* write disable */ trms1040_write_cmd(io_port, 0x04, 0x00); outb(0, io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); /* Disable SEEPROM */ outb((inb(io_port + TRM_S1040_GEN_CONTROL) & ~EN_EEPROM), io_port + TRM_S1040_GEN_CONTROL); } /** * trms1040_get_data - get a single byte from the eeprom * * Called from read all to read a single byte into the SSEEPROM * Which is done one bit at a time. * * @io_port: base I/O address * @addr: offset into SEEPROM * * Returns the byte read. **/ static u8 __devinit trms1040_get_data(unsigned long io_port, u8 addr) { int i; u8 read_byte; u8 result = 0; /* Send read command & address */ trms1040_write_cmd(io_port, 0x06, addr); /* read data */ for (i = 0; i < 8; i++) { outb((NVR_SELECT | NVR_CLOCK), io_port + TRM_S1040_GEN_NVRAM); trms1040_wait_30us(io_port); outb(NVR_SELECT, io_port + TRM_S1040_GEN_NVRAM); /* Get data bit while falling edge */ read_byte = inb(io_port + TRM_S1040_GEN_NVRAM); result <<= 1; if (read_byte & NVR_BITIN) result |= 1; trms1040_wait_30us(io_port); } /* Disable chip select */ outb(0, io_port + TRM_S1040_GEN_NVRAM); return result; } /** * trms1040_read_all - read all bytes from the eeprom * * Read the 128 bytes from the SEEPROM. * * @eeprom: where to store the data * @io_port: the base io port **/ static void __devinit trms1040_read_all(struct NvRamType *eeprom, unsigned long io_port) { u8 *b_eeprom = (u8 *)eeprom; u8 addr; /* Enable SEEPROM */ outb((inb(io_port + TRM_S1040_GEN_CONTROL) | EN_EEPROM), io_port + TRM_S1040_GEN_CONTROL); /* read details */ for (addr = 0; addr < 128; addr++, b_eeprom++) *b_eeprom = trms1040_get_data(io_port, addr); /* Disable SEEPROM */ outb((inb(io_port + TRM_S1040_GEN_CONTROL) & ~EN_EEPROM), io_port + TRM_S1040_GEN_CONTROL); } /** * check_eeprom - get and check contents of the eeprom * * Read seeprom 128 bytes into the memory provider in eeprom. * Checks the checksum and if it's not correct it uses a set of default * values. * * @eeprom: caller allocated strcuture to read the eeprom data into * @io_port: io port to read from **/ static void __devinit check_eeprom(struct NvRamType *eeprom, unsigned long io_port) { u16 *w_eeprom = (u16 *)eeprom; u16 w_addr; u16 cksum; u32 d_addr; u32 *d_eeprom; trms1040_read_all(eeprom, io_port); /* read eeprom */ cksum = 0; for (w_addr = 0, w_eeprom = (u16 *)eeprom; w_addr < 64; w_addr++, w_eeprom++) cksum += *w_eeprom; if (cksum != 0x1234) { /* * Checksum is wrong. * Load a set of defaults into the eeprom buffer */ dprintkl(KERN_WARNING, "EEProm checksum error: using default values and options.\n"); eeprom->sub_vendor_id[0] = (u8)PCI_VENDOR_ID_TEKRAM; eeprom->sub_vendor_id[1] = (u8)(PCI_VENDOR_ID_TEKRAM >> 8); eeprom->sub_sys_id[0] = (u8)PCI_DEVICE_ID_TEKRAM_TRMS1040; eeprom->sub_sys_id[1] = (u8)(PCI_DEVICE_ID_TEKRAM_TRMS1040 >> 8); eeprom->sub_class = 0x00; eeprom->vendor_id[0] = (u8)PCI_VENDOR_ID_TEKRAM; eeprom->vendor_id[1] = (u8)(PCI_VENDOR_ID_TEKRAM >> 8); eeprom->device_id[0] = (u8)PCI_DEVICE_ID_TEKRAM_TRMS1040; eeprom->device_id[1] = (u8)(PCI_DEVICE_ID_TEKRAM_TRMS1040 >> 8); eeprom->reserved = 0x00; for (d_addr = 0, d_eeprom = (u32 *)eeprom->target; d_addr < 16; d_addr++, d_eeprom++) *d_eeprom = 0x00000077; /* cfg3,cfg2,period,cfg0 */ *d_eeprom++ = 0x04000F07; /* max_tag,delay_time,channel_cfg,scsi_id */ *d_eeprom++ = 0x00000015; /* reserved1,boot_lun,boot_target,reserved0 */ for (d_addr = 0; d_addr < 12; d_addr++, d_eeprom++) *d_eeprom = 0x00; /* Now load defaults (maybe set by boot/module params) */ set_safe_settings(); fix_settings(); eeprom_override(eeprom); eeprom->cksum = 0x00; for (w_addr = 0, cksum = 0, w_eeprom = (u16 *)eeprom; w_addr < 63; w_addr++, w_eeprom++) cksum += *w_eeprom; *w_eeprom = 0x1234 - cksum; trms1040_write_all(eeprom, io_port); eeprom->delay_time = cfg_data[CFG_RESET_DELAY].value; } else { set_safe_settings(); eeprom_index_to_delay(eeprom); eeprom_override(eeprom); } } /** * print_eeprom_settings - output the eeprom settings * to the kernel log so people can see what they were. * * @eeprom: The eeprom data strucutre to show details for. **/ static void __devinit print_eeprom_settings(struct NvRamType *eeprom) { dprintkl(KERN_INFO, "Used settings: AdapterID=%02i, Speed=%i(%02i.%01iMHz), dev_mode=0x%02x\n", eeprom->scsi_id, eeprom->target[0].period, clock_speed[eeprom->target[0].period] / 10, clock_speed[eeprom->target[0].period] % 10, eeprom->target[0].cfg0); dprintkl(KERN_INFO, " AdaptMode=0x%02x, Tags=%i(%02i), DelayReset=%is\n", eeprom->channel_cfg, eeprom->max_tag, 1 << eeprom->max_tag, eeprom->delay_time); } /* Free SG tables */ static void adapter_sg_tables_free(struct AdapterCtlBlk *acb) { int i; const unsigned srbs_per_page = PAGE_SIZE/SEGMENTX_LEN; for (i = 0; i < DC395x_MAX_SRB_CNT; i += srbs_per_page) kfree(acb->srb_array[i].segment_x); } /* * Allocate SG tables; as we have to pci_map them, an SG list (struct SGentry*) * should never cross a page boundary */ static int __devinit adapter_sg_tables_alloc(struct AdapterCtlBlk *acb) { const unsigned mem_needed = (DC395x_MAX_SRB_CNT+1) *SEGMENTX_LEN; int pages = (mem_needed+(PAGE_SIZE-1))/PAGE_SIZE; const unsigned srbs_per_page = PAGE_SIZE/SEGMENTX_LEN; int srb_idx = 0; unsigned i = 0; struct SGentry *uninitialized_var(ptr); for (i = 0; i < DC395x_MAX_SRB_CNT; i++) acb->srb_array[i].segment_x = NULL; dprintkdbg(DBG_1, "Allocate %i pages for SG tables\n", pages); while (pages--) { ptr = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!ptr) { adapter_sg_tables_free(acb); return 1; } dprintkdbg(DBG_1, "Allocate %li bytes at %p for SG segments %i\n", PAGE_SIZE, ptr, srb_idx); i = 0; while (i < srbs_per_page && srb_idx < DC395x_MAX_SRB_CNT) acb->srb_array[srb_idx++].segment_x = ptr + (i++ * DC395x_MAX_SG_LISTENTRY); } if (i < srbs_per_page) acb->srb.segment_x = ptr + (i * DC395x_MAX_SG_LISTENTRY); else dprintkl(KERN_DEBUG, "No space for tmsrb SG table reserved?!\n"); return 0; } /** * adapter_print_config - print adapter connection and termination * config * * The io port in the adapter needs to have been set before calling * this function. * * @acb: The adapter to print the information for. **/ static void __devinit adapter_print_config(struct AdapterCtlBlk *acb) { u8 bval; bval = DC395x_read8(acb, TRM_S1040_GEN_STATUS); dprintkl(KERN_INFO, "%sConnectors: ", ((bval & WIDESCSI) ? "(Wide) " : "")); if (!(bval & CON5068)) printk("ext%s ", !(bval & EXT68HIGH) ? "68" : "50"); if (!(bval & CON68)) printk("int68%s ", !(bval & INT68HIGH) ? "" : "(50)"); if (!(bval & CON50)) printk("int50 "); if ((bval & (CON5068 | CON50 | CON68)) == 0 /*(CON5068 | CON50 | CON68) */ ) printk(" Oops! (All 3?) "); bval = DC395x_read8(acb, TRM_S1040_GEN_CONTROL); printk(" Termination: "); if (bval & DIS_TERM) printk("Disabled\n"); else { if (bval & AUTOTERM) printk("Auto "); if (bval & LOW8TERM) printk("Low ");