aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
blob: f21f4a1d6a1ce144d2ce45c30123eb2010f93bb8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
/*
 * mm/rmap.c - physical to virtual reverse mappings
 *
 * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
 * Released under the General Public License (GPL).
 *
 * Simple, low overhead reverse mapping scheme.
 * Please try to keep this thing as modular as possible.
 *
 * Provides methods for unmapping each kind of mapped page:
 * the anon methods track anonymous pages, and
 * the file methods track pages belonging to an inode.
 *
 * Original design by Rik van Riel <riel@conectiva.com.br> 2001
 * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
 * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
 * Contributions by Hugh Dickins 2003, 2004
 */

/*
 * Lock ordering in mm:
 *
 * inode->i_mutex	(while writing or truncating, not reading or faulting)
 *   inode->i_alloc_sem (vmtruncate_range)
 *   mm->mmap_sem
 *     page->flags PG_locked (lock_page)
 *       mapping->i_mmap_lock
 *         anon_vma->lock
 *           mm->page_table_lock or pte_lock
 *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
 *             swap_lock (in swap_duplicate, swap_info_get)
 *               mmlist_lock (in mmput, drain_mmlist and others)
 *               mapping->private_lock (in __set_page_dirty_buffers)
 *               inode_lock (in set_page_dirty's __mark_inode_dirty)
 *                 sb_lock (within inode_lock in fs/fs-writeback.c)
 *                 mapping->tree_lock (widely used, in set_page_dirty,
 *                           in arch-dependent flush_dcache_mmap_lock,
 *                           within inode_lock in __sync_single_inode)
 *
 * (code doesn't rely on that order so it could be switched around)
 * ->tasklist_lock
 *   anon_vma->lock      (memory_failure, collect_procs_anon)
 *     pte map lock
 */

#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/rcupdate.h>
#include <linux/module.h>
#include <linux/memcontrol.h>
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
#include <linux/hugetlb.h>

#include <asm/tlbflush.h>

#include "internal.h"

static struct kmem_cache *anon_vma_cachep;
static struct kmem_cache *anon_vma_chain_cachep;

static inline struct anon_vma *anon_vma_alloc(void)
{
	return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
}

void anon_vma_free(struct anon_vma *anon_vma)
{
	kmem_cache_free(anon_vma_cachep, anon_vma);
}

static inline struct anon_vma_chain *anon_vma_chain_alloc(void)
{
	return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL);
}

static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
{
	kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
}

/**
 * anon_vma_prepare - attach an anon_vma to a memory region
 * @vma: the memory region in question
 *
 * This makes sure the memory mapping described by 'vma' has
 * an 'anon_vma' attached to it, so that we can associate the
 * anonymous pages mapped into it with that anon_vma.
 *
 * The common case will be that we already have one, but if
 * not we either need to find an adjacent mapping that we
 * can re-use the anon_vma from (very common when the only
 * reason for splitting a vma has been mprotect()), or we
 * allocate a new one.
 *
 * Anon-vma allocations are very subtle, because we may have
 * optimistically looked up an anon_vma in page_lock_anon_vma()
 * and that may actually touch the spinlock even in the newly
 * allocated vma (it depends on RCU to make sure that the
 * anon_vma isn't actually destroyed).
 *
 * As a result, we need to do proper anon_vma locking even
 * for the new allocation. At the same time, we do not want
 * to do any locking for the common case of already having
 * an anon_vma.
 *
 * This must be called with the mmap_sem held for reading.
 */
int anon_vma_prepare(struct vm_area_struct *vma)
{
	struct anon_vma *anon_vma = vma->anon_vma;
	struct anon_vma_chain *avc;

	might_sleep();
	if (unlikely(!anon_vma)) {
		struct mm_struct *mm = vma->vm_mm;
		struct anon_vma *allocated;

		avc = anon_vma_chain_alloc();
		if (!avc)
			goto out_enomem;

		anon_vma = find_mergeable_anon_vma(vma);
		allocated = NULL;
		if (!anon_vma) {
			anon_vma = anon_vma_alloc();
			if (unlikely(!anon_vma))
				goto out_enomem_free_avc;
			allocated = anon_vma;
			/*
			 * This VMA had no anon_vma yet.  This anon_vma is
			 * the root of any anon_vma tree that might form.
			 */
			anon_vma->root = anon_vma;
		}

		anon_vma_lock(anon_vma);
		/* page_table_lock to protect against threads */
		spin_lock(&mm->page_table_lock);
		if (likely(!vma->anon_vma)) {
			vma->anon_vma = anon_vma;
			avc->anon_vma = anon_vma;
			avc->vma = vma;
			list_add(&avc->same_vma, &vma->anon_vma_chain);
			list_add_tail(&avc->same_anon_vma, &anon_vma->head);
			allocated = NULL;
			avc = NULL;
		}
		spin_unlock(&mm->page_table_lock);
		anon_vma_unlock(anon_vma);

		if (unlikely(allocated))
			anon_vma_free(allocated);
		if (unlikely(avc))
			anon_vma_chain_free(avc);
	}
	return 0;

 out_enomem_free_avc:
	anon_vma_chain_free(avc);
 out_enomem:
	return -ENOMEM;
}

static void anon_vma_chain_link(struct vm_area_struct *vma,
				struct anon_vma_chain *avc,
				struct anon_vma *anon_vma)
{
	avc->vma = vma;
	avc->anon_vma = anon_vma;
	list_add(&avc->same_vma, &vma->anon_vma_chain);

	anon_vma_lock(anon_vma);
	/*
	 * It's critical to add new vmas to the tail of the anon_vma,
	 * see comment in huge_memory.c:__split_huge_page().
	 */
	list_add_tail(&avc->same_anon_vma, &anon_vma->head);
	anon_vma_unlock(anon_vma);
}

/*
 * Attach the anon_vmas from src to dst.
 * Returns 0 on success, -ENOMEM on failure.
 */
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
	struct anon_vma_chain *avc, *pavc;

	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
		avc = anon_vma_chain_alloc();
		if (!avc)
			goto enomem_failure;
		anon_vma_chain_link(dst, avc, pavc->anon_vma);
	}
	return 0;

 enomem_failure:
	unlink_anon_vmas(dst);
	return -ENOMEM;
}

/*
 * Attach vma to its own anon_vma, as well as to the anon_vmas that
 * the corresponding VMA in the parent process is attached to.
 * Returns 0 on success, non-zero on failure.
 */
int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
{
	struct anon_vma_chain *avc;
	struct anon_vma *anon_vma;

	/* Don't bother if the parent process has no anon_vma here. */
	if (!pvma->anon_vma)
		return 0;

	/*
	 * First, attach the new VMA to the parent VMA's anon_vmas,
	 * so rmap can find non-COWed pages in child processes.
	 */
	if (anon_vma_clone(vma, pvma))
		return -ENOMEM;

	/* Then add our own anon_vma. */
	anon_vma = anon_vma_alloc();
	if (!anon_vma)
		goto out_error;
	avc = anon_vma_chain_alloc();
	if (!avc)
		goto out_error_free_anon_vma;

	/*
	 * The root anon_vma's spinlock is the lock actually used when we
	 * lock any of the anon_vmas in this anon_vma tree.
	 */
	anon_vma->root = pvma->anon_vma->root;
	/*
	 * With KSM refcounts, an anon_vma can stay around longer than the
	 * process it belongs to.  The root anon_vma needs to be pinned
	 * until this anon_vma is freed, because the lock lives in the root.
	 */
	get_anon_vma(anon_vma->root);
	/* Mark this anon_vma as the one where our new (COWed) pages go. */
	vma->anon_vma = anon_vma;
	anon_vma_chain_link(vma, avc, anon_vma);

	return 0;

 out_error_free_anon_vma:
	anon_vma_free(anon_vma);
 out_error:
	unlink_anon_vmas(vma);
	return -ENOMEM;
}

static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
{
	struct anon_vma *anon_vma = anon_vma_chain->anon_vma;
	int empty;

	/* If anon_vma_fork fails, we can get an empty anon_vma_chain. */
	if (!anon_vma)
		return;

	anon_vma_lock(anon_vma);
	list_del(&anon_vma_chain->same_anon_vma);

	/* We must garbage collect the anon_vma if it's empty */
	empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
	anon_vma_unlock(anon_vma);

	if (empty) {
		/* We no longer need the root anon_vma */
		if (anon_vma->root != anon_vma)
			drop_anon_vma(anon_vma->root);
		anon_vma_free(anon_vma);
	}
}

void unlink_anon_vmas(struct vm_area_struct *vma)
{
	struct anon_vma_chain *avc, *next;

	/*
	 * Unlink each anon_vma chained to the VMA.  This list is ordered
	 * from newest to oldest, ensuring the root anon_vma gets freed last.
	 */
	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
		anon_vma_unlink(avc);
		list_del(&avc->same_vma);
		anon_vma_chain_free(avc);
	}
}

static void anon_vma_ctor(void *data)
{
	struct anon_vma *anon_vma = data;

	spin_lock_init(&anon_vma->lock);
	anonvma_external_refcount_init(anon_vma);
	INIT_LIST_HEAD(&anon_vma->head);
}

void __init anon_vma_init(void)
{
	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
}

/*
 * Getting a lock on a stable anon_vma from a page off the LRU is
 * tricky: page_lock_anon_vma rely on RCU to guard against the races.
 */
struct anon_vma *__page_lock_anon_vma(struct page *page)
{
	struct anon_vma *anon_vma, *root_anon_vma;
	unsigned long anon_mapping;

	rcu_read_lock();
	anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
		goto out;
	if (!page_mapped(page))
		goto out;

	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
	root_anon_vma = ACCESS_ONCE(anon_vma->root);
	spin_lock(&root_anon_vma->lock);

	/*
	 * If this page is still mapped, then its anon_vma cannot have been
	 * freed.  But if it has been unmapped, we have no security against
	 * the anon_vma structure being freed and reused (for another anon_vma:
	 * SLAB_DESTROY_BY_RCU guarantees that - so the spin_lock above cannot
	 * corrupt): with anon_vma_prepare() or anon_vma_fork() redirecting
	 * anon_vma->root before page_unlock_anon_vma() is called to unlock.
	 */
	if (page_mapped(page))
		return anon_vma;

	spin_unlock(&root_anon_vma->lock);
out:
	rcu_read_unlock();
	return NULL;
}

void page_unlock_anon_vma(struct anon_vma *anon_vma)
	__releases(&anon_vma->root->lock)
	__releases(RCU)
{
	anon_vma_unlock(anon_vma);
	rcu_read_unlock();
}

/*
 * At what user virtual address is page expected in @vma?
 * Returns virtual address or -EFAULT if page's index/offset is not
 * within the range mapped the @vma.
 */
inline unsigned long
vma_address(struct page *page, struct vm_area_struct *vma)
{
	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
	unsigned long address;

	if (unlikely(is_vm_hugetlb_page(vma)))
		pgoff = page->index << huge_page_order(page_hstate(page));
	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
		/* page should be within @vma mapping range */
		return -EFAULT;
	}
	return address;
}

/*
 * At what user virtual address is page expected in vma?
 * Caller should check the page is actually part of the vma.
 */
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
	if (PageAnon(page)) {
		struct anon_vma *page__anon_vma = page_anon_vma(page);
		/*
		 * Note: swapoff's unuse_vma() is more efficient with this
		 * check, and needs it to match anon_vma when KSM is active.
		 */
		if (!vma->anon_vma || !page__anon_vma ||
		    vma->anon_vma->root != page__anon_vma->root)
			return -EFAULT;
	} else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
		if (!vma->vm_file ||
		    vma->vm_file->f_mapping != page->mapping)
			return -EFAULT;
	} else
		return -EFAULT;
	return vma_address(page, vma);
}

/*
 * Check that @page is mapped at @address into @mm.
 *
 * If @sync is false, page_check_address may perform a racy check to avoid
 * the page table lock when the pte is not present (helpful when reclaiming
 * highly shared pages).
 *
 * On success returns with pte mapped and locked.
 */
pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
			  unsigned long address, spinlock_t **ptlp, int sync)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	spinlock_t *ptl;

	if (unlikely(PageHuge(page))) {
		pte = huge_pte_offset(mm, address);
		ptl = &mm->page_table_lock;
		goto check;
	}

	pgd = pgd_offset(mm, address);
	if (!pgd_present(*pgd))
		return NULL;

	pud = pud_offset(pgd, address);
	if (!pud_present(*pud))
		return NULL;

	pmd = pmd_offset(pud, address);
	if (!pmd_present(*pmd))
		return NULL;
	if (pmd_trans_huge(*pmd))
		return NULL;

	pte = pte_offset_map(pmd, address);
	/* Make a quick check before getting the lock */
	if (!sync && !pte_present(*pte)) {
		pte_unmap(pte);
		return NULL;
	}

	ptl = pte_lockptr(mm, pmd);
check:
	spin_lock(ptl);
	if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
		*ptlp = ptl;
		return pte;
	}
	pte_unmap_unlock(pte, ptl);
	return NULL;
}

/**
 * page_mapped_in_vma - check whether a page is really mapped in a VMA
 * @page: the page to test
 * @vma: the VMA to test
 *
 * Returns 1 if the page is mapped into the page tables of the VMA, 0
 * if the page is not mapped into the page tables of this VMA.  Only
 * valid for normal file or anonymous VMAs.
 */
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
{
	unsigned long address;
	pte_t *pte;
	spinlock_t *ptl;

	address = vma_address(page, vma);
	if (address == -EFAULT)		/* out of vma range */
		return 0;
	pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
	if (!pte)			/* the page is not in this mm */
		return 0;
	pte_unmap_unlock(pte, ptl);

	return 1;
}

/*
 * Subfunctions of page_referenced: page_referenced_one called
 * repeatedly from either page_referenced_anon or page_referenced_file.
 */
int page_referenced_one(struct page *page, struct vm_area_struct *vma,
			unsigned long address, unsigned int *mapcount,
			unsigned long *vm_flags)
{
	struct mm_struct *mm = vma->vm_mm;
	int referenced = 0;

	/*
	 * Don't want to elevate referenced for mlocked page that gets this far,
	 * in order that it progresses to try_to_unmap and is moved to the
	 * unevictable list.
	 */
	if (vma->vm_flags & VM_LOCKED) {
		*mapcount = 0;	/* break early from loop */
		*vm_flags |= VM_LOCKED;
		goto out;
	}

	/* Pretend the page is referenced if the task has the
	   swap token and is in the middle of a page fault. */
	if (mm != current->mm && has_swap_token(mm) &&
			rwsem_is_locked(&mm->mmap_sem))
		referenced++;

	if (unlikely(PageTransHuge(page))) {
		pmd_t *pmd;

		spin_lock(&mm->page_table_lock);
		pmd = page_check_address_pmd(page, mm, address,
					     PAGE_CHECK_ADDRESS_PMD_FLAG);
		if (pmd && !pmd_trans_splitting(*pmd) &&
		    pmdp_clear_flush_young_notify(vma, address, pmd))
			referenced++;
		spin_unlock(&mm->page_table_lock);
	} else {
		pte_t *pte;
		spinlock_t *ptl;

		pte = page_check_address(page, mm, address, &ptl, 0);
		if (!pte)
			goto out;

		if (ptep_clear_flush_young_notify(vma, address, pte)) {
			/*
			 * Don't treat a reference through a sequentially read
			 * mapping as such.  If the page has been used in
			 * another mapping, we will catch it; if this other
			 * mapping is already gone, the unmap path will have
			 * set PG_referenced or activated the page.
			 */
			if (likely(!VM_SequentialReadHint(vma)))
				referenced++;
		}
		pte_unmap_unlock(pte, ptl);
	}

	(*mapcount)--;

	if (referenced)
		*vm_flags |= vma->vm_flags;
out:
	return referenced;
}

static int page_referenced_anon(struct page *page,
				struct mem_cgroup *mem_cont,
				unsigned long *vm_flags)
{
	unsigned int mapcount;
	struct anon_vma *anon_vma;
	struct anon_vma_chain *avc;
	int referenced = 0;

	anon_vma = page_lock_anon_vma(page);
	if (!anon_vma)
		return referenced;

	mapcount = page_mapcount(page);
	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
		struct vm_area_struct *vma = avc->vma;
		unsigned long address = vma_address(page, vma);
		if (address == -EFAULT)
			continue;
		/*
		 * If we are reclaiming on behalf of a cgroup, skip
		 * counting on behalf of references from different
		 * cgroups
		 */
		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
			continue;
		referenced += page_referenced_one(page, vma, address,
						  &mapcount, vm_flags);
		if (!mapcount)
			break;
	}

	page_unlock_anon_vma(anon_vma);
	return referenced;
}

/**
 * page_referenced_file - referenced check for object-based rmap
 * @page: the page we're checking references on.
 * @mem_cont: target memory controller
 * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
 *
 * For an object-based mapped page, find all the places it is mapped and
 * check/clear the referenced flag.  This is done by following the page->mapping
 * pointer, then walking the chain of vmas it holds.  It returns the number
 * of references it found.
 *
 * This function is only called from page_referenced for object-based pages.
 */
static int page_referenced_file(struct page *page,
				struct mem_cgroup *mem_cont,
				unsigned long *vm_flags)
{
	unsigned int mapcount;
	struct address_space *mapping = page->mapping;
	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
	struct vm_area_struct *vma;
	struct prio_tree_iter iter;
	int referenced = 0;

	/*
	 * The caller's checks on page->mapping and !PageAnon have made
	 * sure that this is a file page: the check for page->mapping
	 * excludes the case just before it gets set on an anon page.
	 */
	BUG_ON(PageAnon(page));

	/*
	 * The page lock not only makes sure that page->mapping cannot
	 * suddenly be NULLified by truncation, it makes sure that the
	 * structure at mapping cannot be freed and reused yet,
	 * so we can safely take mapping->i_mmap_lock.
	 */
	BUG_ON(!PageLocked(page));

	spin_lock(&mapping->i_mmap_lock);

	/*
	 * i_mmap_lock does not stabilize mapcount at all, but mapcount
	 * is more likely to be accurate if we note it after spinning.
	 */
	mapcount = page_mapcount(page);

	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
		unsigned long address = vma_address(page, vma);
		if (address == -EFAULT)
			continue;
		/*
		 * If we are reclaiming on behalf of a cgroup, skip
		 * counting on behalf of references from different
		 * cgroups
		 */
		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
			continue;
		referenced += page_referenced_one(page, vma, address,
						  &mapcount, vm_flags);
		if (!mapcount)
			break;
	}

	spin_unlock(&mapping->i_mmap_lock);
	return referenced;
}

/**
 * page_referenced - test if the page was referenced
 * @page: the page to test
 * @is_locked: caller holds lock on the page
 * @mem_cont: target memory controller
 * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
 *
 * Quick test_and_clear_referenced for all mappings to a page,
 * returns the number of ptes which referenced the page.
 */
int page_referenced(struct page *page,
		    int is_locked,
		    struct mem_cgroup *mem_cont,
		    unsigned long *vm_flags)
{
	int referenced = 0;
	int we_locked = 0;

	*vm_flags = 0;
	if (page_mapped(page) && page_rmapping(page)) {
		if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
			we_locked = trylock_page(page);
			if (!we_locked) {
				referenced++;
				goto out;
			}
		}
		if (unlikely(PageKsm(page)))
			referenced += page_referenced_ksm(page, mem_cont,
								vm_flags);
		else if (PageAnon(page))
			referenced += page_referenced_anon(page, mem_cont,
								vm_flags);
		else if (page->mapping)
			referenced += page_referenced_file(page, mem_cont,
								vm_flags);
		if (we_locked)
			unlock_page(page);
	}
out:
	if (page_test_and_clear_young(page))
		referenced++;

	return referenced;
}

static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
			    unsigned long address)
{
	struct mm_struct *mm = vma->vm_mm;
	pte_t *pte;
	spinlock_t *ptl;
	int ret = 0;

	pte = page_check_address(page, mm, address, &ptl, 1);
	if (!pte)
		goto out;

	if (pte_dirty(*pte) || pte_write(*pte)) {
		pte_t entry;

		flush_cache_page(vma, address, pte_pfn(*pte));
		entry = ptep_clear_flush_notify(vma, address, pte);
		entry = pte_wrprotect(entry);
		entry = pte_mkclean(entry);
		set_pte_at(mm, address, pte, entry);
		ret = 1;
	}

	pte_unmap_unlock(pte, ptl);
out:
	return ret;
}

static int page_mkclean_file(struct address_space *mapping, struct page *page)
{
	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
	struct vm_area_struct *vma;
	struct prio_tree_iter iter;
	int ret = 0;

	BUG_ON(PageAnon(page));

	spin_lock(&mapping->i_mmap_lock);
	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
		if (vma->vm_flags & VM_SHARED) {
			unsigned long address = vma_address(page, vma);
			if (address == -EFAULT)
				continue;
			ret += page_mkclean_one(page, vma, address);
		}
	}
	spin_unlock(&mapping->i_mmap_lock);
	return ret;
}

int page_mkclean(struct page *page)
{
	int ret = 0;

	BUG_ON(!PageLocked(page));

	if (page_mapped(page)) {
		struct address_space *mapping = page_mapping(page);
		if (mapping) {
			ret = page_mkclean_file(mapping, page);
			if (page_test_dirty(page)) {
				page_clear_dirty(page, 1);
				ret = 1;
			}
		}
	}

	return ret;
}
EXPORT_SYMBOL_GPL(page_mkclean);

/**
 * page_move_anon_rmap - move a page to our anon_vma
 * @page:	the page to move to our anon_vma
 * @vma:	the vma the page belongs to
 * @address:	the user virtual address mapped
 *
 * When a page belongs exclusively to one process after a COW event,
 * that page can be moved into the anon_vma that belongs to just that
 * process, so the rmap code will not search the parent or sibling
 * processes.
 */
void page_move_anon_rmap(struct page *page,
	struct vm_area_struct *vma, unsigned long address)
{
	struct anon_vma *anon_vma = vma->anon_vma;

	VM_BUG_ON(!PageLocked(page));
	VM_BUG_ON(!anon_vma);
	VM_BUG_ON(page->index != linear_page_index(vma, address));

	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
	page->mapping = (struct address_space *) anon_vma;
}

/**
 * __page_set_anon_rmap - set up new anonymous rmap
 * @page:	Page to add to rmap	
 * @vma:	VM area to add page to.
 * @address:	User virtual address of the mapping	
 * @exclusive:	the page is exclusively owned by the current process
 */
static void __page_set_anon_rmap(struct page *page,
	struct vm_area_struct *vma, unsigned long address, int exclusive)
{
	struct anon_vma *anon_vma = vma->anon_vma;

	BUG_ON(!anon_vma);

	if (PageAnon(page))
		return;

	/*
	 * If the page isn't exclusively mapped into this vma,
	 * we must use the _oldest_ possible anon_vma for the
	 * page mapping!
	 */
	if (!exclusive)
		anon_vma = anon_vma->root;

	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
	page->mapping = (struct address_space *) anon_vma;
	page->index = linear_page_index(vma, address);
}

/**
 * __page_check_anon_rmap - sanity check anonymous rmap addition
 * @page:	the page to add the mapping to
 * @vma:	the vm area in which the mapping is added
 * @address:	the user virtual address mapped
 */
static void __page_check_anon_rmap(struct page *page,
	struct vm_area_struct *vma, unsigned long address)
{
#ifdef CONFIG_DEBUG_VM
	/*
	 * The page's anon-rmap details (mapping and index) are guaranteed to
	 * be set up correctly at this point.
	 *
	 * We have exclusion against page_add_anon_rmap because the caller
	 * always holds the page locked, except if called from page_dup_rmap,
	 * in which case the page is already known to be setup.
	 *
	 * We have exclusion against page_add_new_anon_rmap because those pages
	 * are initially only visible via the pagetables, and the pte is locked
	 * over the call to page_add_new_anon_rmap.
	 */
	BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
	BUG_ON(page->index != linear_page_index(vma, address));
#endif
}

/**
 * page_add_anon_rmap - add pte mapping to an anonymous page
 * @page:	the page to add the mapping to
 * @vma:	the vm area in which the mapping is added
 * @address:	the user virtual address mapped
 *
 * The caller needs to hold the pte lock, and the page must be locked in
 * the anon_vma case: to serialize mapping,index checking after setting,
 * and to ensure that PageAnon is not being upgraded racily to PageKsm
 * (but PageKsm is never downgraded to PageAnon).
 */
void page_add_anon_rmap(struct page *page,
	struct vm_area_struct *vma, unsigned long address)
{
	do_page_add_anon_rmap(page, vma, address, 0);
}

/*
 * Special version of the above for do_swap_page, which often runs
 * into pages that are exclusively owned by the current process.
 * Everybody else should continue to use page_add_anon_rmap above.
 */
void do_page_add_anon_rmap(struct page *page,
	struct vm_area_struct *vma, unsigned long address, int exclusive)
{
	int first = atomic_inc_and_test(&page->_mapcount);
	if (first) {
		if (!PageTransHuge(page))
			__inc_zone_page_state(page, NR_ANON_PAGES);
		else
			__inc_zone_page_state(page,
					      NR_ANON_TRANSPARENT_HUGEPAGES);
	}
	if (unlikely(PageKsm(page)))
		return;

	VM_BUG_ON(!PageLocked(page));
	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
	if (first)
		__page_set_anon_rmap(page, vma, address, exclusive);
	else
		__page_check_anon_rmap(page, vma, address);
}

/**
 * page_add_new_anon_rmap - add pte mapping to a new anonymous page
 * @page:	the page to add the mapping to
 * @vma:	the vm area in which the mapping is added
 * @address:	the user virtual address mapped
 *
 * Same as page_add_anon_rmap but must only be called on *new* pages.
 * This means the inc-and-test can be bypassed.
 * Page does not have to be locked.
 */
void page_add_new_anon_rmap(struct page *page,
	struct vm_area_struct *vma, unsigned long address)
{
	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
	SetPageSwapBacked(page);
	atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
	if (!PageTransHuge(page))
		__inc_zone_page_state(page, NR_ANON_PAGES);
	else
		__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
	__page_set_anon_rmap(page, vma, address, 1);
	if (page_evictable(page, vma))
		lru_cache_add_lru(page, LRU_ACTIVE_ANON);
	else
		add_page_to_unevictable_list(page);
}

/**
 * page_add_file_rmap - add pte mapping to a file page
 * @page: the page to add the mapping to
 *
 * The caller needs to hold the pte lock.
 */
void page_add_file_rmap(struct page *page)
{
	if (atomic_inc_and_test(&page->_mapcount)) {
		__inc_zone_page_state(page, NR_FILE_MAPPED);
		mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
	}
}

/**
 * page_remove_rmap - take down pte mapping from a page
 * @page: page to remove mapping from
 *
 * The caller needs to hold the pte lock.
 */
void page_remove_rmap(struct page *page)
{
	/* page still mapped by someone else? */
	if (!atomic_add_negative(-1, &page->_mapcount))
		return;

	/*
	 * Now that the last pte has gone, s390 must transfer dirty
	 * flag from storage key to struct page.  We can usually skip
	 * this if the page is anon, so about to be freed; but perhaps
	 * not if it's in swapcache - there might be another pte slot
	 * containing the swap entry, but page not yet written to swap.
	 */
	if ((!PageAnon(page) || PageSwapCache(page)) && page_test_dirty(page)) {
		page_clear_dirty(page, 1);
		set_page_dirty(page);
	}
	/*
	 * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED
	 * and not charged by memcg for now.
	 */
	if (unlikely(PageHuge(page)))
		return;
	if (PageAnon(page)) {
		mem_cgroup_uncharge_page(page);
		if (!PageTransHuge(page))
			__dec_zone_page_state(page, NR_ANON_PAGES);
		else
			__dec_zone_page_state(page,
					      NR_ANON_TRANSPARENT_HUGEPAGES);
	} else {
		__dec_zone_page_state(page, NR_FILE_MAPPED);
		mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
	}
	/*
	 * It would be tidy to reset the PageAnon mapping here,
	 * but that might overwrite a racing page_add_anon_rmap
	 * which increments mapcount after us but sets mapping
	 * before us: so leave the reset to free_hot_cold_page,
	 * and remember that it's only reliable while mapped.
	 * Leaving it set also helps swapoff to reinstate ptes
	 * faster for those pages still in swapcache.
	 */
}

/*
 * Subfunctions of try_to_unmap: try_to_unmap_one called
 * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
 */
int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
		     unsigned long address, enum ttu_flags flags)
{
	struct mm_struct *mm = vma->vm_mm;
	pte_t *pte;
	pte_t pteval;
	spinlock_t *ptl;
	int ret = SWAP_AGAIN;

	pte = page_check_address(page, mm, address, &ptl, 0);
	if (!pte)
		goto out;

	/*
	 * If the page is mlock()d, we cannot swap it out.
	 * If it's recently referenced (perhaps page_referenced
	 * skipped over this mm) then we should reactivate it.
	 */
	if (!(flags & TTU_IGNORE_MLOCK)) {
		if (vma->vm_flags & VM_LOCKED)
			goto out_mlock;

		if (TTU_ACTION(flags) == TTU_MUNLOCK)
			goto out_unmap;
	}
	if (!(flags & TTU_IGNORE_ACCESS)) {
		if (ptep_clear_flush_young_notify(vma, address, pte)) {
			ret = SWAP_FAIL;
			goto out_unmap;
		}
  	}

	/* Nuke the page table entry. */
	flush_cache_page(vma, address, page_to_pfn(page));
	pteval = ptep_clear_flush_notify(vma, address, pte);

	/* Move the dirty bit to the physical page now the pte is gone. */
	if (pte_dirty(pteval))
		set_page_dirty(page);

	/* Update high watermark before we lower rss */
	update_hiwater_rss(mm);

	if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
		if (PageAnon(page))
			dec_mm_counter(mm, MM_ANONPAGES);
		else
			dec_mm_counter(mm, MM_FILEPAGES);
		set_pte_at(mm, address, pte,
				swp_entry_to_pte(make_hwpoison_entry(page)));
	} else if (PageAnon(page)) {
		swp_entry_t entry = { .val = page_private(page) };

		if (PageSwapCache(page)) {
			/*
			 * Store the swap location in the pte.
			 * See handle_pte_fault() ...
			 */
			if (swap_duplicate(entry) < 0) {
				set_pte_at(mm, address, pte, pteval);
				ret = SWAP_FAIL;
				goto out_unmap;
			}
			if (list_empty(&mm->mmlist)) {
				spin_lock(&mmlist_lock);
				if (list_empty(&mm->mmlist))
					list_add(&mm->mmlist, &init_mm.mmlist);
				spin_unlock(&mmlist_lock);
			}
			dec_mm_counter(mm, MM_ANONPAGES);
			inc_mm_counter(mm, MM_SWAPENTS);
		} else if (PAGE_MIGRATION) {
			/*
			 * Store the pfn of the page in a special migration
			 * pte. do_swap_page() will wait until the migration
			 * pte is removed and then restart fault handling.
			 */
			BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
			entry = make_migration_entry(page, pte_write(pteval));
		}
		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
		BUG_ON(pte_file(*pte));
	} else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
		/* Establish migration entry for a file page */
		swp_entry_t entry;
		entry = make_migration_entry(page, pte_write(pteval));
		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
	} else
		dec_mm_counter(mm, MM_FILEPAGES);

	page_remove_rmap(page);
	page_cache_release(page);

out_unmap:
	pte_unmap_unlock(pte, ptl);
out:
	return ret;

out_mlock:
	pte_unmap_unlock(pte, ptl);


	/*
	 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
	 * unstable result and race. Plus, We can't wait here because
	 * we now hold anon_vma->lock or mapping->i_mmap_lock.
	 * if trylock failed, the page remain in evictable lru and later
	 * vmscan could retry to move the page to unevictable lru if the
	 * page is actually mlocked.
	 */
	if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
		if (vma->vm_flags & VM_LOCKED) {
			mlock_vma_page(page);
			ret = SWAP_MLOCK;
		}
		up_read(&vma->vm_mm->mmap_sem);
	}
	return ret;
}

/*
 * objrmap doesn't work for nonlinear VMAs because the assumption that
 * offset-into-file correlates with offset-into-virtual-addresses does not hold.
 * Consequently, given a particular page and its ->index, we cannot locate the
 * ptes which are mapping that page without an exhaustive linear search.
 *
 * So what this code does is a mini "virtual scan" of each nonlinear VMA which
 * maps the file to which the target page belongs.  The ->vm_private_data field
 * holds the current cursor into that scan.  Successive searches will circulate
 * around the vma's virtual address space.
 *
 * So as more replacement pressure is applied to the pages in a nonlinear VMA,
 * more scanning pressure is placed against them as well.   Eventually pages
 * will become fully unmapped and are eligible for eviction.
 *
 * For very sparsely populated VMAs this is a little inefficient - chances are
 * there there won't be many ptes located within the scan cluster.  In this case
 * maybe we could scan further - to the end of the pte page, perhaps.
 *
 * Mlocked pages:  check VM_LOCKED under mmap_sem held for read, if we can
 * acquire it without blocking.  If vma locked, mlock the pages in the cluster,
 * rather than unmapping them.  If we encounter the "check_page" that vmscan is
 * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
 */
#define CLUSTER_SIZE	min(32*PAGE_SIZE, PMD_SIZE)
#define CLUSTER_MASK	(~(CLUSTER_SIZE - 1))

static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
		struct vm_area_struct *vma, struct page *check_page)
{
	struct mm_struct *mm = vma->vm_mm;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	pte_t pteval;
	spinlock_t *ptl;
	struct page *page;
	unsigned long address;
	unsigned long end;
	int ret = SWAP_AGAIN;
	int locked_vma = 0;

	address = (vma->vm_start + cursor) & CLUSTER_MASK;
	end = address + CLUSTER_SIZE;
	if (address < vma->vm_start)
		address = vma->vm_start;
	if (end > vma->vm_end)
		end = vma->vm_end;

	pgd = pgd_offset(mm, address);
	if (!pgd_present(*pgd))
		return ret;

	pud = pud_offset(pgd, address);
	if (!pud_present(*pud))
		return ret;

	pmd = pmd_offset(pud, address);
	if (!pmd_present(*pmd))
		return ret;

	/*
	 * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
	 * keep the sem while scanning the cluster for mlocking pages.
	 */
	if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
		locked_vma = (vma->vm_flags & VM_LOCKED);
		if (!locked_vma)
			up_read(&vma->vm_mm->mmap_sem); /* don't need it */
	}

	pte = pte_offset_map_lock(mm, pmd, address, &ptl);

	/* Update high watermark before we lower rss */
	update_hiwater_rss(mm);

	for (; address < end; pte++, address += PAGE_SIZE) {
		if (!pte_present(*pte))
			continue;
		page = vm_normal_page(vma, address, *pte);
		BUG_ON(!page || PageAnon(page));

		if (locked_vma) {
			mlock_vma_page(page);   /* no-op if already mlocked */
			if (page == check_page)
				ret = SWAP_MLOCK;
			continue;	/* don't unmap */
		}

		if (ptep_clear_flush_young_notify(vma, address, pte))
			continue;

		/* Nuke the page table entry. */
		flush_cache_page(vma, address, pte_pfn(*pte));
		pteval = ptep_clear_flush_notify(vma, address, pte);

		/* If nonlinear, store the file page offset in the pte. */
		if (page->index != linear_page_index(vma, address))
			set_pte_at(mm, address, pte, pgoff_to_pte(page->index));

		/* Move the dirty bit to the physical page now the pte is gone. */
		if (pte_dirty(pteval))
			set_page_dirty(page);

		page_remove_rmap(page);
		page_cache_release(page);
		dec_mm_counter(mm, MM_FILEPAGES);
		(*mapcount)--;
	}
	pte_unmap_unlock(pte - 1, ptl);
	if (locked_vma)
		up_read(&vma->vm_mm->mmap_sem);
	return ret;
}

bool is_vma_temporary_stack(struct vm_area_struct *vma)
{
	int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);

	if (!maybe_stack)
		return false;

	if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
						VM_STACK_INCOMPLETE_SETUP)
		return true;

	return false;
}

/**
 * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
 * rmap method
 * @page: the page to unmap/unlock
 * @flags: action and flags
 *
 * Find all the mappings of a page using the mapping pointer and the vma chains
 * contained in the anon_vma struct it points to.
 *
 * This function is only called from try_to_unmap/try_to_munlock for
 * anonymous pages.
 * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
 * where the page was found will be held for write.  So, we won't recheck
 * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
 * 'LOCKED.
 */
static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
{
	struct anon_vma *anon_vma;
	struct anon_vma_chain *avc;
	int ret = SWAP_AGAIN;

	anon_vma = page_lock_anon_vma(page);
	if (!anon_vma)
		return ret;

	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
		struct vm_area_struct *vma = avc->vma;
		unsigned long address;

		/*
		 * During exec, a temporary VMA is setup and later moved.
		 * The VMA is moved under the anon_vma lock but not the
		 * page tables leading to a race where migration cannot
		 * find the migration ptes. Rather than increasing the
		 * locking requirements of exec(), migration skips
		 * temporary VMAs until after exec() completes.
		 */
		if (PAGE_MIGRATION && (flags & TTU_MIGRATION) &&
				is_vma_temporary_stack(vma))
			continue;

		address = vma_address(page, vma);
		if (address == -EFAULT)
			continue;
		ret = try_to_unmap_one(page, vma, address, flags);
		if (ret != SWAP_AGAIN || !page_mapped(page))
			break;
	}

	page_unlock_anon_vma(anon_vma);
	return ret;
}

/**
 * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
 * @page: the page to unmap/unlock
 * @flags: action and flags
 *
 * Find all the mappings of a page using the mapping pointer and the vma chains
 * contained in the address_space struct it points to.
 *
 * This function is only called from try_to_unmap/try_to_munlock for
 * object-based pages.
 * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
 * where the page was found will be held for write.  So, we won't recheck
 * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
 * 'LOCKED.
 */
static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
{
	struct address_space *mapping = page->mapping;
	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
	struct vm_area_struct *vma;
	struct prio_tree_iter iter;
	int ret = SWAP_AGAIN;
	unsigned long cursor;
	unsigned long max_nl_cursor = 0;
	unsigned long max_nl_size = 0;
	unsigned int mapcount;

	spin_lock(&mapping->i_mmap_lock);
	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
		unsigned long address = vma_address(page, vma);
		if (address == -EFAULT)
			continue;
		ret = try_to_unmap_one(page, vma, address, flags);
		if (ret != SWAP_AGAIN || !page_mapped(page))
			goto out;
	}

	if (list_empty(&mapping->i_mmap_nonlinear))
		goto out;

	/*
	 * We don't bother to try to find the munlocked page in nonlinears.
	 * It's costly. Instead, later, page reclaim logic may call
	 * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
	 */
	if (TTU_ACTION(flags) == TTU_MUNLOCK)
		goto out;

	list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
						shared.vm_set.list) {
		cursor = (unsigned long) vma->vm_private_data;
		if (cursor > max_nl_cursor)
			max_nl_cursor = cursor;
		cursor = vma->vm_end - vma->vm_start;
		if (cursor > max_nl_size)
			max_nl_size = cursor;
	}

	if (max_nl_size == 0) {	/* all nonlinears locked or reserved ? */
		ret = SWAP_FAIL;
		goto out;
	}

	/*
	 * We don't try to search for this page in the nonlinear vmas,
	 * and page_referenced wouldn't have found it anyway.  Instead
	 * just walk the nonlinear vmas trying to age and unmap some.
	 * The mapcount of the page we came in with is irrelevant,
	 * but even so use it as a guide to how hard we should try?
	 */
	mapcount = page_mapcount(page);
	if (!mapcount)
		goto out;
	cond_resched_lock(&mapping->i_mmap_lock);

	max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
	if (max_nl_cursor == 0)
		max_nl_cursor = CLUSTER_SIZE;

	do {
		list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
						shared.vm_set.list) {
			cursor = (unsigned long) vma->vm_private_data;
			while ( cursor < max_nl_cursor &&
				cursor < vma->vm_end - vma->vm_start) {
				if (try_to_unmap_cluster(cursor, &mapcount,
						vma, page) == SWAP_MLOCK)
					ret = SWAP_MLOCK;
				cursor += CLUSTER_SIZE;
				vma->vm_private_data = (void *) cursor;
				if ((int)mapcount <= 0)
					goto out;
			}
			vma->vm_private_data = (void *) max_nl_cursor;
		}
		cond_resched_lock(&mapping->i_mmap_lock);
		max_nl_cursor += CLUSTER_SIZE;
	} while (max_nl_cursor <= max_nl_size);

	/*
	 * Don't loop forever (perhaps all the remaining pages are
	 * in locked vmas).  Reset cursor on all unreserved nonlinear
	 * vmas, now forgetting on which ones it had fallen behind.
	 */
	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
		vma->vm_private_data = NULL;
out:
	spin_unlock(&mapping->i_mmap_lock);
	return ret;
}

/**
 * try_to_unmap - try to remove all page table mappings to a page
 * @page: the page to get unmapped
 * @flags: action and flags
 *
 * Tries to remove all the page table entries which are mapping this
 * page, used in the pageout path.  Caller must hold the page lock.
 * Return values are:
 *
 * SWAP_SUCCESS	- we succeeded in removing all mappings
 * SWAP_AGAIN	- we missed a mapping, try again later
 * SWAP_FAIL	- the page is unswappable
 * SWAP_MLOCK	- page is mlocked.
 */
int try_to_unmap(struct page *page, enum ttu_flags flags)
{
	int ret;

	BUG_ON(!PageLocked(page));
	VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));

	if (unlikely(PageKsm(page)))
		ret = try_to_unmap_ksm(page, flags);
	else if (PageAnon(page))
		ret = try_to_unmap_anon(page, flags);
	else
		ret = try_to_unmap_file(page, flags);
	if (ret != SWAP_MLOCK && !page_mapped(page))
		ret = SWAP_SUCCESS;
	return ret;
}

/**
 * try_to_munlock - try to munlock a page
 * @page: the page to be munlocked
 *
 * Called from munlock code.  Checks all of the VMAs mapping the page
 * to make sure nobody else has this page mlocked. The page will be
 * returned with PG_mlocked cleared if no other vmas have it mlocked.
 *
 * Return values are:
 *
 * SWAP_AGAIN	- no vma is holding page mlocked, or,
 * SWAP_AGAIN	- page mapped in mlocked vma -- couldn't acquire mmap sem
 * SWAP_FAIL	- page cannot be located at present
 * SWAP_MLOCK	- page is now mlocked.
 */
int try_to_munlock(struct page *page)
{
	VM_BUG_ON(!PageLocked(page) || PageLRU(page));

	if (unlikely(PageKsm(page)))
		return try_to_unmap_ksm(page, TTU_MUNLOCK);
	else if (PageAnon(page))
		return try_to_unmap_anon(page, TTU_MUNLOCK);
	else
		return try_to_unmap_file(page, TTU_MUNLOCK);
}

#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
/*
 * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
 * if necessary.  Be careful to do all the tests under the lock.  Once
 * we know we are the last user, nobody else can get a reference and we
 * can do the freeing without the lock.
 */
void drop_anon_vma(struct anon_vma *anon_vma)
{
	BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0);
	if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
		struct anon_vma *root = anon_vma->root;
		int empty = list_empty(&anon_vma->head);
		int last_root_user = 0;
		int root_empty = 0;

		/*
		 * The refcount on a non-root anon_vma got dropped.  Drop
		 * the refcount on the root and check if we need to free it.
		 */
		if (empty && anon_vma != root) {
			BUG_ON(atomic_read(&root->external_refcount) <= 0);
			last_root_user = atomic_dec_and_test(&root->external_refcount);
			root_empty = list_empty(&root->head);
		}
		anon_vma_unlock(anon_vma);

		if (empty) {
			anon_vma_free(anon_vma);
			if (root_empty && last_root_user)
				anon_vma_free(root);
		}
	}
}
#endif

#ifdef CONFIG_MIGRATION
/*
 * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
 * Called by migrate.c to remove migration ptes, but might be used more later.
 */
static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
		struct vm_area_struct *, unsigned long, void *), void *arg)
{
	struct anon_vma *anon_vma;
	struct anon_vma_chain *avc;
	int ret = SWAP_AGAIN;

	/*
	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma()
	 * because that depends on page_mapped(); but not all its usages
	 * are holding mmap_sem. Users without mmap_sem are required to
	 * take a reference count to prevent the anon_vma disappearing
	 */
	anon_vma = page_anon_vma(page);
	if (!anon_vma)
		return ret;
	anon_vma_lock(anon_vma);
	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
		struct vm_area_struct *vma = avc->vma;
		unsigned long address = vma_address(page, vma);
		if (address == -EFAULT)
			continue;
		ret = rmap_one(page, vma, address, arg);
		if (ret != SWAP_AGAIN)
			break;
	}
	anon_vma_unlock(anon_vma);
	return ret;
}

static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
		struct vm_area_struct *, unsigned long, void *), void *arg)
{
	struct address_space *mapping = page->mapping;
	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
	struct vm_area_struct *vma;
	struct prio_tree_iter iter;
	int ret = SWAP_AGAIN;

	if (!mapping)
		return ret;
	spin_lock(&mapping->i_mmap_lock);
	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
		unsigned long address = vma_address(page, vma);
		if (address == -EFAULT)
			continue;
		ret = rmap_one(page, vma, address, arg);
		if (ret != SWAP_AGAIN)
			break;
	}
	/*
	 * No nonlinear handling: being always shared, nonlinear vmas
	 * never contain migration ptes.  Decide what to do about this
	 * limitation to linear when we need rmap_walk() on nonlinear.
	 */
	spin_unlock(&mapping->i_mmap_lock);
	return ret;
}

int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
		struct vm_area_struct *, unsigned long, void *), void *arg)
{
	VM_BUG_ON(!PageLocked(page));

	if (unlikely(PageKsm(page)))
		return rmap_walk_ksm(page, rmap_one, arg);
	else if (PageAnon(page))
		return rmap_walk_anon(page, rmap_one, arg);
	else
		return rmap_walk_file(page, rmap_one, arg);
}
#endif /* CONFIG_MIGRATION */

#ifdef CONFIG_HUGETLB_PAGE
/*
 * The following three functions are for anonymous (private mapped) hugepages.
 * Unlike common anonymous pages, anonymous hugepages have no accounting code
 * and no lru code, because we handle hugepages differently from common pages.
 */
static void __hugepage_set_anon_rmap(struct page *page,
	struct vm_area_struct *vma, unsigned long address, int exclusive)
{
	struct anon_vma *anon_vma = vma->anon_vma;

	BUG_ON(!anon_vma);

	if (PageAnon(page))
		return;
	if (!exclusive)
		anon_vma = anon_vma->root;

	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
	page->mapping = (struct address_space *) anon_vma;
	page->index = linear_page_index(vma, address);
}

void hugepage_add_anon_rmap(struct page *page,
			    struct vm_area_struct *vma, unsigned long address)
{
	struct anon_vma *anon_vma = vma->anon_vma;
	int first;

	BUG_ON(!PageLocked(page));
	BUG_ON(!anon_vma);
	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
	first = atomic_inc_and_test(&page->_mapcount);
	if (first)
		__hugepage_set_anon_rmap(page, vma, address, 0);
}

void hugepage_add_new_anon_rmap(struct page *page,
			struct vm_area_struct *vma, unsigned long address)
{
	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
	atomic_set(&page->_mapcount, 0);
	__hugepage_set_anon_rmap(page, vma, address, 1);
}
#endif /* CONFIG_HUGETLB_PAGE */
t">, 0, 0, 0, 0, 0, 0, 0 }; //MIC padding of message #endif // HCF_TYPE_WPA #if defined MSF_COMPONENT_ID CFG_IDENTITY_STRCT BASED cfg_drv_identity = { sizeof(cfg_drv_identity)/sizeof(hcf_16) - 1, //length of RID CFG_DRV_IDENTITY, // (0x0826) MSF_COMPONENT_ID, MSF_COMPONENT_VAR, MSF_COMPONENT_MAJOR_VER, MSF_COMPONENT_MINOR_VER } ; CFG_RANGES_STRCT BASED cfg_drv_sup_range = { sizeof(cfg_drv_sup_range)/sizeof(hcf_16) - 1, //length of RID CFG_DRV_SUP_RANGE, // (0x0827) COMP_ROLE_SUPL, COMP_ID_DUI, {{ DUI_COMPAT_VAR, DUI_COMPAT_BOT, DUI_COMPAT_TOP }} } ; struct CFG_RANGE3_STRCT BASED cfg_drv_act_ranges_pri = { sizeof(cfg_drv_act_ranges_pri)/sizeof(hcf_16) - 1, //length of RID CFG_DRV_ACT_RANGES_PRI, // (0x0828) COMP_ROLE_ACT, COMP_ID_PRI, { { 0, 0, 0 }, // HCF_PRI_VAR_1 not supported by HCF 7 { 0, 0, 0 }, // HCF_PRI_VAR_2 not supported by HCF 7 { 3, //var_rec[2] - Variant number CFG_DRV_ACT_RANGES_PRI_3_BOTTOM, // - Bottom Compatibility CFG_DRV_ACT_RANGES_PRI_3_TOP // - Top Compatibility } } } ; struct CFG_RANGE4_STRCT BASED cfg_drv_act_ranges_sta = { sizeof(cfg_drv_act_ranges_sta)/sizeof(hcf_16) - 1, //length of RID CFG_DRV_ACT_RANGES_STA, // (0x0829) COMP_ROLE_ACT, COMP_ID_STA, { #if defined HCF_STA_VAR_1 { 1, //var_rec[1] - Variant number CFG_DRV_ACT_RANGES_STA_1_BOTTOM, // - Bottom Compatibility CFG_DRV_ACT_RANGES_STA_1_TOP // - Top Compatibility }, #else { 0, 0, 0 }, #endif // HCF_STA_VAR_1 #if defined HCF_STA_VAR_2 { 2, //var_rec[1] - Variant number CFG_DRV_ACT_RANGES_STA_2_BOTTOM, // - Bottom Compatibility CFG_DRV_ACT_RANGES_STA_2_TOP // - Top Compatibility }, #else { 0, 0, 0 }, #endif // HCF_STA_VAR_2 // For Native_USB (Not used!) #if defined HCF_STA_VAR_3 { 3, //var_rec[1] - Variant number CFG_DRV_ACT_RANGES_STA_3_BOTTOM, // - Bottom Compatibility CFG_DRV_ACT_RANGES_STA_3_TOP // - Top Compatibility }, #else { 0, 0, 0 }, #endif // HCF_STA_VAR_3 // Warp #if defined HCF_STA_VAR_4 { 4, //var_rec[1] - Variant number CFG_DRV_ACT_RANGES_STA_4_BOTTOM, // - Bottom Compatibility CFG_DRV_ACT_RANGES_STA_4_TOP // - Top Compatibility } #else { 0, 0, 0 } #endif // HCF_STA_VAR_4 } } ; struct CFG_RANGE6_STRCT BASED cfg_drv_act_ranges_hsi = { sizeof(cfg_drv_act_ranges_hsi)/sizeof(hcf_16) - 1, //length of RID CFG_DRV_ACT_RANGES_HSI, // (0x082A) COMP_ROLE_ACT, COMP_ID_HSI, { #if defined HCF_HSI_VAR_0 // Controlled deployment { 0, // var_rec[1] - Variant number CFG_DRV_ACT_RANGES_HSI_0_BOTTOM, // - Bottom Compatibility CFG_DRV_ACT_RANGES_HSI_0_TOP // - Top Compatibility }, #else { 0, 0, 0 }, #endif // HCF_HSI_VAR_0 { 0, 0, 0 }, // HCF_HSI_VAR_1 not supported by HCF 7 { 0, 0, 0 }, // HCF_HSI_VAR_2 not supported by HCF 7 { 0, 0, 0 }, // HCF_HSI_VAR_3 not supported by HCF 7 #if defined HCF_HSI_VAR_4 // Hermes-II all types { 4, // var_rec[1] - Variant number CFG_DRV_ACT_RANGES_HSI_4_BOTTOM, // - Bottom Compatibility CFG_DRV_ACT_RANGES_HSI_4_TOP // - Top Compatibility }, #else { 0, 0, 0 }, #endif // HCF_HSI_VAR_4 #if defined HCF_HSI_VAR_5 // WARP Hermes-2.5 { 5, // var_rec[1] - Variant number CFG_DRV_ACT_RANGES_HSI_5_BOTTOM, // - Bottom Compatibility CFG_DRV_ACT_RANGES_HSI_5_TOP // - Top Compatibility } #else { 0, 0, 0 } #endif // HCF_HSI_VAR_5 } } ; CFG_RANGE4_STRCT BASED cfg_drv_act_ranges_apf = { sizeof(cfg_drv_act_ranges_apf)/sizeof(hcf_16) - 1, //length of RID CFG_DRV_ACT_RANGES_APF, // (0x082B) COMP_ROLE_ACT, COMP_ID_APF, { #if defined HCF_APF_VAR_1 //(Fake) Hermes-I { 1, //var_rec[1] - Variant number CFG_DRV_ACT_RANGES_APF_1_BOTTOM, // - Bottom Compatibility CFG_DRV_ACT_RANGES_APF_1_TOP // - Top Compatibility }, #else { 0, 0, 0 }, #endif // HCF_APF_VAR_1 #if defined HCF_APF_VAR_2 //Hermes-II { 2, // var_rec[1] - Variant number CFG_DRV_ACT_RANGES_APF_2_BOTTOM, // - Bottom Compatibility CFG_DRV_ACT_RANGES_APF_2_TOP // - Top Compatibility }, #else { 0, 0, 0 }, #endif // HCF_APF_VAR_2 #if defined HCF_APF_VAR_3 // Native_USB { 3, // var_rec[1] - Variant number CFG_DRV_ACT_RANGES_APF_3_BOTTOM, // - Bottom Compatibility !!!!!see note below!!!!!!! CFG_DRV_ACT_RANGES_APF_3_TOP // - Top Compatibility }, #else { 0, 0, 0 }, #endif // HCF_APF_VAR_3 #if defined HCF_APF_VAR_4 // WARP Hermes 2.5 { 4, // var_rec[1] - Variant number CFG_DRV_ACT_RANGES_APF_4_BOTTOM, // - Bottom Compatibility !!!!!see note below!!!!!!! CFG_DRV_ACT_RANGES_APF_4_TOP // - Top Compatibility } #else { 0, 0, 0 } #endif // HCF_APF_VAR_4 } } ; #define HCF_VERSION TEXT( "HCF$Revision: 1.10 $" ) static struct /*CFG_HCF_OPT_STRCT*/ { hcf_16 len; //length of cfg_hcf_opt struct hcf_16 typ; //type 0x082C hcf_16 v0; //offset HCF_VERSION hcf_16 v1; // MSF_COMPONENT_ID hcf_16 v2; // HCF_ALIGN hcf_16 v3; // HCF_ASSERT hcf_16 v4; // HCF_BIG_ENDIAN hcf_16 v5; // /* HCF_DLV | HCF_DLNV */ hcf_16 v6; // HCF_DMA hcf_16 v7; // HCF_ENCAP hcf_16 v8; // HCF_EXT hcf_16 v9; // HCF_INT_ON hcf_16 v10; // HCF_IO hcf_16 v11; // HCF_LEGACY hcf_16 v12; // HCF_MAX_LTV hcf_16 v13; // HCF_PROT_TIME hcf_16 v14; // HCF_SLEEP hcf_16 v15; // HCF_TALLIES hcf_16 v16; // HCF_TYPE hcf_16 v17; // HCF_NIC_TAL_CNT hcf_16 v18; // HCF_HCF_TAL_CNT hcf_16 v19; // offset tallies TCHAR val[sizeof(HCF_VERSION)]; } BASED cfg_hcf_opt = { sizeof(cfg_hcf_opt)/sizeof(hcf_16) -1, CFG_HCF_OPT, // (0x082C) ( sizeof(cfg_hcf_opt) - sizeof(HCF_VERSION) - 4 )/sizeof(hcf_16), #if defined MSF_COMPONENT_ID MSF_COMPONENT_ID, #else 0, #endif // MSF_COMPONENT_ID HCF_ALIGN, HCF_ASSERT, HCF_BIG_ENDIAN, 0, // /* HCF_DLV | HCF_DLNV*/, HCF_DMA, HCF_ENCAP, HCF_EXT, HCF_INT_ON, HCF_IO, HCF_LEGACY, HCF_MAX_LTV, HCF_PROT_TIME, HCF_SLEEP, HCF_TALLIES, HCF_TYPE, #if (HCF_TALLIES) & ( HCF_TALLIES_NIC | HCF_TALLIES_HCF ) HCF_NIC_TAL_CNT, HCF_HCF_TAL_CNT, offsetof(IFB_STRCT, IFB_TallyLen ), #else 0, 0, 0, #endif // HCF_TALLIES_NIC / HCF_TALLIES_HCF HCF_VERSION }; // cfg_hcf_opt #endif // MSF_COMPONENT_ID #if defined HCF_TALLIES_EXTRA replaced by HCF_EXT_TALLIES_FW ; #endif // HCF_TALLIES_EXTRA #if defined MSF_COMPONENT_ID || (HCF_EXT) & HCF_EXT_MB #if (HCF_EXT) & HCF_EXT_MB HCF_STATIC LTV_STRCT BASED cfg_null = { 1, CFG_NULL, {0} }; #endif // HCF_EXT_MB HCF_STATIC hcf_16* BASED xxxx[ ] = { #if (HCF_EXT) & HCF_EXT_MB &cfg_null.len, //CFG_NULL 0x0820 #endif // HCF_EXT_MB #if defined MSF_COMPONENT_ID &cfg_drv_identity.len, //CFG_DRV_IDENTITY 0x0826 &cfg_drv_sup_range.len, //CFG_DRV_SUP_RANGE 0x0827 &cfg_drv_act_ranges_pri.len, //CFG_DRV_ACT_RANGES_PRI 0x0828 &cfg_drv_act_ranges_sta.len, //CFG_DRV_ACT_RANGES_STA 0x0829 &cfg_drv_act_ranges_hsi.len, //CFG_DRV_ACT_RANGES_HSI 0x082A &cfg_drv_act_ranges_apf.len, //CFG_DRV_ACT_RANGES_APF 0x082B &cfg_hcf_opt.len, //CFG_HCF_OPT 0x082C NULL, //IFB_PRIIdentity placeholder 0xFD02 NULL, //IFB_PRISup placeholder 0xFD03 #endif // MSF_COMPONENT_ID NULL //endsentinel }; #define xxxx_PRI_IDENTITY_OFFSET (sizeof(xxxx)/sizeof(xxxx[0]) - 3) #endif // MSF_COMPONENT_ID / HCF_EXT_MB /************************************************************************************************************ ************************** T O P L E V E L H C F R O U T I N E S ************************************** ************************************************************************************************************/ #if (HCF_DL_ONLY) == 0 /************************************************************************************************************ * *.MODULE int hcf_action( IFBP ifbp, hcf_16 action ) *.PURPOSE Changes the run-time Card behavior. * Performs Miscellanuous actions. * *.ARGUMENTS * ifbp address of the Interface Block * action number identifying the type of change * - HCF_ACT_CCX_OFF disable CKIP * - HCF_ACT_CCX_ON enable CKIP * - HCF_ACT_INT_FORCE_ON enable interrupt generation by WaveLAN NIC * - HCF_ACT_INT_OFF disable interrupt generation by WaveLAN NIC * - HCF_ACT_INT_ON compensate 1 HCF_ACT_INT_OFF, enable interrupt generation if balance reached * - HCF_ACT_PRS_SCAN Hermes Probe Respons Scan (F102) command * - HCF_ACT_RX_ACK acknowledge non-DMA receiver to Hermes * - HCF_ACT_SCAN Hermes Inquire Scan (F101) command (non-WARP only) * - HCF_ACT_SLEEP DDS Sleep request * - HCF_ACT_TALLIES Hermes Inquire Tallies (F100) command * *.RETURNS * HCF_SUCCESS all (including invalid) * HCF_INT_PENDING HCF_ACT_INT_OFF, interrupt pending * HCF_ERR_NO_NIC HCF_ACT_INT_OFF, NIC presence check fails * *.CONDITIONS * Except for hcf_action with HCF_ACT_INT_FORCE_ON or HCF_ACT_INT_OFF as parameter or hcf_connect with an I/O * address (i.e. not HCF_DISCONNECT), all hcf-function calls MUST be preceeded by a call of hcf_action with * HCF_ACT_INT_OFF as parameter. * Note that hcf_connect defaults to NIC interrupt disabled mode, i.e. as if hcf_action( HCF_ACT_INT_OFF ) * was called. * *.DESCRIPTION * hcf_action supports the following mode changing action-code pairs that are antonyms * - HCF_ACT_CCX_OFF / HCF_ACT_CCX_ON * - HCF_ACT_INT_[FORCE_]ON / HCF_ACT_INT_OFF * * Additionally hcf_action can start the following actions in the NIC: * - HCF_ACT_PRS_SCAN * - HCF_ACT_RX_ACK * - HCF_ACT_SCAN * - HCF_ACT_SLEEP * - HCF_ACT_TALLIES * * o HCF_ACT_INT_OFF: Sets NIC Interrupts mode Disabled. * This command, and the associated [Force] Enable NIC interrupts command, are only available if the HCF_INT_ON * compile time option is not set at 0x0000. * * o HCF_ACT_INT_ON: Sets NIC Interrupts mode Enabled. * Enable NIC Interrupts, depending on the number of preceding Disable NIC Interrupt calls. * * o HCF_ACT_INT_FORCE_ON: Force NIC Interrupts mode Enabled. * Sets NIC Interrupts mode Enabled, regardless off the number of preceding Disable NIC Interrupt calls. * * The disabling and enabling of interrupts are antonyms. * These actions must be balanced. * For each "disable interrupts" there must be a matching "enable interrupts". * The disable interrupts may be executed multiple times in a row without intervening enable interrupts, in * other words, the disable interrupts may be nested. * The interrupt generation mechanism is disabled at the first call with HCF_ACT_INT_OFF. * The interrupt generation mechanism is re-enabled when the number of calls with HCF_ACT_INT_ON matches the * number of calls with INT_OFF. * * It is not allowed to have more Enable NIC Interrupts calls than Disable NIC Interrupts calls. * The interrupt generation mechanism is initially (i.e. after hcf_connect) disabled. * An MSF based on a interrupt strategy must call hcf_action with INT_ON in its initialization logic. * *! The INT_OFF/INT_ON housekeeping is initialized at 0x0000 by hcf_connect, causing the interrupt generation * mechanism to be disabled at first. This suits MSF implementation based on a polling strategy. * * o HCF_ACT_CCX_OFF / HCF_ACT_CCX_ON *!! This can use some more explanation;? * Disables and Enables support in the HCF runtime code for the CCX feature. Each time one of these action * codes is used, the effects of the preceding use cease. * * o HCF_ACT_SLEEP: Initiates the Disconnected DeepSleep process * This command is only available if the HCF_DDS compile time option is set. It triggers the F/W to start the * sleep handshaking. Regardless whether the Host initiates a Disconnected DeepSleep (DDS) or the F/W initiates * a Connected DeepSleep (CDS), the Host-F/W sleep handshaking is completed when the NIC Interrupts mode is * enabled (by means of the balancing HCF_ACT_INT_ON), i.e. at that moment the F/W really goes into sleep mode. * The F/W is wokenup by the HCF when the NIC Interrupts mode are disabled, i.e. at the first HCF_ACT_INT_OFF * after going into sleep. * * The following Miscellanuous actions are defined: * * o HCF_ACT_RX_ACK: Receiver Acknowledgement (non-DMA, non-USB mode only) * Acking the receiver, frees the NIC memory used to hold the Rx frame and allows the F/W to * report the existence of the next Rx frame. * If the MSF does not need access (any longer) to the current frame, e.g. because it is rejected based on the * look ahead or copied to another buffer, the receiver may be acked. Acking earlier is assumed to have the * potential of improving the performance. * If the MSF does not explitly ack te receiver, the acking is done implicitly if: * - the received frame fits in the look ahead buffer, by the hcf_service_nic call that reported the Rx frame * - if not in the above step, by hcf_rcv_msg (assuming hcf_rcv_msg is called) * - if neither of the above implicit acks nor an explicit ack by the MSF, by the first hcf_service_nic after * the hcf_service_nic that reported the Rx frame. * Note: If an Rx frame is already acked, an explicit ACK by the MSF acts as a NoOperation. * * o HCF_ACT_TALLIES: Inquire Tallies command * This command is only operational if the F/W is enabled. * The Inquire Tallies command requests the F/W to provide its current set of tallies. * See also hcf_get_info with CFG_TALLIES as parameter. * * o HCF_ACT_PRS_SCAN: Inquire Probe Respons Scan command * This command is only operational if the F/W is enabled. * The Probe Respons Scan command starts a scan sequence. * The HCF puts the result of this action in an MSF defined buffer (see CFG_RID_LOG_STRCT). * * o HCF_ACT_SCAN: Inquire Scan command * This command is only supported for HII F/W (i.e. pre-WARP) and it is operational if the F/W is enabled. * The Inquire Scan command starts a scan sequence. * The HCF puts the result of this action in an MSF defined buffer (see CFG_RID_LOG_STRCT). * * Assert fails if * - ifbp has a recognizable out-of-range value. * - NIC interrupts are not disabled while required by parameter action. * - an invalid code is specified in parameter action. * - HCF_ACT_INT_ON commands outnumber the HCF_ACT_INT_OFF commands. * - reentrancy, may be caused by calling hcf_functions without adequate protection against NIC interrupts or * multi-threading * * - Since the HCF does not maintain status information relative to the F/W enabled state, it is not asserted * whether HCF_ACT_SCAN, HCF_ACT_PRS_SCAN or HCF_ACT_TALLIES are only used while F/W is enabled. * *.DIAGRAM * 0: The assert embedded in HCFLOGENTRY checks against re-entrancy. Re-entrancy could be caused by a MSF logic * at task-level calling hcf_functions without shielding with HCF_ACT_ON/_OFF. However the HCF_ACT_INT_OFF * action itself can per definition not be protected this way. Based on code inspection, it can be concluded, * that there is no re-entrancy PROBLEM in this particular flow. It does not seem worth the trouble to * explicitly check for this condition (although there was a report of an MSF which ran into this assert. * 2:IFB_IntOffCnt is used to balance the INT_OFF and INT_ON calls. Disabling of the interrupts is achieved by * writing a zero to the Hermes IntEn register. In a shared interrupt environment (e.g. the mini-PCI NDIS * driver) it is considered more correct to return the status HCF_INT_PENDING if and only if, the current * invocation of hcf_service_nic is (apparently) called in the ISR when the ISR was activated as result of a * change in HREG_EV_STAT matching a bit in HREG_INT_EN, i.e. not if invoked as result of another device * generating an interrupt on the shared interrupt line. * Note 1: it has been observed that under certain adverse conditions on certain platforms the writing of * HREG_INT_EN can apparently fail, therefor it is paramount that HREG_INT_EN is written again with 0 for * each and every call to HCF_ACT_INT_OFF. * Note 2: it has been observed that under certain H/W & S/W architectures this logic is called when there is * no NIC at all. To cater for this, the value of HREG_INT_EN is validated. If the unused bit 0x0100 is set, * it is assumed there is no NIC. * Note 3: During the download process, some versions of the F/W reset HREG_SW_0, hence checking this * register for HCF_MAGIC (the classical NIC presence test) when HCF_ACT_INT_OFF is called due to another * card interrupting via a shared IRQ during a download, fails. *4: The construction "if ( ifbp->IFB_IntOffCnt-- == 0 )" is optimal (in the sense of shortest/quickest * path in error free flows) but NOT fail safe in case of too many INT_ON invocations compared to INT_OFF). * Enabling of the interrupts is achieved by writing the Hermes IntEn register. * - If the HCF is in Defunct mode, the interrupts stay disabled. * - Under "normal" conditions, the HCF is only interested in Info Events, Rx Events and Notify Events. * - When the HCF is out of Tx/Notify resources, the HCF is also interested in Alloc Events. * - via HCF_EXT, the MSF programmer can also request HREG_EV_TICK and/or HREG_EV_TX_EXC interrupts. * For DMA operation, the DMA hardware handles the alloc events. The DMA engine will generate a 'TxDmaDone' * event as soon as it has pumped a frame from host ram into NIC-RAM (note that the frame does not have to be * transmitted then), and a 'RxDmaDone' event as soon as a received frame has been pumped from NIC-RAM into * host ram. Note that the 'alloc' event has been removed from the event-mask, because the DMA engine will * react to and acknowledge this event. *6: ack the "old" Rx-event. See "Rx Buffer free strategy" in hcf_service_nic above for more explanation. * IFB_RxFID and IFB_RxLen must be cleared to bring both the internal HCF house keeping and the information * supplied to the MSF in the state "no frame received". *8: The HCF_ACT_SCAN, HCF_ACT_PRS_SCAN and HCF_ACT_TALLIES activity are merged by "clever" algebraic * manipulations of the RID-values and action codes, so foregoing robustness against migration problems for * ease of implementation. The assumptions about numerical relationships between CFG_TALLIES etc and * HCF_ACT_TALLIES etc are checked by the "#if" statements just prior to the body of this routine, resulting * in: err "maintenance" during compilation if the assumptions are no longer met. The writing of HREG_PARAM_1 * with 0x3FFF in case of an PRS scan, is a kludge to get around lack of specification, hence different * implementation in F/W and Host. * When there is no NIC RAM available, some versions of the Hermes F/W do report 0x7F00 as error in the * Result field of the Status register and some F/W versions don't. To mask this difference to the MSF all * return codes of the Hermes are ignored ("best" and "most simple" solution to these types of analomies with * an acceptable loss due to ignoring all error situations as well). * The "No inquire space" is reported via the Hermes tallies. *30: do not HCFASSERT( rc, rc ) since rc == HCF_INT_PENDING is no error * *.ENDDOC END DOCUMENTATION * ************************************************************************************************************/ #if ( (HCF_TYPE) & HCF_TYPE_HII5 ) == 0 #if CFG_SCAN != CFG_TALLIES - HCF_ACT_TALLIES + HCF_ACT_SCAN err: "maintenance" apparently inviolated the underlying assumption about the numerical values of these macros #endif #endif // HCF_TYPE_HII5 #if CFG_PRS_SCAN != CFG_TALLIES - HCF_ACT_TALLIES + HCF_ACT_PRS_SCAN err: "maintenance" apparently inviolated the underlying assumption about the numerical values of these macros #endif int hcf_action( IFBP ifbp, hcf_16 action ) { int rc = HCF_SUCCESS; HCFASSERT( ifbp->IFB_Magic == HCF_MAGIC, ifbp->IFB_Magic ) #if HCF_INT_ON HCFLOGENTRY( action == HCF_ACT_INT_FORCE_ON ? HCF_TRACE_ACTION_KLUDGE : HCF_TRACE_ACTION, action ) /* 0 */ #if (HCF_SLEEP) HCFASSERT( ifbp->IFB_IntOffCnt != 0xFFFE || action == HCF_ACT_INT_OFF, MERGE_2( action, ifbp->IFB_IntOffCnt ) ) #else HCFASSERT( ifbp->IFB_IntOffCnt != 0xFFFE, action ) #endif // HCF_SLEEP HCFASSERT( ifbp->IFB_IntOffCnt != 0xFFFF || action == HCF_ACT_INT_OFF || action == HCF_ACT_INT_FORCE_ON, action ) HCFASSERT( ifbp->IFB_IntOffCnt <= 16 || ifbp->IFB_IntOffCnt >= 0xFFFE, MERGE_2( action, ifbp->IFB_IntOffCnt ) ) //nesting more than 16 deep seems unreasonable #endif // HCF_INT_ON switch (action) { #if HCF_INT_ON hcf_16 i; case HCF_ACT_INT_OFF: // Disable Interrupt generation #if HCF_SLEEP if ( ifbp->IFB_IntOffCnt == 0xFFFE ) { // WakeUp test ;?tie this to the "new" super-LinkStat ifbp->IFB_IntOffCnt++; // restore conventional I/F OPW(HREG_IO, HREG_IO_WAKEUP_ASYNC ); // set wakeup bit OPW(HREG_IO, HREG_IO_WAKEUP_ASYNC ); // set wakeup bit to counteract the clearing by F/W // 800 us latency before FW switches to high power MSF_WAIT(800); // MSF-defined function to wait n microseconds. //OOR if ( ifbp->IFB_DSLinkStat & CFG_LINK_STAT_DS_OOR ) { // OutOfRange // printk( "<5>ACT_INT_OFF: Deepsleep phase terminated, enable and go to AwaitConnection\n" ); //;?remove me 1 day // hcf_cntl( ifbp, HCF_CNTL_ENABLE ); // } // ifbp->IFB_DSLinkStat &= ~( CFG_LINK_STAT_DS_IR | CFG_LINK_STAT_DS_OOR); //clear IR/OOR state } #endif // HCF_SLEEP /*2*/ ifbp->IFB_IntOffCnt++; //! rc = 0; i = IPW( HREG_INT_EN ); OPW( HREG_INT_EN, 0 ); if ( i & 0x1000 ) { rc = HCF_ERR_NO_NIC; } else { if ( i & IPW( HREG_EV_STAT ) ) { rc = HCF_INT_PENDING; } } break; case HCF_ACT_INT_FORCE_ON: // Enforce Enable Interrupt generation ifbp->IFB_IntOffCnt = 0; //Fall through in HCF_ACT_INT_ON case HCF_ACT_INT_ON: // Enable Interrupt generation /*4*/ if ( ifbp->IFB_IntOffCnt-- == 0 && ifbp->IFB_CardStat == 0 ) { //determine Interrupt Event mask #if HCF_DMA if ( ifbp->IFB_CntlOpt & USE_DMA ) { i = HREG_EV_INFO | HREG_EV_RDMAD | HREG_EV_TDMAD | HREG_EV_TX_EXT; //mask when DMA active } else #endif // HCF_DMA { i = HREG_EV_INFO | HREG_EV_RX | HREG_EV_TX_EXT; //mask when DMA not active if ( ifbp->IFB_RscInd == 0 ) { i |= HREG_EV_ALLOC; //mask when no TxFID available } } #if HCF_SLEEP if ( ( IPW(HREG_EV_STAT) & ( i | HREG_EV_SLEEP_REQ ) ) == HREG_EV_SLEEP_REQ ) { // firmware indicates it would like to go into sleep modus // only acknowledge this request if no other events that can cause an interrupt are pending ifbp->IFB_IntOffCnt--; //becomes 0xFFFE OPW( HREG_INT_EN, i | HREG_EV_TICK ); OPW( HREG_EV_ACK, HREG_EV_SLEEP_REQ | HREG_EV_TICK | HREG_EV_ACK_REG_READY ); } else #endif // HCF_SLEEP { OPW( HREG_INT_EN, i | HREG_EV_SLEEP_REQ ); } } break; #endif // HCF_INT_ON #if (HCF_SLEEP) & HCF_DDS case HCF_ACT_SLEEP: // DDS Sleep request hcf_cntl( ifbp, HCF_CNTL_DISABLE ); cmd_exe( ifbp, HCMD_SLEEP, 0 ); break; // case HCF_ACT_WAKEUP: // DDS Wakeup request // HCFASSERT( ifbp->IFB_IntOffCnt == 0xFFFE, ifbp->IFB_IntOffCnt ) // ifbp->IFB_IntOffCnt++; // restore conventional I/F // OPW( HREG_IO, HREG_IO_WAKEUP_ASYNC ); // MSF_WAIT(800); // MSF-defined function to wait n microseconds. // rc = hcf_action( ifbp, HCF_ACT_INT_OFF ); /*bogus, IFB_IntOffCnt == 0xFFFF, so if you carefully look // *at the #if HCF_DDS statements, HCF_ACT_INT_OFF is empty // *for DDS. "Much" better would be to merge the flows for // *DDS and DEEP_SLEEP // */ // break; #endif // HCF_DDS #if (HCF_TYPE) & HCF_TYPE_CCX case HCF_ACT_CCX_ON: // enable CKIP case HCF_ACT_CCX_OFF: // disable CKIP ifbp->IFB_CKIPStat = action; break; #endif // HCF_TYPE_CCX case HCF_ACT_RX_ACK: //Receiver ACK /*6*/ if ( ifbp->IFB_RxFID ) { DAWA_ACK( HREG_EV_RX ); } ifbp->IFB_RxFID = ifbp->IFB_RxLen = 0; break; /*8*/ case HCF_ACT_PRS_SCAN: // Hermes PRS Scan (F102) OPW( HREG_PARAM_1, 0x3FFF ); //Fall through in HCF_ACT_TALLIES case HCF_ACT_TALLIES: // Hermes Inquire Tallies (F100) #if ( (HCF_TYPE) & HCF_TYPE_HII5 ) == 0 case HCF_ACT_SCAN: // Hermes Inquire Scan (F101) #endif // HCF_TYPE_HII5 /*!! the assumptions about numerical relationships between CFG_TALLIES etc and HCF_ACT_TALLIES etc * are checked by #if statements just prior to this routine resulting in: err "maintenance" */ cmd_exe( ifbp, HCMD_INQUIRE, action - HCF_ACT_TALLIES + CFG_TALLIES ); break; default: HCFASSERT( DO_ASSERT, action ) break; } //! do not HCFASSERT( rc == HCF_SUCCESS, rc ) /* 30*/ HCFLOGEXIT( HCF_TRACE_ACTION ) return rc; } // hcf_action #endif // HCF_DL_ONLY /************************************************************************************************************ * *.MODULE int hcf_cntl( IFBP ifbp, hcf_16 cmd ) *.PURPOSE Connect or disconnect a specific port to a specific network. *!! ;???????????????? continue needs more explanation * recovers by means of "continue" when the connect proces in CCX mode fails * Enables or disables data transmission and reception for the NIC. * Activates static NIC configuration for a specific port at connect. * Activates static configuration for all ports at enable. * *.ARGUMENTS * ifbp address of the Interface Block * cmd 0x001F: Hermes command (disable, enable, connect, disconnect, continue) * HCF_CNTL_ENABLE Enable * HCF_CNTL_DISABLE Disable * HCF_CNTL_CONTINUE Continue * HCF_CNTL_CONNECT Connect * HCF_CNTL_DISCONNECT Disconnect * 0x0100: command qualifier (continue) * HCMD_RETRY retry flag * 0x0700: port number (connect/disconnect) * HCF_PORT_0 MAC Port 0 * HCF_PORT_1 MAC Port 1 * HCF_PORT_2 MAC Port 2 * HCF_PORT_3 MAC Port 3 * HCF_PORT_4 MAC Port 4 * HCF_PORT_5 MAC Port 5 * HCF_PORT_6 MAC Port 6 * *.RETURNS * HCF_SUCCESS *!! via cmd_exe * HCF_ERR_NO_NIC * HCF_ERR_DEFUNCT_... * HCF_ERR_TIME_OUT * *.DESCRIPTION * The parameter cmd contains a number of subfields. * The actual value for cmd is created by logical or-ing the appropriate mnemonics for the subfields. * The field 0x001F contains the command code * - HCF_CNTL_ENABLE * - HCF_CNTL_DISABLE * - HCF_CNTL_CONNECT * - HCF_CNTL_DISCONNECT * - HCF_CNTL_CONTINUE * * For HCF_CNTL_CONTINUE, the field 0x0100 contains the retry flag HCMD_RETRY. * For HCF_CNTL_CONNECT and HCF_CNTL_DISCONNECT, the field 0x0700 contains the port number as HCF_PORT_#. * For Station as well as AccessPoint F/W, MAC Port 0 is the "normal" communication channel. * For AccessPoint F/W, MAC Port 1 through 6 control the WDS links. * * Note that despite the names HCF_CNTL_DISABLE and HCF_CNTL_ENABLE, hcf_cntl does not influence the NIC * Interrupts mode. * * The Connect is used by the MSF to bring a particular port in an inactive state as far as data transmission * and reception are concerned. * When a particular port is disconnected: * - the F/W disables the receiver for that port. * - the F/W ignores send commands for that port. * - all frames (Receive as well as pending Transmit) for that port on the NIC are discarded. * * When the NIC is disabled, above list applies to all ports, i.e. the result is like all ports are * disconnected. * * When a particular port is connected: * - the F/W effectuates the static configuration for that port. * - enables the receiver for that port. * - accepts send commands for that port. * * Enabling has the following effects: * - the F/W effectuates the static configuration for all ports. * The F/W only updates its static configuration at a transition from disabled to enabled or from * disconnected to connected. * In order to enforce the static configuration, the MSF must assure that such a transition takes place. * Due to such a disable/enable or disconnect/connect sequence, Rx/Tx frames may be lost, in other words, * configuration may impact communication. * - The DMA Engine (if applicable) is enabled. * Note that the Enable Function by itself only enables data transmission and reception, it * does not enable the Interrupt Generation mechanism. This is done by hcf_action. * * Disabling has the following effects: *!! ;?????is the following statement really true * - it acts as a disconnect on all ports. * - The DMA Engine (if applicable) is disabled. * * For impact of the disable command on the behavior of hcf_dma_tx/rx_get see the appropriate sections. * * Although the Enable/Disable and Connect/Disconnect are antonyms, there is no restriction on their sequencing, * in other words, they may be called multiple times in arbitrary sequence without being paired or balanced. * Each time one of these functions is called, the effects of the preceding calls cease. * * Assert fails if * - ifbp has a recognizable out-of-range value. * - NIC interrupts are not disabled. * - A command other than Continue, Enable, Disable, Connect or Disconnect is given. * - An invalid combination of the subfields is given or a bit outside the subfields is given. * - any return code besides HCF_SUCCESS. * - reentrancy, may be caused by calling a hcf_function without adequate protection against NIC interrupts or * multi-threading * *.DIAGRAM * hcf_cntl takes successively the following actions: *2: If the HCF is in Defunct mode or incompatible with the Primary or Station Supplier in the Hermes, * hcf_cntl() returns immediately with HCF_ERR_NO_NIC;? as status. *8: when the port is disabled, the DMA engine needs to be de-activated, so the host can safely reclaim tx * packets from the tx descriptor chain. * *.ENDDOC END DOCUMENTATION * ************************************************************************************************************/ int hcf_cntl( IFBP ifbp, hcf_16 cmd ) { int rc = HCF_ERR_INCOMP_FW; #if HCF_ASSERT { int x = cmd & HCMD_CMD_CODE; if ( x == HCF_CNTL_CONTINUE ) x &= ~HCMD_RETRY; else if ( (x == HCMD_DISABLE || x == HCMD_ENABLE) && ifbp->IFB_FWIdentity.comp_id == COMP_ID_FW_AP ) { x &= ~HFS_TX_CNTL_PORT; } HCFASSERT( x==HCF_CNTL_ENABLE || x==HCF_CNTL_DISABLE || HCF_CNTL_CONTINUE || x==HCF_CNTL_CONNECT || x==HCF_CNTL_DISCONNECT, cmd ) } #endif // HCF_ASSERT // #if (HCF_SLEEP) & HCF_DDS // HCFASSERT( ifbp->IFB_IntOffCnt != 0xFFFE, cmd ) // #endif // HCF_DDS HCFLOGENTRY( HCF_TRACE_CNTL, cmd ) if ( ifbp->IFB_CardStat == 0 ) { /*2*/ /*6*/ rc = cmd_exe( ifbp, cmd, 0 ); #if (HCF_SLEEP) & HCF_DDS ifbp->IFB_TickCnt = 0; //start 2 second period (with 1 tick uncertanty) #endif // HCF_DDS } #if HCF_DMA //!rlav : note that this piece of code is always executed, regardless of the DEFUNCT bit in IFB_CardStat. // The reason behind this is that the MSF should be able to get all its DMA resources back from the HCF, // even if the hardware is disfunctional. Practical example under Windows : surprise removal. if ( ifbp->IFB_CntlOpt & USE_DMA ) { hcf_io io_port = ifbp->IFB_IOBase; DESC_STRCT *p; if ( cmd == HCF_CNTL_DISABLE || cmd == HCF_CNTL_ENABLE ) { OUT_PORT_DWORD( (io_port + HREG_DMA_CTRL), DMA_CTRLSTAT_RESET); /*8*/ ifbp->IFB_CntlOpt &= ~DMA_ENABLED; } if ( cmd == HCF_CNTL_ENABLE ) { OUT_PORT_DWORD( (io_port + HREG_DMA_CTRL), DMA_CTRLSTAT_GO); /* ;? by rewriting hcf_dma_rx_put you can probably just call hcf_dma_rx_put( ifbp->IFB_FirstDesc[DMA_RX] ) * as additional beneficiary side effect, the SOP and EOP bits will also be cleared */ ifbp->IFB_CntlOpt |= DMA_ENABLED; HCFASSERT( NT_ASSERT, NEVER_TESTED ) // make the entire rx descriptor chain DMA-owned, so the DMA engine can (re-)use it. if ( ( p = ifbp->IFB_FirstDesc[DMA_RX] ) != NULL ) { //;? Think this over again in the light of the new chaining strategy if ( 1 ) { //begin alternative HCFASSERT( NT_ASSERT, NEVER_TESTED ) put_frame_lst( ifbp, ifbp->IFB_FirstDesc[DMA_RX], DMA_RX ); if ( ifbp->IFB_FirstDesc[DMA_RX] ) { put_frame_lst( ifbp, ifbp->IFB_FirstDesc[DMA_RX]->next_desc_addr, DMA_RX ); } } else { while ( p ) { //p->buf_cntl.cntl_stat |= DESC_DMA_OWNED; p->BUF_CNT |= DESC_DMA_OWNED; p = p->next_desc_addr; } // a rx chain is available so hand it over to the DMA engine p = ifbp->IFB_FirstDesc[DMA_RX]; OUT_PORT_DWORD( (io_port + HREG_RXDMA_PTR32), p->desc_phys_addr); } //end alternative } } } #endif // HCF_DMA HCFASSERT( rc == HCF_SUCCESS, rc ) HCFLOGEXIT( HCF_TRACE_CNTL ) return rc; } // hcf_cntl /************************************************************************************************************ * *.MODULE int hcf_connect( IFBP ifbp, hcf_io io_base ) *.PURPOSE Grants access right for the HCF to the IFB. * Initializes Card and HCF housekeeping. * *.ARGUMENTS * ifbp (near) address of the Interface Block * io_base non-USB: I/O Base address of the NIC (connect) * non-USB: HCF_DISCONNECT * USB: HCF_CONNECT, HCF_DISCONNECT * *.RETURNS * HCF_SUCCESS * HCF_ERR_INCOMP_PRI * HCF_ERR_INCOMP_FW * HCF_ERR_DEFUNCT_CMD_SEQ *!! HCF_ERR_NO_NIC really returned ;? * HCF_ERR_NO_NIC * HCF_ERR_TIME_OUT * * MSF-accessible fields of Result Block: * IFB_IOBase entry parameter io_base * IFB_IORange HREG_IO_RANGE (0x40/0x80) * IFB_Version version of the IFB layout * IFB_FWIdentity CFG_FW_IDENTITY_STRCT, specifies the identity of the * "running" F/W, i.e. tertiary F/W under normal conditions * IFB_FWSup CFG_SUP_RANGE_STRCT, specifies the supplier range of * the "running" F/W, i.e. tertiary F/W under normal conditions * IFB_HSISup CFG_SUP_RANGE_STRCT, specifies the HW/SW I/F range of the NIC * IFB_PRIIdentity CFG_PRI_IDENTITY_STRCT, specifies the Identity of the Primary F/W * IFB_PRISup CFG_SUP_RANGE_STRCT, specifies the supplier range of the Primary F/W * all other all MSF accessible fields, which are not specified above, are zero-filled * *.CONDITIONS * It is the responsibility of the MSF to assure the correctness of the I/O Base address. * * Note: hcf_connect defaults to NIC interrupt disabled mode, i.e. as if hcf_action( HCF_ACT_INT_OFF ) * was called. * *.DESCRIPTION * hcf_connect passes the MSF-defined location of the IFB to the HCF and grants or revokes access right for the * HCF to the IFB. Revoking is done by specifying HCF_DISCONNECT rather than an I/O address for the parameter * io_base. Every call of hcf_connect in "connect" mode, must eventually be followed by a call of hcf_connect * in "disconnect" mode. Clalling hcf_connect in "connect"/"disconnect" mode can not be nested. * The IFB address must be used as a handle with all subsequent HCF-function calls and the HCF uses the IFB * address as a handle when it performs a call(back) of an MSF-function (i.e. msf_assert). * * Note that not only the MSF accessible fields are cleared, but also all internal housekeeping * information is re-initialized. * This implies that all settings which are done via hcf_action and hcf_put_info (e.g. CFG_MB_ASSERT, CFG_REG_MB, * CFG_REG_INFO_LOG) must be done again. The only field which is not cleared, is IFB_MSFSup. * * If HCF_INT_ON is selected as compile option, NIC interrupts are disabled. * * Assert fails if * - ifbp is not properly aligned ( ref chapter HCF_ALIGN in 4.1.1) * - I/O Base Address is not a multiple of 0x40 (note: 0x0000 is explicitly allowed). * *.DIAGRAM * *0: Throughout hcf_connect you need to distinguish the connect from the disconnect case, which requires * some attention about what to use as "I/O" address when for which purpose. *2: *2a: Reset H-II by toggling reset bit in IO-register on and off. * The HCF_TYPE_PRELOADED caters for the DOS environment where H-II is loaded by a separate program to * overcome the 64k size limit posed on DOS drivers. * The macro OPW is not yet useable because the IFB_IOBase field is not set. * Note 1: hopefully the clearing and initializing of the IFB (see below) acts as a delay which meets the * specification for S/W reset * Note 2: it turns out that on some H/W constellations, the clock to access the EEProm is not lowered * to an appropriate frequency by HREG_IO_SRESET. By giving an HCMD_INI first, this problem is worked around. *2b: Experimentally it is determined over a wide range of F/W versions that waiting for the for Cmd bit in * Ev register gives a workable strategy. The available documentation does not give much clues. *4: clear and initialize the IFB * The HCF house keeping info is designed such that zero is the appropriate initial value for as much as * feasible IFB-items. * The readable fields mentioned in the description section and some HCF specific fields are given their * actual value. * IFB_TickIni is initialized at best guess before calibration * Hcf_connect defaults to "no interrupt generation" (implicitly achieved by the zero-filling). *6: Register compile-time linked MSF Routine and set default filter level * cast needed to get around the "near" problem in DOS COM model * er C2446: no conversion from void (__near __cdecl *)(unsigned char __far *,unsigned int,unsigned short,int) * to void (__far __cdecl *)(unsigned char __far *,unsigned int,unsigned short,int) *8: If a command is apparently still active (as indicated by the Busy bit in Cmd register) this may indicate a * blocked cmd pipe line. To unblock the following actions are done: * - Ack everything * - Wait for Busy bit drop in Cmd register * - Wait for Cmd bit raise in Ev register * The two waits are combined in a single HCF_WAIT_WHILE to optimize memory size. If either of these waits * fail (prot_cnt becomes 0), then something is serious wrong. Rather than PANICK, the assumption is that the * next cmd_exe will fail, causing the HCF to go into DEFUNCT mode *10: Ack everything to unblock a (possibly blocked) cmd pipe line * Note 1: it is very likely that an Alloc event is pending and very well possible that a (Send) Cmd event is * pending on non-initial calls * Note 2: it is assumed that this strategy takes away the need to ack every conceivable event after an * Hermes Initialize *12: Only H-II NEEDS the Hermes Initialize command. Due to the different semantics for H-I and H-II * Initialize command, init() does not (and can not, since it is called e.g. after a download) execute the * Hermes Initialize command. Executing the Hermes Initialize command for H-I would not harm but not do * anything useful either, so it is skipped. * The return status of cmd_exe is ignored. It is assumed that if cmd_exe fails, init fails too *14: use io_base as a flag to merge hcf_connect and hcf_disconnect into 1 routine * the call to init and its subsequent call of cmd_exe will return HCF_ERR_NO_NIC if appropriate. This status * is (badly) needed by some legacy combination of NT4 and card services which do not yield an I/O address in * time. * *.NOTICE * On platforms where the NULL-pointer is not a bit-pattern of all zeros, the zero-filling of the IFB results * in an incorrect initialization of pointers. * The implementation of the MailBox manipulation in put_mb_info protects against the absence of a MailBox * based on IFB_MBSize, IFB_MBWp and ifbp->IFB_MBRp. This has ramifications on the initialization of the * MailBox via hcf_put_info with the CFG_REG_MB type, but it prevents dependency on the "NULL-"ness of * IFB_MBp. * *.NOTICE * There are a number of problems when asserting and logging hcf_connect, e.g. * - Asserting on re-entrancy of hcf_connect by means of * "HCFASSERT( (ifbp->IFB_AssertTrace & HCF_ASSERT_CONNECT) == 0, 0 )" is not useful because IFB contents * are undefined * - Asserting before the IFB is cleared will cause mdd_assert() to interpret the garbage in IFB_AssertRtn * as a routine address * Therefore HCFTRACE nor HCFLOGENTRY is called by hcf_connect. *.ENDDOC END DOCUMENTATION * ************************************************************************************************************/ int hcf_connect( IFBP ifbp, hcf_io io_base ) { int rc = HCF_SUCCESS; hcf_io io_addr; hcf_32 prot_cnt; hcf_8 *q; LTV_STRCT x; #if HCF_ASSERT hcf_16 xa = ifbp->IFB_FWIdentity.typ; /* is assumed to cause an assert later on if hcf_connect is called without intervening hcf_disconnect. * xa == CFG_FW_IDENTITY in subsequent calls without preceding hcf_disconnect, * xa == 0 in subsequent calls with preceding hcf_disconnect, * xa == "garbage" (any value except CFG_FW_IDENTITY is acceptable) in the initial call */ #endif // HCF_ASSERT if ( io_base == HCF_DISCONNECT ) { //disconnect io_addr = ifbp->IFB_IOBase; OPW( HREG_INT_EN, 0 ); //;?workaround against dying F/W on subsequent hcf_connect calls } else { //connect /* 0 */ io_addr = io_base; } #if 0 //;? if a subsequent hcf_connect is preceeded by an hcf_disconnect the wakeup is not needed !! #if HCF_SLEEP OUT_PORT_WORD( .....+HREG_IO, HREG_IO_WAKEUP_ASYNC ); //OPW not yet useable MSF_WAIT(800); // MSF-defined function to wait n microseconds. note that MSF_WAIT uses not yet defined!!!! IFB_IOBase and IFB_TickIni (via PROT_CNT_INI) so be carefull if this code is restored #endif // HCF_SLEEP #endif // 0 #if ( (HCF_TYPE) & HCF_TYPE_PRELOADED ) == 0 //switch clock back for SEEPROM access !!! OUT_PORT_WORD( io_addr + HREG_CMD, HCMD_INI ); //OPW not yet useable prot_cnt = INI_TICK_INI; HCF_WAIT_WHILE( (IN_PORT_WORD( io_addr + HREG_EV_STAT) & HREG_EV_CMD) == 0 ); OUT_PORT_WORD( (io_addr + HREG_IO), HREG_IO_SRESET ); //OPW not yet useable /* 2a*/ #endif // HCF_TYPE_PRELOADED for ( q = (hcf_8*)(&ifbp->IFB_Magic); q > (hcf_8*)ifbp; *--q = 0 ) /*NOP*/; /* 4 */ ifbp->IFB_Magic = HCF_MAGIC; ifbp->IFB_Version = IFB_VERSION; #if defined MSF_COMPONENT_ID //a new IFB demonstrates how dirty the solution is xxxx[xxxx_PRI_IDENTITY_OFFSET] = NULL; //IFB_PRIIdentity placeholder 0xFD02 xxxx[xxxx_PRI_IDENTITY_OFFSET+1] = NULL; //IFB_PRISup placeholder 0xFD03 #endif // MSF_COMPONENT_ID #if (HCF_TALLIES) & ( HCF_TALLIES_NIC | HCF_TALLIES_HCF ) ifbp->IFB_TallyLen = 1 + 2 * (HCF_NIC_TAL_CNT + HCF_HCF_TAL_CNT); //convert # of Tallies to L value for LTV ifbp->IFB_TallyTyp = CFG_TALLIES; //IFB_TallyTyp: set T value #endif // HCF_TALLIES_NIC / HCF_TALLIES_HCF ifbp->IFB_IOBase = io_addr; //set IO_Base asap, so asserts via HREG_SW_2 don't harm ifbp->IFB_IORange = HREG_IO_RANGE; ifbp->IFB_CntlOpt = USE_16BIT; #if HCF_ASSERT assert_ifbp = ifbp; ifbp->IFB_AssertLvl = 1; #if (HCF_ASSERT) & HCF_ASSERT_LNK_MSF_RTN if ( io_base != HCF_DISCONNECT ) { ifbp->IFB_AssertRtn = (MSF_ASSERT_RTNP)msf_assert; /* 6 */ } #endif // HCF_ASSERT_LNK_MSF_RTN #if (HCF_ASSERT) & HCF_ASSERT_MB //build the structure to pass the assert info to hcf_put_info ifbp->IFB_AssertStrct.len = sizeof(ifbp->IFB_AssertStrct)/sizeof(hcf_16) - 1; ifbp->IFB_AssertStrct.typ = CFG_MB_INFO; ifbp->IFB_AssertStrct.base_typ = CFG_MB_ASSERT; ifbp->IFB_AssertStrct.frag_cnt = 1; ifbp->IFB_AssertStrct.frag_buf[0].frag_len = ( offsetof(IFB_STRCT, IFB_AssertLvl) - offsetof(IFB_STRCT, IFB_AssertLine) ) / sizeof(hcf_16); ifbp->IFB_AssertStrct.frag_buf[0].frag_addr = &ifbp->IFB_AssertLine; #endif // HCF_ASSERT_MB #endif // HCF_ASSERT IF_PROT_TIME( prot_cnt = ifbp->IFB_TickIni = INI_TICK_INI; ) #if ( (HCF_TYPE) & HCF_TYPE_PRELOADED ) == 0 //!! No asserts before Reset-bit in HREG_IO is cleared OPW( HREG_IO, 0x0000 ); //OPW useable /* 2b*/ HCF_WAIT_WHILE( (IPW( HREG_EV_STAT) & HREG_EV_CMD) == 0 ); IF_PROT_TIME( HCFASSERT( prot_cnt, IPW( HREG_EV_STAT) ) ) IF_PROT_TIME( if ( prot_cnt ) prot_cnt = ifbp->IFB_TickIni; ) #endif // HCF_TYPE_PRELOADED //!! No asserts before Reset-bit in HREG_IO is cleared HCFASSERT( DO_ASSERT, MERGE_2( HCF_ASSERT, 0xCAF0 ) ) //just to proof that the complete assert machinery is working HCFASSERT( xa != CFG_FW_IDENTITY, 0 ) // assert if hcf_connect is called without intervening hcf_disconnect. HCFASSERT( ((hcf_32)(void*)ifbp & (HCF_ALIGN-1) ) == 0, (hcf_32)(void*)ifbp ) HCFASSERT( (io_addr & 0x003F) == 0, io_addr ) //if Busy bit in Cmd register if (IPW( HREG_CMD ) & HCMD_BUSY ) { /* 8 */ //. Ack all to unblock a (possibly) blocked cmd pipe line OPW( HREG_EV_ACK, ~HREG_EV_SLEEP_REQ ); //. Wait for Busy bit drop in Cmd register //. Wait for Cmd bit raise in Ev register HCF_WAIT_WHILE( ( IPW( HREG_CMD ) & HCMD_BUSY ) && (IPW( HREG_EV_STAT) & HREG_EV_CMD) == 0 ); IF_PROT_TIME( HCFASSERT( prot_cnt, IPW( HREG_EV_STAT) ) ) /* if prot_cnt == 0, cmd_exe will fail, causing DEFUNCT */ } OPW( HREG_EV_ACK, ~HREG_EV_SLEEP_REQ ); #if ( (HCF_TYPE) & HCF_TYPE_PRELOADED ) == 0 /*12*/ (void)cmd_exe( ifbp, HCMD_INI, 0 ); #endif // HCF_TYPE_PRELOADED if ( io_base != HCF_DISCONNECT ) { rc = init( ifbp ); /*14*/ if ( rc == HCF_SUCCESS ) { x.len = 2; x.typ = CFG_NIC_BUS_TYPE; (void)hcf_get_info( ifbp, &x ); ifbp->IFB_BusType = x.val[0]; //CFG_NIC_BUS_TYPE not supported -> default 32 bits/DMA, MSF has to overrule via CFG_CNTL_OPT if ( x.len == 0 || x.val[0] == 0x0002 || x.val[0] == 0x0003 ) { #if (HCF_IO) & HCF_IO_32BITS ifbp->IFB_CntlOpt &= ~USE_16BIT; //reset USE_16BIT #endif // HCF_IO_32BITS #if HCF_DMA ifbp->IFB_CntlOpt |= USE_DMA; //SET DMA #else ifbp->IFB_IORange = 0x40 /*i.s.o. HREG_IO_RANGE*/; #endif // HCF_DMA } } } else HCFASSERT( ( ifbp->IFB_Magic ^= HCF_MAGIC ) == 0, ifbp->IFB_Magic ) /*NOP*/; /* of above HCFASSERT only the side effect is needed, NOP in case HCFASSERT is dummy */ ifbp->IFB_IOBase = io_base; /* 0*/ return rc; } // hcf_connect #if HCF_DMA /************************************************************************************************************ * Function get_frame_lst * - resolve the "last host-owned descriptor" problems when a descriptor list is reclaimed by the MSF. * * The FrameList to be reclaimed as well as the DescriptorList always start in IFB_FirstDesc[tx_rx_flag] * and this is always the "current" DELWA Descriptor. * * If a FrameList is available, the last descriptor of the FrameList to turned into a new DELWA Descriptor: * - a copy is made from the information in the last descriptor of the FrameList into the current * DELWA Descriptor * - the remainder of the DescriptorList is detached from the copy by setting the next_desc_addr at NULL * - the DMA control bits of the copy are cleared to do not confuse the MSF * - the copy of the last descriptor (i.e. the "old" DELWA Descriptor) is chained to the prev Descriptor * of the FrameList, thus replacing the original last Descriptor of the FrameList. * - IFB_FirstDesc is changed to the address of that replaced (original) last descriptor of the FrameList, * i.e. the "new" DELWA Descriptor. * * This function makes a copy of that last host-owned descriptor, so the MSF will get a copy of the descriptor. * On top of that, it adjusts DMA related fields in the IFB structure. // perform a copying-scheme to circumvent the 'last host owned descriptor cannot be reclaimed' limitation imposed by H2.5's DMA hardware design // a 'reclaim descriptor' should be available in the HCF: * * Returns: address of the first descriptor of the FrameList * 8: Be careful once you start re-ordering the steps in the copy process, that it still works for cases * of FrameLists of 1, 2 and more than 2 descriptors * * Input parameters: * tx_rx_flag : specifies 'transmit' or 'receive' descriptor. * ************************************************************************************************************/ HCF_STATIC DESC_STRCT* get_frame_lst( IFBP ifbp, int tx_rx_flag ) { DESC_STRCT *head = ifbp->IFB_FirstDesc[tx_rx_flag]; DESC_STRCT *copy, *p, *prev; HCFASSERT( tx_rx_flag == DMA_RX || tx_rx_flag == DMA_TX, tx_rx_flag ) //if FrameList if ( head ) { //. search for last descriptor of first FrameList p = prev = head; while ( ( p->BUF_SIZE & DESC_EOP ) == 0 && p->next_desc_addr ) { if ( ( ifbp->IFB_CntlOpt & DMA_ENABLED ) == 0 ) { //clear control bits when disabled p->BUF_CNT &= DESC_CNT_MASK; } prev = p; p = p->next_desc_addr; } //. if DMA enabled if ( ifbp->IFB_CntlOpt & DMA_ENABLED ) { //. . if last descriptor of FrameList is DMA owned //. . or if FrameList is single (DELWA) Descriptor if ( p->BUF_CNT & DESC_DMA_OWNED || head->next_desc_addr == NULL ) { //. . . refuse to return FrameList to caller head = NULL; } } } //if returnable FrameList found if ( head ) { //. if FrameList is single (DELWA) Descriptor (implies DMA disabled) if ( head->next_desc_addr == NULL ) { //. . clear DescriptorList /*;?ifbp->IFB_LastDesc[tx_rx_flag] =*/ ifbp->IFB_FirstDesc[tx_rx_flag] = NULL; //. else } else { //. . strip hardware-related bits from last descriptor //. . remove DELWA Descriptor from head of DescriptorList copy = head; head = head->next_desc_addr; //. . exchange first (Confined) and last (possibly imprisoned) Descriptor copy->buf_phys_addr = p->buf_phys_addr; copy->buf_addr = p->buf_addr; copy->BUF_SIZE = p->BUF_SIZE &= DESC_CNT_MASK; //get rid of DESC_EOP and possibly DESC_SOP copy->BUF_CNT = p->BUF_CNT &= DESC_CNT_MASK; //get rid of DESC_DMA_OWNED #if (HCF_EXT) & HCF_DESC_STRCT_EXT copy->DESC_MSFSup = p->DESC_MSFSup; #endif // HCF_DESC_STRCT_EXT //. . turn into a DELWA Descriptor p->buf_addr = NULL; //. . chain copy to prev /* 8*/ prev->next_desc_addr = copy; //. . detach remainder of the DescriptorList from FrameList copy->next_desc_addr = NULL; copy->next_desc_phys_addr = 0xDEAD0000; //! just to be nice, not really needed //. . save the new start (i.e. DELWA Descriptor) in IFB_FirstDesc ifbp->IFB_FirstDesc[tx_rx_flag] = p; } //. strip DESC_SOP from first descriptor head->BUF_SIZE &= DESC_CNT_MASK; //head->BUF_CNT &= DESC_CNT_MASK; get rid of DESC_DMA_OWNED head->next_desc_phys_addr = 0xDEAD0000; //! just to be nice, not really needed } //return the just detached FrameList (if any) return head; } // get_frame_lst /************************************************************************************************************ * Function put_frame_lst * * This function * * Returns: address of the first descriptor of the FrameList * * Input parameters: * tx_rx_flag : specifies 'transmit' or 'receive' descriptor. * * The following list should be kept in sync with hcf_dma_tx/rx_put, in order to get them in the WCI-spec !!!! * Assert fails if * - DMA is not enabled * - descriptor list is NULL * - a descriptor in the descriptor list is not double word aligned * - a count of size field of a descriptor contains control bits, i.e. bits in the high order nibble. * - the DELWA descriptor is not a "singleton" DescriptorList. * - the DELWA descriptor is not the first Descriptor supplied * - a non_DMA descriptor is supplied before the DELWA Descriptor is supplied * - Possibly more checks could be added !!!!!!!!!!!!! *.NOTICE * The asserts marked with *sc* are really sanity checks for the HCF, they can (supposedly) not be influenced * by incorrect MSF behavior // The MSF is required to supply the HCF with a single descriptor for MSF tx reclaim purposes. // This 'reclaim descriptor' can be recognized by the fact that its buf_addr field is zero. ********************************************************************************************* * Although not required from a hardware perspective: * - make each descriptor in this rx-chain DMA-owned. * - Also set the count to zero. EOP and SOP bits are also cleared. *********************************************************************************************/ HCF_STATIC void put_frame_lst( IFBP ifbp, DESC_STRCT *descp, int tx_rx_flag ) { DESC_STRCT *p = descp; hcf_16 port; HCFASSERT( ifbp->IFB_CntlOpt & USE_DMA, ifbp->IFB_CntlOpt) //only hcf_dma_tx_put must also be DMA_ENABLED HCFASSERT( tx_rx_flag == DMA_RX || tx_rx_flag == DMA_TX, tx_rx_flag ) HCFASSERT( p , 0 ) while ( p ) { HCFASSERT( ((hcf_32)p & 3 ) == 0, (hcf_32)p ) HCFASSERT( (p->BUF_CNT & ~DESC_CNT_MASK) == 0, p->BUF_CNT ) HCFASSERT( (p->BUF_SIZE & ~DESC_CNT_MASK) == 0, p->BUF_SIZE ) p->BUF_SIZE &= DESC_CNT_MASK; //!!this SHOULD be superfluous in case of correct MSF p->BUF_CNT &= tx_rx_flag == DMA_RX ? 0 : DESC_CNT_MASK; //!!this SHOULD be superfluous in case of correct MSF p->BUF_CNT |= DESC_DMA_OWNED; if ( p->next_desc_addr ) { // HCFASSERT( p->buf_addr && p->buf_phys_addr && p->BUF_SIZE && +/- p->BUF_SIZE, ... ) HCFASSERT( p->next_desc_addr->desc_phys_addr, (hcf_32)p->next_desc_addr ) p->next_desc_phys_addr = p->next_desc_addr->desc_phys_addr; } else { // p->next_desc_phys_addr = 0; if ( p->buf_addr == NULL ) { // DELWA Descriptor HCFASSERT( descp == p, (hcf_32)descp ) //singleton DescriptorList HCFASSERT( ifbp->IFB_FirstDesc[tx_rx_flag] == NULL, (hcf_32)ifbp->IFB_FirstDesc[tx_rx_flag]) HCFASSERT( ifbp->IFB_LastDesc[tx_rx_flag] == NULL, (hcf_32)ifbp->IFB_LastDesc[tx_rx_flag]) descp->BUF_CNT = 0; //&= ~DESC_DMA_OWNED; ifbp->IFB_FirstDesc[tx_rx_flag] = descp; // part of alternative ifbp->IFB_LastDesc[tx_rx_flag] = ifbp->IFB_FirstDesc[tx_rx_flag] = descp; // if "recycling" a FrameList // (e.g. called from hcf_cntl( HCF_CNTL_ENABLE ) // . prepare for activation DMA controller // part of alternative descp = descp->next_desc_addr; } else { //a "real" FrameList, hand it over to the DMA engine HCFASSERT( ifbp->IFB_FirstDesc[tx_rx_flag], (hcf_32)descp ) HCFASSERT( ifbp->IFB_LastDesc[tx_rx_flag], (hcf_32)descp ) HCFASSERT( ifbp->IFB_LastDesc[tx_rx_flag]->next_desc_addr == NULL, (hcf_32)ifbp->IFB_LastDesc[tx_rx_flag]->next_desc_addr) // p->buf_cntl.cntl_stat |= DESC_DMA_OWNED; ifbp->IFB_LastDesc[tx_rx_flag]->next_desc_addr = descp; ifbp->IFB_LastDesc[tx_rx_flag]->next_desc_phys_addr = descp->desc_phys_addr; port = HREG_RXDMA_PTR32; if ( tx_rx_flag ) { p->BUF_SIZE |= DESC_EOP; // p points at the last descriptor in the caller-supplied descriptor chain descp->BUF_SIZE |= DESC_SOP; port = HREG_TXDMA_PTR32; } OUT_PORT_DWORD( (ifbp->IFB_IOBase + port), descp->desc_phys_addr ); } ifbp->IFB_LastDesc[tx_rx_flag] = p; } p = p->next_desc_addr; } } // put_frame_lst /************************************************************************************************************ * *.MODULE DESC_STRCT* hcf_dma_rx_get( IFBP ifbp ) *.PURPOSE decapsulate a message and provides that message to the MSF. * reclaim all descriptors in the rx descriptor chain. * *.ARGUMENTS * ifbp address of the Interface Block * *.RETURNS * pointer to a FrameList * *.DESCRIPTION * hcf_dma_rx_get is intended to return a received frame when such a frame is deposited in Host memory by the * DMA engine. In addition hcf_dma_rx_get can be used to reclaim all descriptors in the rx descriptor chain * when the DMA Engine is disabled, e.g. as part of a driver unloading strategy. * hcf_dma_rx_get must be called repeatedly by the MSF when hcf_service_nic signals availability of a rx frame * through the HREG_EV_RDMAD flag of IFB_DmaPackets. The calling must stop when a NULL pointer is returned, at * which time the HREG_EV_RDMAD flag is also cleared by the HCF to arm the mechanism for the next frame * reception. * Regardless whether the DMA Engine is currently enabled (as controlled via hcf_cntl), if the DMA controller * deposited an Rx-frame in the Rx-DescriptorList, this frame is detached from the Rx-DescriptorList, * transformed into a FrameList (i.e. updating the housekeeping fields in the descriptors) and returned to the * caller. * If no such Rx-frame is available in the Rx-DescriptorList, the behavior of hcf_dma_rx_get depends on the * status of the DMA Engine. * If the DMA Engine is enabled, a NULL pointer is returned. * If the DMA Engine is disabled, the following strategy is used: * - the complete Rx-DescriptorList is returned. The DELWA Descriptor is not part of the Rx-DescriptorList. * - If there is no Rx-DescriptorList, the DELWA Descriptor is returned. * - If there is no DELWA Descriptor, a NULL pointer is returned. * * If the MSF performs an disable/enable sequence without exhausting the Rx-DescriptorList as described above, * the enable command will reset all house keeping information, i.e. already received but not yet by the MSF * retrieved frames are lost and the next frame will be received starting with the oldest descriptor. * * The HCF can be used in 2 fashions: with and without decapsulation for data transfer. * This is controlled at compile time by the HCF_ENC bit of the HCF_ENCAP system constant. * If appropriate, decapsulation is done by moving some data inside the buffers and updating the descriptors * accordingly. *!! ;?????where did I describe why a simple manipulation with the count values does not suffice? * *.DIAGRAM * *.ENDDOC END DOCUMENTATION * ************************************************************************************************************/ DESC_STRCT* hcf_dma_rx_get (IFBP ifbp) { DESC_STRCT *descp; // pointer to start of FrameList descp = get_frame_lst( ifbp, DMA_RX ); if ( descp && descp->buf_addr ) //!be aware of the missing curly bracket //skip decapsulation at confined descriptor #if (HCF_ENCAP) == HCF_ENC #if (HCF_TYPE) & HCF_TYPE_CCX if ( ifbp->IFB_CKIPStat == HCF_ACT_CCX_OFF ) #endif // HCF_TYPE_CCX { int i; DESC_STRCT *p = descp->next_desc_addr; //pointer to 2nd descriptor of frame HCFASSERT(p, 0) // The 2nd descriptor contains (maybe) a SNAP header plus part or whole of the payload. //determine decapsulation sub-flag in RxFS i = *(wci_recordp)&descp->buf_addr[HFS_STAT] & ( HFS_STAT_MSG_TYPE | HFS_STAT_ERR ); if ( i == HFS_STAT_TUNNEL || ( i == HFS_STAT_1042 && hcf_encap( (wci_bufp)&p->buf_addr[HCF_DASA_SIZE] ) != ENC_TUNNEL )) { // The 2nd descriptor contains a SNAP header plus part or whole of the payload. HCFASSERT( p->BUF_CNT == (p->buf_addr[5] + (p->buf_addr[4]<<8) + 2*6 + 2 - 8), p->BUF_CNT ) // perform decapsulation HCFASSERT(p->BUF_SIZE >=8, p->BUF_SIZE) // move SA[2:5] in the second buffer to replace part of the SNAP header for ( i=3; i >= 0; i--) p->buf_addr[i+8] = p->buf_addr[i]; // copy DA[0:5], SA[0:1] from first buffer to second buffer for ( i=0; i<8; i++) p->buf_addr[i] = descp->buf_addr[HFS_ADDR_DEST + i]; // make first buffer shorter in count descp->BUF_CNT = HFS_ADDR_DEST; } } #endif // HCF_ENC if ( descp == NULL ) ifbp->IFB_DmaPackets &= (hcf_16)~HREG_EV_RDMAD; //;?could be integrated into get_frame_lst HCFLOGEXIT( HCF_TRACE_DMA_RX_GET ) return descp; } // hcf_dma_rx_get /************************************************************************************************************ * *.MODULE void hcf_dma_rx_put( IFBP ifbp, DESC_STRCT *descp ) *.PURPOSE supply buffers for receive purposes. * supply the Rx-DELWA descriptor. * *.ARGUMENTS * ifbp address of the Interface Block * descp address of a DescriptorList * *.RETURNS N.A. * *.DESCRIPTION * This function is called by the MSF to supply the HCF with new/more buffers for receive purposes. * The HCF can be used in 2 fashions: with and without encapsulation for data transfer. * This is controlled at compile time by the HCF_ENC bit of the HCF_ENCAP system constant. * As a consequence, some additional constaints apply to the number of descriptor and the buffers associated * with the first 2 descriptors. Independent of the encapsulation feature, the COUNT fields are ignored. * A special case is the supplying of the DELWA descriptor, which must be supplied as the first descriptor. * * Assert fails if * - ifbp has a recognizable out-of-range value. * - NIC interrupts are not disabled while required by parameter action. * - in case decapsulation by the HCF is selected: * - The first databuffer does not have the exact size corresponding with the RxFS up to the 802.3 DestAddr * field (== 29 words). * - The FrameList does not consists of at least 2 Descriptors. * - The second databuffer does not have the minimum size of 8 bytes. *!! The 2nd part of the list of asserts should be kept in sync with put_frame_lst, in order to get *!! them in the WCI-spec !!!! * - DMA is not enabled * - descriptor list is NULL * - a descriptor in the descriptor list is not double word aligned * - a count of size field of a descriptor contains control bits, i.e. bits in the high order nibble. * - the DELWA descriptor is not a "singleton" DescriptorList. * - the DELWA descriptor is not the first Descriptor supplied * - a non_DMA descriptor is supplied before the DELWA Descriptor is supplied *!! - Possibly more checks could be added !!!!!!!!!!!!! * *.DIAGRAM * * *.ENDDOC END DOCUMENTATION * ************************************************************************************************************/ void hcf_dma_rx_put( IFBP ifbp, DESC_STRCT *descp ) { HCFLOGENTRY( HCF_TRACE_DMA_RX_PUT, 0xDA01 ) HCFASSERT( ifbp->IFB_Magic == HCF_MAGIC, ifbp->IFB_Magic ) HCFASSERT_INT put_frame_lst( ifbp, descp, DMA_RX ); #if HCF_ASSERT && (HCF_ENCAP) == HCF_ENC if ( descp->buf_addr ) { HCFASSERT( descp->BUF_SIZE == HCF_DMA_RX_BUF1_SIZE, descp->BUF_SIZE ) HCFASSERT( descp->next_desc_addr, 0 ) // first descriptor should be followed by another descriptor // The second DB is for SNAP and payload purposes. It should be a minimum of 12 bytes in size. HCFASSERT( descp->next_desc_addr->BUF_SIZE >= 12, descp->next_desc_addr->BUF_SIZE ) } #endif // HCFASSERT / HCF_ENC HCFLOGEXIT( HCF_TRACE_DMA_RX_PUT ) } // hcf_dma_rx_put /************************************************************************************************************ * *.MODULE DESC_STRCT* hcf_dma_tx_get( IFBP ifbp ) *.PURPOSE DMA mode: reclaims and decapsulates packets in the tx descriptor chain if: * - A Tx packet has been copied from host-RAM into NIC-RAM by the DMA engine * - The Hermes/DMAengine have been disabled * *.ARGUMENTS * ifbp address of the Interface Block * *.RETURNS * pointer to a reclaimed Tx packet. * *.DESCRIPTION * impact of the disable command: * When a non-empty pool of Tx descriptors exists (created by means of hcf_dma_put_tx), the MSF * is supposed to empty that pool by means of hcf_dma_tx_get calls after the disable in an * disable/enable sequence. * *.DIAGRAM * *.NOTICE * *.ENDDOC END DOCUMENTATION * ************************************************************************************************************/ DESC_STRCT* hcf_dma_tx_get( IFBP ifbp ) { DESC_STRCT *descp; // pointer to start of FrameList