aboutsummaryrefslogblamecommitdiffstats
path: root/drivers/char/vt.c
blob: e5ef1dfc54826a33b4b86e9347b8584087bda831 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242









































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































                                                                                                                                      
/*
 *  linux/drivers/char/vt.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * Hopefully this will be a rather complete VT102 implementation.
 *
 * Beeping thanks to John T Kohl.
 *
 * Virtual Consoles, Screen Blanking, Screen Dumping, Color, Graphics
 *   Chars, and VT100 enhancements by Peter MacDonald.
 *
 * Copy and paste function by Andrew Haylett,
 *   some enhancements by Alessandro Rubini.
 *
 * Code to check for different video-cards mostly by Galen Hunt,
 * <g-hunt@ee.utah.edu>
 *
 * Rudimentary ISO 10646/Unicode/UTF-8 character set support by
 * Markus Kuhn, <mskuhn@immd4.informatik.uni-erlangen.de>.
 *
 * Dynamic allocation of consoles, aeb@cwi.nl, May 1994
 * Resizing of consoles, aeb, 940926
 *
 * Code for xterm like mouse click reporting by Peter Orbaek 20-Jul-94
 * <poe@daimi.aau.dk>
 *
 * User-defined bell sound, new setterm control sequences and printk
 * redirection by Martin Mares <mj@k332.feld.cvut.cz> 19-Nov-95
 *
 * APM screenblank bug fixed Takashi Manabe <manabe@roy.dsl.tutics.tut.jp>
 *
 * Merge with the abstract console driver by Geert Uytterhoeven
 * <geert@linux-m68k.org>, Jan 1997.
 *
 *   Original m68k console driver modifications by
 *
 *     - Arno Griffioen <arno@usn.nl>
 *     - David Carter <carter@cs.bris.ac.uk>
 * 
 *   The abstract console driver provides a generic interface for a text
 *   console. It supports VGA text mode, frame buffer based graphical consoles
 *   and special graphics processors that are only accessible through some
 *   registers (e.g. a TMS340x0 GSP).
 *
 *   The interface to the hardware is specified using a special structure
 *   (struct consw) which contains function pointers to console operations
 *   (see <linux/console.h> for more information).
 *
 * Support for changeable cursor shape
 * by Pavel Machek <pavel@atrey.karlin.mff.cuni.cz>, August 1997
 *
 * Ported to i386 and con_scrolldelta fixed
 * by Emmanuel Marty <core@ggi-project.org>, April 1998
 *
 * Resurrected character buffers in videoram plus lots of other trickery
 * by Martin Mares <mj@atrey.karlin.mff.cuni.cz>, July 1998
 *
 * Removed old-style timers, introduced console_timer, made timer
 * deletion SMP-safe.  17Jun00, Andrew Morton <andrewm@uow.edu.au>
 *
 * Removed console_lock, enabled interrupts across all console operations
 * 13 March 2001, Andrew Morton
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/tty.h>
#include <linux/tty_flip.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/kd.h>
#include <linux/slab.h>
#include <linux/major.h>
#include <linux/mm.h>
#include <linux/console.h>
#include <linux/init.h>
#include <linux/devfs_fs_kernel.h>
#include <linux/vt_kern.h>
#include <linux/selection.h>
#include <linux/tiocl.h>
#include <linux/kbd_kern.h>
#include <linux/consolemap.h>
#include <linux/timer.h>
#include <linux/interrupt.h>
#include <linux/config.h>
#include <linux/workqueue.h>
#include <linux/bootmem.h>
#include <linux/pm.h>
#include <linux/font.h>
#include <linux/bitops.h>

#include <asm/io.h>
#include <asm/system.h>
#include <asm/uaccess.h>


const struct consw *conswitchp;

/* A bitmap for codes <32. A bit of 1 indicates that the code
 * corresponding to that bit number invokes some special action
 * (such as cursor movement) and should not be displayed as a
 * glyph unless the disp_ctrl mode is explicitly enabled.
 */
#define CTRL_ACTION 0x0d00ff81
#define CTRL_ALWAYS 0x0800f501	/* Cannot be overridden by disp_ctrl */

/*
 * Here is the default bell parameters: 750HZ, 1/8th of a second
 */
#define DEFAULT_BELL_PITCH	750
#define DEFAULT_BELL_DURATION	(HZ/8)

extern void vcs_make_devfs(struct tty_struct *tty);
extern void vcs_remove_devfs(struct tty_struct *tty);

extern void console_map_init(void);
#ifdef CONFIG_PROM_CONSOLE
extern void prom_con_init(void);
#endif
#ifdef CONFIG_MDA_CONSOLE
extern int mda_console_init(void);
#endif

struct vc vc_cons [MAX_NR_CONSOLES];

#ifndef VT_SINGLE_DRIVER
static const struct consw *con_driver_map[MAX_NR_CONSOLES];
#endif

static int con_open(struct tty_struct *, struct file *);
static void vc_init(struct vc_data *vc, unsigned int rows,
		    unsigned int cols, int do_clear);
static void gotoxy(struct vc_data *vc, int new_x, int new_y);
static void save_cur(struct vc_data *vc);
static void reset_terminal(struct vc_data *vc, int do_clear);
static void con_flush_chars(struct tty_struct *tty);
static void set_vesa_blanking(char __user *p);
static void set_cursor(struct vc_data *vc);
static void hide_cursor(struct vc_data *vc);
static void console_callback(void *ignored);
static void blank_screen_t(unsigned long dummy);
static void set_palette(struct vc_data *vc);

static int printable;		/* Is console ready for printing? */

/*
 * ignore_poke: don't unblank the screen when things are typed.  This is
 * mainly for the privacy of braille terminal users.
 */
static int ignore_poke;

int do_poke_blanked_console;
int console_blanked;

static int vesa_blank_mode; /* 0:none 1:suspendV 2:suspendH 3:powerdown */
static int blankinterval = 10*60*HZ;
static int vesa_off_interval;

static DECLARE_WORK(console_work, console_callback, NULL);

/*
 * fg_console is the current virtual console,
 * last_console is the last used one,
 * want_console is the console we want to switch to,
 * kmsg_redirect is the console for kernel messages,
 */
int fg_console;
int last_console;
int want_console = -1;
int kmsg_redirect;

/*
 * For each existing display, we have a pointer to console currently visible
 * on that display, allowing consoles other than fg_console to be refreshed
 * appropriately. Unless the low-level driver supplies its own display_fg
 * variable, we use this one for the "master display".
 */
static struct vc_data *master_display_fg;

/*
 * Unfortunately, we need to delay tty echo when we're currently writing to the
 * console since the code is (and always was) not re-entrant, so we schedule
 * all flip requests to process context with schedule-task() and run it from
 * console_callback().
 */

/*
 * For the same reason, we defer scrollback to the console callback.
 */
static int scrollback_delta;

/*
 * Hook so that the power management routines can (un)blank
 * the console on our behalf.
 */
int (*console_blank_hook)(int);

static struct timer_list console_timer;
static int blank_state;
static int blank_timer_expired;
enum {
	blank_off = 0,
	blank_normal_wait,
	blank_vesa_wait,
};

/*
 *	Low-Level Functions
 */

#define IS_FG(vc)	((vc)->vc_num == fg_console)

#ifdef VT_BUF_VRAM_ONLY
#define DO_UPDATE(vc)	0
#else
#define DO_UPDATE(vc)	CON_IS_VISIBLE(vc)
#endif

static inline unsigned short *screenpos(struct vc_data *vc, int offset, int viewed)
{
	unsigned short *p;
	
	if (!viewed)
		p = (unsigned short *)(vc->vc_origin + offset);
	else if (!vc->vc_sw->con_screen_pos)
		p = (unsigned short *)(vc->vc_visible_origin + offset);
	else
		p = vc->vc_sw->con_screen_pos(vc, offset);
	return p;
}

static inline void scrolldelta(int lines)
{
	scrollback_delta += lines;
	schedule_console_callback();
}

void schedule_console_callback(void)
{
	schedule_work(&console_work);
}

static void scrup(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
{
	unsigned short *d, *s;

	if (t+nr >= b)
		nr = b - t - 1;
	if (b > vc->vc_rows || t >= b || nr < 1)
		return;
	if (CON_IS_VISIBLE(vc) && vc->vc_sw->con_scroll(vc, t, b, SM_UP, nr))
		return;
	d = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t);
	s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * (t + nr));
	scr_memmovew(d, s, (b - t - nr) * vc->vc_size_row);
	scr_memsetw(d + (b - t - nr) * vc->vc_cols, vc->vc_video_erase_char,
		    vc->vc_size_row * nr);
}

static void scrdown(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
{
	unsigned short *s;
	unsigned int step;

	if (t+nr >= b)
		nr = b - t - 1;
	if (b > vc->vc_rows || t >= b || nr < 1)
		return;
	if (CON_IS_VISIBLE(vc) && vc->vc_sw->con_scroll(vc, t, b, SM_DOWN, nr))
		return;
	s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t);
	step = vc->vc_cols * nr;
	scr_memmovew(s + step, s, (b - t - nr) * vc->vc_size_row);
	scr_memsetw(s, vc->vc_video_erase_char, 2 * step);
}

static void do_update_region(struct vc_data *vc, unsigned long start, int count)
{
#ifndef VT_BUF_VRAM_ONLY
	unsigned int xx, yy, offset;
	u16 *p;

	p = (u16 *) start;
	if (!vc->vc_sw->con_getxy) {
		offset = (start - vc->vc_origin) / 2;
		xx = offset % vc->vc_cols;
		yy = offset / vc->vc_cols;
	} else {
		int nxx, nyy;
		start = vc->vc_sw->con_getxy(vc, start, &nxx, &nyy);
		xx = nxx; yy = nyy;
	}
	for(;;) {
		u16 attrib = scr_readw(p) & 0xff00;
		int startx = xx;
		u16 *q = p;
		while (xx < vc->vc_cols && count) {
			if (attrib != (scr_readw(p) & 0xff00)) {
				if (p > q)
					vc->vc_sw->con_putcs(vc, q, p-q, yy, startx);
				startx = xx;
				q = p;
				attrib = scr_readw(p) & 0xff00;
			}
			p++;
			xx++;
			count--;
		}
		if (p > q)
			vc->vc_sw->con_putcs(vc, q, p-q, yy, startx);
		if (!count)
			break;
		xx = 0;
		yy++;
		if (vc->vc_sw->con_getxy) {
			p = (u16 *)start;
			start = vc->vc_sw->con_getxy(vc, start, NULL, NULL);
		}
	}
#endif
}

void update_region(struct vc_data *vc, unsigned long start, int count)
{
	WARN_CONSOLE_UNLOCKED();

	if (DO_UPDATE(vc)) {
		hide_cursor(vc);
		do_update_region(vc, start, count);
		set_cursor(vc);
	}
}

/* Structure of attributes is hardware-dependent */

static u8 build_attr(struct vc_data *vc, u8 _color, u8 _intensity, u8 _blink, u8 _underline, u8 _reverse)
{
	if (vc->vc_sw->con_build_attr)
		return vc->vc_sw->con_build_attr(vc, _color, _intensity, _blink, _underline, _reverse);

#ifndef VT_BUF_VRAM_ONLY
/*
 * ++roman: I completely changed the attribute format for monochrome
 * mode (!can_do_color). The formerly used MDA (monochrome display
 * adapter) format didn't allow the combination of certain effects.
 * Now the attribute is just a bit vector:
 *  Bit 0..1: intensity (0..2)
 *  Bit 2   : underline
 *  Bit 3   : reverse
 *  Bit 7   : blink
 */
	{
	u8 a = vc->vc_color;
	if (!vc->vc_can_do_color)
		return _intensity |
		       (_underline ? 4 : 0) |
		       (_reverse ? 8 : 0) |
		       (_blink ? 0x80 : 0);
	if (_underline)
		a = (a & 0xf0) | vc->vc_ulcolor;
	else if (_intensity == 0)
		a = (a & 0xf0) | vc->vc_ulcolor;
	if (_reverse)
		a = ((a) & 0x88) | ((((a) >> 4) | ((a) << 4)) & 0x77);
	if (_blink)
		a ^= 0x80;
	if (_intensity == 2)
		a ^= 0x08;
	if (vc->vc_hi_font_mask == 0x100)
		a <<= 1;
	return a;
	}
#else
	return 0;
#endif
}

static void update_attr(struct vc_data *vc)
{
	vc->vc_attr = build_attr(vc, vc->vc_color, vc->vc_intensity, vc->vc_blink, vc->vc_underline, vc->vc_reverse ^ vc->vc_decscnm);
	vc->vc_video_erase_char = (build_attr(vc, vc->vc_color, 1, vc->vc_blink, 0, vc->vc_decscnm) << 8) | ' ';
}

/* Note: inverting the screen twice should revert to the original state */
void invert_screen(struct vc_data *vc, int offset, int count, int viewed)
{
	unsigned short *p;

	WARN_CONSOLE_UNLOCKED();

	count /= 2;
	p = screenpos(vc, offset, viewed);
	if (vc->vc_sw->con_invert_region)
		vc->vc_sw->con_invert_region(vc, p, count);
#ifndef VT_BUF_VRAM_ONLY
	else {
		u16 *q = p;
		int cnt = count;
		u16 a;

		if (!vc->vc_can_do_color) {
			while (cnt--) {
			    a = scr_readw(q);
			    a ^= 0x0800;
			    scr_writew(a, q);
			    q++;
			}
		} else if (vc->vc_hi_font_mask == 0x100) {
			while (cnt--) {
				a = scr_readw(q);
				a = ((a) & 0x11ff) | (((a) & 0xe000) >> 4) | (((a) & 0x0e00) << 4);
				scr_writew(a, q);
				q++;
			}
		} else {
			while (cnt--) {
				a = scr_readw(q);
				a = ((a) & 0x88ff) | (((a) & 0x7000) >> 4) | (((a) & 0x0700) << 4);
				scr_writew(a, q);
				q++;
			}
		}
	}
#endif
	if (DO_UPDATE(vc))
		do_update_region(vc, (unsigned long) p, count);
}

/* used by selection: complement pointer position */
void complement_pos(struct vc_data *vc, int offset)
{
	static unsigned short *p;
	static unsigned short old;
	static unsigned short oldx, oldy;

	WARN_CONSOLE_UNLOCKED();

	if (p) {
		scr_writew(old, p);
		if (DO_UPDATE(vc))
			vc->vc_sw->con_putc(vc, old, oldy, oldx);
	}
	if (offset == -1)
		p = NULL;
	else {
		unsigned short new;
		p = screenpos(vc, offset, 1);
		old = scr_readw(p);
		new = old ^ vc->vc_complement_mask;
		scr_writew(new, p);
		if (DO_UPDATE(vc)) {
			oldx = (offset >> 1) % vc->vc_cols;
			oldy = (offset >> 1) / vc->vc_cols;
			vc->vc_sw->con_putc(vc, new, oldy, oldx);
		}
	}
}

static void insert_char(struct vc_data *vc, unsigned int nr)
{
	unsigned short *p, *q = (unsigned short *)vc->vc_pos;

	p = q + vc->vc_cols - nr - vc->vc_x;
	while (--p >= q)
		scr_writew(scr_readw(p), p + nr);
	scr_memsetw(q, vc->vc_video_erase_char, nr * 2);
	vc->vc_need_wrap = 0;
	if (DO_UPDATE(vc)) {
		unsigned short oldattr = vc->vc_attr;
		vc->vc_sw->con_bmove(vc, vc->vc_y, vc->vc_x, vc->vc_y, vc->vc_x + nr, 1,
				     vc->vc_cols - vc->vc_x - nr);
		vc->vc_attr = vc->vc_video_erase_char >> 8;
		while (nr--)
			vc->vc_sw->con_putc(vc, vc->vc_video_erase_char, vc->vc_y, vc->vc_x + nr);
		vc->vc_attr = oldattr;
	}
}

static void delete_char(struct vc_data *vc, unsigned int nr)
{
	unsigned int i = vc->vc_x;
	unsigned short *p = (unsigned short *)vc->vc_pos;

	while (++i <= vc->vc_cols - nr) {
		scr_writew(scr_readw(p+nr), p);
		p++;
	}
	scr_memsetw(p, vc->vc_video_erase_char, nr * 2);
	vc->vc_need_wrap = 0;
	if (DO_UPDATE(vc)) {
		unsigned short oldattr = vc->vc_attr;
		vc->vc_sw->con_bmove(vc, vc->vc_y, vc->vc_x + nr, vc->vc_y, vc->vc_x, 1,
				     vc->vc_cols - vc->vc_x - nr);
		vc->vc_attr = vc->vc_video_erase_char >> 8;
		while (nr--)
			vc->vc_sw->con_putc(vc, vc->vc_video_erase_char, vc->vc_y,
				     vc->vc_cols - 1 - nr);
		vc->vc_attr = oldattr;
	}
}

static int softcursor_original;

static void add_softcursor(struct vc_data *vc)
{
	int i = scr_readw((u16 *) vc->vc_pos);
	u32 type = vc->vc_cursor_type;

	if (! (type & 0x10)) return;
	if (softcursor_original != -1) return;
	softcursor_original = i;
	i |= ((type >> 8) & 0xff00 );
	i ^= ((type) & 0xff00 );
	if ((type & 0x20) && ((softcursor_original & 0x7000) == (i & 0x7000))) i ^= 0x7000;
	if ((type & 0x40) && ((i & 0x700) == ((i & 0x7000) >> 4))) i ^= 0x0700;
	scr_writew(i, (u16 *) vc->vc_pos);
	if (DO_UPDATE(vc))
		vc->vc_sw->con_putc(vc, i, vc->vc_y, vc->vc_x);
}

static void hide_softcursor(struct vc_data *vc)
{
	if (softcursor_original != -1) {
		scr_writew(softcursor_original, (u16 *)vc->vc_pos);
		if (DO_UPDATE(vc))
			vc->vc_sw->con_putc(vc, softcursor_original,
					vc->vc_y, vc->vc_x);
		softcursor_original = -1;
	}
}

static void hide_cursor(struct vc_data *vc)
{
	if (vc == sel_cons)
		clear_selection();
	vc->vc_sw->con_cursor(vc, CM_ERASE);
	hide_softcursor(vc);
}

static void set_cursor(struct vc_data *vc)
{
	if (!IS_FG(vc) || console_blanked ||
	    vc->vc_mode == KD_GRAPHICS)
		return;
	if (vc->vc_deccm) {
		if (vc == sel_cons)
			clear_selection();
		add_softcursor(vc);
		if ((vc->vc_cursor_type & 0x0f) != 1)
			vc->vc_sw->con_cursor(vc, CM_DRAW);
	} else
		hide_cursor(vc);
}

static void set_origin(struct vc_data *vc)
{
	WARN_CONSOLE_UNLOCKED();

	if (!CON_IS_VISIBLE(vc) ||
	    !vc->vc_sw->con_set_origin ||
	    !vc->vc_sw->con_set_origin(vc))
		vc->vc_origin = (unsigned long)vc->vc_screenbuf;
	vc->vc_visible_origin = vc->vc_origin;
	vc->vc_scr_end = vc->vc_origin + vc->vc_screenbuf_size;
	vc->vc_pos = vc->vc_origin + vc->vc_size_row * vc->vc_y + 2 * vc->vc_x;
}

static inline void save_screen(struct vc_data *vc)
{
	WARN_CONSOLE_UNLOCKED();

	if (vc->vc_sw->con_save_screen)
		vc->vc_sw->con_save_screen(vc);
}

/*
 *	Redrawing of screen
 */

static void clear_buffer_attributes(struct vc_data *vc)
{
	unsigned short *p = (unsigned short *)vc->vc_origin;
	int count = vc->vc_screenbuf_size / 2;
	int mask = vc->vc_hi_font_mask | 0xff;

	for (; count > 0; count--, p++) {
		scr_writew((scr_readw(p)&mask) | (vc->vc_video_erase_char & ~mask), p);
	}
}

void redraw_screen(struct vc_data *vc, int is_switch)
{
	int redraw = 0;

	WARN_CONSOLE_UNLOCKED();

	if (!vc) {
		/* strange ... */
		/* printk("redraw_screen: tty %d not allocated ??\n", new_console+1); */
		return;
	}

	if (is_switch) {
		struct vc_data *old_vc = vc_cons[fg_console].d;
		if (old_vc == vc)
			return;
		if (!CON_IS_VISIBLE(vc))
			redraw = 1;
		*vc->vc_display_fg = vc;
		fg_console = vc->vc_num;
		hide_cursor(old_vc);
		if (!CON_IS_VISIBLE(old_vc)) {
			save_screen(old_vc);
			set_origin(old_vc);
		}
	} else {
		hide_cursor(vc);
		redraw = 1;
	}

	if (redraw) {
		int update;
		int old_was_color = vc->vc_can_do_color;

		set_origin(vc);
		update = vc->vc_sw->con_switch(vc);
		set_palette(vc);
		/*
		 * If console changed from mono<->color, the best we can do
		 * is to clear the buffer attributes. As it currently stands,
		 * rebuilding new attributes from the old buffer is not doable
		 * without overly complex code.
		 */
		if (old_was_color != vc->vc_can_do_color) {
			update_attr(vc);
			clear_buffer_attributes(vc);
		}
		if (update && vc->vc_mode != KD_GRAPHICS)
			do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2);
	}
	set_cursor(vc);
	if (is_switch) {
		set_leds();
		compute_shiftstate();
	}
}

/*
 *	Allocation, freeing and resizing of VTs.
 */

int vc_cons_allocated(unsigned int i)
{
	return (i < MAX_NR_CONSOLES && vc_cons[i].d);
}

static void visual_init(struct vc_data *vc, int num, int init)
{
	/* ++Geert: vc->vc_sw->con_init determines console size */
	if (vc->vc_sw)
		module_put(vc->vc_sw->owner);
	vc->vc_sw = conswitchp;
#ifndef VT_SINGLE_DRIVER
	if (con_driver_map[num])
		vc->vc_sw = con_driver_map[num];
#endif
	__module_get(vc->vc_sw->owner);
	vc->vc_num = num;
	vc->vc_display_fg = &master_display_fg;
	vc->vc_uni_pagedir_loc = &vc->vc_uni_pagedir;
	vc->vc_uni_pagedir = 0;
	vc->vc_hi_font_mask = 0;
	vc->vc_complement_mask = 0;
	vc->vc_can_do_color = 0;
	vc->vc_sw->con_init(vc, init);
	if (!vc->vc_complement_mask)
		vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
	vc->vc_s_complement_mask = vc->vc_complement_mask;
	vc->vc_size_row = vc->vc_cols << 1;
	vc->vc_screenbuf_size = vc->vc_rows * vc->vc_size_row;
}

int vc_allocate(unsigned int currcons)	/* return 0 on success */
{
	WARN_CONSOLE_UNLOCKED();

	if (currcons >= MAX_NR_CONSOLES)
		return -ENXIO;
	if (!vc_cons[currcons].d) {
	    struct vc_data *vc;

	    /* prevent users from taking too much memory */
	    if (currcons >= MAX_NR_USER_CONSOLES && !capable(CAP_SYS_RESOURCE))
	      return -EPERM;

	    /* due to the granularity of kmalloc, we waste some memory here */
	    /* the alloc is done in two steps, to optimize the common situation
	       of a 25x80 console (structsize=216, screenbuf_size=4000) */
	    /* although the numbers above are not valid since long ago, the
	       point is still up-to-date and the comment still has its value
	       even if only as a historical artifact.  --mj, July 1998 */
	    vc = kmalloc(sizeof(struct vc_data), GFP_KERNEL);
	    if (!vc)
		return -ENOMEM;
	    memset(vc, 0, sizeof(*vc));
	    vc_cons[currcons].d = vc;
	    visual_init(vc, currcons, 1);
	    if (!*vc->vc_uni_pagedir_loc)
		con_set_default_unimap(vc);
	    vc->vc_screenbuf = kmalloc(vc->vc_screenbuf_size, GFP_KERNEL);
	    if (!vc->vc_screenbuf) {
		kfree(vc);
		vc_cons[currcons].d = NULL;
		return -ENOMEM;
	    }
	    vc->vc_kmalloced = 1;
	    vc_init(vc, vc->vc_rows, vc->vc_cols, 1);
	}
	return 0;
}

static inline int resize_screen(struct vc_data *vc, int width, int height)
{
	/* Resizes the resolution of the display adapater */
	int err = 0;

	if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_resize)
		err = vc->vc_sw->con_resize(vc, width, height);
	return err;
}

/*
 * Change # of rows and columns (0 means unchanged/the size of fg_console)
 * [this is to be used together with some user program
 * like resize that changes the hardware videomode]
 */
#define VC_RESIZE_MAXCOL (32767)
#define VC_RESIZE_MAXROW (32767)
int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines)
{
	unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
	unsigned int old_cols, old_rows, old_row_size, old_screen_size;
	unsigned int new_cols, new_rows, new_row_size, new_screen_size;
	unsigned short *newscreen;

	WARN_CONSOLE_UNLOCKED();

	if (!vc)
		return -ENXIO;

	if (cols > VC_RESIZE_MAXCOL || lines > VC_RESIZE_MAXROW)
		return -EINVAL;

	new_cols = (cols ? cols : vc->vc_cols);
	new_rows = (lines ? lines : vc->vc_rows);
	new_row_size = new_cols << 1;
	new_screen_size = new_row_size * new_rows;

	if (new_cols == vc->vc_cols && new_rows == vc->vc_rows)
		return 0;

	newscreen = (unsigned short *) kmalloc(new_screen_size, GFP_USER);
	if (!newscreen)
		return -ENOMEM;

	old_rows = vc->vc_rows;
	old_cols = vc->vc_cols;
	old_row_size = vc->vc_size_row;
	old_screen_size = vc->vc_screenbuf_size;

	err = resize_screen(vc, new_cols, new_rows);
	if (err) {
		kfree(newscreen);
		return err;
	}

	vc->vc_rows = new_rows;
	vc->vc_cols = new_cols;
	vc->vc_size_row = new_row_size;
	vc->vc_screenbuf_size = new_screen_size;

	rlth = min(old_row_size, new_row_size);
	rrem = new_row_size - rlth;
	old_origin = vc->vc_origin;
	new_origin = (long) newscreen;
	new_scr_end = new_origin + new_screen_size;
	if (new_rows < old_rows)
		old_origin += (old_rows - new_rows) * old_row_size;

	update_attr(vc);

	while (old_origin < vc->vc_scr_end) {
		scr_memcpyw((unsigned short *) new_origin, (unsigned short *) old_origin, rlth);
		if (rrem)
			scr_memsetw((void *)(new_origin + rlth), vc->vc_video_erase_char, rrem);
		old_origin += old_row_size;
		new_origin += new_row_size;
	}
	if (new_scr_end > new_origin)
		scr_memsetw((void *)new_origin, vc->vc_video_erase_char, new_scr_end - new_origin);
	if (vc->vc_kmalloced)
		kfree(vc->vc_screenbuf);
	vc->vc_screenbuf = newscreen;
	vc->vc_kmalloced = 1;
	vc->vc_screenbuf_size = new_screen_size;
	set_origin(vc);

	/* do part of a reset_terminal() */
	vc->vc_top = 0;
	vc->vc_bottom = vc->vc_rows;
	gotoxy(vc, vc->vc_x, vc->vc_y);
	save_cur(vc);

	if (vc->vc_tty) {
		struct winsize ws, *cws = &vc->vc_tty->winsize;

		memset(&ws, 0, sizeof(ws));
		ws.ws_row = vc->vc_rows;
		ws.ws_col = vc->vc_cols;
		ws.ws_ypixel = vc->vc_scan_lines;
		if ((ws.ws_row != cws->ws_row || ws.ws_col != cws->ws_col) &&
		    vc->vc_tty->pgrp > 0)
			kill_pg(vc->vc_tty->pgrp, SIGWINCH, 1);
		*cws = ws;
	}

	if (CON_IS_VISIBLE(vc))
		update_screen(vc);
	return err;
}


void vc_disallocate(unsigned int currcons)
{
	WARN_CONSOLE_UNLOCKED();

	if (vc_cons_allocated(currcons)) {
		struct vc_data *vc = vc_cons[currcons].d;
		vc->vc_sw->con_deinit(vc);
		if (vc->vc_kmalloced)
			kfree(vc->vc_screenbuf);
		if (currcons >= MIN_NR_CONSOLES)
			kfree(vc);
		vc_cons[currcons].d = NULL;
	}
}

/*
 *	VT102 emulator
 */

#define set_kbd(vc, x)	set_vc_kbd_mode(kbd_table + (vc)->vc_num, (x))
#define clr_kbd(vc, x)	clr_vc_kbd_mode(kbd_table + (vc)->vc_num, (x))
#define is_kbd(vc, x)	vc_kbd_mode(kbd_table + (vc)->vc_num, (x))

#define decarm		VC_REPEAT
#define decckm		VC_CKMODE
#define kbdapplic	VC_APPLIC
#define lnm		VC_CRLF

/*
 * this is what the terminal answers to a ESC-Z or csi0c query.
 */
#define VT100ID "\033[?1;2c"
#define VT102ID "\033[?6c"

unsigned char color_table[] = { 0, 4, 2, 6, 1, 5, 3, 7,
				       8,12,10,14, 9,13,11,15 };

/* the default colour table, for VGA+ colour systems */
int default_red[] = {0x00,0xaa,0x00,0xaa,0x00,0xaa,0x00,0xaa,
    0x55,0xff,0x55,0xff,0x55,0xff,0x55,0xff};
int default_grn[] = {0x00,0x00,0xaa,0x55,0x00,0x00,0xaa,0xaa,
    0x55,0x55,0xff,0xff,0x55,0x55,0xff,0xff};
int default_blu[] = {0x00,0x00,0x00,0x00,0xaa,0xaa,0xaa,0xaa,
    0x55,0x55,0x55,0x55,0xff,0xff,0xff,0xff};

/*
 * gotoxy() must verify all boundaries, because the arguments
 * might also be negative. If the given position is out of
 * bounds, the cursor is placed at the nearest margin.
 */
static void gotoxy(struct vc_data *vc, int new_x, int new_y)
{
	int min_y, max_y;

	if (new_x < 0)
		vc->vc_x = 0;
	else {
		if (new_x >= vc->vc_cols)
			vc->vc_x = vc->vc_cols - 1;
		else
			vc->vc_x = new_x;
	}

 	if (vc->vc_decom) {
		min_y = vc->vc_top;
		max_y = vc->vc_bottom;
	} else {
		min_y = 0;
		max_y = vc->vc_rows;
	}
	if (new_y < min_y)
		vc->vc_y = min_y;
	else if (new_y >= max_y)
		vc->vc_y = max_y - 1;
	else
		vc->vc_y = new_y;
	vc->vc_pos = vc->vc_origin + vc->vc_y * vc->vc_size_row + (vc->vc_x<<1);
	vc->vc_need_wrap = 0;
}

/* for absolute user moves, when decom is set */
static void gotoxay(struct vc_data *vc, int new_x, int new_y)
{
	gotoxy(vc, new_x, vc->vc_decom ? (vc->vc_top + new_y) : new_y);
}

void scrollback(struct vc_data *vc, int lines)
{
	if (!lines)
		lines = vc->vc_rows / 2;
	scrolldelta(-lines);
}

void scrollfront(struct vc_data *vc, int lines)
{
	if (!lines)
		lines = vc->vc_rows / 2;
	scrolldelta(lines);
}

static void lf(struct vc_data *vc)
{
    	/* don't scroll if above bottom of scrolling region, or
	 * if below scrolling region
	 */
    	if (vc->vc_y + 1 == vc->vc_bottom)
		scrup(vc, vc->vc_top, vc->vc_bottom, 1);
	else if (vc->vc_y < vc->vc_rows - 1) {
	    	vc->vc_y++;
		vc->vc_pos += vc->vc_size_row;
	}
	vc->vc_need_wrap = 0;
}

static void ri(struct vc_data *vc)
{
    	/* don't scroll if below top of scrolling region, or
	 * if above scrolling region
	 */
	if (vc->vc_y == vc->vc_top)
		scrdown(vc, vc->vc_top, vc->vc_bottom, 1);
	else if (vc->vc_y > 0) {
		vc->vc_y--;
		vc->vc_pos -= vc->vc_size_row;
	}
	vc->vc_need_wrap = 0;
}

static inline void cr(struct vc_data *vc)
{
	vc->vc_pos -= vc->vc_x << 1;
	vc->vc_need_wrap = vc->vc_x = 0;
}

static inline void bs(struct vc_data *vc)
{
	if (vc->vc_x) {
		vc->vc_pos -= 2;
		vc->vc_x--;
		vc->vc_need_wrap = 0;
	}
}

static inline void del(struct vc_data *vc)
{
	/* ignored */
}

static void csi_J(struct vc_data *vc, int vpar)
{
	unsigned int count;
	unsigned short * start;

	switch (vpar) {
		case 0:	/* erase from cursor to end of display */
			count = (vc->vc_scr_end - vc->vc_pos) >> 1;
			start = (unsigned short *)vc->vc_pos;
			if (DO_UPDATE(vc)) {
				/* do in two stages */
				vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1,
					      vc->vc_cols - vc->vc_x);
				vc->vc_sw->con_clear(vc, vc->vc_y + 1, 0,
					      vc->vc_rows - vc->vc_y - 1,
					      vc->vc_cols);
			}
			break;
		case 1:	/* erase from start to cursor */
			count = ((vc->vc_pos - vc->vc_origin) >> 1) + 1;
			start = (unsigned short *)vc->vc_origin;
			if (DO_UPDATE(vc)) {
				/* do in two stages */
				vc->vc_sw->con_clear(vc, 0, 0, vc->vc_y,
					      vc->vc_cols);
				vc->vc_sw->con_clear(vc, vc->vc_y, 0, 1,
					      vc->vc_x + 1);
			}
			break;
		case 2: /* erase whole display */
			count = vc->vc_cols * vc->vc_rows;
			start = (unsigned short *)vc->vc_origin;
			if (DO_UPDATE(vc))
				vc->vc_sw->con_clear(vc, 0, 0,
					      vc->vc_rows,
					      vc->vc_cols);
			break;
		default:
			return;
	}
	scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
	vc->vc_need_wrap = 0;
}

static void csi_K(struct vc_data *vc, int vpar)
{
	unsigned int count;
	unsigned short * start;

	switch (vpar) {
		case 0:	/* erase from cursor to end of line */
			count = vc->vc_cols - vc->vc_x;
			start = (unsigned short *)vc->vc_pos;
			if (DO_UPDATE(vc))
				vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1,
						     vc->vc_cols - vc->vc_x);
			break;
		case 1:	/* erase from start of line to cursor */
			start = (unsigned short *)(vc->vc_pos - (vc->vc_x << 1));
			count = vc->vc_x + 1;
			if (DO_UPDATE(vc))
				vc->vc_sw->con_clear(vc, vc->vc_y, 0, 1,
						     vc->vc_x + 1);
			break;
		case 2: /* erase whole line */
			start = (unsigned short *)(vc->vc_pos - (vc->vc_x << 1));
			count = vc->vc_cols;
			if (DO_UPDATE(vc))
				vc->vc_sw->con_clear(vc, vc->vc_y, 0, 1,
					      vc->vc_cols);
			break;
		default:
			return;
	}
	scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
	vc->vc_need_wrap = 0;
}

static void csi_X(struct vc_data *vc, int vpar) /* erase the following vpar positions */
{					  /* not vt100? */
	int count;

	if (!vpar)
		vpar++;
	count = (vpar > vc->vc_cols - vc->vc_x) ? (vc->vc_cols - vc->vc_x) : vpar;

	scr_memsetw((unsigned short *)vc->vc_pos, vc->vc_video_erase_char, 2 * count);
	if (DO_UPDATE(vc))
		vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1, count);
	vc->vc_need_wrap = 0;
}

static void default_attr(struct vc_data *vc)
{
	vc->vc_intensity = 1;
	vc->vc_underline = 0;
	vc->vc_reverse = 0;
	vc->vc_blink = 0;
	vc->vc_color = vc->vc_def_color;
}

/* console_sem is held */
static void csi_m(struct vc_data *vc)
{
	int i;

	for (i = 0; i <= vc->vc_npar; i++)
		switch (vc->vc_par[i]) {
			case 0:	/* all attributes off */
				default_attr(vc);
				break;
			case 1:
				vc->vc_intensity = 2;
				break;
			case 2:
				vc->vc_intensity = 0;
				break;
			case 4:
				vc->vc_underline = 1;
				break;
			case 5:
				vc->vc_blink = 1;
				break;
			case 7:
				vc->vc_reverse = 1;
				break;
			case 10: /* ANSI X3.64-1979 (SCO-ish?)
				  * Select primary font, don't display
				  * control chars if defined, don't set
				  * bit 8 on output.
				  */
				vc->vc_translate = set_translate(vc->vc_charset == 0
						? vc->vc_G0_charset
						: vc->vc_G1_charset, vc);
				vc->vc_disp_ctrl = 0;
				vc->vc_toggle_meta = 0;
				break;
			case 11: /* ANSI X3.64-1979 (SCO-ish?)
				  * Select first alternate font, lets
				  * chars < 32 be displayed as ROM chars.
				  */
				vc->vc_translate = set_translate(IBMPC_MAP, vc);
				vc->vc_disp_ctrl = 1;
				vc->vc_toggle_meta = 0;
				break;
			case 12: /* ANSI X3.64-1979 (SCO-ish?)
				  * Select second alternate font, toggle
				  * high bit before displaying as ROM char.
				  */
				vc->vc_translate = set_translate(IBMPC_MAP, vc);
				vc->vc_disp_ctrl = 1;
				vc->vc_toggle_meta = 1;
				break;
			case 21:
			case 22:
				vc->vc_intensity = 1;
				break;
			case 24:
				vc->vc_underline = 0;
				break;
			case 25:
				vc->vc_blink = 0;
				break;
			case 27:
				vc->vc_reverse = 0;
				break;
			case 38: /* ANSI X3.64-1979 (SCO-ish?)
				  * Enables underscore, white foreground
				  * with white underscore (Linux - use
				  * default foreground).
				  */
				vc->vc_color = (vc->vc_def_color & 0x0f) | (vc->vc_color & 0xf0);
				vc->vc_underline = 1;
				break;
			case 39: /* ANSI X3.64-1979 (SCO-ish?)
				  * Disable underline option.
				  * Reset colour to default? It did this
				  * before...
				  */
				vc->vc_color = (vc->vc_def_color & 0x0f) | (vc->vc_color & 0xf0);
				vc->vc_underline = 0;
				break;
			case 49:
				vc->vc_color = (vc->vc_def_color & 0xf0) | (vc->vc_color & 0x0f);
				break;
			default:
				if (vc->vc_par[i] >= 30 && vc->vc_par[i] <= 37)
					vc->vc_color = color_table[vc->vc_par[i] - 30]
						| (vc->vc_color & 0xf0);
				else if (vc->vc_par[i] >= 40 && vc->vc_par[i] <= 47)
					vc->vc_color = (color_table[vc->vc_par[i] - 40] << 4)
						| (vc->vc_color & 0x0f);
				break;
		}
	update_attr(vc);
}

static void respond_string(const char *p, struct tty_struct *tty)
{
	while (*p) {
		tty_insert_flip_char(tty, *p, 0);
		p++;
	}
	con_schedule_flip(tty);
}

static void cursor_report(struct vc_data *vc, struct tty_struct *tty)
{
	char buf[40];

	sprintf(buf, "\033[%d;%dR", vc->vc_y + (vc->vc_decom ? vc->vc_top + 1 : 1), vc->vc_x + 1);
	respond_string(buf, tty);
}

static inline void status_report(struct tty_struct *tty)
{
	respond_string("\033[0n", tty);	/* Terminal ok */
}

static inline void respond_ID(struct tty_struct * tty)
{
	respond_string(VT102ID, tty);
}

void mouse_report(struct tty_struct *tty, int butt, int mrx, int mry)
{
	char buf[8];

	sprintf(buf, "\033[M%c%c%c", (char)(' ' + butt), (char)('!' + mrx),
		(char)('!' + mry));
	respond_string(buf, tty);
}

/* invoked via ioctl(TIOCLINUX) and through set_selection */
int mouse_reporting(void)
{
	return vc_cons[fg_console].d->vc_report_mouse;
}

/* console_sem is held */
static void set_mode(struct vc_data *vc, int on_off)
{
	int i;

	for (i = 0; i <= vc->vc_npar; i++)
		if (vc->vc_ques) {
			switch(vc->vc_par[i]) {	/* DEC private modes set/reset */
			case 1:			/* Cursor keys send ^[Ox/^[[x */
				if (on_off)
					set_kbd(vc, decckm);
				else
					clr_kbd(vc, decckm);
				break;
			case 3:	/* 80/132 mode switch unimplemented */
				vc->vc_deccolm = on_off;
#if 0
				vc_resize(deccolm ? 132 : 80, vc->vc_rows);
				/* this alone does not suffice; some user mode
				   utility has to change the hardware regs */
#endif
				break;
			case 5:			/* Inverted screen on/off */
				if (vc->vc_decscnm != on_off) {
					vc->vc_decscnm = on_off;
					invert_screen(vc, 0, vc->vc_screenbuf_size, 0);
					update_attr(vc);
				}
				break;
			case 6:			/* Origin relative/absolute */
				vc->vc_decom = on_off;
				gotoxay(vc, 0, 0);
				break;
			case 7:			/* Autowrap on/off */
				vc->vc_decawm = on_off;
				break;
			case 8:			/* Autorepeat on/off */
				if (on_off)
					set_kbd(vc, decarm);
				else
					clr_kbd(vc, decarm);
				break;
			case 9:
				vc->vc_report_mouse = on_off ? 1 : 0;
				break;
			case 25:		/* Cursor on/off */
				vc->vc_deccm = on_off;
				break;
			case 1000:
				vc->vc_report_mouse = on_off ? 2 : 0;
				break;
			}
		} else {
			switch(vc->vc_par[i]) {	/* ANSI modes set/reset */
			case 3:			/* Monitor (display ctrls) */
				vc->vc_disp_ctrl = on_off;
				break;
			case 4:			/* Insert Mode on/off */
				vc->vc_decim = on_off;
				break;
			case 20:		/* Lf, Enter == CrLf/Lf */
				if (on_off)
					set_kbd(vc, lnm);
				else
					clr_kbd(vc, lnm);
				break;
			}
		}
}

/* console_sem is held */
static void setterm_command(struct vc_data *vc)
{
	switch(vc->vc_par[0]) {
		case 1:	/* set color for underline mode */
			if (vc->vc_can_do_color &&
					vc->vc_par[1] < 16) {
				vc->vc_ulcolor = color_table[vc->vc_par[1]];
				if (vc->vc_underline)
					update_attr(vc);
			}
			break;
		case 2:	/* set color for half intensity mode */
			if (vc->vc_can_do_color &&
					vc->vc_par[1] < 16) {
				vc->vc_halfcolor = color_table[vc->vc_par[1]];
				if (vc->vc_intensity == 0)
					update_attr(vc);
			}
			break;
		case 8:	/* store colors as defaults */
			vc->vc_def_color = vc->vc_attr;
			if (vc->vc_hi_font_mask == 0x100)
				vc->vc_def_color >>= 1;
			default_attr(vc);
			update_attr(vc);
			break;
		case 9:	/* set blanking interval */
			blankinterval = ((vc->vc_par[1] < 60) ? vc->vc_par[1] : 60) * 60 * HZ;
			poke_blanked_console();
			break;
		case 10: /* set bell frequency in Hz */
			if (vc->vc_npar >= 1)
				vc->vc_bell_pitch = vc->vc_par[1];
			else
				vc->vc_bell_pitch = DEFAULT_BELL_PITCH;
			break;
		case 11: /* set bell duration in msec */
			if (vc->vc_npar >= 1)
				vc->vc_bell_duration = (vc->vc_par[1] < 2000) ?
					vc->vc_par[1] * HZ / 1000 : 0;
			else
				vc->vc_bell_duration = DEFAULT_BELL_DURATION;
			break;
		case 12: /* bring specified console to the front */
			if (vc->vc_par[1] >= 1 && vc_cons_allocated(vc->vc_par[1] - 1))
				set_console(vc->vc_par[1] - 1);
			break;
		case 13: /* unblank the screen */
			poke_blanked_console();
			break;
		case 14: /* set vesa powerdown interval */
			vesa_off_interval = ((vc->vc_par[1] < 60) ? vc->vc_par[1] : 60) * 60 * HZ;
			break;
		case 15: /* activate the previous console */
			set_console(last_console);
			break;
	}
}

/* console_sem is held */
static void csi_at(struct vc_data *vc, unsigned int nr)
{
	if (nr > vc->vc_cols - vc->vc_x)
		nr = vc->vc_cols - vc->vc_x;
	else if (!nr)
		nr = 1;
	insert_char(vc, nr);
}

/* console_sem is held */
static void csi_L(struct vc_data *vc, unsigned int nr)
{
	if (nr > vc->vc_rows - vc->vc_y)
		nr = vc->vc_rows - vc->vc_y;
	else if (!nr)
		nr = 1;
	scrdown(vc, vc->vc_y, vc->vc_bottom, nr);
	vc->vc_need_wrap = 0;
}

/* console_sem is held */
static void csi_P(struct vc_data *vc, unsigned int nr)
{
	if (nr > vc->vc_cols - vc->vc_x)
		nr = vc->vc_cols - vc->vc_x;
	else if (!nr)
		nr = 1;
	delete_char(vc, nr);
}

/* console_sem is held */
static void csi_M(struct vc_data *vc, unsigned int nr)
{
	if (nr > vc->vc_rows - vc->vc_y)
		nr = vc->vc_rows - vc->vc_y;
	else if (!nr)
		nr=1;
	scrup(vc, vc->vc_y, vc->vc_bottom, nr);
	vc->vc_need_wrap = 0;
}

/* console_sem is held (except via vc_init->reset_terminal */
static void save_cur(struct vc_data *vc)
{
	vc->vc_saved_x		= vc->vc_x;
	vc->vc_saved_y		= vc->vc_y;
	vc->vc_s_intensity	= vc->vc_intensity;
	vc->vc_s_underline	= vc->vc_underline;
	vc->vc_s_blink		= vc->vc_blink;
	vc->vc_s_reverse	= vc->vc_reverse;
	vc->vc_s_charset	= vc->vc_charset;
	vc->vc_s_color		= vc->vc_color;
	vc->vc_saved_G0		= vc->vc_G0_charset;
	vc->vc_saved_G1		= vc->vc_G1_charset;
}

/* console_sem is held */
static void restore_cur(struct vc_data *vc)
{
	gotoxy(vc, vc->vc_saved_x, vc->vc_saved_y);
	vc->vc_intensity	= vc->vc_s_intensity;
	vc->vc_underline	= vc->vc_s_underline;
	vc->vc_blink		= vc->vc_s_blink;
	vc->vc_reverse		= vc->vc_s_reverse;
	vc->vc_charset		= vc->vc_s_charset;
	vc->vc_color		= vc->vc_s_color;
	vc->vc_G0_charset	= vc->vc_saved_G0;
	vc->vc_G1_charset	= vc->vc_saved_G1;
	vc->vc_translate	= set_translate(vc->vc_charset ? vc->vc_G1_charset : vc->vc_G0_charset, vc);
	update_attr(vc);
	vc->vc_need_wrap = 0;
}

enum { ESnormal, ESesc, ESsquare, ESgetpars, ESgotpars, ESfunckey,
	EShash, ESsetG0, ESsetG1, ESpercent, ESignore, ESnonstd,
	ESpalette };

/* console_sem is held (except via vc_init()) */
static void reset_terminal(struct vc_data *vc, int do_clear)
{
	vc->vc_top		= 0;
	vc->vc_bottom		= vc->vc_rows;
	vc->vc_state		= ESnormal;
	vc->vc_ques		= 0;
	vc->vc_translate	= set_translate(LAT1_MAP, vc);
	vc->vc_G0_charset	= LAT1_MAP;
	vc->vc_G1_charset	= GRAF_MAP;
	vc->vc_charset		= 0;
	vc->vc_need_wrap	= 0;
	vc->vc_report_mouse	= 0;
	vc->vc_utf		= 0;
	vc->vc_utf_count	= 0;

	vc->vc_disp_ctrl	= 0;
	vc->vc_toggle_meta	= 0;

	vc->vc_decscnm		= 0;
	vc->vc_decom		= 0;
	vc->vc_decawm		= 1;
	vc->vc_deccm		= 1;
	vc->vc_decim		= 0;

	set_kbd(vc, decarm);
	clr_kbd(vc, decckm);
	clr_kbd(vc, kbdapplic);
	clr_kbd(vc, lnm);
	kbd_table[vc->vc_num].lockstate = 0;
	kbd_table[vc->vc_num].slockstate = 0;
	kbd_table[vc->vc_num].ledmode = LED_SHOW_FLAGS;
	kbd_table[vc->vc_num].ledflagstate = kbd_table[vc->vc_num].default_ledflagstate;
	/* do not do set_leds here because this causes an endless tasklet loop
	   when the keyboard hasn't been initialized yet */

	vc->vc_cursor_type = CUR_DEFAULT;
	vc->vc_complement_mask = vc->vc_s_complement_mask;

	default_attr(vc);
	update_attr(vc);

	vc->vc_tab_stop[0]	= 0x01010100;
	vc->vc_tab_stop[1]	=
	vc->vc_tab_stop[2]	=
	vc->vc_tab_stop[3]	=
	vc->vc_tab_stop[4]	= 0x01010101;

	vc->vc_bell_pitch = DEFAULT_BELL_PITCH;
	vc->vc_bell_duration = DEFAULT_BELL_DURATION;

	gotoxy(vc, 0, 0);
	save_cur(vc);
	if (do_clear)
	    csi_J(vc, 2);
}

/* console_sem is held */
static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
{
	/*
	 *  Control characters can be used in the _middle_
	 *  of an escape sequence.
	 */
	switch (c) {
	case 0:
		return;
	case 7:
		if (vc->vc_bell_duration)
			kd_mksound(vc->vc_bell_pitch, vc->vc_bell_duration);
		return;
	case 8:
		bs(vc);
		return;
	case 9:
		vc->vc_pos -= (vc->vc_x << 1);
		while (vc->vc_x < vc->vc_cols - 1) {
			vc->vc_x++;
			if (vc->vc_tab_stop[vc->vc_x >> 5] & (1 << (vc->vc_x & 31)))
				break;
		}
		vc->vc_pos += (vc->vc_x << 1);
		return;
	case 10: case 11: case 12:
		lf(vc);
		if (!is_kbd(vc, lnm))
			return;
	case 13:
		cr(vc);
		return;
	case 14:
		vc->vc_charset = 1;
		vc->vc_translate = set_translate(vc->vc_G1_charset, vc);
		vc->vc_disp_ctrl = 1;
		return;
	case 15:
		vc->vc_charset = 0;
		vc->vc_translate = set_translate(vc->vc_G0_charset, vc);
		vc->vc_disp_ctrl = 0;
		return;
	case 24: case 26:
		vc->vc_state = ESnormal;
		return;
	case 27:
		vc->vc_state = ESesc;
		return;
	case 127:
		del(vc);
		return;
	case 128+27:
		vc->vc_state = ESsquare;
		return;
	}
	switch(vc->vc_state) {
	case ESesc:
		vc->vc_state = ESnormal;
		switch (c) {
		case '[':
			vc->vc_state = ESsquare;
			return;
		case ']':
			vc->vc_state = ESnonstd;
			return;
		case '%':
			vc->vc_state = ESpercent;
			return;
		case 'E':
			cr(vc);
			lf(vc);
			return;
		case 'M':
			ri(vc);
			return;
		case 'D':
			lf(vc);
			return;
		case 'H':
			vc->vc_tab_stop[vc->vc_x >> 5] |= (1 << (vc->vc_x & 31));
			return;
		case 'Z':
			respond_ID(tty);
			return;
		case '7':
			save_cur(vc);
			return;
		case '8':
			restore_cur(vc);
			return;
		case '(':
			vc->vc_state = ESsetG0;
			return;
		case ')':
			vc->vc_state = ESsetG1;
			return;
		case '#':
			vc->vc_state = EShash;
			return;
		case 'c':
			reset_terminal(vc, 1);
			return;
		case '>':  /* Numeric keypad */
			clr_kbd(vc, kbdapplic);
			return;
		case '=':  /* Appl. keypad */
			set_kbd(vc, kbdapplic);
			return;
		}
		return;
	case ESnonstd:
		if (c=='P') {   /* palette escape sequence */
			for (vc->vc_npar = 0; vc->vc_npar < NPAR; vc->vc_npar++)
				vc->vc_par[vc->vc_npar] = 0;
			vc->vc_npar = 0;
			vc->vc_state = ESpalette;
			return;
		} else if (c=='R') {   /* reset palette */
			reset_palette(vc);
			vc->vc_state = ESnormal;
		} else
			vc->vc_state = ESnormal;
		return;
	case ESpalette:
		if ( (c>='0'&&c<='9') || (c>='A'&&c<='F') || (c>='a'&&c<='f') ) {
			vc->vc_par[vc->vc_npar++] = (c > '9' ? (c & 0xDF) - 'A' + 10 : c - '0');
			if (vc->vc_npar == 7) {
				int i = vc->vc_par[0] * 3, j = 1;
				vc->vc_palette[i] = 16 * vc->vc_par[j++];
				vc->vc_palette[i++] += vc->vc_par[j++];
				vc->vc_palette[i] = 16 * vc->vc_par[j++];
				vc->vc_palette[i++] += vc->vc_par[j++];
				vc->vc_palette[i] = 16 * vc->vc_par[j++];
				vc->vc_palette[i] += vc->vc_par[j];
				set_palette(vc);
				vc->vc_state = ESnormal;
			}
		} else
			vc->vc_state = ESnormal;
		return;
	case ESsquare:
		for (vc->vc_npar = 0; vc->vc_npar < NPAR; vc->vc_npar++)
			vc->vc_par[vc->vc_npar] = 0;
		vc->vc_npar = 0;
		vc->vc_state = ESgetpars;
		if (c == '[') { /* Function key */
			vc->vc_state=ESfunckey;
			return;
		}
		vc->vc_ques = (c == '?');
		if (vc->vc_ques)
			return;
	case ESgetpars:
		if (c == ';' && vc->vc_npar < NPAR - 1) {
			vc->vc_npar++;
			return;
		} else if (c>='0' && c<='9') {
			vc->vc_par[vc->vc_npar] *= 10;
			vc->vc_par[vc->vc_npar] += c - '0';
			return;
		} else
			vc->vc_state = ESgotpars;
	case ESgotpars:
		vc->vc_state = ESnormal;
		switch(c) {
		case 'h':
			set_mode(vc, 1);
			return;
		case 'l':
			set_mode(vc, 0);
			return;
		case 'c':
			if (vc->vc_ques) {
				if (vc->vc_par[0])
					vc->vc_cursor_type = vc->vc_par[0] | (vc->vc_par[1] << 8) | (vc->vc_par[2] << 16);
				else
					vc->vc_cursor_type = CUR_DEFAULT;
				return;
			}
			break;
		case 'm':
			if (vc->vc_ques) {
				clear_selection();
				if (vc->vc_par[0])
					vc->vc_complement_mask = vc->vc_par[0] << 8 | vc->vc_par[1];
				else
					vc->vc_complement_mask = vc->vc_s_complement_mask;
				return;
			}
			break;
		case 'n':
			if (!vc->vc_ques) {
				if (vc->vc_par[0] == 5)
					status_report(tty);
				else if (vc->vc_par[0] == 6)
					cursor_report(vc, tty);
			}
			return;
		}
		if (vc->vc_ques) {
			vc->vc_ques = 0;
			return;
		}
		switch(c) {
		case 'G': case '`':
			if (vc->vc_par[0])
				vc->vc_par[0]--;
			gotoxy(vc, vc->vc_par[0], vc->vc_y);
			return;
		case 'A':
			if (!vc->vc_par[0])
				vc->vc_par[0]++;
			gotoxy(vc, vc->vc_x, vc->vc_y - vc->vc_par[0]);
			return;
		case 'B': case 'e':
			if (!vc->vc_par[0])
				vc->vc_par[0]++;
			gotoxy(vc, vc->vc_x, vc->vc_y + vc->vc_par[0]);
			return;
		case 'C': case 'a':
			if (!vc->vc_par[0])
				vc->vc_par[0]++;
			gotoxy(vc, vc->vc_x + vc->vc_par[0], vc->vc_y);
			return;
		case 'D':
			if (!vc->vc_par[0])
				vc->vc_par[0]++;
			gotoxy(vc, vc->vc_x - vc->vc_par[0], vc->vc_y);
			return;
		case 'E':
			if (!vc->vc_par[0])
				vc->vc_par[0]++;
			gotoxy(vc, 0, vc->vc_y + vc->vc_par[0]);
			return;
		case 'F':
			if (!vc->vc_par[0])
				vc->vc_par[0]++;
			gotoxy(vc, 0, vc->vc_y - vc->vc_par[0]);
			return;
		case 'd':
			if (vc->vc_par[0])
				vc->vc_par[0]--;
			gotoxay(vc, vc->vc_x ,vc->vc_par[0]);
			return;
		case 'H': case 'f':
			if (vc->vc_par[0])
				vc->vc_par[0]--;
			if (vc->vc_par[1])
				vc->vc_par[1]--;
			gotoxay(vc, vc->vc_par[1], vc->vc_par[0]);
			return;
		case 'J':
			csi_J(vc, vc->vc_par[0]);
			return;
		case 'K':
			csi_K(vc, vc->vc_par[0]);
			return;
		case 'L':
			csi_L(vc, vc->vc_par[0]);
			return;
		case 'M':
			csi_M(vc, vc->vc_par[0]);
			return;
		case 'P':
			csi_P(vc, vc->vc_par[0]);
			return;
		case 'c':
			if (!vc->vc_par[0])
				respond_ID(tty);
			return;
		case 'g':
			if (!vc->vc_par[0])
				vc->vc_tab_stop[vc->vc_x >> 5] &= ~(1 << (vc->vc_x & 31));
			else if (vc->vc_par[0] == 3) {
				vc->vc_tab_stop[0] =
					vc->vc_tab_stop[1] =
					vc->vc_tab_stop[2] =
					vc->vc_tab_stop[3] =
					vc->vc_tab_stop[4] = 0;
			}
			return;
		case 'm':
			csi_m(vc);
			return;
		case 'q': /* DECLL - but only 3 leds */
			/* map 0,1,2,3 to 0,1,2,4 */
			if (vc->vc_par[0] < 4)
				setledstate(kbd_table + vc->vc_num,
					    (vc->vc_par[0] < 3) ? vc->vc_par[0] : 4);
			return;
		case 'r':
			if (!vc->vc_par[0])
				vc->vc_par[0]++;
			if (!vc->vc_par[1])
				vc->vc_par[1] = vc->vc_rows;
			/* Minimum allowed region is 2 lines */
			if (vc->vc_par[0] < vc->vc_par[1] &&
			    vc->vc_par[1] <= vc->vc_rows) {
				vc->vc_top = vc->vc_par[0] - 1;
				vc->vc_bottom = vc->vc_par[1];
				gotoxay(vc, 0, 0);
			}
			return;
		case 's':
			save_cur(vc);
			return;
		case 'u':
			restore_cur(vc);
			return;
		case 'X':
			csi_X(vc, vc->vc_par[0]);
			return;
		case '@':
			csi_at(vc, vc->vc_par[0]);
			return;
		case ']': /* setterm functions */
			setterm_command(vc);
			return;
		}
		return;
	case ESpercent:
		vc->vc_state = ESnormal;
		switch (c) {
		case '@':  /* defined in ISO 2022 */
			vc->vc_utf = 0;
			return;
		case 'G':  /* prelim official escape code */
		case '8':  /* retained for compatibility */
			vc->vc_utf = 1;
			return;
		}
		return;
	case ESfunckey:
		vc->vc_state = ESnormal;
		return;
	case EShash:
		vc->vc_state = ESnormal;
		if (c == '8') {
			/* DEC screen alignment test. kludge :-) */
			vc->vc_video_erase_char =
				(vc->vc_video_erase_char & 0xff00) | 'E';
			csi_J(vc, 2);
			vc->vc_video_erase_char =
				(vc->vc_video_erase_char & 0xff00) | ' ';
			do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2);
		}
		return;
	case ESsetG0:
		if (c == '0')
			vc->vc_G0_charset = GRAF_MAP;
		else if (c == 'B')
			vc->vc_G0_charset = LAT1_MAP;
		else if (c == 'U')
			vc->vc_G0_charset = IBMPC_MAP;
		else if (c == 'K')
			vc->vc_G0_charset = USER_MAP;
		if (vc->vc_charset == 0)
			vc->vc_translate = set_translate(vc->vc_G0_charset, vc);
		vc->vc_state = ESnormal;
		return;
	case ESsetG1:
		if (c == '0')
			vc->vc_G1_charset = GRAF_MAP;
		else if (c == 'B')
			vc->vc_G1_charset = LAT1_MAP;
		else if (c == 'U')
			vc->vc_G1_charset = IBMPC_MAP;
		else if (c == 'K')
			vc->vc_G1_charset = USER_MAP;
		if (vc->vc_charset == 1)
			vc->vc_translate = set_translate(vc->vc_G1_charset, vc);
		vc->vc_state = ESnormal;
		return;
	default:
		vc->vc_state = ESnormal;
	}
}

/* This is a temporary buffer used to prepare a tty console write
 * so that we can easily avoid touching user space while holding the
 * console spinlock.  It is allocated in con_init and is shared by
 * this code and the vc_screen read/write tty calls.
 *
 * We have to allocate this statically in the kernel data section
 * since console_init (and thus con_init) are called before any
 * kernel memory allocation is available.
 */
char con_buf[CON_BUF_SIZE];
DECLARE_MUTEX(con_buf_sem);

/* acquires console_sem */
static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count)
{
#ifdef VT_BUF_VRAM_ONLY
#define FLUSH do { } while(0);
#else
#define FLUSH if (draw_x >= 0) { \
	vc->vc_sw->con_putcs(vc, (u16 *)draw_from, (u16 *)draw_to - (u16 *)draw_from, vc->vc_y, draw_x); \
	draw_x = -1; \
	}
#endif

	int c, tc, ok, n = 0, draw_x = -1;
	unsigned int currcons;
	unsigned long draw_from = 0, draw_to = 0;
	struct vc_data *vc;
	u16 himask, charmask;
	const unsigned char *orig_buf = NULL;
	int orig_count;

	if (in_interrupt())
		return count;

	might_sleep();

	acquire_console_sem();
	vc = tty->driver_data;
	if (vc == NULL) {
		printk(KERN_ERR "vt: argh, driver_data is NULL !\n");
		release_console_sem();
		return 0;
	}

	currcons = vc->vc_num;
	if (!vc_cons_allocated(currcons)) {
	    /* could this happen? */
	    static int error = 0;
	    if (!error) {
		error = 1;
		printk("con_write: tty %d not allocated\n", currcons+1);
	    }
	    release_console_sem();
	    return 0;
	}
	release_console_sem();

	orig_buf = buf;
	orig_count = count;

	/* At this point 'buf' is guaranteed to be a kernel buffer
	 * and therefore no access to userspace (and therefore sleeping)
	 * will be needed.  The con_buf_sem serializes all tty based
	 * console rendering and vcs write/read operations.  We hold
	 * the console spinlock during the entire write.
	 */

	acquire_console_sem();

	vc = tty->driver_data;
	if (vc == NULL) {
		printk(KERN_ERR "vt: argh, driver_data _became_ NULL !\n");
		release_console_sem();
		goto out;
	}

	himask = vc->vc_hi_font_mask;
	charmask = himask ? 0x1ff : 0xff;

	/* undraw cursor first */
	if (IS_FG(vc))
		hide_cursor(vc);

	while (!tty->stopped && count) {
		int orig = *buf;
		c = orig;
		buf++;
		n++;
		count--;

		/* Do no translation at all in control states */
		if (vc->vc_state != ESnormal) {
			tc = c;
		} else if (vc->vc_utf) {
		    /* Combine UTF-8 into Unicode */
		    /* Incomplete characters silently ignored */
		    if(c > 0x7f) {
			if (vc->vc_utf_count > 0 && (c & 0xc0) == 0x80) {
				vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
				vc->vc_utf_count--;
				if (vc->vc_utf_count == 0)
				    tc = c = vc->vc_utf_char;
				else continue;
			} else {
				if ((c & 0xe0) == 0xc0) {
				    vc->vc_utf_count = 1;
				    vc->vc_utf_char = (c & 0x1f);
				} else if ((c & 0xf0) == 0xe0) {
				    vc->vc_utf_count = 2;
				    vc->vc_utf_char = (c & 0x0f);
				} else if ((c & 0xf8) == 0xf0) {
				    vc->vc_utf_count = 3;
				    vc->vc_utf_char = (c & 0x07);
				} else if ((c & 0xfc) == 0xf8) {
				    vc->vc_utf_count = 4;
				    vc->vc_utf_char = (c & 0x03);
				} else if ((c & 0xfe) == 0xfc) {
				    vc->vc_utf_count = 5;
				    vc->vc_utf_char = (c & 0x01);
				} else
				    vc->vc_utf_count = 0;
				continue;
			      }
		    } else {
		      tc = c;
		      vc->vc_utf_count = 0;
		    }
		} else {	/* no utf */
		  tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
		}

                /* If the original code was a control character we
                 * only allow a glyph to be displayed if the code is
                 * not normally used (such as for cursor movement) or
                 * if the disp_ctrl mode has been explicitly enabled.
                 * Certain characters (as given by the CTRL_ALWAYS
                 * bitmap) are always displayed as control characters,
                 * as the console would be pretty useless without
                 * them; to display an arbitrary font position use the
                 * direct-to-font zone in UTF-8 mode.
                 */
                ok = tc && (c >= 32 ||
			    (!vc->vc_utf && !(((vc->vc_disp_ctrl ? CTRL_ALWAYS
						: CTRL_ACTION) >> c) & 1)))
			&& (c != 127 || vc->vc_disp_ctrl)
			&& (c != 128+27);

		if (vc->vc_state == ESnormal && ok) {
			/* Now try to find out how to display it */
			tc = conv_uni_to_pc(vc, tc);
			if ( tc == -4 ) {
                                /* If we got -4 (not found) then see if we have
                                   defined a replacement character (U+FFFD) */
                                tc = conv_uni_to_pc(vc, 0xfffd);

				/* One reason for the -4 can be that we just
				   did a clear_unimap();
				   try at least to show something. */
				if (tc == -4)
				     tc = c;
                        } else if ( tc == -3 ) {
                                /* Bad hash table -- hope for the best */
                                tc = c;
                        }
			if (tc & ~charmask)
                                continue; /* Conversion failed */

			if (vc->vc_need_wrap || vc->vc_decim)
				FLUSH
			if (vc->vc_need_wrap) {
				cr(vc);
				lf(vc);
			}
			if (vc->vc_decim)
				insert_char(vc, 1);
			scr_writew(himask ?
				     ((vc->vc_attr << 8) & ~himask) + ((tc & 0x100) ? himask : 0) + (tc & 0xff) :
				     (vc->vc_attr << 8) + tc,
				   (u16 *) vc->vc_pos);
			if (DO_UPDATE(vc) && draw_x < 0) {
				draw_x = vc->vc_x;
				draw_from = vc->vc_pos;
			}
			if (vc->vc_x == vc->vc_cols - 1) {
				vc->vc_need_wrap = vc->vc_decawm;
				draw_to = vc->vc_pos + 2;
			} else {
				vc->vc_x++;
				draw_to = (vc->vc_pos += 2);
			}
			continue;
		}
		FLUSH
		do_con_trol(tty, vc, orig);
	}
	FLUSH
	console_conditional_schedule();
	release_console_sem();

out:
	return n;
#undef FLUSH
}

/*
 * This is the console switching callback.
 *
 * Doing console switching in a process context allows
 * us to do the switches asynchronously (needed when we want
 * to switch due to a keyboard interrupt).  Synchronization
 * with other console code and prevention of re-entrancy is
 * ensured with console_sem.
 */
static void console_callback(void *ignored)
{
	acquire_console_sem();

	if (want_console >= 0) {
		if (want_console != fg_console &&
		    vc_cons_allocated(want_console)) {
			hide_cursor(vc_cons[fg_console].d);
			change_console(vc_cons[want_console].d);
			/* we only changed when the console had already
			   been allocated - a new console is not created
			   in an interrupt routine */
		}
		want_console = -1;
	}
	if (do_poke_blanked_console) { /* do not unblank for a LED change */
		do_poke_blanked_console = 0;
		poke_blanked_console();
	}
	if (scrollback_delta) {
		struct vc_data *vc = vc_cons[fg_console].d;
		clear_selection();
		if (vc->vc_mode == KD_TEXT)
			vc->vc_sw->con_scrolldelta(vc, scrollback_delta);
		scrollback_delta = 0;
	}
	if (blank_timer_expired) {
		do_blank_screen(0);
		blank_timer_expired = 0;
	}

	release_console_sem();
}

void set_console(int nr)
{
	want_console = nr;
	schedule_console_callback();
}

struct tty_driver *console_driver;

#ifdef CONFIG_VT_CONSOLE

/*
 *	Console on virtual terminal
 *
 * The console must be locked when we get here.
 */

static void vt_console_print(struct console *co, const char *b, unsigned count)
{
	struct vc_data *vc = vc_cons[fg_console].d;
	unsigned char c;
	static unsigned long printing;
	const ushort *start;
	ushort cnt = 0;
	ushort myx;

	/* console busy or not yet initialized */
	if (!printable || test_and_set_bit(0, &printing))
		return;

	if (kmsg_redirect && vc_cons_allocated(kmsg_redirect - 1))
		vc = vc_cons[kmsg_redirect - 1].d;

	/* read `x' only after setting currcons properly (otherwise
	   the `x' macro will read the x of the foreground console). */
	myx = vc->vc_x;

	if (!vc_cons_allocated(fg_console)) {
		/* impossible */
		/* printk("vt_console_print: tty %d not allocated ??\n", currcons+1); */
		goto quit;
	}

	if (vc->vc_mode != KD_TEXT)
		goto quit;

	/* undraw cursor first */
	if (IS_FG(vc))
		hide_cursor(vc);

	start = (ushort *)vc->vc_pos;

	/* Contrived structure to try to emulate original need_wrap behaviour
	 * Problems caused when we have need_wrap set on '\n' character */
	while (count--) {
		c = *b++;
		if (c == 10 || c == 13 || c == 8 || vc->vc_need_wrap) {
			if (cnt > 0) {
				if (CON_IS_VISIBLE(vc))
					vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x);
				vc->vc_x += cnt;
				if (vc->vc_need_wrap)
					vc->vc_x--;
				cnt = 0;
			}
			if (c == 8) {		/* backspace */
				bs(vc);
				start = (ushort *)vc->vc_pos;
				myx = vc->vc_x;
				continue;
			}
			if (c != 13)
				lf(vc);
			cr(vc);
			start = (ushort *)vc->vc_pos;
			myx = vc->vc_x;
			if (c == 10 || c == 13)
				continue;
		}
		scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos);
		cnt++;
		if (myx == vc->vc_cols - 1) {
			vc->vc_need_wrap = 1;
			continue;
		}
		vc->vc_pos += 2;
		myx++;
	}
	if (cnt > 0) {
		if (CON_IS_VISIBLE(vc))
			vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x);
		vc->vc_x += cnt;
		if (vc->vc_x == vc->vc_cols) {
			vc->vc_x--;
			vc->vc_need_wrap = 1;
		}
	}
	set_cursor(vc);

quit:
	clear_bit(0, &printing);
}

static struct tty_driver *vt_console_device(struct console *c, int *index)
{
	*index = c->index ? c->index-1 : fg_console;
	return console_driver;
}

static struct console vt_console_driver = {
	.name		= "tty",
	.write		= vt_console_print,
	.device		= vt_console_device,
	.unblank	= unblank_screen,
	.flags		= CON_PRINTBUFFER,
	.index		= -1,
};
#endif

/*
 *	Handling of Linux-specific VC ioctls
 */

/*
 * Generally a bit racy with respect to console_sem().
 *
 * There are some functions which don't need it.
 *
 * There are some functions which can sleep for arbitrary periods
 * (paste_selection) but we don't need the lock there anyway.
 *
 * set_selection has locking, and definitely needs it
 */

int tioclinux(struct tty_struct *tty, unsigned long arg)
{
	char type, data;
	char __user *p = (char __user *)arg;
	int lines;
	int ret;

	if (tty->driver->type != TTY_DRIVER_TYPE_CONSOLE)
		return -EINVAL;
	if (current->signal->tty != tty && !capable(CAP_SYS_ADMIN))
		return -EPERM;
	if (get_user(type, p))
		return -EFAULT;
	ret = 0;
	switch (type)
	{
		case TIOCL_SETSEL:
			acquire_console_sem();
			ret = set_selection((struct tiocl_selection __user *)(p+1), tty);
			release_console_sem();
			break;
		case TIOCL_PASTESEL:
			ret = paste_selection(tty);
			break;
		case TIOCL_UNBLANKSCREEN:
			unblank_screen();
			break;
		case TIOCL_SELLOADLUT:
			ret = sel_loadlut(p);
			break;
		case TIOCL_GETSHIFTSTATE:
			
	/*
	 * Make it possible to react to Shift+Mousebutton.
	 * Note that 'shift_state' is an undocumented
	 * kernel-internal variable; programs not closely
	 * related to the kernel should not use this.
	 */
	 		data = shift_state;
			ret = __put_user(data, p);
			break;
		case TIOCL_GETMOUSEREPORTING:
			data = mouse_reporting();
			ret = __put_user(data, p);
			break;
		case TIOCL_SETVESABLANK:
			set_vesa_blanking(p);
			break;
		case TIOCL_SETKMSGREDIRECT:
			if (!capable(CAP_SYS_ADMIN)) {
				ret = -EPERM;
			} else {
				if (get_user(data, p+1))
					ret = -EFAULT;
				else
					kmsg_redirect = data;
			}
			break;
		case TIOCL_GETFGCONSOLE:
			ret = fg_console;
			break;
		case TIOCL_SCROLLCONSOLE:
			if (get_user(lines, (s32 __user *)(p+4))) {
				ret = -EFAULT;
			} else {
				scrollfront(vc_cons[fg_console].d, lines);
				ret = 0;
			}
			break;
		case TIOCL_BLANKSCREEN:	/* until explicitly unblanked, not only poked */
			ignore_poke = 1;
			do_blank_screen(0);
			break;
		case TIOCL_BLANKEDSCREEN:
			ret = console_blanked;
			break;
		default:
			ret = -EINVAL;
			break;
	}
	return ret;
}

/*
 * /dev/ttyN handling
 */

static int con_write(struct tty_struct *tty, const unsigned char *buf, int count)
{
	int	retval;

	retval = do_con_write(tty, buf, count);
	con_flush_chars(tty);

	return retval;
}

static void con_put_char(struct tty_struct *tty, unsigned char ch)
{
	if (in_interrupt())
		return;	/* n_r3964 calls put_char() from interrupt context */
	do_con_write(tty, &ch, 1);
}

static int con_write_room(struct tty_struct *tty)
{
	if (tty->stopped)
		return 0;
	return 4096;		/* No limit, really; we're not buffering */
}

static int con_chars_in_buffer(struct tty_struct *tty)
{
	return 0;		/* we're not buffering */
}

/*
 * con_throttle and con_unthrottle are only used for
 * paste_selection(), which has to stuff in a large number of
 * characters...
 */
static void con_throttle(struct tty_struct *tty)
{
}

static void con_unthrottle(struct tty_struct *tty)
{
	struct vc_data *vc = tty->driver_data;

	wake_up_interruptible(&vc->paste_wait);
}

/*
 * Turn the Scroll-Lock LED on when the tty is stopped
 */
static void con_stop(struct tty_struct *tty)
{
	int console_num;
	if (!tty)
		return;
	console_num = tty->index;
	if (!vc_cons_allocated(console_num))
		return;
	set_vc_kbd_led(kbd_table + console_num, VC_SCROLLOCK);
	set_leds();
}

/*
 * Turn the Scroll-Lock LED off when the console is started
 */
static void con_start(struct tty_struct *tty)
{
	int console_num;
	if (!tty)
		return;
	console_num = tty->index;
	if (!vc_cons_allocated(console_num))
		return;
	clr_vc_kbd_led(kbd_table + console_num, VC_SCROLLOCK);
	set_leds();
}

static void con_flush_chars(struct tty_struct *tty)
{
	struct vc_data *vc;

	if (in_interrupt())	/* from flush_to_ldisc */
		return;

	/* if we race with con_close(), vt may be null */
	acquire_console_sem();
	vc = tty->driver_data;
	if (vc)
		set_cursor(vc);
	release_console_sem();
}

/*
 * Allocate the console screen memory.
 */
static int con_open(struct tty_struct *tty, struct file *filp)
{
	unsigned int currcons = tty->index;
	int ret = 0;

	acquire_console_sem();
	if (tty->count == 1) {
		ret = vc_allocate(currcons);
		if (ret == 0) {
			struct vc_data *vc = vc_cons[currcons].d;
			tty->driver_data = vc;
			vc->vc_tty = tty;

			if (!tty->winsize.ws_row && !tty->winsize.ws_col) {
				tty->winsize.ws_row = vc_cons[currcons].d->vc_rows;
				tty->winsize.ws_col = vc_cons[currcons].d->vc_cols;
			}
			release_console_sem();
			vcs_make_devfs(tty);
			return ret;
		}
	}
	release_console_sem();
	return ret;
}

/*
 * We take tty_sem in here to prevent another thread from coming in via init_dev
 * and taking a ref against the tty while we're in the process of forgetting
 * about it and cleaning things up.
 *
 * This is because vcs_remove_devfs() can sleep and will drop the BKL.
 */
static void con_close(struct tty_struct *tty, struct file *filp)
{
	down(&tty_sem);
	acquire_console_sem();
	if (tty && tty->count == 1) {
		struct vc_data *vc = tty->driver_data;

		if (vc)
			vc->vc_tty = NULL;
		tty->driver_data = NULL;
		release_console_sem();
		vcs_remove_devfs(tty);
		up(&tty_sem);
		/*
		 * tty_sem is released, but we still hold BKL, so there is
		 * still exclusion against init_dev()
		 */
		return;
	}
	release_console_sem();
	up(&tty_sem);
}

static void vc_init(struct vc_data *vc, unsigned int rows,
		    unsigned int cols, int do_clear)
{
	int j, k ;

	vc->vc_cols = cols;
	vc->vc_rows = rows;
	vc->vc_size_row = cols << 1;
	vc->vc_screenbuf_size = vc->vc_rows * vc->vc_size_row;

	set_origin(vc);
	vc->vc_pos = vc->vc_origin;
	reset_vc(vc);
	for (j=k=0; j<16; j++) {
		vc->vc_palette[k++] = default_red[j] ;
		vc->vc_palette[k++] = default_grn[j] ;
		vc->vc_palette[k++] = default_blu[j] ;
	}
	vc->vc_def_color       = 0x07;   /* white */
	vc->vc_ulcolor		= 0x0f;   /* bold white */
	vc->vc_halfcolor       = 0x08;   /* grey */
	init_waitqueue_head(&vc->paste_wait);
	reset_terminal(vc, do_clear);
}

/*
 * This routine initializes console interrupts, and does nothing
 * else. If you want the screen to clear, call tty_write with
 * the appropriate escape-sequence.
 */

static int __init con_init(void)
{
	const char *display_desc = NULL;
	struct vc_data *vc;
	unsigned int currcons = 0;

	acquire_console_sem();

	if (conswitchp)
		display_desc = conswitchp->con_startup();
	if (!display_desc) {
		fg_console = 0;
		release_console_sem();
		return 0;
	}

	init_timer(&console_timer);
	console_timer.function = blank_screen_t;
	if (blankinterval) {
		blank_state = blank_normal_wait;
		mod_timer(&console_timer, jiffies + blankinterval);
	}

	/*
	 * kmalloc is not running yet - we use the bootmem allocator.
	 */
	for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) {
		vc_cons[currcons].d = vc = alloc_bootmem(sizeof(struct vc_data));
		visual_init(vc, currcons, 1);
		vc->vc_screenbuf = (unsigned short *)alloc_bootmem(vc->vc_screenbuf_size);
		vc->vc_kmalloced = 0;
		vc_init(vc, vc->vc_rows, vc->vc_cols,
			currcons || !vc->vc_sw->con_save_screen);
	}
	currcons = fg_console = 0;
	master_display_fg = vc = vc_cons[currcons].d;
	set_origin(vc);
	save_screen(vc);
	gotoxy(vc, vc->vc_x, vc->vc_y);
	csi_J(vc, 0);
	update_screen(vc);
	printk("Console: %s %s %dx%d",
		vc->vc_can_do_color ? "colour" : "mono",
		display_desc, vc->vc_cols, vc->vc_rows);
	printable = 1;
	printk("\n");

	release_console_sem();

#ifdef CONFIG_VT_CONSOLE
	register_console(&vt_console_driver);
#endif
	return 0;
}
console_initcall(con_init);

static struct tty_operations con_ops = {
	.open = con_open,
	.close = con_close,
	.write = con_write,
	.write_room = con_write_room,
	.put_char = con_put_char,
	.flush_chars = con_flush_chars,
	.chars_in_buffer = con_chars_in_buffer,
	.ioctl = vt_ioctl,
	.stop = con_stop,
	.start = con_start,
	.throttle = con_throttle,
	.unthrottle = con_unthrottle,
};

int __init vty_init(void)
{
	vcs_init();

	console_driver = alloc_tty_driver(MAX_NR_CONSOLES);
	if (!console_driver)
		panic("Couldn't allocate console driver\n");
	console_driver->owner = THIS_MODULE;
	console_driver->devfs_name = "vc/";
	console_driver->name = "tty";
	console_driver->name_base = 1;
	console_driver->major = TTY_MAJOR;
	console_driver->minor_start = 1;
	console_driver->type = TTY_DRIVER_TYPE_CONSOLE;
	console_driver->init_termios = tty_std_termios;
	console_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS;
	tty_set_operations(console_driver, &con_ops);
	if (tty_register_driver(console_driver))
		panic("Couldn't register console driver\n");

	kbd_init();
	console_map_init();
#ifdef CONFIG_PROM_CONSOLE
	prom_con_init();
#endif
#ifdef CONFIG_MDA_CONSOLE
	mda_console_init();
#endif
	return 0;
}

#ifndef VT_SINGLE_DRIVER

/*
 *	If we support more console drivers, this function is used
 *	when a driver wants to take over some existing consoles
 *	and become default driver for newly opened ones.
 */

int take_over_console(const struct consw *csw, int first, int last, int deflt)
{
	int i, j = -1;
	const char *desc;
	struct module *owner;

	owner = csw->owner;
	if (!try_module_get(owner))
		return -ENODEV;

	acquire_console_sem();

	desc = csw->con_startup();
	if (!desc) {
		release_console_sem();
		module_put(owner);
		return -ENODEV;
	}
	if (deflt) {
		if (conswitchp)
			module_put(conswitchp->owner);
		__module_get(owner);
		conswitchp = csw;
	}

	for (i = first; i <= last; i++) {
		int old_was_color;
		struct vc_data *vc = vc_cons[i].d;

		if (con_driver_map[i])
			module_put(con_driver_map[i]->owner);
		__module_get(owner);
		con_driver_map[i] = csw;

		if (!vc || !vc->vc_sw)
			continue;

		j = i;
		if (CON_IS_VISIBLE(vc))
			save_screen(vc);
		old_was_color = vc->vc_can_do_color;
		vc->vc_sw->con_deinit(vc);
		vc->vc_origin = (unsigned long)vc->vc_screenbuf;
		vc->vc_visible_origin = vc->vc_origin;
		vc->vc_scr_end = vc->vc_origin + vc->vc_screenbuf_size;
		vc->vc_pos = vc->vc_origin + vc->vc_size_row * vc->vc_y + 2 * vc->vc_x;
		visual_init(vc, i, 0);
		update_attr(vc);

		/* If the console changed between mono <-> color, then
		 * the attributes in the screenbuf will be wrong.  The
		 * following resets all attributes to something sane.
		 */
		if (old_was_color != vc->vc_can_do_color)
			clear_buffer_attributes(vc);

		if (CON_IS_VISIBLE(vc))
			update_screen(vc);
	}
	printk("Console: switching ");
	if (!deflt)
		printk("consoles %d-%d ", first+1, last+1);
	if (j >= 0)
		printk("to %s %s %dx%d\n",
		       vc_cons[j].d->vc_can_do_color ? "colour" : "mono",
		       desc, vc_cons[j].d->vc_cols, vc_cons[j].d->vc_rows);
	else
		printk("to %s\n", desc);

	release_console_sem();

	module_put(owner);
	return 0;
}

void give_up_console(const struct consw *csw)
{
	int i;

	for(i = 0; i < MAX_NR_CONSOLES; i++)
		if (con_driver_map[i] == csw) {
			module_put(csw->owner);
			con_driver_map[i] = NULL;
		}
}

#endif

/*
 *	Screen blanking
 */

static void set_vesa_blanking(char __user *p)
{
    unsigned int mode;
    get_user(mode, p + 1);
    vesa_blank_mode = (mode < 4) ? mode : 0;
}

/*
 * This is called by a timer handler
 */
static void vesa_powerdown(void)
{
    struct vc_data *c = vc_cons[fg_console].d;
    /*
     *  Power down if currently suspended (1 or 2),
     *  suspend if currently blanked (0),
     *  else do nothing (i.e. already powered down (3)).
     *  Called only if powerdown features are allowed.
     */
    switch (vesa_blank_mode) {
    case VESA_NO_BLANKING:
	    c->vc_sw->con_blank(c, VESA_VSYNC_SUSPEND+1, 0);
	    break;
    case VESA_VSYNC_SUSPEND:
    case VESA_HSYNC_SUSPEND:
	    c->vc_sw->con_blank(c, VESA_POWERDOWN+1, 0);
	    break;
    }
}

void do_blank_screen(int entering_gfx)
{
	struct vc_data *vc = vc_cons[fg_console].d;
	int i;

	WARN_CONSOLE_UNLOCKED();

	if (console_blanked) {
		if (blank_state == blank_vesa_wait) {
			blank_state = blank_off;
			vesa_powerdown();

		}
		return;
	}
	if (blank_state != blank_normal_wait)
		return;
	blank_state = blank_off;

	/* entering graphics mode? */
	if (entering_gfx) {
		hide_cursor(vc);
		save_screen(vc);
		vc->vc_sw->con_blank(vc, -1, 1);
		console_blanked = fg_console + 1;
		set_origin(vc);
		return;
	}

	/* don't blank graphics */
	if (vc->vc_mode != KD_TEXT) {
		console_blanked = fg_console + 1;
		return;
	}

	hide_cursor(vc);
	del_timer_sync(&console_timer);
	blank_timer_expired = 0;

	save_screen(vc);
	/* In case we need to reset origin, blanking hook returns 1 */
	i = vc->vc_sw->con_blank(vc, 1, 0);
	console_blanked = fg_console + 1;
	if (i)
		set_origin(vc);

	if (console_blank_hook && console_blank_hook(1))
		return;

	if (vesa_off_interval) {
		blank_state = blank_vesa_wait,
		mod_timer(&console_timer, jiffies + vesa_off_interval);
	}

    	if (vesa_blank_mode)
		vc->vc_sw->con_blank(vc, vesa_blank_mode + 1, 0);
}
EXPORT_SYMBOL(do_blank_screen);

/*
 * Called by timer as well as from vt_console_driver
 */
void do_unblank_screen(int leaving_gfx)
{
	struct vc_data *vc;

	/* This should now always be called from a "sane" (read: can schedule)
	 * context for the sake of the low level drivers, except in the special
	 * case of oops_in_progress
	 */
	if (!oops_in_progress)
		might_sleep();

	WARN_CONSOLE_UNLOCKED();

	ignore_poke = 0;
	if (!console_blanked)
		return;
	if (!vc_cons_allocated(fg_console)) {
		/* impossible */
		printk("unblank_screen: tty %d not allocated ??\n", fg_console+1);
		return;
	}
	vc = vc_cons[fg_console].d;
	if (vc->vc_mode != KD_TEXT)
		return; /* but leave console_blanked != 0 */

	if (blankinterval) {
		mod_timer(&console_timer, jiffies + blankinterval);
		blank_state = blank_normal_wait;
	}

	console_blanked = 0;
	if (vc->vc_sw->con_blank(vc, 0, leaving_gfx))
		/* Low-level driver cannot restore -> do it ourselves */
		update_screen(vc);
	if (console_blank_hook)
		console_blank_hook(0);
	set_palette(vc);
	set_cursor(vc);
}
EXPORT_SYMBOL(do_unblank_screen);

/*
 * This is called by the outside world to cause a forced unblank, mostly for
 * oopses. Currently, I just call do_unblank_screen(0), but we could eventually
 * call it with 1 as an argument and so force a mode restore... that may kill
 * X or at least garbage the screen but would also make the Oops visible...
 */
void unblank_screen(void)
{
	do_unblank_screen(0);
}

/*
 * We defer the timer blanking to work queue so it can take the console semaphore
 * (console operations can still happen at irq time, but only from printk which
 * has the console semaphore. Not perfect yet, but better than no locking
 */
static void blank_screen_t(unsigned long dummy)
{
	blank_timer_expired = 1;
	schedule_work(&console_work);
}

void poke_blanked_console(void)
{
	WARN_CONSOLE_UNLOCKED();

	/* Add this so we quickly catch whoever might call us in a non
	 * safe context. Nowadays, unblank_screen() isn't to be called in
	 * atomic contexts and is allowed to schedule (with the special case
	 * of oops_in_progress, but that isn't of any concern for this
	 * function. --BenH.
	 */
	might_sleep();

	/* This isn't perfectly race free, but a race here would be mostly harmless,
	 * at worse, we'll do a spurrious blank and it's unlikely
	 */
	del_timer(&console_timer);
	blank_timer_expired = 0;

	if (ignore_poke || !vc_cons[fg_console].d || vc_cons[fg_console].d->vc_mode == KD_GRAPHICS)
		return;
	if (console_blanked)
		unblank_screen();
	else if (blankinterval) {
		mod_timer(&console_timer, jiffies + blankinterval);
		blank_state = blank_normal_wait;
	}
}

/*
 *	Palettes
 */

static void set_palette(struct vc_data *vc)
{
	WARN_CONSOLE_UNLOCKED();

	if (vc->vc_mode != KD_GRAPHICS)
		vc->vc_sw->con_set_palette(vc, color_table);
}

static int set_get_cmap(unsigned char __user *arg, int set)
{
    int i, j, k;

    WARN_CONSOLE_UNLOCKED();

    for (i = 0; i < 16; i++)
	if (set) {
	    get_user(default_red[i], arg++);
	    get_user(default_grn[i], arg++);
	    get_user(default_blu[i], arg++);
	} else {
	    put_user(default_red[i], arg++);
	    put_user(default_grn[i], arg++);
	    put_user(default_blu[i], arg++);
	}
    if (set) {
	for (i = 0; i < MAX_NR_CONSOLES; i++)
	    if (vc_cons_allocated(i)) {
		for (j = k = 0; j < 16; j++) {
		    vc_cons[i].d->vc_palette[k++] = default_red[j];
		    vc_cons[i].d->vc_palette[k++] = default_grn[j];
		    vc_cons[i].d->vc_palette[k++] = default_blu[j];
		}
		set_palette(vc_cons[i].d);
	    }
    }
    return 0;
}

/*
 * Load palette into the DAC registers. arg points to a colour
 * map, 3 bytes per colour, 16 colours, range from 0 to 255.
 */

int con_set_cmap(unsigned char __user *arg)
{
	int rc;

	acquire_console_sem();
	rc = set_get_cmap (arg,1);
	release_console_sem();

	return rc;
}

int con_get_cmap(unsigned char __user *arg)
{
	int rc;

	acquire_console_sem();
	rc = set_get_cmap (arg,0);
	release_console_sem();

	return rc;
}

void reset_palette(struct vc_data *vc)
{
	int j, k;
	for (j=k=0; j<16; j++) {
		vc->vc_palette[k++] = default_red[j];
		vc->vc_palette[k++] = default_grn[j];
		vc->vc_palette[k++] = default_blu[j];
	}
	set_palette(vc);
}

/*
 *  Font switching
 *
 *  Currently we only support fonts up to 32 pixels wide, at a maximum height
 *  of 32 pixels. Userspace fontdata is stored with 32 bytes (shorts/ints, 
 *  depending on width) reserved for each character which is kinda wasty, but 
 *  this is done in order to maintain compatibility with the EGA/VGA fonts. It 
 *  is upto the actual low-level console-driver convert data into its favorite
 *  format (maybe we should add a `fontoffset' field to the `display'
 *  structure so we won't have to convert the fontdata all the time.
 *  /Jes
 */

#define max_font_size 65536

static int con_font_get(struct vc_data *vc, struct console_font_op *op)
{
	struct console_font font;
	int rc = -EINVAL;
	int c;

	if (vc->vc_mode != KD_TEXT)
		return -EINVAL;

	if (op->data) {
		font.data = kmalloc(max_font_size, GFP_KERNEL);
		if (!font.data)
			return -ENOMEM;
	} else
		font.data = NULL;

	acquire_console_sem();
	if (vc->vc_sw->con_font_get)
		rc = vc->vc_sw->con_font_get(vc, &font);
	else
		rc = -ENOSYS;
	release_console_sem();

	if (rc)
		goto out;

	c = (font.width+7)/8 * 32 * font.charcount;
	
	if (op->data && font.charcount > op->charcount)
		rc = -ENOSPC;
	if (!(op->flags & KD_FONT_FLAG_OLD)) {
		if (font.width > op->width || font.height > op->height) 
			rc = -ENOSPC;
	} else {
		if (font.width != 8)
			rc = -EIO;
		else if ((op->height && font.height > op->height) ||
			 font.height > 32)
			rc = -ENOSPC;
	}
	if (rc)
		goto out;

	op->height = font.height;
	op->width = font.width;
	op->charcount = font.charcount;

	if (op->data && copy_to_user(op->data, font.data, c))
		rc = -EFAULT;

out:
	kfree(font.data);
	return rc;
}

static int con_font_set(struct vc_data *vc, struct console_font_op *op)
{
	struct console_font font;
	int rc = -EINVAL;
	int size;

	if (vc->vc_mode != KD_TEXT)
		return -EINVAL;
	if (!op->data)
		return -EINVAL;
	if (op->charcount > 512)
		return -EINVAL;
	if (!op->height) {		/* Need to guess font height [compat] */
		int h, i;
		u8 __user *charmap = op->data;
		u8 tmp;
		
		/* If from KDFONTOP ioctl, don't allow things which can be done in userland,
		   so that we can get rid of this soon */
		if (!(op->flags & KD_FONT_FLAG_OLD))
			return -EINVAL;
		for (h = 32; h > 0; h--)
			for (i = 0; i < op->charcount; i++) {
				if (get_user(tmp, &charmap[32*i+h-1]))
					return -EFAULT;
				if (tmp)
					goto nonzero;
			}
		return -EINVAL;
	nonzero:
		op->height = h;
	}
	if (op->width <= 0 || op->width > 32 || op->height > 32)
		return -EINVAL;
	size = (op->width+7)/8 * 32 * op->charcount;
	if (size > max_font_size)
		return -ENOSPC;
	font.charcount = op->charcount;
	font.height = op->height;
	font.width = op->width;
	font.data = kmalloc(size, GFP_KERNEL);
	if (!font.data)
		return -ENOMEM;
	if (copy_from_user(font.data, op->data, size)) {
		kfree(font.data);
		return -EFAULT;
	}
	acquire_console_sem();
	if (vc->vc_sw->con_font_set)
		rc = vc->vc_sw->con_font_set(vc, &font, op->flags);
	else
		rc = -ENOSYS;
	release_console_sem();
	kfree(font.data);
	return rc;
}

static int con_font_default(struct vc_data *vc, struct console_font_op *op)
{
	struct console_font font = {.width = op->width, .height = op->height};
	char name[MAX_FONT_NAME];
	char *s = name;
	int rc;

	if (vc->vc_mode != KD_TEXT)
		return -EINVAL;

	if (!op->data)
		s = NULL;
	else if (strncpy_from_user(name, op->data, MAX_FONT_NAME - 1) < 0)
		return -EFAULT;
	else
		name[MAX_FONT_NAME - 1] = 0;

	acquire_console_sem();
	if (vc->vc_sw->con_font_default)
		rc = vc->vc_sw->con_font_default(vc, &font, s);
	else
		rc = -ENOSYS;
	release_console_sem();
	if (!rc) {
		op->width = font.width;
		op->height = font.height;
	}
	return rc;
}

static int con_font_copy(struct vc_data *vc, struct console_font_op *op)
{
	int con = op->height;
	int rc;

	if (vc->vc_mode != KD_TEXT)
		return -EINVAL;

	acquire_console_sem();
	if (!vc->vc_sw->con_font_copy)
		rc = -ENOSYS;
	else if (con < 0 || !vc_cons_allocated(con))
		rc = -ENOTTY;
	else if (con == vc->vc_num)	/* nothing to do */
		rc = 0;
	else
		rc = vc->vc_sw->con_font_copy(vc, con);
	release_console_sem();
	return rc;
}

int con_font_op(struct vc_data *vc, struct console_font_op *op)
{
	switch (op->op) {
	case KD_FONT_OP_SET:
		return con_font_set(vc, op);
	case KD_FONT_OP_GET:
		return con_font_get(vc, op);
	case KD_FONT_OP_SET_DEFAULT:
		return con_font_default(vc, op);
	case KD_FONT_OP_COPY:
		return con_font_copy(vc, op);
	}
	return -ENOSYS;
}

/*
 *	Interface exported to selection and vcs.
 */

/* used by selection */
u16 screen_glyph(struct vc_data *vc, int offset)
{
	u16 w = scr_readw(screenpos(vc, offset, 1));
	u16 c = w & 0xff;

	if (w & vc->vc_hi_font_mask)
		c |= 0x100;
	return c;
}

/* used by vcs - note the word offset */
unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed)
{
	return screenpos(vc, 2 * w_offset, viewed);
}

void getconsxy(struct vc_data *vc, unsigned char *p)
{
	p[0] = vc->vc_x;
	p[1] = vc->vc_y;
}

void putconsxy(struct vc_data *vc, unsigned char *p)
{
	gotoxy(vc, p[0], p[1]);
	set_cursor(vc);
}

u16 vcs_scr_readw(struct vc_data *vc, const u16 *org)
{
	if ((unsigned long)org == vc->vc_pos && softcursor_original != -1)
		return softcursor_original;
	return scr_readw(org);
}

void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org)
{
	scr_writew(val, org);
	if ((unsigned long)org == vc->vc_pos) {
		softcursor_original = -1;
		add_softcursor(vc);
	}
}

/*
 *	Visible symbols for modules
 */

EXPORT_SYMBOL(color_table);
EXPORT_SYMBOL(default_red);
EXPORT_SYMBOL(default_grn);
EXPORT_SYMBOL(default_blu);
EXPORT_SYMBOL(update_region);
EXPORT_SYMBOL(redraw_screen);
EXPORT_SYMBOL(vc_resize);
EXPORT_SYMBOL(fg_console);
EXPORT_SYMBOL(console_blank_hook);
EXPORT_SYMBOL(console_blanked);
EXPORT_SYMBOL(vc_cons);
#ifndef VT_SINGLE_DRIVER
EXPORT_SYMBOL(take_over_console);
EXPORT_SYMBOL(give_up_console);
#endif
l opt">; } #undef ISVALID } STATIC int xfs_bmap_rtalloc( xfs_bmalloca_t *ap) /* bmap alloc argument struct */ { xfs_alloctype_t atype = 0; /* type for allocation routines */ int error; /* error return value */ xfs_mount_t *mp; /* mount point structure */ xfs_extlen_t prod = 0; /* product factor for allocators */ xfs_extlen_t ralen = 0; /* realtime allocation length */ xfs_extlen_t align; /* minimum allocation alignment */ xfs_rtblock_t rtb; mp = ap->ip->i_mount; align = xfs_get_extsz_hint(ap->ip); prod = align / mp->m_sb.sb_rextsize; error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, align, 1, ap->eof, 0, ap->conv, &ap->off, &ap->alen); if (error) return error; ASSERT(ap->alen); ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0); /* * If the offset & length are not perfectly aligned * then kill prod, it will just get us in trouble. */ if (do_mod(ap->off, align) || ap->alen % align) prod = 1; /* * Set ralen to be the actual requested length in rtextents. */ ralen = ap->alen / mp->m_sb.sb_rextsize; /* * If the old value was close enough to MAXEXTLEN that * we rounded up to it, cut it back so it's valid again. * Note that if it's a really large request (bigger than * MAXEXTLEN), we don't hear about that number, and can't * adjust the starting point to match it. */ if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; /* * If it's an allocation to an empty file at offset 0, * pick an extent that will space things out in the rt area. */ if (ap->eof && ap->off == 0) { xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); if (error) return error; ap->rval = rtx * mp->m_sb.sb_rextsize; } else { ap->rval = 0; } xfs_bmap_adjacent(ap); /* * Realtime allocation, done through xfs_rtallocate_extent. */ atype = ap->rval == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; do_div(ap->rval, mp->m_sb.sb_rextsize); rtb = ap->rval; ap->alen = ralen; if ((error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen, &ralen, atype, ap->wasdel, prod, &rtb))) return error; if (rtb == NULLFSBLOCK && prod > 1 && (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen, &ralen, atype, ap->wasdel, 1, &rtb))) return error; ap->rval = rtb; if (ap->rval != NULLFSBLOCK) { ap->rval *= mp->m_sb.sb_rextsize; ralen *= mp->m_sb.sb_rextsize; ap->alen = ralen; ap->ip->i_d.di_nblocks += ralen; xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); if (ap->wasdel) ap->ip->i_delayed_blks -= ralen; /* * Adjust the disk quota also. This was reserved * earlier. */ xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_RTBCOUNT, (long) ralen); } else { ap->alen = 0; } return 0; } STATIC int xfs_bmap_btalloc_nullfb( struct xfs_bmalloca *ap, struct xfs_alloc_arg *args, xfs_extlen_t *blen) { struct xfs_mount *mp = ap->ip->i_mount; struct xfs_perag *pag; xfs_agnumber_t ag, startag; int notinit = 0; int error; if (ap->userdata && xfs_inode_is_filestream(ap->ip)) args->type = XFS_ALLOCTYPE_NEAR_BNO; else args->type = XFS_ALLOCTYPE_START_BNO; args->total = ap->total; /* * Search for an allocation group with a single extent large enough * for the request. If one isn't found, then adjust the minimum * allocation size to the largest space found. */ startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); if (startag == NULLAGNUMBER) startag = ag = 0; pag = xfs_perag_get(mp, ag); while (*blen < ap->alen) { if (!pag->pagf_init) { error = xfs_alloc_pagf_init(mp, args->tp, ag, XFS_ALLOC_FLAG_TRYLOCK); if (error) { xfs_perag_put(pag); return error; } } /* * See xfs_alloc_fix_freelist... */ if (pag->pagf_init) { xfs_extlen_t longest; longest = xfs_alloc_longest_free_extent(mp, pag); if (*blen < longest) *blen = longest; } else notinit = 1; if (xfs_inode_is_filestream(ap->ip)) { if (*blen >= ap->alen) break; if (ap->userdata) { /* * If startag is an invalid AG, we've * come here once before and * xfs_filestream_new_ag picked the * best currently available. * * Don't continue looping, since we * could loop forever. */ if (startag == NULLAGNUMBER) break; error = xfs_filestream_new_ag(ap, &ag); xfs_perag_put(pag); if (error) return error; /* loop again to set 'blen'*/ startag = NULLAGNUMBER; pag = xfs_perag_get(mp, ag); continue; } } if (++ag == mp->m_sb.sb_agcount) ag = 0; if (ag == startag) break; xfs_perag_put(pag); pag = xfs_perag_get(mp, ag); } xfs_perag_put(pag); /* * Since the above loop did a BUF_TRYLOCK, it is * possible that there is space for this request. */ if (notinit || *blen < ap->minlen) args->minlen = ap->minlen; /* * If the best seen length is less than the request * length, use the best as the minimum. */ else if (*blen < ap->alen) args->minlen = *blen; /* * Otherwise we've seen an extent as big as alen, * use that as the minimum. */ else args->minlen = ap->alen; /* * set the failure fallback case to look in the selected * AG as the stream may have moved. */ if (xfs_inode_is_filestream(ap->ip)) ap->rval = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); return 0; } STATIC int xfs_bmap_btalloc( xfs_bmalloca_t *ap) /* bmap alloc argument struct */ { xfs_mount_t *mp; /* mount point structure */ xfs_alloctype_t atype = 0; /* type for allocation routines */ xfs_extlen_t align; /* minimum allocation alignment */ xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ xfs_agnumber_t ag; xfs_alloc_arg_t args; xfs_extlen_t blen; xfs_extlen_t nextminlen = 0; int nullfb; /* true if ap->firstblock isn't set */ int isaligned; int tryagain; int error; mp = ap->ip->i_mount; align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; if (unlikely(align)) { error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, align, 0, ap->eof, 0, ap->conv, &ap->off, &ap->alen); ASSERT(!error); ASSERT(ap->alen); } nullfb = ap->firstblock == NULLFSBLOCK; fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); if (nullfb) { if (ap->userdata && xfs_inode_is_filestream(ap->ip)) { ag = xfs_filestream_lookup_ag(ap->ip); ag = (ag != NULLAGNUMBER) ? ag : 0; ap->rval = XFS_AGB_TO_FSB(mp, ag, 0); } else { ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino); } } else ap->rval = ap->firstblock; xfs_bmap_adjacent(ap); /* * If allowed, use ap->rval; otherwise must use firstblock since * it's in the right allocation group. */ if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno) ; else ap->rval = ap->firstblock; /* * Normal allocation, done through xfs_alloc_vextent. */ tryagain = isaligned = 0; args.tp = ap->tp; args.mp = mp; args.fsbno = ap->rval; args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); args.firstblock = ap->firstblock; blen = 0; if (nullfb) { error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); if (error) return error; } else if (ap->low) { if (xfs_inode_is_filestream(ap->ip)) args.type = XFS_ALLOCTYPE_FIRST_AG; else args.type = XFS_ALLOCTYPE_START_BNO; args.total = args.minlen = ap->minlen; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; args.total = ap->total; args.minlen = ap->minlen; } /* apply extent size hints if obtained earlier */ if (unlikely(align)) { args.prod = align; if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) { args.prod = 1; args.mod = 0; } else { args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog; if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod)))) args.mod = (xfs_extlen_t)(args.prod - args.mod); } /* * If we are not low on available data blocks, and the * underlying logical volume manager is a stripe, and * the file offset is zero then try to allocate data * blocks on stripe unit boundary. * NOTE: ap->aeof is only set if the allocation length * is >= the stripe unit and the allocation offset is * at the end of file. */ if (!ap->low && ap->aeof) { if (!ap->off) { args.alignment = mp->m_dalign; atype = args.type; isaligned = 1; /* * Adjust for alignment */ if (blen > args.alignment && blen <= ap->alen) args.minlen = blen - args.alignment; args.minalignslop = 0; } else { /* * First try an exact bno allocation. * If it fails then do a near or start bno * allocation with alignment turned on. */ atype = args.type; tryagain = 1; args.type = XFS_ALLOCTYPE_THIS_BNO; args.alignment = 1; /* * Compute the minlen+alignment for the * next case. Set slop so that the value * of minlen+alignment+slop doesn't go up * between the calls. */ if (blen > mp->m_dalign && blen <= ap->alen) nextminlen = blen - mp->m_dalign; else nextminlen = args.minlen; if (nextminlen + mp->m_dalign > args.minlen + 1) args.minalignslop = nextminlen + mp->m_dalign - args.minlen - 1; else args.minalignslop = 0; } } else { args.alignment = 1; args.minalignslop = 0; } args.minleft = ap->minleft; args.wasdel = ap->wasdel; args.isfl = 0; args.userdata = ap->userdata; if ((error = xfs_alloc_vextent(&args))) return error; if (tryagain && args.fsbno == NULLFSBLOCK) { /* * Exact allocation failed. Now try with alignment * turned on. */ args.type = atype; args.fsbno = ap->rval; args.alignment = mp->m_dalign; args.minlen = nextminlen; args.minalignslop = 0; isaligned = 1; if ((error = xfs_alloc_vextent(&args))) return error; } if (isaligned && args.fsbno == NULLFSBLOCK) { /* * allocation failed, so turn off alignment and * try again. */ args.type = atype; args.fsbno = ap->rval; args.alignment = 0; if ((error = xfs_alloc_vextent(&args))) return error; } if (args.fsbno == NULLFSBLOCK && nullfb && args.minlen > ap->minlen) { args.minlen = ap->minlen; args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = ap->rval; if ((error = xfs_alloc_vextent(&args))) return error; } if (args.fsbno == NULLFSBLOCK && nullfb) { args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; args.total = ap->minlen; args.minleft = 0; if ((error = xfs_alloc_vextent(&args))) return error; ap->low = 1; } if (args.fsbno != NULLFSBLOCK) { ap->firstblock = ap->rval = args.fsbno; ASSERT(nullfb || fb_agno == args.agno || (ap->low && fb_agno < args.agno)); ap->alen = args.len; ap->ip->i_d.di_nblocks += args.len; xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); if (ap->wasdel) ap->ip->i_delayed_blks -= args.len; /* * Adjust the disk quota also. This was reserved * earlier. */ xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, (long) args.len); } else { ap->rval = NULLFSBLOCK; ap->alen = 0; } return 0; } /* * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. * It figures out where to ask the underlying allocator to put the new extent. */ STATIC int xfs_bmap_alloc( xfs_bmalloca_t *ap) /* bmap alloc argument struct */ { if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata) return xfs_bmap_rtalloc(ap); return xfs_bmap_btalloc(ap); } /* * Transform a btree format file with only one leaf node, where the * extents list will fit in the inode, into an extents format file. * Since the file extents are already in-core, all we have to do is * give up the space for the btree root and pitch the leaf block. */ STATIC int /* error */ xfs_bmap_btree_to_extents( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_btree_cur_t *cur, /* btree cursor */ int *logflagsp, /* inode logging flags */ int whichfork) /* data or attr fork */ { /* REFERENCED */ struct xfs_btree_block *cblock;/* child btree block */ xfs_fsblock_t cbno; /* child block number */ xfs_buf_t *cbp; /* child block's buffer */ int error; /* error return value */ xfs_ifork_t *ifp; /* inode fork data */ xfs_mount_t *mp; /* mount point structure */ __be64 *pp; /* ptr to block address */ struct xfs_btree_block *rblock;/* root btree block */ mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(ifp->if_flags & XFS_IFEXTENTS); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); rblock = ifp->if_broot; ASSERT(be16_to_cpu(rblock->bb_level) == 1); ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); cbno = be64_to_cpu(*pp); *logflagsp = 0; #ifdef DEBUG if ((error = xfs_btree_check_lptr(cur, cbno, 1))) return error; #endif if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF))) return error; cblock = XFS_BUF_TO_BLOCK(cbp); if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); ip->i_d.di_nblocks--; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); if (cur->bc_bufs[0] == cbp) cur->bc_bufs[0] = NULL; xfs_iroot_realloc(ip, -1, whichfork); ASSERT(ifp->if_broot == NULL); ASSERT((ifp->if_flags & XFS_IFBROOT) == 0); XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); return 0; } /* * Called by xfs_bmapi to update file extent records and the btree * after removing space (or undoing a delayed allocation). */ STATIC int /* error */ xfs_bmap_del_extent( xfs_inode_t *ip, /* incore inode pointer */ xfs_trans_t *tp, /* current transaction pointer */ xfs_extnum_t idx, /* extent number to update/delete */ xfs_bmap_free_t *flist, /* list of extents to be freed */ xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ int whichfork, /* data or attr fork */ int rsvd) /* OK to allocate reserved blocks */ { xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ xfs_fsblock_t del_endblock=0; /* first block past del */ xfs_fileoff_t del_endoff; /* first offset past del */ int delay; /* current block is delayed allocated */ int do_fx; /* free extent at end of routine */ xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */ int error; /* error return value */ int flags; /* inode logging flags */ xfs_bmbt_irec_t got; /* current extent entry */ xfs_fileoff_t got_endoff; /* first offset past got */ int i; /* temp state */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_mount_t *mp; /* mount structure */ xfs_filblks_t nblks; /* quota/sb block count */ xfs_bmbt_irec_t new; /* new record to be inserted */ /* REFERENCED */ uint qfield; /* quota field to update */ xfs_filblks_t temp; /* for indirect length calculations */ xfs_filblks_t temp2; /* for indirect length calculations */ int state = 0; XFS_STATS_INC(xs_del_exlist); if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT((idx >= 0) && (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); ASSERT(del->br_blockcount > 0); ep = xfs_iext_get_ext(ifp, idx); xfs_bmbt_get_all(ep, &got); ASSERT(got.br_startoff <= del->br_startoff); del_endoff = del->br_startoff + del->br_blockcount; got_endoff = got.br_startoff + got.br_blockcount; ASSERT(got_endoff >= del_endoff); delay = isnullstartblock(got.br_startblock); ASSERT(isnullstartblock(del->br_startblock) == delay); flags = 0; qfield = 0; error = 0; /* * If deleting a real allocation, must free up the disk space. */ if (!delay) { flags = XFS_ILOG_CORE; /* * Realtime allocation. Free it and record di_nblocks update. */ if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { xfs_fsblock_t bno; xfs_filblks_t len; ASSERT(do_mod(del->br_blockcount, mp->m_sb.sb_rextsize) == 0); ASSERT(do_mod(del->br_startblock, mp->m_sb.sb_rextsize) == 0); bno = del->br_startblock; len = del->br_blockcount; do_div(bno, mp->m_sb.sb_rextsize); do_div(len, mp->m_sb.sb_rextsize); if ((error = xfs_rtfree_extent(ip->i_transp, bno, (xfs_extlen_t)len))) goto done; do_fx = 0; nblks = len * mp->m_sb.sb_rextsize; qfield = XFS_TRANS_DQ_RTBCOUNT; } /* * Ordinary allocation. */ else { do_fx = 1; nblks = del->br_blockcount; qfield = XFS_TRANS_DQ_BCOUNT; } /* * Set up del_endblock and cur for later. */ del_endblock = del->br_startblock + del->br_blockcount; if (cur) { if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, got.br_blockcount, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } da_old = da_new = 0; } else { da_old = startblockval(got.br_startblock); da_new = 0; nblks = 0; do_fx = 0; } /* * Set flag value to use in switch statement. * Left-contig is 2, right-contig is 1. */ switch (((got.br_startoff == del->br_startoff) << 1) | (got_endoff == del_endoff)) { case 3: /* * Matches the whole extent. Delete the entry. */ xfs_iext_remove(ip, idx, 1, whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); ifp->if_lastex = idx; if (delay) break; XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); flags |= XFS_ILOG_CORE; if (!cur) { flags |= xfs_ilog_fext(whichfork); break; } if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); break; case 2: /* * Deleting the first part of the extent. */ trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_startoff(ep, del_endoff); temp = got.br_blockcount - del->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); ifp->if_lastex = idx; if (delay) { temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); da_new = temp; break; } xfs_bmbt_set_startblock(ep, del_endblock); trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; } if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock, got.br_blockcount - del->br_blockcount, got.br_state))) goto done; break; case 1: /* * Deleting the last part of the extent. */ temp = got.br_blockcount - del->br_blockcount; trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); ifp->if_lastex = idx; if (delay) { temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); da_new = temp; break; } trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; } if ((error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, got.br_blockcount - del->br_blockcount, got.br_state))) goto done; break; case 0: /* * Deleting the middle of the extent. */ temp = del->br_startoff - got.br_startoff; trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); new.br_startoff = del_endoff; temp2 = got_endoff - del_endoff; new.br_blockcount = temp2; new.br_state = got.br_state; if (!delay) { new.br_startblock = del_endblock; flags |= XFS_ILOG_CORE; if (cur) { if ((error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, temp, got.br_state))) goto done; if ((error = xfs_btree_increment(cur, 0, &i))) goto done; cur->bc_rec.b = new; error = xfs_btree_insert(cur, &i); if (error && error != ENOSPC) goto done; /* * If get no-space back from btree insert, * it tried a split, and we have a zero * block reservation. * Fix up our state and return the error. */ if (error == ENOSPC) { /* * Reset the cursor, don't trust * it after any insert operation. */ if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, temp, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); /* * Update the btree record back * to the original value. */ if ((error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, got.br_blockcount, got.br_state))) goto done; /* * Reset the extent record back * to the original value. */ xfs_bmbt_set_blockcount(ep, got.br_blockcount); flags = 0; error = XFS_ERROR(ENOSPC); goto done; } XFS_WANT_CORRUPTED_GOTO(i == 1, done); } else flags |= xfs_ilog_fext(whichfork); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); } else { ASSERT(whichfork == XFS_DATA_FORK); temp = xfs_bmap_worst_indlen(ip, temp); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); temp2 = xfs_bmap_worst_indlen(ip, temp2); new.br_startblock = nullstartblock((int)temp2); da_new = temp + temp2; while (da_new > da_old) { if (temp) { temp--; da_new--; xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); } if (da_new == da_old) break; if (temp2) { temp2--; da_new--; new.br_startblock = nullstartblock((int)temp2); } } } trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); xfs_iext_insert(ip, idx + 1, 1, &new, state); ifp->if_lastex = idx + 1; break; } /* * If we need to, add to list of extents to delete. */ if (do_fx) xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist, mp); /* * Adjust inode # blocks in the file. */ if (nblks) ip->i_d.di_nblocks -= nblks; /* * Adjust quota data. */ if (qfield) xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); /* * Account for change in delayed indirect blocks. * Nothing to do for disk quota accounting here. */ ASSERT(da_old >= da_new); if (da_old > da_new) { xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new), rsvd); } done: *logflagsp = flags; return error; } /* * Remove the entry "free" from the free item list. Prev points to the * previous entry, unless "free" is the head of the list. */ STATIC void xfs_bmap_del_free( xfs_bmap_free_t *flist, /* free item list header */ xfs_bmap_free_item_t *prev, /* previous item on list, if any */ xfs_bmap_free_item_t *free) /* list item to be freed */ { if (prev) prev->xbfi_next = free->xbfi_next; else flist->xbf_first = free->xbfi_next; flist->xbf_count--; kmem_zone_free(xfs_bmap_free_item_zone, free); } /* * Convert an extents-format file into a btree-format file. * The new file will have a root block (in the inode) and a single child block. */ STATIC int /* error */ xfs_bmap_extents_to_btree( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_fsblock_t *firstblock, /* first-block-allocated */ xfs_bmap_free_t *flist, /* blocks freed in xaction */ xfs_btree_cur_t **curp, /* cursor returned to caller */ int wasdel, /* converting a delayed alloc */ int *logflagsp, /* inode logging flags */ int whichfork) /* data or attr fork */ { struct xfs_btree_block *ablock; /* allocated (child) bt block */ xfs_buf_t *abp; /* buffer for ablock */ xfs_alloc_arg_t args; /* allocation arguments */ xfs_bmbt_rec_t *arp; /* child record pointer */ struct xfs_btree_block *block; /* btree root block */ xfs_btree_cur_t *cur; /* bmap btree cursor */ xfs_bmbt_rec_host_t *ep; /* extent record pointer */ int error; /* error return value */ xfs_extnum_t i, cnt; /* extent record index */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_key_t *kp; /* root block key pointer */ xfs_mount_t *mp; /* mount structure */ xfs_extnum_t nextents; /* number of file extents */ xfs_bmbt_ptr_t *pp; /* root block address pointer */ ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); /* * Make space in the inode incore. */ xfs_iroot_realloc(ip, 1, whichfork); ifp->if_flags |= XFS_IFBROOT; /* * Fill in the root. */ block = ifp->if_broot; block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); block->bb_level = cpu_to_be16(1); block->bb_numrecs = cpu_to_be16(1); block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); /* * Need a cursor. Can't allocate until bb_level is filled in. */ mp = ip->i_mount; cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; /* * Convert to a btree with two levels, one record in root. */ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); args.tp = tp; args.mp = mp; args.firstblock = *firstblock; if (*firstblock == NULLFSBLOCK) { args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); } else if (flist->xbf_low) { args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = *firstblock; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; args.fsbno = *firstblock; } args.minlen = args.maxlen = args.prod = 1; args.total = args.minleft = args.alignment = args.mod = args.isfl = args.minalignslop = 0; args.wasdel = wasdel; *logflagsp = 0; if ((error = xfs_alloc_vextent(&args))) { xfs_iroot_realloc(ip, -1, whichfork); xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; } /* * Allocation can't fail, the space was reserved. */ ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(*firstblock == NULLFSBLOCK || args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || (flist->xbf_low && args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); *firstblock = cur->bc_private.b.firstblock = args.fsbno; cur->bc_private.b.allocated++; ip->i_d.di_nblocks++; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); /* * Fill in the child block. */ ablock = XFS_BUF_TO_BLOCK(abp); ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); ablock->bb_level = 0; ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); for (cnt = i = 0; i < nextents; i++) { ep = xfs_iext_get_ext(ifp, i); if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { arp->l0 = cpu_to_be64(ep->l0); arp->l1 = cpu_to_be64(ep->l1); arp++; cnt++; } } ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); xfs_btree_set_numrecs(ablock, cnt); /* * Fill in the root key and pointer. */ kp = XFS_BMBT_KEY_ADDR(mp, block, 1); arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, be16_to_cpu(block->bb_level))); *pp = cpu_to_be64(args.fsbno); /* * Do all this logging at the end so that * the root is at the right level. */ xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); ASSERT(*curp == NULL); *curp = cur; *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); return 0; } /* * Calculate the default attribute fork offset for newly created inodes. */ uint xfs_default_attroffset( struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; uint offset; if (mp->m_sb.sb_inodesize == 256) { offset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); } else { offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); } ASSERT(offset < XFS_LITINO(mp)); return offset; } /* * Helper routine to reset inode di_forkoff field when switching * attribute fork from local to extent format - we reset it where * possible to make space available for inline data fork extents. */ STATIC void xfs_bmap_forkoff_reset( xfs_mount_t *mp, xfs_inode_t *ip, int whichfork) { if (whichfork == XFS_ATTR_FORK && ip->i_d.di_format != XFS_DINODE_FMT_DEV && ip->i_d.di_format != XFS_DINODE_FMT_UUID && ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; if (dfl_forkoff > ip->i_d.di_forkoff) { ip->i_d.di_forkoff = dfl_forkoff; ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t); ip->i_afp->if_ext_max = XFS_IFORK_ASIZE(ip) / sizeof(xfs_bmbt_rec_t); } } } /* * Convert a local file to an extents file. * This code is out of bounds for data forks of regular files, * since the file data needs to get logged so things will stay consistent. * (The bmap-level manipulations are ok, though). */ STATIC int /* error */ xfs_bmap_local_to_extents( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_fsblock_t *firstblock, /* first block allocated in xaction */ xfs_extlen_t total, /* total blocks needed by transaction */ int *logflagsp, /* inode logging flags */ int whichfork) /* data or attr fork */ { int error; /* error return value */ int flags; /* logging flags returned */ xfs_ifork_t *ifp; /* inode fork pointer */ /* * We don't want to deal with the case of keeping inode data inline yet. * So sending the data fork of a regular inode is invalid. */ ASSERT(!((ip->i_d.di_mode & S_IFMT) == S_IFREG && whichfork == XFS_DATA_FORK)); ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); flags = 0; error = 0; if (ifp->if_bytes) { xfs_alloc_arg_t args; /* allocation arguments */ xfs_buf_t *bp; /* buffer for extent block */ xfs_bmbt_rec_host_t *ep;/* extent record pointer */ args.tp = tp; args.mp = ip->i_mount; args.firstblock = *firstblock; ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); /* * Allocate a block. We know we need only one, since the * file currently fits in an inode. */ if (*firstblock == NULLFSBLOCK) { args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); args.type = XFS_ALLOCTYPE_START_BNO; } else { args.fsbno = *firstblock; args.type = XFS_ALLOCTYPE_NEAR_BNO; } args.total = total; args.mod = args.minleft = args.alignment = args.wasdel = args.isfl = args.minalignslop = 0; args.minlen = args.maxlen = args.prod = 1; if ((error = xfs_alloc_vextent(&args))) goto done; /* * Can't fail, the space was reserved. */ ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.len == 1); *firstblock = args.fsbno; bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data, ifp->if_bytes); xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); xfs_bmap_forkoff_reset(args.mp, ip, whichfork); xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); xfs_iext_add(ifp, 0, 1); ep = xfs_iext_get_ext(ifp, 0); xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); trace_xfs_bmap_post_update(ip, 0, whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, _THIS_IP_); XFS_IFORK_NEXT_SET(ip, whichfork, 1); ip->i_d.di_nblocks = 1; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); flags |= xfs_ilog_fext(whichfork); } else { ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); } ifp->if_flags &= ~XFS_IFINLINE; ifp->if_flags |= XFS_IFEXTENTS; XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); flags |= XFS_ILOG_CORE; done: *logflagsp = flags; return error; } /* * Search the extent records for the entry containing block bno. * If bno lies in a hole, point to the next entry. If bno lies * past eof, *eofp will be set, and *prevp will contain the last * entry (null if none). Else, *lastxp will be set to the index * of the found entry; *gotp will contain the entry. */ STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ xfs_bmap_search_multi_extents( xfs_ifork_t *ifp, /* inode fork pointer */ xfs_fileoff_t bno, /* block number searched for */ int *eofp, /* out: end of file found */ xfs_extnum_t *lastxp, /* out: last extent index */ xfs_bmbt_irec_t *gotp, /* out: extent entry found */ xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ { xfs_bmbt_rec_host_t *ep; /* extent record pointer */ xfs_extnum_t lastx; /* last extent index */ /* * Initialize the extent entry structure to catch access to * uninitialized br_startblock field. */ gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; gotp->br_state = XFS_EXT_INVALID; #if XFS_BIG_BLKNOS gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; #else gotp->br_startblock = 0xffffa5a5; #endif prevp->br_startoff = NULLFILEOFF; ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); if (lastx > 0) { xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp); } if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { xfs_bmbt_get_all(ep, gotp); *eofp = 0; } else { if (lastx > 0) { *gotp = *prevp; } *eofp = 1; ep = NULL; } *lastxp = lastx; return ep; } /* * Search the extents list for the inode, for the extent containing bno. * If bno lies in a hole, point to the next entry. If bno lies past eof, * *eofp will be set, and *prevp will contain the last entry (null if none). * Else, *lastxp will be set to the index of the found * entry; *gotp will contain the entry. */ STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ xfs_bmap_search_extents( xfs_inode_t *ip, /* incore inode pointer */ xfs_fileoff_t bno, /* block number searched for */ int fork, /* data or attr fork */ int *eofp, /* out: end of file found */ xfs_extnum_t *lastxp, /* out: last extent index */ xfs_bmbt_irec_t *gotp, /* out: extent entry found */ xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ { xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_rec_host_t *ep; /* extent record pointer */ XFS_STATS_INC(xs_look_exlist); ifp = XFS_IFORK_PTR(ip, fork); ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, "Access to block zero in inode %llu " "start_block: %llx start_off: %llx " "blkcnt: %llx extent-state: %x lastx: %x\n", (unsigned long long)ip->i_ino, (unsigned long long)gotp->br_startblock, (unsigned long long)gotp->br_startoff, (unsigned long long)gotp->br_blockcount, gotp->br_state, *lastxp); *lastxp = NULLEXTNUM; *eofp = 1; return NULL; } return ep; } /* * Compute the worst-case number of indirect blocks that will be used * for ip's delayed extent of length "len". */ STATIC xfs_filblks_t xfs_bmap_worst_indlen( xfs_inode_t *ip, /* incore inode pointer */ xfs_filblks_t len) /* delayed extent length */ { int level; /* btree level number */ int maxrecs; /* maximum record count at this level */ xfs_mount_t *mp; /* mount structure */ xfs_filblks_t rval; /* return value */ mp = ip->i_mount; maxrecs = mp->m_bmap_dmxr[0]; for (level = 0, rval = 0; level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); level++) { len += maxrecs - 1; do_div(len, maxrecs); rval += len; if (len == 1) return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - level - 1; if (level == 0) maxrecs = mp->m_bmap_dmxr[1]; } return rval; } /* * Convert inode from non-attributed to attributed. * Must not be in a transaction, ip must not be locked. */ int /* error code */ xfs_bmap_add_attrfork( xfs_inode_t *ip, /* incore inode pointer */ int size, /* space new attribute needs */ int rsvd) /* xact may use reserved blks */ { xfs_fsblock_t firstblock; /* 1st block/ag allocated */ xfs_bmap_free_t flist; /* freed extent records */ xfs_mount_t *mp; /* mount structure */ xfs_trans_t *tp; /* transaction pointer */ int blks; /* space reservation */ int version = 1; /* superblock attr version */ int committed; /* xaction was committed */ int logflags; /* logging flags */ int error; /* error return value */ ASSERT(XFS_IFORK_Q(ip) == 0); ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); mp = ip->i_mount; ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK); blks = XFS_ADDAFORK_SPACE_RES(mp); if (rsvd) tp->t_flags |= XFS_TRANS_RESERVE; if ((error = xfs_trans_reserve(tp, blks, XFS_ADDAFORK_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT))) goto error0; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); return error; } if (XFS_IFORK_Q(ip)) goto error1; if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { /* * For inodes coming from pre-6.2 filesystems. */ ASSERT(ip->i_d.di_aformat == 0); ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; } ASSERT(ip->i_d.di_anextents == 0); xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); switch (ip->i_d.di_format) { case XFS_DINODE_FMT_DEV: ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; break; case XFS_DINODE_FMT_UUID: ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3; break; case XFS_DINODE_FMT_LOCAL: case XFS_DINODE_FMT_EXTENTS: case XFS_DINODE_FMT_BTREE: ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); if (!ip->i_d.di_forkoff) ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3; else if (mp->m_flags & XFS_MOUNT_ATTR2) version = 2; break; default: ASSERT(0); error = XFS_ERROR(EINVAL); goto error1; } ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); ASSERT(ip->i_afp == NULL); ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); ip->i_afp->if_ext_max = XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); ip->i_afp->if_flags = XFS_IFEXTENTS; logflags = 0; xfs_bmap_init(&flist, &firstblock); switch (ip->i_d.di_format) { case XFS_DINODE_FMT_LOCAL: error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist, &logflags); break; case XFS_DINODE_FMT_EXTENTS: error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, &flist, &logflags); break; case XFS_DINODE_FMT_BTREE: error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist, &logflags); break; default: error = 0; break; } if (logflags) xfs_trans_log_inode(tp, ip, logflags); if (error) goto error2; if (!xfs_sb_version_hasattr(&mp->m_sb) || (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { __int64_t sbfields = 0; spin_lock(&mp->m_sb_lock); if (!xfs_sb_version_hasattr(&mp->m_sb)) { xfs_sb_version_addattr(&mp->m_sb); sbfields |= XFS_SB_VERSIONNUM; } if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { xfs_sb_version_addattr2(&mp->m_sb); sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); } if (sbfields) { spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, sbfields); } else spin_unlock(&mp->m_sb_lock); } if ((error = xfs_bmap_finish(&tp, &flist, &committed))) goto error2; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); return error; error2: xfs_bmap_cancel(&flist); error1: xfs_iunlock(ip, XFS_ILOCK_EXCL); error0: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); return error; } /* * Add the extent to the list of extents to be free at transaction end. * The list is maintained sorted (by block number). */ /* ARGSUSED */ void xfs_bmap_add_free( xfs_fsblock_t bno, /* fs block number of extent */ xfs_filblks_t len, /* length of extent */ xfs_bmap_free_t *flist, /* list of extents */ xfs_mount_t *mp) /* mount point structure */ { xfs_bmap_free_item_t *cur; /* current (next) element */ xfs_bmap_free_item_t *new; /* new element */ xfs_bmap_free_item_t *prev; /* previous element */ #ifdef DEBUG xfs_agnumber_t agno; xfs_agblock_t agbno; ASSERT(bno != NULLFSBLOCK); ASSERT(len > 0); ASSERT(len <= MAXEXTLEN); ASSERT(!isnullstartblock(bno)); agno = XFS_FSB_TO_AGNO(mp, bno); agbno = XFS_FSB_TO_AGBNO(mp, bno); ASSERT(agno < mp->m_sb.sb_agcount); ASSERT(agbno < mp->m_sb.sb_agblocks); ASSERT(len < mp->m_sb.sb_agblocks); ASSERT(agbno + len <= mp->m_sb.sb_agblocks); #endif ASSERT(xfs_bmap_free_item_zone != NULL); new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); new->xbfi_startblock = bno; new->xbfi_blockcount = (xfs_extlen_t)len; for (prev = NULL, cur = flist->xbf_first; cur != NULL; prev = cur, cur = cur->xbfi_next) { if (cur->xbfi_startblock >= bno) break; } if (prev) prev->xbfi_next = new; else flist->xbf_first = new; new->xbfi_next = cur; flist->xbf_count++; } /* * Compute and fill in the value of the maximum depth of a bmap btree * in this filesystem. Done once, during mount. */ void xfs_bmap_compute_maxlevels( xfs_mount_t *mp, /* file system mount structure */ int whichfork) /* data or attr fork */ { int level; /* btree level */ uint maxblocks; /* max blocks at this level */ uint maxleafents; /* max leaf entries possible */ int maxrootrecs; /* max records in root block */ int minleafrecs; /* min records in leaf block */ int minnoderecs; /* min records in node block */ int sz; /* root block size */ /* * The maximum number of extents in a file, hence the maximum * number of leaf entries, is controlled by the type of di_nextents * (a signed 32-bit number, xfs_extnum_t), or by di_anextents * (a signed 16-bit number, xfs_aextnum_t). * * Note that we can no longer assume that if we are in ATTR1 that * the fork offset of all the inodes will be * (xfs_default_attroffset(ip) >> 3) because we could have mounted * with ATTR2 and then mounted back with ATTR1, keeping the * di_forkoff's fixed but probably at various positions. Therefore, * for both ATTR1 and ATTR2 we have to assume the worst case scenario * of a minimum size available. */ if (whichfork == XFS_DATA_FORK) { maxleafents = MAXEXTNUM; sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); } else { maxleafents = MAXAEXTNUM; sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); } maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0); minleafrecs = mp->m_bmap_dmnr[0]; minnoderecs = mp->m_bmap_dmnr[1]; maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; for (level = 1; maxblocks > 1; level++) { if (maxblocks <= maxrootrecs) maxblocks = 1; else maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; } mp->m_bm_maxlevels[whichfork] = level; } /* * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi * caller. Frees all the extents that need freeing, which must be done * last due to locking considerations. We never free any extents in * the first transaction. This is to allow the caller to make the first * transaction a synchronous one so that the pointers to the data being * broken in this transaction will be permanent before the data is actually * freed. This is necessary to prevent blocks from being reallocated * and written to before the free and reallocation are actually permanent. * We do not just make the first transaction synchronous here, because * there are more efficient ways to gain the same protection in some cases * (see the file truncation code). * * Return 1 if the given transaction was committed and a new one * started, and 0 otherwise in the committed parameter. */ /*ARGSUSED*/ int /* error */ xfs_bmap_finish( xfs_trans_t **tp, /* transaction pointer addr */ xfs_bmap_free_t *flist, /* i/o: list extents to free */ int *committed) /* xact committed or not */ { xfs_efd_log_item_t *efd; /* extent free data */ xfs_efi_log_item_t *efi; /* extent free intention */ int error; /* error return value */ xfs_bmap_free_item_t *free; /* free extent item */ unsigned int logres; /* new log reservation */ unsigned int logcount; /* new log count */ xfs_mount_t *mp; /* filesystem mount structure */ xfs_bmap_free_item_t *next; /* next item on free list */ xfs_trans_t *ntp; /* new transaction pointer */ ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); if (flist->xbf_count == 0) { *committed = 0; return 0; } ntp = *tp; efi = xfs_trans_get_efi(ntp, flist->xbf_count); for (free = flist->xbf_first; free; free = free->xbfi_next) xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock, free->xbfi_blockcount); logres = ntp->t_log_res; logcount = ntp->t_log_count; ntp = xfs_trans_dup(*tp); error = xfs_trans_commit(*tp, 0); *tp = ntp; *committed = 1; /* * We have a new transaction, so we should return committed=1, * even though we're returning an error. */ if (error) return error; /* * transaction commit worked ok so we can drop the extra ticket * reference that we gained in xfs_trans_dup() */ xfs_log_ticket_put(ntp->t_ticket); if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES, logcount))) return error; efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count); for (free = flist->xbf_first; free != NULL; free = next) { next = free->xbfi_next; if ((error = xfs_free_extent(ntp, free->xbfi_startblock, free->xbfi_blockcount))) { /* * The bmap free list will be cleaned up at a * higher level. The EFI will be canceled when * this transaction is aborted. * Need to force shutdown here to make sure it * happens, since this transaction may not be * dirty yet. */ mp = ntp->t_mountp; if (!XFS_FORCED_SHUTDOWN(mp)) xfs_force_shutdown(mp, (error == EFSCORRUPTED) ? SHUTDOWN_CORRUPT_INCORE : SHUTDOWN_META_IO_ERROR); return error; } xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock, free->xbfi_blockcount); xfs_bmap_del_free(flist, NULL, free); } return 0; } /* * Free up any items left in the list. */ void xfs_bmap_cancel( xfs_bmap_free_t *flist) /* list of bmap_free_items */ { xfs_bmap_free_item_t *free; /* free list item */ xfs_bmap_free_item_t *next; if (flist->xbf_count == 0) return; ASSERT(flist->xbf_first != NULL); for (free = flist->xbf_first; free; free = next) { next = free->xbfi_next; xfs_bmap_del_free(flist, NULL, free); } ASSERT(flist->xbf_count == 0); } /* * Returns the file-relative block number of the first unused block(s) * in the file with at least "len" logically contiguous blocks free. * This is the lowest-address hole if the file has holes, else the first block * past the end of file. * Return 0 if the file is currently local (in-inode). */ int /* error */ xfs_bmap_first_unused( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode */ xfs_extlen_t len, /* size of hole to find */ xfs_fileoff_t *first_unused, /* unused block */ int whichfork) /* data or attr fork */ { int error; /* error return value */ int idx; /* extent record index */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_fileoff_t lastaddr; /* last block number seen */ xfs_fileoff_t lowest; /* lowest useful block */ xfs_fileoff_t max; /* starting useful block */ xfs_fileoff_t off; /* offset for this block */ xfs_extnum_t nextents; /* number of extent entries */ ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { *first_unused = 0; return 0; } ifp = XFS_IFORK_PTR(ip, whichfork); if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; lowest = *first_unused; nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); off = xfs_bmbt_get_startoff(ep); /* * See if the hole before this extent will work. */ if (off >= lowest + len && off - max >= len) { *first_unused = max; return 0; } lastaddr = off + xfs_bmbt_get_blockcount(ep); max = XFS_FILEOFF_MAX(lastaddr, lowest); } *first_unused = max; return 0; } /* * Returns the file-relative block number of the last block + 1 before * last_block (input value) in the file. * This is not based on i_size, it is based on the extent records. * Returns 0 for local files, as they do not have extent records. */ int /* error */ xfs_bmap_last_before( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode */ xfs_fileoff_t *last_block, /* last block */ int whichfork) /* data or attr fork */ { xfs_fileoff_t bno; /* input file offset */ int eof; /* hit end of file */ xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ int error; /* error return value */ xfs_bmbt_irec_t got; /* current extent value */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extnum_t lastx; /* last extent used */ xfs_bmbt_irec_t prev; /* previous extent value */ if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EIO); if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { *last_block = 0; return 0; } ifp = XFS_IFORK_PTR(ip, whichfork); if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; bno = *last_block - 1; ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); if (eof || xfs_bmbt_get_startoff(ep) > bno) { if (prev.br_startoff == NULLFILEOFF) *last_block = 0; else *last_block = prev.br_startoff + prev.br_blockcount; } /* * Otherwise *last_block is already the right answer. */ return 0; } /* * Returns the file-relative block number of the first block past eof in * the file. This is not based on i_size, it is based on the extent records. * Returns 0 for local files, as they do not have extent records. */ int /* error */ xfs_bmap_last_offset( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode */ xfs_fileoff_t *last_block, /* last block */ int whichfork) /* data or attr fork */ { xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ int error; /* error return value */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extnum_t nextents; /* number of extent entries */ if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EIO); if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { *last_block = 0; return 0; } ifp = XFS_IFORK_PTR(ip, whichfork); if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); if (!nextents) { *last_block = 0; return 0; } ep = xfs_iext_get_ext(ifp, nextents - 1); *last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep); return 0; } /* * Returns whether the selected fork of the inode has exactly one * block or not. For the data fork we check this matches di_size, * implying the file's range is 0..bsize-1. */ int /* 1=>1 block, 0=>otherwise */ xfs_bmap_one_block( xfs_inode_t *ip, /* incore inode */ int whichfork) /* data or attr fork */ { xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */ xfs_ifork_t *ifp; /* inode fork pointer */ int rval; /* return value */ xfs_bmbt_irec_t s; /* internal version of extent */ #ifndef DEBUG if (whichfork == XFS_DATA_FORK) { return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ? (ip->i_size == ip->i_mount->m_sb.sb_blocksize) : (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize); } #endif /* !DEBUG */ if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) return 0; if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) return 0; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(ifp->if_flags & XFS_IFEXTENTS); ep = xfs_iext_get_ext(ifp, 0); xfs_bmbt_get_all(ep, &s); rval = s.br_startoff == 0 && s.br_blockcount == 1; if (rval && whichfork == XFS_DATA_FORK) ASSERT(ip->i_size == ip->i_mount->m_sb.sb_blocksize); return rval; } STATIC int xfs_bmap_sanity_check( struct xfs_mount *mp, struct xfs_buf *bp, int level) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC || be16_to_cpu(block->bb_level) != level || be16_to_cpu(block->bb_numrecs) == 0 || be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) return 0; return 1; } /* * Read in the extents to if_extents. * All inode fields are set up by caller, we just traverse the btree * and copy the records in. If the file system cannot contain unwritten * extents, the records are checked for no "state" flags. */ int /* error */ xfs_bmap_read_extents( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode */ int whichfork) /* data or attr fork */ { struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_buf_t *bp; /* buffer for "block" */ int error; /* error return value */ xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ xfs_extnum_t i, j; /* index into the extents list */ xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ xfs_mount_t *mp; /* file system mount structure */ __be64 *pp; /* pointer to block address */ /* REFERENCED */ xfs_extnum_t room; /* number of entries there's room for */ bno = NULLFSBLOCK; mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : XFS_EXTFMT_INODE(ip); block = ifp->if_broot; /* * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ level = be16_to_cpu(block->bb_level); ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLDFSBNO); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); /* * Go down the tree until leaf level is reached, following the first * pointer (leftmost) at each level. */ while (level-- > 0) { if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) return error; block = XFS_BUF_TO_BLOCK(bp); XFS_WANT_CORRUPTED_GOTO( xfs_bmap_sanity_check(mp, bp, level), error0); if (level == 0) break; pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); xfs_trans_brelse(tp, bp); } /* * Here with bp and block set to the leftmost leaf node in the tree. */ room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); i = 0; /* * Loop over all leaf nodes. Copy information to the extent records. */ for (;;) { xfs_bmbt_rec_t *frp; xfs_fsblock_t nextbno; xfs_extnum_t num_recs; xfs_extnum_t start; num_recs = xfs_btree_get_numrecs(block); if (unlikely(i + num_recs > room)) { ASSERT(i + num_recs <= room); xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, "corrupt dinode %Lu, (btree extents).", (unsigned long long) ip->i_ino); XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", XFS_ERRLEVEL_LOW, ip->i_mount); goto error0; } XFS_WANT_CORRUPTED_GOTO( xfs_bmap_sanity_check(mp, bp, 0), error0); /* * Read-ahead the next leaf block, if any. */ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); if (nextbno != NULLFSBLOCK) xfs_btree_reada_bufl(mp, nextbno, 1); /* * Copy records into the extent records. */ frp = XFS_BMBT_REC_ADDR(mp, block, 1); start = i; for (j = 0; j < num_recs; j++, i++, frp++) { xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); trp->l0 = be64_to_cpu(frp->l0); trp->l1 = be64_to_cpu(frp->l1); } if (exntf == XFS_EXTFMT_NOSTATE) { /* * Check all attribute bmap btree records and * any "older" data bmap btree records for a * set bit in the "extent flag" position. */ if (unlikely(xfs_check_nostate_extents(ifp, start, num_recs))) { XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", XFS_ERRLEVEL_LOW, ip->i_mount); goto error0; } } xfs_trans_brelse(tp, bp); bno = nextbno; /* * If we've reached the end, stop. */ if (bno == NULLFSBLOCK) break; if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) return error; block = XFS_BUF_TO_BLOCK(bp); } ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); return 0; error0: xfs_trans_brelse(tp, bp); return XFS_ERROR(EFSCORRUPTED); } #ifdef DEBUG /* * Add bmap trace insert entries for all the contents of the extent records. */ void xfs_bmap_trace_exlist( xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t cnt, /* count of entries in the list */ int whichfork, /* data or attr fork */ unsigned long caller_ip) { xfs_extnum_t idx; /* extent record index */ xfs_ifork_t *ifp; /* inode fork pointer */ int state = 0; if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); for (idx = 0; idx < cnt; idx++) trace_xfs_extlist(ip, idx, whichfork, caller_ip); } /* * Validate that the bmbt_irecs being returned from bmapi are valid * given the callers original parameters. Specifically check the * ranges of the returned irecs to ensure that they only extent beyond * the given parameters if the XFS_BMAPI_ENTIRE flag was set. */ STATIC void xfs_bmap_validate_ret( xfs_fileoff_t bno, xfs_filblks_t len, int flags, xfs_bmbt_irec_t *mval, int nmap, int ret_nmap) { int i; /* index to map values */ ASSERT(ret_nmap <= nmap); for (i = 0; i < ret_nmap; i++) { ASSERT(mval[i].br_blockcount > 0); if (!(flags & XFS_BMAPI_ENTIRE)) { ASSERT(mval[i].br_startoff >= bno); ASSERT(mval[i].br_blockcount <= len); ASSERT(mval[i].br_startoff + mval[i].br_blockcount <= bno + len); } else { ASSERT(mval[i].br_startoff < bno + len); ASSERT(mval[i].br_startoff + mval[i].br_blockcount > bno); } ASSERT(i == 0 || mval[i - 1].br_startoff + mval[i - 1].br_blockcount == mval[i].br_startoff); if ((flags & XFS_BMAPI_WRITE) && !(flags & XFS_BMAPI_DELAY)) ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && mval[i].br_startblock != HOLESTARTBLOCK); ASSERT(mval[i].br_state == XFS_EXT_NORM || mval[i].br_state == XFS_EXT_UNWRITTEN); } } #endif /* DEBUG */ /* * Map file blocks to filesystem blocks. * File range is given by the bno/len pair. * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set) * into a hole or past eof. * Only allocates blocks from a single allocation group, * to avoid locking problems. * The returned value in "firstblock" from the first call in a transaction * must be remembered and presented to subsequent calls in "firstblock". * An upper bound for the number of blocks to be allocated is supplied to * the first call in "total"; if no allocation group has that many free * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). */ int /* error */ xfs_bmapi( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode */ xfs_fileoff_t bno, /* starting file offs. mapped */ xfs_filblks_t len, /* length to map in file */ int flags, /* XFS_BMAPI_... */ xfs_fsblock_t *firstblock, /* first allocated block controls a.g. for allocs */ xfs_extlen_t total, /* total blocks needed */ xfs_bmbt_irec_t *mval, /* output: map values */ int *nmap, /* i/o: mval size/count */ xfs_bmap_free_t *flist) /* i/o: list extents to free */ { xfs_fsblock_t abno; /* allocated block number */ xfs_extlen_t alen; /* allocated extent length */ xfs_fileoff_t aoff; /* allocated file offset */ xfs_bmalloca_t bma = { 0 }; /* args for xfs_bmap_alloc */ xfs_btree_cur_t *cur; /* bmap btree cursor */ xfs_fileoff_t end; /* end of mapped file region */ int eof; /* we've hit the end of extents */ xfs_bmbt_rec_host_t *ep; /* extent record pointer */ int error; /* error return */ xfs_bmbt_irec_t got; /* current file extent record */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extlen_t indlen; /* indirect blocks length */ xfs_extnum_t lastx; /* last useful extent number */ int logflags; /* flags for transaction logging */ xfs_extlen_t minleft; /* min blocks left after allocation */ xfs_extlen_t minlen; /* min allocation size */ xfs_mount_t *mp; /* xfs mount structure */ int n; /* current extent index */ int nallocs; /* number of extents alloc'd */ xfs_extnum_t nextents; /* number of extents in file */ xfs_fileoff_t obno; /* old block number (offset) */ xfs_bmbt_irec_t prev; /* previous file extent record */ int tmp_logflags; /* temp flags holder */ int whichfork; /* data or attr fork */ char inhole; /* current location is hole in file */ char wasdelay; /* old extent was delayed */ char wr; /* this is a write request */ char rt; /* this is a realtime file */ #ifdef DEBUG xfs_fileoff_t orig_bno; /* original block number value */ int orig_flags; /* original flags arg value */ xfs_filblks_t orig_len; /* original value of len arg */ xfs_bmbt_irec_t *orig_mval; /* original value of mval */ int orig_nmap; /* original value of *nmap */ orig_bno = bno; orig_len = len; orig_flags = flags; orig_mval = mval; orig_nmap = *nmap; #endif ASSERT(*nmap >= 1); ASSERT(*nmap <= XFS_BMAP_MAX_NMAP || !(flags & XFS_BMAPI_WRITE)); whichfork = (flags & XFS_BMAPI_ATTRFORK) ? XFS_ATTR_FORK : XFS_DATA_FORK; mp = ip->i_mount; if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL), mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmapi", XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); } if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); if ((wr = (flags & XFS_BMAPI_WRITE)) != 0) XFS_STATS_INC(xs_blk_mapw); else XFS_STATS_INC(xs_blk_mapr); /* * IGSTATE flag is used to combine extents which * differ only due to the state of the extents. * This technique is used from xfs_getbmap() * when the caller does not wish to see the * separation (which is the default). * * This technique is also used when writing a * buffer which has been partially written, * (usually by being flushed during a chunkread), * to ensure one write takes place. This also * prevents a change in the xfs inode extents at * this time, intentionally. This change occurs * on completion of the write operation, in * xfs_strat_comp(), where the xfs_bmapi() call * is transactioned, and the extents combined. */ if ((flags & XFS_BMAPI_IGSTATE) && wr) /* if writing unwritten space */ wr = 0; /* no allocations are allowed */ ASSERT(wr || !(flags & XFS_BMAPI_DELAY)); logflags = 0; nallocs = 0; cur = NULL; if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { ASSERT(wr && tp); if ((error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, &logflags, whichfork))) goto error0; } if (wr && *firstblock == NULLFSBLOCK) { if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; else minleft = 1; } else minleft = 0; if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) goto error0; ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); n = 0; end = bno + len; obno = bno; bma.ip = NULL; while (bno < end && n < *nmap) { /* * Reading past eof, act as though there's a hole * up to end. */ if (eof && !wr) got.br_startoff = end; inhole = eof || got.br_startoff > bno; wasdelay = wr && !inhole && !(flags & XFS_BMAPI_DELAY) && isnullstartblock(got.br_startblock); /* * First, deal with the hole before the allocated space * that we found, if any. */ if (wr && (inhole || wasdelay)) { /* * For the wasdelay case, we could also just * allocate the stuff asked for in this bmap call * but that wouldn't be as good. */ if (wasdelay) { alen = (xfs_extlen_t)got.br_blockcount; aoff = got.br_startoff; if (lastx != NULLEXTNUM && lastx) { ep = xfs_iext_get_ext(ifp, lastx - 1); xfs_bmbt_get_all(ep, &prev); } } else { alen = (xfs_extlen_t) XFS_FILBLKS_MIN(len, MAXEXTLEN); if (!eof) alen = (xfs_extlen_t) XFS_FILBLKS_MIN(alen, got.br_startoff - bno); aoff = bno; } minlen = (flags & XFS_BMAPI_CONTIG) ? alen : 1; if (flags & XFS_BMAPI_DELAY) { xfs_extlen_t extsz; /* Figure out the extent size, adjust alen */ extsz = xfs_get_extsz_hint(ip); if (extsz) { error = xfs_bmap_extsize_align(mp, &got, &prev, extsz, rt, eof, flags&XFS_BMAPI_DELAY, flags&XFS_BMAPI_CONVERT, &aoff, &alen); ASSERT(!error); } if (rt) extsz = alen / mp->m_sb.sb_rextsize; /* * Make a transaction-less quota reservation for * delayed allocation blocks. This number gets * adjusted later. We return if we haven't * allocated blocks already inside this loop. */ error = xfs_trans_reserve_quota_nblks( NULL, ip, (long)alen, 0, rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); if (error) { if (n == 0) { *nmap = 0; ASSERT(cur == NULL); return error; } break; } /* * Split changing sb for alen and indlen since * they could be coming from different places. */ indlen = (xfs_extlen_t) xfs_bmap_worst_indlen(ip, alen); ASSERT(indlen > 0); if (rt) { error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, -((int64_t)extsz), (flags & XFS_BMAPI_RSVBLOCKS)); } else { error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -((int64_t)alen), (flags & XFS_BMAPI_RSVBLOCKS)); } if (!error) { error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -((int64_t)indlen), (flags & XFS_BMAPI_RSVBLOCKS)); if (error && rt) xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, (int64_t)extsz, (flags & XFS_BMAPI_RSVBLOCKS)); else if (error) xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, (int64_t)alen, (flags & XFS_BMAPI_RSVBLOCKS)); } if (error) { if (XFS_IS_QUOTA_ON(mp)) /* unreserve the blocks now */ (void) xfs_trans_unreserve_quota_nblks( NULL, ip, (long)alen, 0, rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); break; } ip->i_delayed_blks += alen; abno = nullstartblock(indlen); } else { /* * If first time, allocate and fill in * once-only bma fields. */ if (bma.ip == NULL) { bma.tp = tp; bma.ip = ip; bma.prevp = &prev; bma.gotp = &got; bma.total = total; bma.userdata = 0; } /* Indicate if this is the first user data * in the file, or just any user data. */ if (!(flags & XFS_BMAPI_METADATA)) { bma.userdata = (aoff == 0) ? XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; } /* * Fill in changeable bma fields. */ bma.eof = eof; bma.firstblock = *firstblock; bma.alen = alen; bma.off = aoff; bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.wasdel = wasdelay; bma.minlen = minlen; bma.low = flist->xbf_low; bma.minleft = minleft; /* * Only want to do the alignment at the * eof if it is userdata and allocation length * is larger than a stripe unit. */ if (mp->m_dalign && alen >= mp->m_dalign && (!(flags & XFS_BMAPI_METADATA)) && (whichfork == XFS_DATA_FORK)) { if ((error = xfs_bmap_isaeof(ip, aoff, whichfork, &bma.aeof))) goto error0; } else bma.aeof = 0; /* * Call allocator. */ if ((error = xfs_bmap_alloc(&bma))) goto error0; /* * Copy out result fields. */ abno = bma.rval; if ((flist->xbf_low = bma.low)) minleft = 0; alen = bma.alen; aoff = bma.off; ASSERT(*firstblock == NULLFSBLOCK || XFS_FSB_TO_AGNO(mp, *firstblock) == XFS_FSB_TO_AGNO(mp, bma.firstblock) || (flist->xbf_low && XFS_FSB_TO_AGNO(mp, *firstblock) < XFS_FSB_TO_AGNO(mp, bma.firstblock))); *firstblock = bma.firstblock; if (cur) cur->bc_private.b.firstblock = *firstblock; if (abno == NULLFSBLOCK) break; if ((ifp->if_flags & XFS_IFBROOT) && !cur) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; } /* * Bump the number of extents we've allocated * in this call. */ nallocs++; } if (cur) cur->bc_private.b.flags = wasdelay ? XFS_BTCUR_BPRV_WASDEL : 0; got.br_startoff = aoff; got.br_startblock = abno; got.br_blockcount = alen; got.br_state = XFS_EXT_NORM; /* assume normal */ /* * Determine state of extent, and the filesystem. * A wasdelay extent has been initialized, so * shouldn't be flagged as unwritten. */ if (wr && xfs_sb_version_hasextflgbit(&mp->m_sb)) { if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) got.br_state = XFS_EXT_UNWRITTEN; } error = xfs_bmap_add_extent(ip, lastx, &cur, &got, firstblock, flist, &tmp_logflags, whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); logflags |= tmp_logflags; if (error) goto error0; lastx = ifp->if_lastex; ep = xfs_iext_get_ext(ifp, lastx); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); xfs_bmbt_get_all(ep, &got); ASSERT(got.br_startoff <= aoff); ASSERT(got.br_startoff + got.br_blockcount >= aoff + alen); #ifdef DEBUG if (flags & XFS_BMAPI_DELAY) { ASSERT(isnullstartblock(got.br_startblock)); ASSERT(startblockval(got.br_startblock) > 0); } ASSERT(got.br_state == XFS_EXT_NORM || got.br_state == XFS_EXT_UNWRITTEN); #endif /* * Fall down into the found allocated space case. */ } else if (inhole) { /* * Reading in a hole. */ mval->br_startoff = bno; mval->br_startblock = HOLESTARTBLOCK; mval->br_blockcount = XFS_FILBLKS_MIN(len, got.br_startoff - bno); mval->br_state = XFS_EXT_NORM; bno += mval->br_blockcount; len -= mval->br_blockcount; mval++; n++; continue; } /* * Then deal with the allocated space we found. */ ASSERT(ep != NULL); if (!(flags & XFS_BMAPI_ENTIRE) && (got.br_startoff + got.br_blockcount > obno)) { if (obno > bno) bno = obno; ASSERT((bno >= obno) || (n == 0)); ASSERT(bno < end); mval->br_startoff = bno; if (isnullstartblock(got.br_startblock)) { ASSERT(!wr || (flags & XFS_BMAPI_DELAY)); mval->br_startblock = DELAYSTARTBLOCK; } else mval->br_startblock = got.br_startblock + (bno - got.br_startoff); /* * Return the minimum of what we got and what we * asked for for the length. We can use the len * variable here because it is modified below * and we could have been there before coming * here if the first part of the allocation * didn't overlap what was asked for. */ mval->br_blockcount = XFS_FILBLKS_MIN(end - bno, got.br_blockcount - (bno - got.br_startoff)); mval->br_state = got.br_state; ASSERT(mval->br_blockcount <= len); } else { *mval = got; if (isnullstartblock(mval->br_startblock)) { ASSERT(!wr || (flags & XFS_BMAPI_DELAY)); mval->br_startblock = DELAYSTARTBLOCK; } } /* * Check if writing previously allocated but * unwritten extents. */ if (wr && ((mval->br_state == XFS_EXT_UNWRITTEN && ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) || (mval->br_state == XFS_EXT_NORM && ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) == (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) { /* * Modify (by adding) the state flag, if writing. */ ASSERT(mval->br_blockcount <= len); if ((ifp->if_flags & XFS_IFBROOT) && !cur) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; } mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx, &cur, mval, firstblock, flist, &tmp_logflags, whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); logflags |= tmp_logflags; if (error) goto error0; lastx = ifp->if_lastex; ep = xfs_iext_get_ext(ifp, lastx); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); xfs_bmbt_get_all(ep, &got); /* * We may have combined previously unwritten * space with written space, so generate * another request. */ if (mval->br_blockcount < len) continue; } ASSERT((flags & XFS_BMAPI_ENTIRE) || ((mval->br_startoff + mval->br_blockcount) <= end)); ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= len) || (mval->br_startoff < obno)); bno = mval->br_startoff + mval->br_blockcount; len = end - bno; if (n > 0 && mval->br_startoff == mval[-1].br_startoff) { ASSERT(mval->br_startblock == mval[-1].br_startblock); ASSERT(mval->br_blockcount > mval[-1].br_blockcount); ASSERT(mval->br_state == mval[-1].br_state); mval[-1].br_blockcount = mval->br_blockcount; mval[-1].br_state = mval->br_state; } else if (n > 0 && mval->br_startblock != DELAYSTARTBLOCK && mval[-1].br_startblock != DELAYSTARTBLOCK && mval[-1].br_startblock != HOLESTARTBLOCK && mval->br_startblock == mval[-1].br_startblock + mval[-1].br_blockcount && ((flags & XFS_BMAPI_IGSTATE) || mval[-1].br_state == mval->br_state)) { ASSERT(mval->br_startoff == mval[-1].br_startoff + mval[-1].br_blockcount); mval[-1].br_blockcount += mval->br_blockcount; } else if (n > 0 && mval->br_startblock == DELAYSTARTBLOCK && mval[-1].br_startblock == DELAYSTARTBLOCK && mval->br_startoff == mval[-1].br_startoff + mval[-1].br_blockcount) { mval[-1].br_blockcount += mval->br_blockcount; mval[-1].br_state = mval->br_state; } else if (!((n == 0) && ((mval->br_startoff + mval->br_blockcount) <= obno))) { mval++; n++; } /* * If we're done, stop now. Stop when we've allocated * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise * the transaction may get too big. */ if (bno >= end || n >= *nmap || nallocs >= *nmap) break; /* * Else go on to the next record. */ ep = xfs_iext_get_ext(ifp, ++lastx); prev = got; if (lastx >= nextents) eof = 1; else xfs_bmbt_get_all(ep, &got); } ifp->if_lastex = lastx; *nmap = n; /* * Transform from btree to extents, give it cur. */ if (tp && XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) { ASSERT(wr && cur); error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) goto error0; } ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max); error = 0; error0: /* * Log everything. Do this after conversion, there's no point in * logging the extent records if we've converted to btree format. */ if ((logflags & xfs_ilog_fext(whichfork)) && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) logflags &= ~xfs_ilog_fext(whichfork); else if ((logflags & xfs_ilog_fbroot(whichfork)) && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) logflags &= ~xfs_ilog_fbroot(whichfork); /* * Log whatever the flags say, even if error. Otherwise we might miss * detecting a case where the data is changed, there's an error, * and it's not logged so we don't shutdown when we should. */ if (logflags) { ASSERT(tp && wr); xfs_trans_log_inode(tp, ip, logflags); } if (cur) { if (!error) { ASSERT(*firstblock == NULLFSBLOCK || XFS_FSB_TO_AGNO(mp, *firstblock) == XFS_FSB_TO_AGNO(mp, cur->bc_private.b.firstblock) || (flist->xbf_low && XFS_FSB_TO_AGNO(mp, *firstblock) < XFS_FSB_TO_AGNO(mp, cur->bc_private.b.firstblock))); *firstblock = cur->bc_private.b.firstblock; } xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); } if (!error) xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, orig_nmap, *nmap); return error; } /* * Map file blocks to filesystem blocks, simple version. * One block (extent) only, read-only. * For flags, only the XFS_BMAPI_ATTRFORK flag is examined. * For the other flag values, the effect is as if XFS_BMAPI_METADATA * was set and all the others were clear. */ int /* error */ xfs_bmapi_single( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode */ int whichfork, /* data or attr fork */ xfs_fsblock_t *fsb, /* output: mapped block */ xfs_fileoff_t bno) /* starting file offs. mapped */ { int eof; /* we've hit the end of extents */ int error; /* error return */ xfs_bmbt_irec_t got; /* current file extent record */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extnum_t lastx; /* last useful extent number */ xfs_bmbt_irec_t prev; /* previous file extent record */ ifp = XFS_IFORK_PTR(ip, whichfork); if (unlikely( XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)) { XFS_ERROR_REPORT("xfs_bmapi_single", XFS_ERRLEVEL_LOW, ip->i_mount); return XFS_ERROR(EFSCORRUPTED); } if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); XFS_STATS_INC(xs_blk_mapr); if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; (void)xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); /* * Reading past eof, act as though there's a hole * up to end. */ if (eof || got.br_startoff > bno) { *fsb = NULLFSBLOCK; return 0; } ASSERT(!isnullstartblock(got.br_startblock)); ASSERT(bno < got.br_startoff + got.br_blockcount); *fsb = got.br_startblock + (bno - got.br_startoff); ifp->if_lastex = lastx; return 0; } /* * Unmap (remove) blocks from a file. * If nexts is nonzero then the number of extents to remove is limited to * that value. If not all extents in the block range can be removed then * *done is set. */ int /* error */ xfs_bunmapi( xfs_trans_t *tp, /* transaction pointer */ struct xfs_inode *ip, /* incore inode */ xfs_fileoff_t bno, /* starting offset to unmap */ xfs_filblks_t len, /* length to unmap in file */ int flags, /* misc flags */ xfs_extnum_t nexts, /* number of extents max */ xfs_fsblock_t *firstblock, /* first allocated block controls a.g. for allocs */ xfs_bmap_free_t *flist, /* i/o: list extents to free */ int *done) /* set if not done yet */ { xfs_btree_cur_t *cur; /* bmap btree cursor */ xfs_bmbt_irec_t del; /* extent being deleted */ int eof; /* is deleting at eof */ xfs_bmbt_rec_host_t *ep; /* extent record pointer */ int error; /* error return value */ xfs_extnum_t extno; /* extent number in list */ xfs_bmbt_irec_t got; /* current extent record */ xfs_ifork_t *ifp; /* inode fork pointer */ int isrt; /* freeing in rt area */ xfs_extnum_t lastx; /* last extent index used */ int logflags; /* transaction logging flags */ xfs_extlen_t mod; /* rt extent offset */ xfs_mount_t *mp; /* mount structure */ xfs_extnum_t nextents; /* number of file extents */ xfs_bmbt_irec_t prev; /* previous extent record */ xfs_fileoff_t start; /* first file offset deleted */ int tmp_logflags; /* partial logging flags */ int wasdel; /* was a delayed alloc extent */ int whichfork; /* data or attribute fork */ int rsvd; /* OK to allocate reserved blocks */ xfs_fsblock_t sum; trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); whichfork = (flags & XFS_BMAPI_ATTRFORK) ? XFS_ATTR_FORK : XFS_DATA_FORK; ifp = XFS_IFORK_PTR(ip, whichfork); if (unlikely( XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, ip->i_mount); return XFS_ERROR(EFSCORRUPTED); } mp = ip->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; ASSERT(len > 0); ASSERT(nexts >= 0); ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); if (nextents == 0) { *done = 1; return 0; } XFS_STATS_INC(xs_blk_unmap); isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); start = bno; bno = start + len - 1; ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); /* * Check to see if the given block number is past the end of the * file, back up to the last block if so... */ if (eof) { ep = xfs_iext_get_ext(ifp, --lastx); xfs_bmbt_get_all(ep, &got); bno = got.br_startoff + got.br_blockcount - 1; } logflags = 0; if (ifp->if_flags & XFS_IFBROOT) { ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; cur->bc_private.b.flags = 0; } else cur = NULL; extno = 0; while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && (nexts == 0 || extno < nexts)) { /* * Is the found extent after a hole in which bno lives? * Just back up to the previous extent, if so. */ if (got.br_startoff > bno) { if (--lastx < 0) break; ep = xfs_iext_get_ext(ifp, lastx); xfs_bmbt_get_all(ep, &got); } /* * Is the last block of this extent before the range * we're supposed to delete? If so, we're done. */ bno = XFS_FILEOFF_MIN(bno, got.br_startoff + got.br_blockcount - 1); if (bno < start) break; /* * Then deal with the (possibly delayed) allocated space * we found. */ ASSERT(ep != NULL); del = got; wasdel = isnullstartblock(del.br_startblock); if (got.br_startoff < start) { del.br_startoff = start; del.br_blockcount -= start - got.br_startoff; if (!wasdel) del.br_startblock += start - got.br_startoff; } if (del.br_startoff + del.br_blockcount > bno + 1) del.br_blockcount = bno + 1 - del.br_startoff; sum = del.br_startblock + del.br_blockcount; if (isrt && (mod = do_mod(sum, mp->m_sb.sb_rextsize))) { /* * Realtime extent not lined up at the end. * The extent could have been split into written * and unwritten pieces, or we could just be * unmapping part of it. But we can't really * get rid of part of a realtime extent. */ if (del.br_state == XFS_EXT_UNWRITTEN || !xfs_sb_version_hasextflgbit(&mp->m_sb)) { /* * This piece is unwritten, or we're not * using unwritten extents. Skip over it. */ ASSERT(bno >= mod); bno -= mod > del.br_blockcount ? del.br_blockcount : mod; if (bno < got.br_startoff) { if (--lastx >= 0) xfs_bmbt_get_all(xfs_iext_get_ext( ifp, lastx), &got); } continue; } /* * It's written, turn it unwritten. * This is better than zeroing it. */ ASSERT(del.br_state == XFS_EXT_NORM); ASSERT(xfs_trans_get_block_res(tp) > 0); /* * If this spans a realtime extent boundary, * chop it back to the start of the one we end at. */ if (del.br_blockcount > mod) { del.br_startoff += del.br_blockcount - mod; del.br_startblock += del.br_blockcount - mod; del.br_blockcount = mod; } del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx, &cur, &del, firstblock, flist, &logflags, XFS_DATA_FORK, 0); if (error) goto error0; goto nodelete; } if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) { /* * Realtime extent is lined up at the end but not * at the front. We'll get rid of full extents if * we can. */ mod = mp->m_sb.sb_rextsize - mod; if (del.br_blockcount > mod) { del.br_blockcount -= mod; del.br_startoff += mod; del.br_startblock += mod; } else if ((del.br_startoff == start && (del.br_state == XFS_EXT_UNWRITTEN || xfs_trans_get_block_res(tp) == 0)) || !xfs_sb_version_hasextflgbit(&mp->m_sb)) { /* * Can't make it unwritten. There isn't * a full extent here so just skip it. */ ASSERT(bno >= del.br_blockcount); bno -= del.br_blockcount; if (bno < got.br_startoff) { if (--lastx >= 0) xfs_bmbt_get_all(--ep, &got); } continue; } else if (del.br_state == XFS_EXT_UNWRITTEN) { /* * This one is already unwritten. * It must have a written left neighbor. * Unwrite the killed part of that one and * try again. */ ASSERT(lastx > 0); xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), &prev); ASSERT(prev.br_state == XFS_EXT_NORM); ASSERT(!isnullstartblock(prev.br_startblock)); ASSERT(del.br_startblock == prev.br_startblock + prev.br_blockcount); if (prev.br_startoff < start) { mod = start - prev.br_startoff; prev.br_blockcount -= mod; prev.br_startblock += mod; prev.br_startoff = start; } prev.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx - 1, &cur, &prev, firstblock, flist, &logflags, XFS_DATA_FORK, 0); if (error) goto error0; goto nodelete; } else { ASSERT(del.br_state == XFS_EXT_NORM); del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx, &cur, &del, firstblock, flist, &logflags, XFS_DATA_FORK, 0); if (error) goto error0; goto nodelete; } } if (wasdel) { ASSERT(startblockval(del.br_startblock) > 0); /* Update realtime/data freespace, unreserve quota */ if (isrt) { xfs_filblks_t rtexts; rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); do_div(rtexts, mp->m_sb.sb_rextsize); xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, (int64_t)rtexts, rsvd); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, (int64_t)del.br_blockcount, rsvd); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); } ip->i_delayed_blks -= del.br_blockcount; if (cur) cur->bc_private.b.flags |= XFS_BTCUR_BPRV_WASDEL; } else if (cur) cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; /* * If it's the case where the directory code is running * with no block reservation, and the deleted block is in * the middle of its extent, and the resulting insert * of an extent would cause transformation to btree format, * then reject it. The calling code will then swap * blocks around instead. * We have to do this now, rather than waiting for the * conversion to btree format, since the transaction * will be dirty. */ if (!wasdel && xfs_trans_get_block_res(tp) == 0 && XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && XFS_IFORK_NEXTENTS(ip, whichfork) >= ifp->if_ext_max && del.br_startoff > got.br_startoff && del.br_startoff + del.br_blockcount < got.br_startoff + got.br_blockcount) { error = XFS_ERROR(ENOSPC); goto error0; } error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, &tmp_logflags, whichfork, rsvd); logflags |= tmp_logflags; if (error) goto error0; bno = del.br_startoff - 1; nodelete: lastx = ifp->if_lastex; /* * If not done go on to the next (previous) record. * Reset ep in case the extents array was re-alloced. */ ep = xfs_iext_get_ext(ifp, lastx); if (bno != (xfs_fileoff_t)-1 && bno >= start) { if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) || xfs_bmbt_get_startoff(ep) > bno) { if (--lastx >= 0) ep = xfs_iext_get_ext(ifp, lastx); } if (lastx >= 0) xfs_bmbt_get_all(ep, &got); extno++; } } ifp->if_lastex = lastx; *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); /* * Convert to a btree if necessary. */ if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) { ASSERT(cur == NULL); error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0, &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) goto error0; } /* * transform from btree to extents, give it cur */ else if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) { ASSERT(cur != NULL); error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) goto error0; } /* * transform from extents to local? */ ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); error = 0; error0: /* * Log everything. Do this after conversion, there's no point in * logging the extent records if we've converted to btree format. */ if ((logflags & xfs_ilog_fext(whichfork)) && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) logflags &= ~xfs_ilog_fext(whichfork); else if ((logflags & xfs_ilog_fbroot(whichfork)) && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) logflags &= ~xfs_ilog_fbroot(whichfork); /* * Log inode even in the error case, if the transaction * is dirty we'll need to shut down the filesystem. */ if (logflags) xfs_trans_log_inode(tp, ip, logflags); if (cur) { if (!error) { *firstblock = cur->bc_private.b.firstblock; cur->bc_private.b.allocated = 0; } xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); } return error; } /* * returns 1 for success, 0 if we failed to map the extent. */ STATIC int xfs_getbmapx_fix_eof_hole( xfs_inode_t *ip, /* xfs incore inode pointer */ struct getbmapx *out, /* output structure */ int prealloced, /* this is a file with * preallocated data space */ __int64_t end, /* last block requested */ xfs_fsblock_t startblock) { __int64_t fixlen; xfs_mount_t *mp; /* file system mount point */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extnum_t lastx; /* last extent pointer */ xfs_fileoff_t fileblock; if (startblock == HOLESTARTBLOCK) { mp = ip->i_mount; out->bmv_block = -1; fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, ip->i_size)); fixlen -= out->bmv_offset; if (prealloced && out->bmv_offset + out->bmv_length == end) { /* Came to hole at EOF. Trim it. */ if (fixlen <= 0) return 0; out->bmv_length = fixlen; } } else { if (startblock == DELAYSTARTBLOCK) out->bmv_block = -2; else out->bmv_block = xfs_fsb_to_db(ip, startblock); fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) out->bmv_oflags |= BMV_OF_LAST; } return 1; } /* * Get inode's extents as described in bmv, and format for output. * Calls formatter to fill the user's buffer until all extents * are mapped, until the passed-in bmv->bmv_count slots have * been filled, or until the formatter short-circuits the loop, * if it is tracking filled-in extents on its own. */ int /* error code */ xfs_getbmap( xfs_inode_t *ip, struct getbmapx *bmv, /* user bmap structure */ xfs_bmap_format_t formatter, /* format to user */ void *arg) /* formatter arg */ { __int64_t bmvend; /* last block requested */ int error = 0; /* return value */ __int64_t fixlen; /* length for -1 case */ int i; /* extent number */ int lock; /* lock state */ xfs_bmbt_irec_t *map; /* buffer for user's data */ xfs_mount_t *mp; /* file system mount point */ int nex; /* # of user extents can do */ int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ struct getbmapx *out; /* output structure */ int whichfork; /* data or attr fork */ int prealloced; /* this is a file with * preallocated data space */ int iflags; /* interface flags */ int bmapi_flags; /* flags for xfs_bmapi */ int cur_ext = 0; mp = ip->i_mount; iflags = bmv->bmv_iflags; whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; if (whichfork == XFS_ATTR_FORK) { if (XFS_IFORK_Q(ip)) { if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EINVAL); } else if (unlikely( ip->i_d.di_aformat != 0 && ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, ip->i_mount); return XFS_ERROR(EFSCORRUPTED); } prealloced = 0; fixlen = 1LL << 32; } else { if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_format != XFS_DINODE_FMT_BTREE && ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EINVAL); if (xfs_get_extsz_hint(ip) || ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ prealloced = 1; fixlen = XFS_MAXIOFFSET(mp); } else { prealloced = 0; fixlen = ip->i_size; } } if (bmv->bmv_length == -1) { fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); bmv->bmv_length = max_t(__int64_t, fixlen - bmv->bmv_offset, 0); } else if (bmv->bmv_length == 0) { bmv->bmv_entries = 0; return 0; } else if (bmv->bmv_length < 0) { return XFS_ERROR(EINVAL); } nex = bmv->bmv_count - 1; if (nex <= 0) return XFS_ERROR(EINVAL); bmvend = bmv->bmv_offset + bmv->bmv_length; if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) return XFS_ERROR(ENOMEM); out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); if (!out) return XFS_ERROR(ENOMEM); xfs_ilock(ip, XFS_IOLOCK_SHARED); if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) { error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF); if (error) goto out_unlock_iolock; } /* * even after flushing the inode, there can still be delalloc * blocks on the inode beyond EOF due to speculative * preallocation. These are not removed until the release * function is called or the inode is inactivated. Hence we * cannot assert here that ip->i_delayed_blks == 0. */ } lock = xfs_ilock_map_shared(ip); /* * Don't let nex be bigger than the number of extents * we can have assuming alternating holes and real extents. */ if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; bmapi_flags = xfs_bmapi_aflag(whichfork); if (!(iflags & BMV_IF_PREALLOC)) bmapi_flags |= XFS_BMAPI_IGSTATE; /* * Allocate enough space to handle "subnex" maps at a time. */ error = ENOMEM; subnex = 16; map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); if (!map) goto out_unlock_ilock; bmv->bmv_entries = 0; if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { error = 0; goto out_free_map; } nexleft = nex; do { nmap = (nexleft > subnex) ? subnex : nexleft; error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), XFS_BB_TO_FSB(mp, bmv->bmv_length), bmapi_flags, NULL, 0, map, &nmap, NULL); if (error) goto out_free_map; ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; else if (map[i].br_startblock == DELAYSTARTBLOCK) out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; out[cur_ext].bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); out[cur_ext].bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); out[cur_ext].bmv_unused1 = 0; out[cur_ext].bmv_unused2 = 0; ASSERT(((iflags & BMV_IF_DELALLOC) != 0) || (map[i].br_startblock != DELAYSTARTBLOCK)); if (map[i].br_startblock == HOLESTARTBLOCK && whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ out[cur_ext].bmv_oflags |= BMV_OF_LAST; goto out_free_map; } if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext], prealloced, bmvend, map[i].br_startblock)) goto out_free_map; bmv->bmv_offset = out[cur_ext].bmv_offset + out[cur_ext].bmv_length; bmv->bmv_length = max_t(__int64_t, 0, bmvend - bmv->bmv_offset); /* * In case we don't want to return the hole, * don't increase cur_ext so that we can reuse * it in the next loop. */ if ((iflags & BMV_IF_NO_HOLES) && map[i].br_startblock == HOLESTARTBLOCK) { memset(&out[cur_ext], 0, sizeof(out[cur_ext])); continue; } nexleft--; bmv->bmv_entries++; cur_ext++; } } while (nmap && nexleft && bmv->bmv_length); out_free_map: kmem_free(map); out_unlock_ilock: xfs_iunlock_map_shared(ip, lock); out_unlock_iolock: xfs_iunlock(ip, XFS_IOLOCK_SHARED); for (i = 0; i < cur_ext; i++) { int full = 0; /* user array is full */ /* format results & advance arg */ error = formatter(&arg, &out[i], &full); if (error || full) break; } kmem_free(out); return error; } /* * Check the last inode extent to determine whether this allocation will result * in blocks being allocated at the end of the file. When we allocate new data * blocks at the end of the file which do not start at the previous data block, * we will try to align the new blocks at stripe unit boundaries. */ STATIC int /* error */ xfs_bmap_isaeof( xfs_inode_t *ip, /* incore inode pointer */ xfs_fileoff_t off, /* file offset in fsblocks */ int whichfork, /* data or attribute fork */ char *aeof) /* return value */ { int error; /* error return value */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_rec_host_t *lastrec; /* extent record pointer */ xfs_extnum_t nextents; /* number of file extents */ xfs_bmbt_irec_t s; /* expanded extent record */ ASSERT(whichfork == XFS_DATA_FORK); ifp = XFS_IFORK_PTR(ip, whichfork); if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(NULL, ip, whichfork))) return error; nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); if (nextents == 0) { *aeof = 1; return 0; } /* * Go to the last extent */ lastrec = xfs_iext_get_ext(ifp, nextents - 1); xfs_bmbt_get_all(lastrec, &s); /* * Check we are allocating in the last extent (for delayed allocations) * or past the last extent for non-delayed allocations. */ *aeof = (off >= s.br_startoff && off < s.br_startoff + s.br_blockcount && isnullstartblock(s.br_startblock)) || off >= s.br_startoff + s.br_blockcount; return 0; } /* * Check if the endoff is outside the last extent. If so the caller will grow * the allocation to a stripe unit boundary. */ int /* error */ xfs_bmap_eof( xfs_inode_t *ip, /* incore inode pointer */ xfs_fileoff_t endoff, /* file offset in fsblocks */ int whichfork, /* data or attribute fork */ int *eof) /* result value */ { xfs_fsblock_t blockcount; /* extent block count */ int error; /* error return value */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_rec_host_t *lastrec; /* extent record pointer */ xfs_extnum_t nextents; /* number of file extents */ xfs_fileoff_t startoff; /* extent starting file offset */ ASSERT(whichfork == XFS_DATA_FORK); ifp = XFS_IFORK_PTR(ip, whichfork); if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(NULL, ip, whichfork))) return error; nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); if (nextents == 0) { *eof = 1; return 0; } /* * Go to the last extent */ lastrec = xfs_iext_get_ext(ifp, nextents - 1); startoff = xfs_bmbt_get_startoff(lastrec); blockcount = xfs_bmbt_get_blockcount(lastrec); *eof = endoff >= startoff + blockcount; return 0; } #ifdef DEBUG STATIC struct xfs_buf * xfs_bmap_get_bp( struct xfs_btree_cur *cur, xfs_fsblock_t bno) { struct xfs_log_item_desc *lidp; int i; if (!cur) return NULL; for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { if (!cur->bc_bufs[i]) break; if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno) return cur->bc_bufs[i]; } /* Chase down all the log items to see if the bp is there */ list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) { struct xfs_buf_log_item *bip; bip = (struct xfs_buf_log_item *)lidp->lid_item; if (bip->bli_item.li_type == XFS_LI_BUF && XFS_BUF_ADDR(bip->bli_buf) == bno) return bip->bli_buf; } return NULL; } STATIC void xfs_check_block( struct xfs_btree_block *block, xfs_mount_t *mp, int root, short sz) { int i, j, dmxr; __be64 *pp, *thispa; /* pointer to block address */ xfs_bmbt_key_t *prevp, *keyp; ASSERT(be16_to_cpu(block->bb_level) > 0); prevp = NULL; for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { dmxr = mp->m_bmap_dmxr[0]; keyp = XFS_BMBT_KEY_ADDR(mp, block, i); if (prevp) { ASSERT(be64_to_cpu(prevp->br_startoff) < be64_to_cpu(keyp->br_startoff)); } prevp = keyp; /* * Compare the block numbers to see if there are dups. */ if (root) pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); else pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { if (root) thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); else thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); if (*thispa == *pp) { cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", __func__, j, i, (unsigned long long)be64_to_cpu(*thispa)); panic("%s: ptrs are equal in node\n", __func__); } } } } /* * Check that the extents for the inode ip are in the right order in all * btree leaves. */ STATIC void xfs_bmap_check_leaf_extents( xfs_btree_cur_t *cur, /* btree cursor or null */ xfs_inode_t *ip, /* incore inode pointer */ int whichfork) /* data or attr fork */ { struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_buf_t *bp; /* buffer for "block" */ int error; /* error return value */ xfs_extnum_t i=0, j; /* index into the extents list */ xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ xfs_mount_t *mp; /* file system mount structure */ __be64 *pp; /* pointer to block address */ xfs_bmbt_rec_t *ep; /* pointer to current extent */ xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ xfs_bmbt_rec_t *nextp; /* pointer to next extent */ int bp_release = 0; if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) { return; } bno = NULLFSBLOCK; mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); block = ifp->if_broot; /* * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ level = be16_to_cpu(block->bb_level); ASSERT(level > 0); xfs_check_block(block, mp, 1, ifp->if_broot_bytes); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLDFSBNO); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); /* * Go down the tree until leaf level is reached, following the first * pointer (leftmost) at each level. */ while (level-- > 0) { /* See if buf is in cur first */ bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); if (bp) { bp_release = 0; } else { bp_release = 1; } if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, XFS_BMAP_BTREE_REF))) goto error_norelse; block = XFS_BUF_TO_BLOCK(bp); XFS_WANT_CORRUPTED_GOTO( xfs_bmap_sanity_check(mp, bp, level), error0); if (level == 0) break; /* * Check this block for basic sanity (increasing keys and * no duplicate blocks). */ xfs_check_block(block, mp, 0, 0); pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); if (bp_release) { bp_release = 0; xfs_trans_brelse(NULL, bp); } } /* * Here with bp and block set to the leftmost leaf node in the tree. */ i = 0; /* * Loop over all leaf nodes checking that all extents are in the right order. */ for (;;) { xfs_fsblock_t nextbno; xfs_extnum_t num_recs; num_recs = xfs_btree_get_numrecs(block); /* * Read-ahead the next leaf block, if any. */ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); /* * Check all the extents to make sure they are OK. * If we had a previous block, the last entry should * conform with the first entry in this one. */ ep = XFS_BMBT_REC_ADDR(mp, block, 1); if (i) { ASSERT(xfs_bmbt_disk_get_startoff(&last) + xfs_bmbt_disk_get_blockcount(&last) <= xfs_bmbt_disk_get_startoff(ep)); } for (j = 1; j < num_recs; j++) { nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); ASSERT(xfs_bmbt_disk_get_startoff(ep) + xfs_bmbt_disk_get_blockcount(ep) <= xfs_bmbt_disk_get_startoff(nextp)); ep = nextp; } last = *ep; i += num_recs; if (bp_release) { bp_release = 0; xfs_trans_brelse(NULL, bp); } bno = nextbno; /* * If we've reached the end, stop. */ if (bno == NULLFSBLOCK) break; bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); if (bp) { bp_release = 0; } else { bp_release = 1; } if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, XFS_BMAP_BTREE_REF))) goto error_norelse; block = XFS_BUF_TO_BLOCK(bp); } if (bp_release) { bp_release = 0; xfs_trans_brelse(NULL, bp); } return; error0: cmn_err(CE_WARN, "%s: at error0", __func__); if (bp_release) xfs_trans_brelse(NULL, bp); error_norelse: cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", __func__, i); panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); return; } #endif /* * Count fsblocks of the given fork. */ int /* error */ xfs_bmap_count_blocks( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode */ int whichfork, /* data or attr fork */ int *count) /* out: count of blocks */ { struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ xfs_mount_t *mp; /* file system mount structure */ __be64 *pp; /* pointer to block address */ bno = NULLFSBLOCK; mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { xfs_bmap_count_leaves(ifp, 0, ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), count); return 0; } /* * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ block = ifp->if_broot; level = be16_to_cpu(block->bb_level); ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLDFSBNO); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); } return 0; } /* * Recursively walks each level of a btree * to count total fsblocks is use. */ STATIC int /* error */ xfs_bmap_count_tree( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ xfs_ifork_t *ifp, /* inode fork pointer */ xfs_fsblock_t blockno, /* file system block number */ int levelin, /* level in btree */ int *count) /* Count of blocks */ { int error; xfs_buf_t *bp, *nbp; int level = levelin; __be64 *pp; xfs_fsblock_t bno = blockno; xfs_fsblock_t nextbno; struct xfs_btree_block *block, *nextblock; int numrecs; if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) return error; *count += 1; block = XFS_BUF_TO_BLOCK(bp); if (--level) { /* Not at node above leaves, count this level of nodes */ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); while (nextbno != NULLFSBLOCK) { if ((error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, XFS_BMAP_BTREE_REF))) return error; *count += 1; nextblock = XFS_BUF_TO_BLOCK(nbp); nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); xfs_trans_brelse(tp, nbp); } /* Dive to the next level */ pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); if (unlikely((error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { xfs_trans_brelse(tp, bp); XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); } xfs_trans_brelse(tp, bp); } else { /* count all level 1 nodes and their leaves */ for (;;) { nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); numrecs = be16_to_cpu(block->bb_numrecs); xfs_bmap_disk_count_leaves(mp, block, numrecs, count); xfs_trans_brelse(tp, bp); if (nextbno == NULLFSBLOCK) break; bno = nextbno; if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) return error; *count += 1; block = XFS_BUF_TO_BLOCK(bp); } } return 0; } /* * Count leaf blocks given a range of extent records. */ STATIC void xfs_bmap_count_leaves( xfs_ifork_t *ifp, xfs_extnum_t idx, int numrecs, int *count) { int b; for (b = 0; b < numrecs; b++) { xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); *count += xfs_bmbt_get_blockcount(frp); } } /* * Count leaf blocks given a range of extent records originally * in btree format. */ STATIC void xfs_bmap_disk_count_leaves( struct xfs_mount *mp, struct xfs_btree_block *block, int numrecs, int *count) { int b; xfs_bmbt_rec_t *frp; for (b = 1; b <= numrecs; b++) { frp = XFS_BMBT_REC_ADDR(mp, block, b); *count += xfs_bmbt_disk_get_blockcount(frp); } } /* * dead simple method of punching delalyed allocation blocks from a range in * the inode. Walks a block at a time so will be slow, but is only executed in * rare error cases so the overhead is not critical. This will alays punch out * both the start and end blocks, even if the ranges only partially overlap * them, so it is up to the caller to ensure that partial blocks are not * passed in. */ int xfs_bmap_punch_delalloc_range( struct xfs_inode *ip, xfs_fileoff_t start_fsb, xfs_fileoff_t length) { xfs_fileoff_t remaining = length; int error = 0; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); do { int done; xfs_bmbt_irec_t imap; int nimaps = 1; xfs_fsblock_t firstblock; xfs_bmap_free_t flist; /* * Map the range first and check that it is a delalloc extent * before trying to unmap the range. Otherwise we will be * trying to remove a real extent (which requires a * transaction) or a hole, which is probably a bad idea... */ error = xfs_bmapi(NULL, ip, start_fsb, 1, XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); if (error) { /* something screwed, just bail */ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_fs_cmn_err(CE_ALERT, ip->i_mount, "Failed delalloc mapping lookup ino %lld fsb %lld.", ip->i_ino, start_fsb); } break; } if (!nimaps) { /* nothing there */ goto next_block; } if (imap.br_startblock != DELAYSTARTBLOCK) { /* been converted, ignore */ goto next_block; } WARN_ON(imap.br_blockcount == 0); /* * Note: while we initialise the firstblock/flist pair, they * should never be used because blocks should never be * allocated or freed for a delalloc extent and hence we need * don't cancel or finish them after the xfs_bunmapi() call. */ xfs_bmap_init(&flist, &firstblock); error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, &flist, &done); if (error) break; ASSERT(!flist.xbf_count && !flist.xbf_first); next_block: start_fsb++; remaining--; } while(remaining > 0); return error; }