aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/write.c
blob: 7f40ea30554339718e36e36ae22550ccb158e2e9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
/*
 * linux/fs/nfs/write.c
 *
 * Write file data over NFS.
 *
 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
 */

#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/writeback.h>
#include <linux/swap.h>
#include <linux/migrate.h>

#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <linux/backing-dev.h>

#include <asm/uaccess.h>

#include "delegation.h"
#include "internal.h"
#include "iostat.h"
#include "nfs4_fs.h"
#include "fscache.h"

#define NFSDBG_FACILITY		NFSDBG_PAGECACHE

#define MIN_POOL_WRITE		(32)
#define MIN_POOL_COMMIT		(4)

/*
 * Local function declarations
 */
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
				  struct inode *inode, int ioflags);
static void nfs_redirty_request(struct nfs_page *req);
static const struct rpc_call_ops nfs_write_partial_ops;
static const struct rpc_call_ops nfs_write_full_ops;
static const struct rpc_call_ops nfs_commit_ops;

static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
static mempool_t *nfs_commit_mempool;

struct nfs_write_data *nfs_commitdata_alloc(void)
{
	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);

	if (p) {
		memset(p, 0, sizeof(*p));
		INIT_LIST_HEAD(&p->pages);
		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
	}
	return p;
}

void nfs_commit_free(struct nfs_write_data *p)
{
	if (p && (p->pagevec != &p->page_array[0]))
		kfree(p->pagevec);
	mempool_free(p, nfs_commit_mempool);
}

struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
{
	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);

	if (p) {
		memset(p, 0, sizeof(*p));
		INIT_LIST_HEAD(&p->pages);
		p->npages = pagecount;
		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
		if (pagecount <= ARRAY_SIZE(p->page_array))
			p->pagevec = p->page_array;
		else {
			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
			if (!p->pagevec) {
				mempool_free(p, nfs_wdata_mempool);
				p = NULL;
			}
		}
	}
	return p;
}

void nfs_writedata_free(struct nfs_write_data *p)
{
	if (p && (p->pagevec != &p->page_array[0]))
		kfree(p->pagevec);
	mempool_free(p, nfs_wdata_mempool);
}

static void nfs_writedata_release(struct nfs_write_data *wdata)
{
	put_nfs_open_context(wdata->args.context);
	nfs_writedata_free(wdata);
}

static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
{
	ctx->error = error;
	smp_wmb();
	set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
}

static struct nfs_page *nfs_page_find_request_locked(struct page *page)
{
	struct nfs_page *req = NULL;

	if (PagePrivate(page)) {
		req = (struct nfs_page *)page_private(page);
		if (req != NULL)
			kref_get(&req->wb_kref);
	}
	return req;
}

static struct nfs_page *nfs_page_find_request(struct page *page)
{
	struct inode *inode = page->mapping->host;
	struct nfs_page *req = NULL;

	spin_lock(&inode->i_lock);
	req = nfs_page_find_request_locked(page);
	spin_unlock(&inode->i_lock);
	return req;
}

/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
{
	struct inode *inode = page->mapping->host;
	loff_t end, i_size;
	pgoff_t end_index;

	spin_lock(&inode->i_lock);
	i_size = i_size_read(inode);
	end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
	if (i_size > 0 && page->index < end_index)
		goto out;
	end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
	if (i_size >= end)
		goto out;
	i_size_write(inode, end);
	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
out:
	spin_unlock(&inode->i_lock);
}

/* A writeback failed: mark the page as bad, and invalidate the page cache */
static void nfs_set_pageerror(struct page *page)
{
	SetPageError(page);
	nfs_zap_mapping(page->mapping->host, page->mapping);
}

/* We can set the PG_uptodate flag if we see that a write request
 * covers the full page.
 */
static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
{
	if (PageUptodate(page))
		return;
	if (base != 0)
		return;
	if (count != nfs_page_length(page))
		return;
	SetPageUptodate(page);
}

static int wb_priority(struct writeback_control *wbc)
{
	if (wbc->for_reclaim)
		return FLUSH_HIGHPRI | FLUSH_STABLE;
	if (wbc->for_kupdate || wbc->for_background)
		return FLUSH_LOWPRI;
	return 0;
}

/*
 * NFS congestion control
 */

int nfs_congestion_kb;

#define NFS_CONGESTION_ON_THRESH 	(nfs_congestion_kb >> (PAGE_SHIFT-10))
#define NFS_CONGESTION_OFF_THRESH	\
	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))

static int nfs_set_page_writeback(struct page *page)
{
	int ret = test_set_page_writeback(page);

	if (!ret) {
		struct inode *inode = page->mapping->host;
		struct nfs_server *nfss = NFS_SERVER(inode);

		if (atomic_long_inc_return(&nfss->writeback) >
				NFS_CONGESTION_ON_THRESH) {
			set_bdi_congested(&nfss->backing_dev_info,
						BLK_RW_ASYNC);
		}
	}
	return ret;
}

static void nfs_end_page_writeback(struct page *page)
{
	struct inode *inode = page->mapping->host;
	struct nfs_server *nfss = NFS_SERVER(inode);

	end_page_writeback(page);
	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}

static struct nfs_page *nfs_find_and_lock_request(struct page *page)
{
	struct inode *inode = page->mapping->host;
	struct nfs_page *req;
	int ret;

	spin_lock(&inode->i_lock);
	for (;;) {
		req = nfs_page_find_request_locked(page);
		if (req == NULL)
			break;
		if (nfs_set_page_tag_locked(req))
			break;
		/* Note: If we hold the page lock, as is the case in nfs_writepage,
		 *	 then the call to nfs_set_page_tag_locked() will always
		 *	 succeed provided that someone hasn't already marked the
		 *	 request as dirty (in which case we don't care).
		 */
		spin_unlock(&inode->i_lock);
		ret = nfs_wait_on_request(req);
		nfs_release_request(req);
		if (ret != 0)
			return ERR_PTR(ret);
		spin_lock(&inode->i_lock);
	}
	spin_unlock(&inode->i_lock);
	return req;
}

/*
 * Find an associated nfs write request, and prepare to flush it out
 * May return an error if the user signalled nfs_wait_on_request().
 */
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
				struct page *page)
{
	struct nfs_page *req;
	int ret = 0;

	req = nfs_find_and_lock_request(page);
	if (!req)
		goto out;
	ret = PTR_ERR(req);
	if (IS_ERR(req))
		goto out;

	ret = nfs_set_page_writeback(page);
	BUG_ON(ret != 0);
	BUG_ON(test_bit(PG_CLEAN, &req->wb_flags));

	if (!nfs_pageio_add_request(pgio, req)) {
		nfs_redirty_request(req);
		ret = pgio->pg_error;
	}
out:
	return ret;
}

static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
{
	struct inode *inode = page->mapping->host;

	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);

	nfs_pageio_cond_complete(pgio, page->index);
	return nfs_page_async_flush(pgio, page);
}

/*
 * Write an mmapped page to the server.
 */
static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
{
	struct nfs_pageio_descriptor pgio;
	int err;

	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
	err = nfs_do_writepage(page, wbc, &pgio);
	nfs_pageio_complete(&pgio);
	if (err < 0)
		return err;
	if (pgio.pg_error < 0)
		return pgio.pg_error;
	return 0;
}

int nfs_writepage(struct page *page, struct writeback_control *wbc)
{
	int ret;

	ret = nfs_writepage_locked(page, wbc);
	unlock_page(page);
	return ret;
}

static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
{
	int ret;

	ret = nfs_do_writepage(page, wbc, data);
	unlock_page(page);
	return ret;
}

int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
	unsigned long *bitlock = &NFS_I(inode)->flags;
	struct nfs_pageio_descriptor pgio;
	int err;

	/* Stop dirtying of new pages while we sync */
	err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
			nfs_wait_bit_killable, TASK_KILLABLE);
	if (err)
		goto out_err;

	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);

	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
	nfs_pageio_complete(&pgio);

	clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
	smp_mb__after_clear_bit();
	wake_up_bit(bitlock, NFS_INO_FLUSHING);

	if (err < 0)
		goto out_err;
	err = pgio.pg_error;
	if (err < 0)
		goto out_err;
	return 0;
out_err:
	return err;
}

/*
 * Insert a write request into an inode
 */
static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
{
	struct nfs_inode *nfsi = NFS_I(inode);
	int error;

	error = radix_tree_preload(GFP_NOFS);
	if (error != 0)
		goto out;

	/* Lock the request! */
	nfs_lock_request_dontget(req);

	spin_lock(&inode->i_lock);
	error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
	BUG_ON(error);
	if (!nfsi->npages) {
		igrab(inode);
		if (nfs_have_delegation(inode, FMODE_WRITE))
			nfsi->change_attr++;
	}
	SetPagePrivate(req->wb_page);
	set_page_private(req->wb_page, (unsigned long)req);
	nfsi->npages++;
	kref_get(&req->wb_kref);
	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
				NFS_PAGE_TAG_LOCKED);
	spin_unlock(&inode->i_lock);
	radix_tree_preload_end();
out:
	return error;
}

/*
 * Remove a write request from an inode
 */
static void nfs_inode_remove_request(struct nfs_page *req)
{
	struct inode *inode = req->wb_context->path.dentry->d_inode;
	struct nfs_inode *nfsi = NFS_I(inode);

	BUG_ON (!NFS_WBACK_BUSY(req));

	spin_lock(&inode->i_lock);
	set_page_private(req->wb_page, 0);
	ClearPagePrivate(req->wb_page);
	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
	nfsi->npages--;
	if (!nfsi->npages) {
		spin_unlock(&inode->i_lock);
		iput(inode);
	} else
		spin_unlock(&inode->i_lock);
	nfs_clear_request(req);
	nfs_release_request(req);
}

static void
nfs_mark_request_dirty(struct nfs_page *req)
{
	__set_page_dirty_nobuffers(req->wb_page);
	__mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC);
}

#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
/*
 * Add a request to the inode's commit list.
 */
static void
nfs_mark_request_commit(struct nfs_page *req)
{
	struct inode *inode = req->wb_context->path.dentry->d_inode;
	struct nfs_inode *nfsi = NFS_I(inode);

	spin_lock(&inode->i_lock);
	set_bit(PG_CLEAN, &(req)->wb_flags);
	radix_tree_tag_set(&nfsi->nfs_page_tree,
			req->wb_index,
			NFS_PAGE_TAG_COMMIT);
	nfsi->ncommit++;
	spin_unlock(&inode->i_lock);
	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}

static int
nfs_clear_request_commit(struct nfs_page *req)
{
	struct page *page = req->wb_page;

	if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
		dec_zone_page_state(page, NR_UNSTABLE_NFS);
		dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
		return 1;
	}
	return 0;
}

static inline
int nfs_write_need_commit(struct nfs_write_data *data)
{
	return data->verf.committed != NFS_FILE_SYNC;
}

static inline
int nfs_reschedule_unstable_write(struct nfs_page *req)
{
	if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
		nfs_mark_request_commit(req);
		return 1;
	}
	if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
		nfs_mark_request_dirty(req);
		return 1;
	}
	return 0;
}
#else
static inline void
nfs_mark_request_commit(struct nfs_page *req)
{
}

static inline int
nfs_clear_request_commit(struct nfs_page *req)
{
	return 0;
}

static inline
int nfs_write_need_commit(struct nfs_write_data *data)
{
	return 0;
}

static inline
int nfs_reschedule_unstable_write(struct nfs_page *req)
{
	return 0;
}
#endif

#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static int
nfs_need_commit(struct nfs_inode *nfsi)
{
	return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT);
}

/*
 * nfs_scan_commit - Scan an inode for commit requests
 * @inode: NFS inode to scan
 * @dst: destination list
 * @idx_start: lower bound of page->index to scan.
 * @npages: idx_start + npages sets the upper bound to scan.
 *
 * Moves requests from the inode's 'commit' request list.
 * The requests are *not* checked to ensure that they form a contiguous set.
 */
static int
nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
{
	struct nfs_inode *nfsi = NFS_I(inode);
	int ret;

	if (!nfs_need_commit(nfsi))
		return 0;

	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
	if (ret > 0)
		nfsi->ncommit -= ret;
	if (nfs_need_commit(NFS_I(inode)))
		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
	return ret;
}
#else
static inline int nfs_need_commit(struct nfs_inode *nfsi)
{
	return 0;
}

static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
{
	return 0;
}
#endif

/*
 * Search for an existing write request, and attempt to update
 * it to reflect a new dirty region on a given page.
 *
 * If the attempt fails, then the existing request is flushed out
 * to disk.
 */
static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
		struct page *page,
		unsigned int offset,
		unsigned int bytes)
{
	struct nfs_page *req;
	unsigned int rqend;
	unsigned int end;
	int error;

	if (!PagePrivate(page))
		return NULL;

	end = offset + bytes;
	spin_lock(&inode->i_lock);

	for (;;) {
		req = nfs_page_find_request_locked(page);
		if (req == NULL)
			goto out_unlock;

		rqend = req->wb_offset + req->wb_bytes;
		/*
		 * Tell the caller to flush out the request if
		 * the offsets are non-contiguous.
		 * Note: nfs_flush_incompatible() will already
		 * have flushed out requests having wrong owners.
		 */
		if (offset > rqend
		    || end < req->wb_offset)
			goto out_flushme;

		if (nfs_set_page_tag_locked(req))
			break;

		/* The request is locked, so wait and then retry */
		spin_unlock(&inode->i_lock);
		error = nfs_wait_on_request(req);
		nfs_release_request(req);
		if (error != 0)
			goto out_err;
		spin_lock(&inode->i_lock);
	}

	if (nfs_clear_request_commit(req) &&
			radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
				req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL)
		NFS_I(inode)->ncommit--;

	/* Okay, the request matches. Update the region */
	if (offset < req->wb_offset) {
		req->wb_offset = offset;
		req->wb_pgbase = offset;
	}
	if (end > rqend)
		req->wb_bytes = end - req->wb_offset;
	else
		req->wb_bytes = rqend - req->wb_offset;
out_unlock:
	spin_unlock(&inode->i_lock);
	return req;
out_flushme:
	spin_unlock(&inode->i_lock);
	nfs_release_request(req);
	error = nfs_wb_page(inode, page);
out_err:
	return ERR_PTR(error);
}

/*
 * Try to update an existing write request, or create one if there is none.
 *
 * Note: Should always be called with the Page Lock held to prevent races
 * if we have to add a new request. Also assumes that the caller has
 * already called nfs_flush_incompatible() if necessary.
 */
static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
		struct page *page, unsigned int offset, unsigned int bytes)
{
	struct inode *inode = page->mapping->host;
	struct nfs_page	*req;
	int error;

	req = nfs_try_to_update_request(inode, page, offset, bytes);
	if (req != NULL)
		goto out;
	req = nfs_create_request(ctx, inode, page, offset, bytes);
	if (IS_ERR(req))
		goto out;
	error = nfs_inode_add_request(inode, req);
	if (error != 0) {
		nfs_release_request(req);
		req = ERR_PTR(error);
	}
out:
	return req;
}

static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
		unsigned int offset, unsigned int count)
{
	struct nfs_page	*req;

	req = nfs_setup_write_request(ctx, page, offset, count);
	if (IS_ERR(req))
		return PTR_ERR(req);
	nfs_mark_request_dirty(req);
	/* Update file length */
	nfs_grow_file(page, offset, count);
	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
	nfs_clear_page_tag_locked(req);
	return 0;
}

int nfs_flush_incompatible(struct file *file, struct page *page)
{
	struct nfs_open_context *ctx = nfs_file_open_context(file);
	struct nfs_page	*req;
	int do_flush, status;
	/*
	 * Look for a request corresponding to this page. If there
	 * is one, and it belongs to another file, we flush it out
	 * before we try to copy anything into the page. Do this
	 * due to the lack of an ACCESS-type call in NFSv2.
	 * Also do the same if we find a request from an existing
	 * dropped page.
	 */
	do {
		req = nfs_page_find_request(page);
		if (req == NULL)
			return 0;
		do_flush = req->wb_page != page || req->wb_context != ctx;
		nfs_release_request(req);
		if (!do_flush)
			return 0;
		status = nfs_wb_page(page->mapping->host, page);
	} while (status == 0);
	return status;
}

/*
 * If the page cache is marked as unsafe or invalid, then we can't rely on
 * the PageUptodate() flag. In this case, we will need to turn off
 * write optimisations that depend on the page contents being correct.
 */
static int nfs_write_pageuptodate(struct page *page, struct inode *inode)
{
	return PageUptodate(page) &&
		!(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA));
}

/*
 * Update and possibly write a cached page of an NFS file.
 *
 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
 * things with a page scheduled for an RPC call (e.g. invalidate it).
 */
int nfs_updatepage(struct file *file, struct page *page,
		unsigned int offset, unsigned int count)
{
	struct nfs_open_context *ctx = nfs_file_open_context(file);
	struct inode	*inode = page->mapping->host;
	int		status = 0;

	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);

	dprintk("NFS:       nfs_updatepage(%s/%s %d@%lld)\n",
		file->f_path.dentry->d_parent->d_name.name,
		file->f_path.dentry->d_name.name, count,
		(long long)(page_offset(page) + offset));

	/* If we're not using byte range locks, and we know the page
	 * is up to date, it may be more efficient to extend the write
	 * to cover the entire page in order to avoid fragmentation
	 * inefficiencies.
	 */
	if (nfs_write_pageuptodate(page, inode) &&
			inode->i_flock == NULL &&
			!(file->f_flags & O_DSYNC)) {
		count = max(count + offset, nfs_page_length(page));
		offset = 0;
	}

	status = nfs_writepage_setup(ctx, page, offset, count);
	if (status < 0)
		nfs_set_pageerror(page);

	dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
			status, (long long)i_size_read(inode));
	return status;
}

static void nfs_writepage_release(struct nfs_page *req)
{

	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
		nfs_end_page_writeback(req->wb_page);
		nfs_inode_remove_request(req);
	} else
		nfs_end_page_writeback(req->wb_page);
	nfs_clear_page_tag_locked(req);
}

static int flush_task_priority(int how)
{
	switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
		case FLUSH_HIGHPRI:
			return RPC_PRIORITY_HIGH;
		case FLUSH_LOWPRI:
			return RPC_PRIORITY_LOW;
	}
	return RPC_PRIORITY_NORMAL;
}

/*
 * Set up the argument/result storage required for the RPC call.
 */
static int nfs_write_rpcsetup(struct nfs_page *req,
		struct nfs_write_data *data,
		const struct rpc_call_ops *call_ops,
		unsigned int count, unsigned int offset,
		int how)
{
	struct inode *inode = req->wb_context->path.dentry->d_inode;
	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
	int priority = flush_task_priority(how);
	struct rpc_task *task;
	struct rpc_message msg = {
		.rpc_argp = &data->args,
		.rpc_resp = &data->res,
		.rpc_cred = req->wb_context->cred,
	};
	struct rpc_task_setup task_setup_data = {
		.rpc_client = NFS_CLIENT(inode),
		.task = &data->task,
		.rpc_message = &msg,
		.callback_ops = call_ops,
		.callback_data = data,
		.workqueue = nfsiod_workqueue,
		.flags = flags,
		.priority = priority,
	};

	/* Set up the RPC argument and reply structs
	 * NB: take care not to mess about with data->commit et al. */

	data->req = req;
	data->inode = inode = req->wb_context->path.dentry->d_inode;
	data->cred = msg.rpc_cred;

	data->args.fh     = NFS_FH(inode);
	data->args.offset = req_offset(req) + offset;
	data->args.pgbase = req->wb_pgbase + offset;
	data->args.pages  = data->pagevec;
	data->args.count  = count;
	data->args.context = get_nfs_open_context(req->wb_context);
	data->args.stable  = NFS_UNSTABLE;
	if (how & FLUSH_STABLE) {
		data->args.stable = NFS_DATA_SYNC;
		if (!nfs_need_commit(NFS_I(inode)))
			data->args.stable = NFS_FILE_SYNC;
	}

	data->res.fattr   = &data->fattr;
	data->res.count   = count;
	data->res.verf    = &data->verf;
	nfs_fattr_init(&data->fattr);

	/* Set up the initial task struct.  */
	NFS_PROTO(inode)->write_setup(data, &msg);

	dprintk("NFS: %5u initiated write call "
		"(req %s/%lld, %u bytes @ offset %llu)\n",
		data->task.tk_pid,
		inode->i_sb->s_id,
		(long long)NFS_FILEID(inode),
		count,
		(unsigned long long)data->args.offset);

	task = rpc_run_task(&task_setup_data);
	if (IS_ERR(task))
		return PTR_ERR(task);
	rpc_put_task(task);
	return 0;
}

/* If a nfs_flush_* function fails, it should remove reqs from @head and
 * call this on each, which will prepare them to be retried on next
 * writeback using standard nfs.
 */
static void nfs_redirty_request(struct nfs_page *req)
{
	nfs_mark_request_dirty(req);
	nfs_end_page_writeback(req->wb_page);
	nfs_clear_page_tag_locked(req);
}

/*
 * Generate multiple small requests to write out a single
 * contiguous dirty area on one page.
 */
static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
{
	struct nfs_page *req = nfs_list_entry(head->next);
	struct page *page = req->wb_page;
	struct nfs_write_data *data;
	size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
	unsigned int offset;
	int requests = 0;
	int ret = 0;
	LIST_HEAD(list);

	nfs_list_remove_request(req);

	nbytes = count;
	do {
		size_t len = min(nbytes, wsize);

		data = nfs_writedata_alloc(1);
		if (!data)
			goto out_bad;
		list_add(&data->pages, &list);
		requests++;
		nbytes -= len;
	} while (nbytes != 0);
	atomic_set(&req->wb_complete, requests);

	ClearPageError(page);
	offset = 0;
	nbytes = count;
	do {
		int ret2;

		data = list_entry(list.next, struct nfs_write_data, pages);
		list_del_init(&data->pages);

		data->pagevec[0] = page;

		if (nbytes < wsize)
			wsize = nbytes;
		ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
				   wsize, offset, how);
		if (ret == 0)
			ret = ret2;
		offset += wsize;
		nbytes -= wsize;
	} while (nbytes != 0);

	return ret;

out_bad:
	while (!list_empty(&list)) {
		data = list_entry(list.next, struct nfs_write_data, pages);
		list_del(&data->pages);
		nfs_writedata_release(data);
	}
	nfs_redirty_request(req);
	return -ENOMEM;
}

/*
 * Create an RPC task for the given write request and kick it.
 * The page must have been locked by the caller.
 *
 * It may happen that the page we're passed is not marked dirty.
 * This is the case if nfs_updatepage detects a conflicting request
 * that has been written but not committed.
 */
static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
{
	struct nfs_page		*req;
	struct page		**pages;
	struct nfs_write_data	*data;

	data = nfs_writedata_alloc(npages);
	if (!data)
		goto out_bad;

	pages = data->pagevec;
	while (!list_empty(head)) {
		req = nfs_list_entry(head->next);
		nfs_list_remove_request(req);
		nfs_list_add_request(req, &data->pages);
		ClearPageError(req->wb_page);
		*pages++ = req->wb_page;
	}
	req = nfs_list_entry(data->pages.next);

	/* Set up the argument struct */
	return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
 out_bad:
	while (!list_empty(head)) {
		req = nfs_list_entry(head->next);
		nfs_list_remove_request(req);
		nfs_redirty_request(req);
	}
	return -ENOMEM;
}

static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
				  struct inode *inode, int ioflags)
{
	size_t wsize = NFS_SERVER(inode)->wsize;

	if (wsize < PAGE_CACHE_SIZE)
		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
	else
		nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
}

/*
 * Handle a write reply that flushed part of a page.
 */
static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
{
	struct nfs_write_data	*data = calldata;

	dprintk("NFS: %5u write(%s/%lld %d@%lld)",
		task->tk_pid,
		data->req->wb_context->path.dentry->d_inode->i_sb->s_id,
		(long long)
		  NFS_FILEID(data->req->wb_context->path.dentry->d_inode),
		data->req->wb_bytes, (long long)req_offset(data->req));

	nfs_writeback_done(task, data);
}

static void nfs_writeback_release_partial(void *calldata)
{
	struct nfs_write_data	*data = calldata;
	struct nfs_page		*req = data->req;
	struct page		*page = req->wb_page;
	int status = data->task.tk_status;

	if (status < 0) {
		nfs_set_pageerror(page);
		nfs_context_set_write_error(req->wb_context, status);
		dprintk(", error = %d\n", status);
		goto out;
	}

	if (nfs_write_need_commit(data)) {
		struct inode *inode = page->mapping->host;

		spin_lock(&inode->i_lock);
		if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
			/* Do nothing we need to resend the writes */
		} else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
			dprintk(" defer commit\n");
		} else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
			set_bit(PG_NEED_RESCHED, &req->wb_flags);
			clear_bit(PG_NEED_COMMIT, &req->wb_flags);
			dprintk(" server reboot detected\n");
		}
		spin_unlock(&inode->i_lock);
	} else
		dprintk(" OK\n");

out:
	if (atomic_dec_and_test(&req->wb_complete))
		nfs_writepage_release(req);
	nfs_writedata_release(calldata);
}

#if defined(CONFIG_NFS_V4_1)
void nfs_write_prepare(struct rpc_task *task, void *calldata)
{
	struct nfs_write_data *data = calldata;
	struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client;

	if (nfs4_setup_sequence(clp, &data->args.seq_args,
				&data->res.seq_res, 1, task))
		return;
	rpc_call_start(task);
}
#endif /* CONFIG_NFS_V4_1 */

static const struct rpc_call_ops nfs_write_partial_ops = {
#if defined(CONFIG_NFS_V4_1)
	.rpc_call_prepare = nfs_write_prepare,
#endif /* CONFIG_NFS_V4_1 */
	.rpc_call_done = nfs_writeback_done_partial,
	.rpc_release = nfs_writeback_release_partial,
};

/*
 * Handle a write reply that flushes a whole page.
 *
 * FIXME: There is an inherent race with invalidate_inode_pages and
 *	  writebacks since the page->count is kept > 1 for as long
 *	  as the page has a write request pending.
 */
static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
{
	struct nfs_write_data	*data = calldata;

	nfs_writeback_done(task, data);
}

static void nfs_writeback_release_full(void *calldata)
{
	struct nfs_write_data	*data = calldata;
	int status = data->task.tk_status;

	/* Update attributes as result of writeback. */
	while (!list_empty(&data->pages)) {
		struct nfs_page *req = nfs_list_entry(data->pages.next);
		struct page *page = req->wb_page;

		nfs_list_remove_request(req);

		dprintk("NFS: %5u write (%s/%lld %d@%lld)",
			data->task.tk_pid,
			req->wb_context->path.dentry->d_inode->i_sb->s_id,
			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
			req->wb_bytes,
			(long long)req_offset(req));

		if (status < 0) {
			nfs_set_pageerror(page);
			nfs_context_set_write_error(req->wb_context, status);
			dprintk(", error = %d\n", status);
			goto remove_request;
		}

		if (nfs_write_need_commit(data)) {
			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
			nfs_mark_request_commit(req);
			nfs_end_page_writeback(page);
			dprintk(" marked for commit\n");
			goto next;
		}
		dprintk(" OK\n");
remove_request:
		nfs_end_page_writeback(page);
		nfs_inode_remove_request(req);
	next:
		nfs_clear_page_tag_locked(req);
	}
	nfs_writedata_release(calldata);
}

static const struct rpc_call_ops nfs_write_full_ops = {
#if defined(CONFIG_NFS_V4_1)
	.rpc_call_prepare = nfs_write_prepare,
#endif /* CONFIG_NFS_V4_1 */
	.rpc_call_done = nfs_writeback_done_full,
	.rpc_release = nfs_writeback_release_full,
};


/*
 * This function is called when the WRITE call is complete.
 */
int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
{
	struct nfs_writeargs	*argp = &data->args;
	struct nfs_writeres	*resp = &data->res;
	struct nfs_server	*server = NFS_SERVER(data->inode);
	int status;

	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
		task->tk_pid, task->tk_status);

	/*
	 * ->write_done will attempt to use post-op attributes to detect
	 * conflicting writes by other clients.  A strict interpretation
	 * of close-to-open would allow us to continue caching even if
	 * another writer had changed the file, but some applications
	 * depend on tighter cache coherency when writing.
	 */
	status = NFS_PROTO(data->inode)->write_done(task, data);
	if (status != 0)
		return status;
	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);

#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
		/* We tried a write call, but the server did not
		 * commit data to stable storage even though we
		 * requested it.
		 * Note: There is a known bug in Tru64 < 5.0 in which
		 *	 the server reports NFS_DATA_SYNC, but performs
		 *	 NFS_FILE_SYNC. We therefore implement this checking
		 *	 as a dprintk() in order to avoid filling syslog.
		 */
		static unsigned long    complain;

		if (time_before(complain, jiffies)) {
			dprintk("NFS:       faulty NFS server %s:"
				" (committed = %d) != (stable = %d)\n",
				server->nfs_client->cl_hostname,
				resp->verf->committed, argp->stable);
			complain = jiffies + 300 * HZ;
		}
	}
#endif
	/* Is this a short write? */
	if (task->tk_status >= 0 && resp->count < argp->count) {
		static unsigned long    complain;

		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);

		/* Has the server at least made some progress? */
		if (resp->count != 0) {
			/* Was this an NFSv2 write or an NFSv3 stable write? */
			if (resp->verf->committed != NFS_UNSTABLE) {
				/* Resend from where the server left off */
				argp->offset += resp->count;
				argp->pgbase += resp->count;
				argp->count -= resp->count;
			} else {
				/* Resend as a stable write in order to avoid
				 * headaches in the case of a server crash.
				 */
				argp->stable = NFS_FILE_SYNC;
			}
			nfs_restart_rpc(task, server->nfs_client);
			return -EAGAIN;
		}
		if (time_before(complain, jiffies)) {
			printk(KERN_WARNING
			       "NFS: Server wrote zero bytes, expected %u.\n",
					argp->count);
			complain = jiffies + 300 * HZ;
		}
		/* Can't do anything about it except throw an error. */
		task->tk_status = -EIO;
	}
	return 0;
}


#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static void nfs_commitdata_release(void *data)
{
	struct nfs_write_data *wdata = data;

	put_nfs_open_context(wdata->args.context);
	nfs_commit_free(wdata);
}

/*
 * Set up the argument/result storage required for the RPC call.
 */
static int nfs_commit_rpcsetup(struct list_head *head,
		struct nfs_write_data *data,
		int how)
{
	struct nfs_page *first = nfs_list_entry(head->next);
	struct inode *inode = first->wb_context->path.dentry->d_inode;
	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
	int priority = flush_task_priority(how);
	struct rpc_task *task;
	struct rpc_message msg = {
		.rpc_argp = &data->args,
		.rpc_resp = &data->res,
		.rpc_cred = first->wb_context->cred,
	};
	struct rpc_task_setup task_setup_data = {
		.task = &data->task,
		.rpc_client = NFS_CLIENT(inode),
		.rpc_message = &msg,
		.callback_ops = &nfs_commit_ops,
		.callback_data = data,
		.workqueue = nfsiod_workqueue,
		.flags = flags,
		.priority = priority,
	};

	/* Set up the RPC argument and reply structs
	 * NB: take care not to mess about with data->commit et al. */

	list_splice_init(head, &data->pages);

	data->inode	  = inode;
	data->cred	  = msg.rpc_cred;

	data->args.fh     = NFS_FH(data->inode);
	/* Note: we always request a commit of the entire inode */
	data->args.offset = 0;
	data->args.count  = 0;
	data->args.context = get_nfs_open_context(first->wb_context);
	data->res.count   = 0;
	data->res.fattr   = &data->fattr;
	data->res.verf    = &data->verf;
	nfs_fattr_init(&data->fattr);

	/* Set up the initial task struct.  */
	NFS_PROTO(inode)->commit_setup(data, &msg);

	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);

	task = rpc_run_task(&task_setup_data);
	if (IS_ERR(task))
		return PTR_ERR(task);
	rpc_put_task(task);
	return 0;
}

/*
 * Commit dirty pages
 */
static int
nfs_commit_list(struct inode *inode, struct list_head *head, int how)
{
	struct nfs_write_data	*data;
	struct nfs_page         *req;

	data = nfs_commitdata_alloc();

	if (!data)
		goto out_bad;

	/* Set up the argument struct */
	return nfs_commit_rpcsetup(head, data, how);
 out_bad:
	while (!list_empty(head)) {
		req = nfs_list_entry(head->next);
		nfs_list_remove_request(req);
		nfs_mark_request_commit(req);
		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
				BDI_RECLAIMABLE);
		nfs_clear_page_tag_locked(req);
	}
	return -ENOMEM;
}

/*
 * COMMIT call returned
 */
static void nfs_commit_done(struct rpc_task *task, void *calldata)
{
	struct nfs_write_data	*data = calldata;

        dprintk("NFS: %5u nfs_commit_done (status %d)\n",
                                task->tk_pid, task->tk_status);

	/* Call the NFS version-specific code */
	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
		return;
}

static void nfs_commit_release(void *calldata)
{
	struct nfs_write_data	*data = calldata;
	struct nfs_page		*req;
	int status = data->task.tk_status;

	while (!list_empty(&data->pages)) {
		req = nfs_list_entry(data->pages.next);
		nfs_list_remove_request(req);
		nfs_clear_request_commit(req);

		dprintk("NFS:       commit (%s/%lld %d@%lld)",
			req->wb_context->path.dentry->d_inode->i_sb->s_id,
			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
			req->wb_bytes,
			(long long)req_offset(req));
		if (status < 0) {
			nfs_context_set_write_error(req->wb_context, status);
			nfs_inode_remove_request(req);
			dprintk(", error = %d\n", status);
			goto next;
		}

		/* Okay, COMMIT succeeded, apparently. Check the verifier
		 * returned by the server against all stored verfs. */
		if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
			/* We have a match */
			nfs_inode_remove_request(req);
			dprintk(" OK\n");
			goto next;
		}
		/* We have a mismatch. Write the page again */
		dprintk(" mismatch\n");
		nfs_mark_request_dirty(req);
	next:
		nfs_clear_page_tag_locked(req);
	}
	nfs_commitdata_release(calldata);
}

static const struct rpc_call_ops nfs_commit_ops = {
#if defined(CONFIG_NFS_V4_1)
	.rpc_call_prepare = nfs_write_prepare,
#endif /* CONFIG_NFS_V4_1 */
	.rpc_call_done = nfs_commit_done,
	.rpc_release = nfs_commit_release,
};

static int nfs_commit_inode(struct inode *inode, int how)
{
	LIST_HEAD(head);
	int res;

	spin_lock(&inode->i_lock);
	res = nfs_scan_commit(inode, &head, 0, 0);
	spin_unlock(&inode->i_lock);
	if (res) {
		int error = nfs_commit_list(inode, &head, how);
		if (error < 0)
			return error;
	}
	return res;
}

static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
{
	struct nfs_inode *nfsi = NFS_I(inode);
	int flags = FLUSH_SYNC;
	int ret = 0;

	/* Don't commit yet if this is a non-blocking flush and there are
	 * lots of outstanding writes for this mapping.
	 */
	if (wbc->sync_mode == WB_SYNC_NONE &&
	    nfsi->ncommit <= (nfsi->npages >> 1))
		goto out_mark_dirty;

	if (wbc->nonblocking || wbc->for_background)
		flags = 0;
	ret = nfs_commit_inode(inode, flags);
	if (ret >= 0) {
		if (wbc->sync_mode == WB_SYNC_NONE) {
			if (ret < wbc->nr_to_write)
				wbc->nr_to_write -= ret;
			else
				wbc->nr_to_write = 0;
		}
		return 0;
	}
out_mark_dirty:
	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
	return ret;
}
#else
static int nfs_commit_inode(struct inode *inode, int how)
{
	return 0;
}

static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
{
	return 0;
}
#endif

int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
	return nfs_commit_unstable_pages(inode, wbc);
}

/*
 * flush the inode to disk.
 */
int nfs_wb_all(struct inode *inode)
{
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = LONG_MAX,
		.range_start = 0,
		.range_end = LLONG_MAX,
	};

	return sync_inode(inode, &wbc);
}

int nfs_wb_page_cancel(struct inode *inode, struct page *page)
{
	struct nfs_page *req;
	int ret = 0;

	BUG_ON(!PageLocked(page));
	for (;;) {
		req = nfs_page_find_request(page);
		if (req == NULL)
			break;
		if (nfs_lock_request_dontget(req)) {
			nfs_inode_remove_request(req);
			/*
			 * In case nfs_inode_remove_request has marked the
			 * page as being dirty
			 */
			cancel_dirty_page(page, PAGE_CACHE_SIZE);
			nfs_unlock_request(req);
			break;
		}
		ret = nfs_wait_on_request(req);
		nfs_release_request(req);
		if (ret < 0)
			break;
	}
	return ret;
}

/*
 * Write back all requests on one page - we do this before reading it.
 */
int nfs_wb_page(struct inode *inode, struct page *page)
{
	loff_t range_start = page_offset(page);
	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = 0,
		.range_start = range_start,
		.range_end = range_end,
	};
	struct nfs_page *req;
	int need_commit;
	int ret;

	while(PagePrivate(page)) {
		if (clear_page_dirty_for_io(page)) {
			ret = nfs_writepage_locked(page, &wbc);
			if (ret < 0)
				goto out_error;
		}
		req = nfs_find_and_lock_request(page);
		if (!req)
			break;
		if (IS_ERR(req)) {
			ret = PTR_ERR(req);
			goto out_error;
		}
		need_commit = test_bit(PG_CLEAN, &req->wb_flags);
		nfs_clear_page_tag_locked(req);
		if (need_commit) {
			ret = nfs_commit_inode(inode, FLUSH_SYNC);
			if (ret < 0)
				goto out_error;
		}
	}
	return 0;
out_error:
	return ret;
}

#ifdef CONFIG_MIGRATION
int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
		struct page *page)
{
	struct nfs_page *req;
	int ret;

	nfs_fscache_release_page(page, GFP_KERNEL);

	req = nfs_find_and_lock_request(page);
	ret = PTR_ERR(req);
	if (IS_ERR(req))
		goto out;

	ret = migrate_page(mapping, newpage, page);
	if (!req)
		goto out;
	if (ret)
		goto out_unlock;
	page_cache_get(newpage);
	spin_lock(&mapping->host->i_lock);
	req->wb_page = newpage;
	SetPagePrivate(newpage);
	set_page_private(newpage, (unsigned long)req);
	ClearPagePrivate(page);
	set_page_private(page, 0);
	spin_unlock(&mapping->host->i_lock);
	page_cache_release(page);
out_unlock:
	nfs_clear_page_tag_locked(req);
out:
	return ret;
}
#endif

int __init nfs_init_writepagecache(void)
{
	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
					     sizeof(struct nfs_write_data),
					     0, SLAB_HWCACHE_ALIGN,
					     NULL);
	if (nfs_wdata_cachep == NULL)
		return -ENOMEM;

	nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
						     nfs_wdata_cachep);
	if (nfs_wdata_mempool == NULL)
		return -ENOMEM;

	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
						      nfs_wdata_cachep);
	if (nfs_commit_mempool == NULL)
		return -ENOMEM;

	/*
	 * NFS congestion size, scale with available memory.
	 *
	 *  64MB:    8192k
	 * 128MB:   11585k
	 * 256MB:   16384k
	 * 512MB:   23170k
	 *   1GB:   32768k
	 *   2GB:   46340k
	 *   4GB:   65536k
	 *   8GB:   92681k
	 *  16GB:  131072k
	 *
	 * This allows larger machines to have larger/more transfers.
	 * Limit the default to 256M
	 */
	nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
	if (nfs_congestion_kb > 256*1024)
		nfs_congestion_kb = 256*1024;

	return 0;
}

void nfs_destroy_writepagecache(void)
{
	mempool_destroy(nfs_commit_mempool);
	mempool_destroy(nfs_wdata_mempool);
	kmem_cache_destroy(nfs_wdata_cachep);
}

disc open function */ tty->receive_room = 0; /* * Problem: What do we do if this blocks ? */ tty_wait_until_sent(tty, 0); if (tty->ldisc.num == ldisc) { tty_ldisc_put(ldisc); return 0; } o_ldisc = tty->ldisc; o_tty = tty->link; /* * Make sure we don't change while someone holds a * reference to the line discipline. The TTY_LDISC bit * prevents anyone taking a reference once it is clear. * We need the lock to avoid racing reference takers. */ spin_lock_irqsave(&tty_ldisc_lock, flags); if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) { if(tty->ldisc.refcount) { /* Free the new ldisc we grabbed. Must drop the lock first. */ spin_unlock_irqrestore(&tty_ldisc_lock, flags); tty_ldisc_put(ldisc); /* * There are several reasons we may be busy, including * random momentary I/O traffic. We must therefore * retry. We could distinguish between blocking ops * and retries if we made tty_ldisc_wait() smarter. That * is up for discussion. */ if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0) return -ERESTARTSYS; goto restart; } if(o_tty && o_tty->ldisc.refcount) { spin_unlock_irqrestore(&tty_ldisc_lock, flags); tty_ldisc_put(ldisc); if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0) return -ERESTARTSYS; goto restart; } } /* if the TTY_LDISC bit is set, then we are racing against another ldisc change */ if (!test_bit(TTY_LDISC, &tty->flags)) { spin_unlock_irqrestore(&tty_ldisc_lock, flags); tty_ldisc_put(ldisc); ld = tty_ldisc_ref_wait(tty); tty_ldisc_deref(ld); goto restart; } clear_bit(TTY_LDISC, &tty->flags); if (o_tty) clear_bit(TTY_LDISC, &o_tty->flags); spin_unlock_irqrestore(&tty_ldisc_lock, flags); /* * From this point on we know nobody has an ldisc * usage reference, nor can they obtain one until * we say so later on. */ work = cancel_delayed_work(&tty->buf.work); /* * Wait for ->hangup_work and ->buf.work handlers to terminate */ flush_scheduled_work(); /* Shutdown the current discipline. */ if (tty->ldisc.close) (tty->ldisc.close)(tty); /* Now set up the new line discipline. */ tty_ldisc_assign(tty, ld); tty_set_termios_ldisc(tty, ldisc); if (tty->ldisc.open) retval = (tty->ldisc.open)(tty); if (retval < 0) { tty_ldisc_put(ldisc); /* There is an outstanding reference here so this is safe */ tty_ldisc_assign(tty, tty_ldisc_get(o_ldisc.num)); tty_set_termios_ldisc(tty, tty->ldisc.num); if (tty->ldisc.open && (tty->ldisc.open(tty) < 0)) { tty_ldisc_put(o_ldisc.num); /* This driver is always present */ tty_ldisc_assign(tty, tty_ldisc_get(N_TTY)); tty_set_termios_ldisc(tty, N_TTY); if (tty->ldisc.open) { int r = tty->ldisc.open(tty); if (r < 0) panic("Couldn't open N_TTY ldisc for " "%s --- error %d.", tty_name(tty, buf), r); } } } /* At this point we hold a reference to the new ldisc and a a reference to the old ldisc. If we ended up flipping back to the existing ldisc we have two references to it */ if (tty->ldisc.num != o_ldisc.num && tty->driver->set_ldisc) tty->driver->set_ldisc(tty); tty_ldisc_put(o_ldisc.num); /* * Allow ldisc referencing to occur as soon as the driver * ldisc callback completes. */ tty_ldisc_enable(tty); if (o_tty) tty_ldisc_enable(o_tty); /* Restart it in case no characters kick it off. Safe if already running */ if (work) schedule_delayed_work(&tty->buf.work, 1); return retval; } /** * get_tty_driver - find device of a tty * @dev_t: device identifier * @index: returns the index of the tty * * This routine returns a tty driver structure, given a device number * and also passes back the index number. * * Locking: caller must hold tty_mutex */ static struct tty_driver *get_tty_driver(dev_t device, int *index) { struct tty_driver *p; list_for_each_entry(p, &tty_drivers, tty_drivers) { dev_t base = MKDEV(p->major, p->minor_start); if (device < base || device >= base + p->num) continue; *index = device - base; return p; } return NULL; } /** * tty_check_change - check for POSIX terminal changes * @tty: tty to check * * If we try to write to, or set the state of, a terminal and we're * not in the foreground, send a SIGTTOU. If the signal is blocked or * ignored, go ahead and perform the operation. (POSIX 7.2) * * Locking: none */ int tty_check_change(struct tty_struct * tty) { if (current->signal->tty != tty) return 0; if (tty->pgrp <= 0) { printk(KERN_WARNING "tty_check_change: tty->pgrp <= 0!\n"); return 0; } if (process_group(current) == tty->pgrp) return 0; if (is_ignored(SIGTTOU)) return 0; if (is_orphaned_pgrp(process_group(current))) return -EIO; (void) kill_pg(process_group(current), SIGTTOU, 1); return -ERESTARTSYS; } EXPORT_SYMBOL(tty_check_change); static ssize_t hung_up_tty_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { return 0; } static ssize_t hung_up_tty_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { return -EIO; } /* No kernel lock held - none needed ;) */ static unsigned int hung_up_tty_poll(struct file * filp, poll_table * wait) { return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM; } static int hung_up_tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { return cmd == TIOCSPGRP ? -ENOTTY : -EIO; } static const struct file_operations tty_fops = { .llseek = no_llseek, .read = tty_read, .write = tty_write, .poll = tty_poll, .ioctl = tty_ioctl, .open = tty_open, .release = tty_release, .fasync = tty_fasync, }; #ifdef CONFIG_UNIX98_PTYS static const struct file_operations ptmx_fops = { .llseek = no_llseek, .read = tty_read, .write = tty_write, .poll = tty_poll, .ioctl = tty_ioctl, .open = ptmx_open, .release = tty_release, .fasync = tty_fasync, }; #endif static const struct file_operations console_fops = { .llseek = no_llseek, .read = tty_read, .write = redirected_tty_write, .poll = tty_poll, .ioctl = tty_ioctl, .open = tty_open, .release = tty_release, .fasync = tty_fasync, }; static const struct file_operations hung_up_tty_fops = { .llseek = no_llseek, .read = hung_up_tty_read, .write = hung_up_tty_write, .poll = hung_up_tty_poll, .ioctl = hung_up_tty_ioctl, .release = tty_release, }; static DEFINE_SPINLOCK(redirect_lock); static struct file *redirect; /** * tty_wakeup - request more data * @tty: terminal * * Internal and external helper for wakeups of tty. This function * informs the line discipline if present that the driver is ready * to receive more output data. */ void tty_wakeup(struct tty_struct *tty) { struct tty_ldisc *ld; if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) { ld = tty_ldisc_ref(tty); if(ld) { if(ld->write_wakeup) ld->write_wakeup(tty); tty_ldisc_deref(ld); } } wake_up_interruptible(&tty->write_wait); } EXPORT_SYMBOL_GPL(tty_wakeup); /** * tty_ldisc_flush - flush line discipline queue * @tty: tty * * Flush the line discipline queue (if any) for this tty. If there * is no line discipline active this is a no-op. */ void tty_ldisc_flush(struct tty_struct *tty) { struct tty_ldisc *ld = tty_ldisc_ref(tty); if(ld) { if(ld->flush_buffer) ld->flush_buffer(tty); tty_ldisc_deref(ld); } } EXPORT_SYMBOL_GPL(tty_ldisc_flush); /** * do_tty_hangup - actual handler for hangup events * @data: tty device * * This can be called by the "eventd" kernel thread. That is process * synchronous but doesn't hold any locks, so we need to make sure we * have the appropriate locks for what we're doing. * * The hangup event clears any pending redirections onto the hung up * device. It ensures future writes will error and it does the needed * line discipline hangup and signal delivery. The tty object itself * remains intact. * * Locking: * BKL * redirect lock for undoing redirection * file list lock for manipulating list of ttys * tty_ldisc_lock from called functions * termios_sem resetting termios data * tasklist_lock to walk task list for hangup event * */ static void do_tty_hangup(void *data) { struct tty_struct *tty = (struct tty_struct *) data; struct file * cons_filp = NULL; struct file *filp, *f = NULL; struct task_struct *p; struct tty_ldisc *ld; int closecount = 0, n; if (!tty) return; /* inuse_filps is protected by the single kernel lock */ lock_kernel(); spin_lock(&redirect_lock); if (redirect && redirect->private_data == tty) { f = redirect; redirect = NULL; } spin_unlock(&redirect_lock); check_tty_count(tty, "do_tty_hangup"); file_list_lock(); /* This breaks for file handles being sent over AF_UNIX sockets ? */ list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { if (filp->f_op->write == redirected_tty_write) cons_filp = filp; if (filp->f_op->write != tty_write) continue; closecount++; tty_fasync(-1, filp, 0); /* can't block */ filp->f_op = &hung_up_tty_fops; } file_list_unlock(); /* FIXME! What are the locking issues here? This may me overdoing things.. * this question is especially important now that we've removed the irqlock. */ ld = tty_ldisc_ref(tty); if(ld != NULL) /* We may have no line discipline at this point */ { if (ld->flush_buffer) ld->flush_buffer(tty); if (tty->driver->flush_buffer) tty->driver->flush_buffer(tty); if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && ld->write_wakeup) ld->write_wakeup(tty); if (ld->hangup) ld->hangup(tty); } /* FIXME: Once we trust the LDISC code better we can wait here for ldisc completion and fix the driver call race */ wake_up_interruptible(&tty->write_wait); wake_up_interruptible(&tty->read_wait); /* * Shutdown the current line discipline, and reset it to * N_TTY. */ if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) { down(&tty->termios_sem); *tty->termios = tty->driver->init_termios; up(&tty->termios_sem); } /* Defer ldisc switch */ /* tty_deferred_ldisc_switch(N_TTY); This should get done automatically when the port closes and tty_release is called */ read_lock(&tasklist_lock); if (tty->session > 0) { do_each_task_pid(tty->session, PIDTYPE_SID, p) { if (p->signal->tty == tty) p->signal->tty = NULL; if (!p->signal->leader) continue; group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); if (tty->pgrp > 0) p->signal->tty_old_pgrp = tty->pgrp; } while_each_task_pid(tty->session, PIDTYPE_SID, p); } read_unlock(&tasklist_lock); tty->flags = 0; tty->session = 0; tty->pgrp = -1; tty->ctrl_status = 0; /* * If one of the devices matches a console pointer, we * cannot just call hangup() because that will cause * tty->count and state->count to go out of sync. * So we just call close() the right number of times. */ if (cons_filp) { if (tty->driver->close) for (n = 0; n < closecount; n++) tty->driver->close(tty, cons_filp); } else if (tty->driver->hangup) (tty->driver->hangup)(tty); /* We don't want to have driver/ldisc interactions beyond the ones we did here. The driver layer expects no calls after ->hangup() from the ldisc side. However we can't yet guarantee all that */ set_bit(TTY_HUPPED, &tty->flags); if (ld) { tty_ldisc_enable(tty); tty_ldisc_deref(ld); } unlock_kernel(); if (f) fput(f); } /** * tty_hangup - trigger a hangup event * @tty: tty to hangup * * A carrier loss (virtual or otherwise) has occurred on this like * schedule a hangup sequence to run after this event. */ void tty_hangup(struct tty_struct * tty) { #ifdef TTY_DEBUG_HANGUP char buf[64]; printk(KERN_DEBUG "%s hangup...\n", tty_name(tty, buf)); #endif schedule_work(&tty->hangup_work); } EXPORT_SYMBOL(tty_hangup); /** * tty_vhangup - process vhangup * @tty: tty to hangup * * The user has asked via system call for the terminal to be hung up. * We do this synchronously so that when the syscall returns the process * is complete. That guarantee is neccessary for security reasons. */ void tty_vhangup(struct tty_struct * tty) { #ifdef TTY_DEBUG_HANGUP char buf[64]; printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf)); #endif do_tty_hangup((void *) tty); } EXPORT_SYMBOL(tty_vhangup); /** * tty_hung_up_p - was tty hung up * @filp: file pointer of tty * * Return true if the tty has been subject to a vhangup or a carrier * loss */ int tty_hung_up_p(struct file * filp) { return (filp->f_op == &hung_up_tty_fops); } EXPORT_SYMBOL(tty_hung_up_p); /** * disassociate_ctty - disconnect controlling tty * @on_exit: true if exiting so need to "hang up" the session * * This function is typically called only by the session leader, when * it wants to disassociate itself from its controlling tty. * * It performs the following functions: * (1) Sends a SIGHUP and SIGCONT to the foreground process group * (2) Clears the tty from being controlling the session * (3) Clears the controlling tty for all processes in the * session group. * * The argument on_exit is set to 1 if called when a process is * exiting; it is 0 if called by the ioctl TIOCNOTTY. * * Locking: tty_mutex is taken to protect current->signal->tty * BKL is taken for hysterical raisins * Tasklist lock is taken (under tty_mutex) to walk process * lists for the session. */ void disassociate_ctty(int on_exit) { struct tty_struct *tty; struct task_struct *p; int tty_pgrp = -1; lock_kernel(); mutex_lock(&tty_mutex); tty = current->signal->tty; if (tty) { tty_pgrp = tty->pgrp; mutex_unlock(&tty_mutex); if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) tty_vhangup(tty); } else { if (current->signal->tty_old_pgrp) { kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit); kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit); } mutex_unlock(&tty_mutex); unlock_kernel(); return; } if (tty_pgrp > 0) { kill_pg(tty_pgrp, SIGHUP, on_exit); if (!on_exit) kill_pg(tty_pgrp, SIGCONT, on_exit); } /* Must lock changes to tty_old_pgrp */ mutex_lock(&tty_mutex); current->signal->tty_old_pgrp = 0; tty->session = 0; tty->pgrp = -1; /* Now clear signal->tty under the lock */ read_lock(&tasklist_lock); do_each_task_pid(current->signal->session, PIDTYPE_SID, p) { p->signal->tty = NULL; } while_each_task_pid(current->signal->session, PIDTYPE_SID, p); read_unlock(&tasklist_lock); mutex_unlock(&tty_mutex); unlock_kernel(); } /** * stop_tty - propogate flow control * @tty: tty to stop * * Perform flow control to the driver. For PTY/TTY pairs we * must also propogate the TIOCKPKT status. May be called * on an already stopped device and will not re-call the driver * method. * * This functionality is used by both the line disciplines for * halting incoming flow and by the driver. It may therefore be * called from any context, may be under the tty atomic_write_lock * but not always. * * Locking: * Broken. Relies on BKL which is unsafe here. */ void stop_tty(struct tty_struct *tty) { if (tty->stopped) return; tty->stopped = 1; if (tty->link && tty->link->packet) { tty->ctrl_status &= ~TIOCPKT_START; tty->ctrl_status |= TIOCPKT_STOP; wake_up_interruptible(&tty->link->read_wait); } if (tty->driver->stop) (tty->driver->stop)(tty); } EXPORT_SYMBOL(stop_tty); /** * start_tty - propogate flow control * @tty: tty to start * * Start a tty that has been stopped if at all possible. Perform * any neccessary wakeups and propogate the TIOCPKT status. If this * is the tty was previous stopped and is being started then the * driver start method is invoked and the line discipline woken. * * Locking: * Broken. Relies on BKL which is unsafe here. */ void start_tty(struct tty_struct *tty) { if (!tty->stopped || tty->flow_stopped) return; tty->stopped = 0; if (tty->link && tty->link->packet) { tty->ctrl_status &= ~TIOCPKT_STOP; tty->ctrl_status |= TIOCPKT_START; wake_up_interruptible(&tty->link->read_wait); } if (tty->driver->start) (tty->driver->start)(tty); /* If we have a running line discipline it may need kicking */ tty_wakeup(tty); wake_up_interruptible(&tty->write_wait); } EXPORT_SYMBOL(start_tty); /** * tty_read - read method for tty device files * @file: pointer to tty file * @buf: user buffer * @count: size of user buffer * @ppos: unused * * Perform the read system call function on this terminal device. Checks * for hung up devices before calling the line discipline method. * * Locking: * Locks the line discipline internally while needed * For historical reasons the line discipline read method is * invoked under the BKL. This will go away in time so do not rely on it * in new code. Multiple read calls may be outstanding in parallel. */ static ssize_t tty_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { int i; struct tty_struct * tty; struct inode *inode; struct tty_ldisc *ld; tty = (struct tty_struct *)file->private_data; inode = file->f_dentry->d_inode; if (tty_paranoia_check(tty, inode, "tty_read")) return -EIO; if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) return -EIO; /* We want to wait for the line discipline to sort out in this situation */ ld = tty_ldisc_ref_wait(tty); lock_kernel(); if (ld->read) i = (ld->read)(tty,file,buf,count); else i = -EIO; tty_ldisc_deref(ld); unlock_kernel(); if (i > 0) inode->i_atime = current_fs_time(inode->i_sb); return i; } /* * Split writes up in sane blocksizes to avoid * denial-of-service type attacks */ static inline ssize_t do_tty_write( ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t), struct tty_struct *tty, struct file *file, const char __user *buf, size_t count) { ssize_t ret = 0, written = 0; unsigned int chunk; /* FIXME: O_NDELAY ... */ if (mutex_lock_interruptible(&tty->atomic_write_lock)) { return -ERESTARTSYS; } /* * We chunk up writes into a temporary buffer. This * simplifies low-level drivers immensely, since they * don't have locking issues and user mode accesses. * * But if TTY_NO_WRITE_SPLIT is set, we should use a * big chunk-size.. * * The default chunk-size is 2kB, because the NTTY * layer has problems with bigger chunks. It will * claim to be able to handle more characters than * it actually does. * * FIXME: This can probably go away now except that 64K chunks * are too likely to fail unless switched to vmalloc... */ chunk = 2048; if (test_bit(TTY_NO_WRITE_SPLIT, &tty->flags)) chunk = 65536; if (count < chunk) chunk = count; /* write_buf/write_cnt is protected by the atomic_write_lock mutex */ if (tty->write_cnt < chunk) { unsigned char *buf; if (chunk < 1024) chunk = 1024; buf = kmalloc(chunk, GFP_KERNEL); if (!buf) { mutex_unlock(&tty->atomic_write_lock); return -ENOMEM; } kfree(tty->write_buf); tty->write_cnt = chunk; tty->write_buf = buf; } /* Do the write .. */ for (;;) { size_t size = count; if (size > chunk) size = chunk; ret = -EFAULT; if (copy_from_user(tty->write_buf, buf, size)) break; lock_kernel(); ret = write(tty, file, tty->write_buf, size); unlock_kernel(); if (ret <= 0) break; written += ret; buf += ret; count -= ret; if (!count) break; ret = -ERESTARTSYS; if (signal_pending(current)) break; cond_resched(); } if (written) { struct inode *inode = file->f_dentry->d_inode; inode->i_mtime = current_fs_time(inode->i_sb); ret = written; } mutex_unlock(&tty->atomic_write_lock); return ret; } /** * tty_write - write method for tty device file * @file: tty file pointer * @buf: user data to write * @count: bytes to write * @ppos: unused * * Write data to a tty device via the line discipline. * * Locking: * Locks the line discipline as required * Writes to the tty driver are serialized by the atomic_write_lock * and are then processed in chunks to the device. The line discipline * write method will not be involked in parallel for each device * The line discipline write method is called under the big * kernel lock for historical reasons. New code should not rely on this. */ static ssize_t tty_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct tty_struct * tty; struct inode *inode = file->f_dentry->d_inode; ssize_t ret; struct tty_ldisc *ld; tty = (struct tty_struct *)file->private_data; if (tty_paranoia_check(tty, inode, "tty_write")) return -EIO; if (!tty || !tty->driver->write || (test_bit(TTY_IO_ERROR, &tty->flags))) return -EIO; ld = tty_ldisc_ref_wait(tty); if (!ld->write) ret = -EIO; else ret = do_tty_write(ld->write, tty, file, buf, count); tty_ldisc_deref(ld); return ret; } ssize_t redirected_tty_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct file *p = NULL; spin_lock(&redirect_lock); if (redirect) { get_file(redirect); p = redirect; } spin_unlock(&redirect_lock); if (p) { ssize_t res; res = vfs_write(p, buf, count, &p->f_pos); fput(p); return res; } return tty_write(file, buf, count, ppos); } static char ptychar[] = "pqrstuvwxyzabcde"; /** * pty_line_name - generate name for a pty * @driver: the tty driver in use * @index: the minor number * @p: output buffer of at least 6 bytes * * Generate a name from a driver reference and write it to the output * buffer. * * Locking: None */ static void pty_line_name(struct tty_driver *driver, int index, char *p) { int i = index + driver->name_base; /* ->name is initialized to "ttyp", but "tty" is expected */ sprintf(p, "%s%c%x", driver->subtype == PTY_TYPE_SLAVE ? "tty" : driver->name, ptychar[i >> 4 & 0xf], i & 0xf); } /** * pty_line_name - generate name for a tty * @driver: the tty driver in use * @index: the minor number * @p: output buffer of at least 7 bytes * * Generate a name from a driver reference and write it to the output * buffer. * * Locking: None */ static void tty_line_name(struct tty_driver *driver, int index, char *p) { sprintf(p, "%s%d", driver->name, index + driver->name_base); } /** * init_dev - initialise a tty device * @driver: tty driver we are opening a device on * @idx: device index * @tty: returned tty structure * * Prepare a tty device. This may not be a "new" clean device but * could also be an active device. The pty drivers require special * handling because of this. * * Locking: * The function is called under the tty_mutex, which * protects us from the tty struct or driver itself going away. * * On exit the tty device has the line discipline attached and * a reference count of 1. If a pair was created for pty/tty use * and the other was a pty master then it too has a reference count of 1. * * WSH 06/09/97: Rewritten to remove races and properly clean up after a * failed open. The new code protects the open with a mutex, so it's * really quite straightforward. The mutex locking can probably be * relaxed for the (most common) case of reopening a tty. */ static int init_dev(struct tty_driver *driver, int idx, struct tty_struct **ret_tty) { struct tty_struct *tty, *o_tty; struct termios *tp, **tp_loc, *o_tp, **o_tp_loc; struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc; int retval = 0; /* check whether we're reopening an existing tty */ if (driver->flags & TTY_DRIVER_DEVPTS_MEM) { tty = devpts_get_tty(idx); if (tty && driver->subtype == PTY_TYPE_MASTER) tty = tty->link; } else { tty = driver->ttys[idx]; } if (tty) goto fast_track; /* * First time open is complex, especially for PTY devices. * This code guarantees that either everything succeeds and the * TTY is ready for operation, or else the table slots are vacated * and the allocated memory released. (Except that the termios * and locked termios may be retained.) */ if (!try_module_get(driver->owner)) { retval = -ENODEV; goto end_init; } o_tty = NULL; tp = o_tp = NULL; ltp = o_ltp = NULL; tty = alloc_tty_struct(); if(!tty) goto fail_no_mem; initialize_tty_struct(tty); tty->driver = driver; tty->index = idx; tty_line_name(driver, idx, tty->name); if (driver->flags & TTY_DRIVER_DEVPTS_MEM) { tp_loc = &tty->termios; ltp_loc = &tty->termios_locked; } else { tp_loc = &driver->termios[idx]; ltp_loc = &driver->termios_locked[idx]; } if (!*tp_loc) { tp = (struct termios *) kmalloc(sizeof(struct termios), GFP_KERNEL); if (!tp) goto free_mem_out; *tp = driver->init_termios; } if (!*ltp_loc) { ltp = (struct termios *) kmalloc(sizeof(struct termios), GFP_KERNEL); if (!ltp) goto free_mem_out; memset(ltp, 0, sizeof(struct termios)); } if (driver->type == TTY_DRIVER_TYPE_PTY) { o_tty = alloc_tty_struct(); if (!o_tty) goto free_mem_out; initialize_tty_struct(o_tty); o_tty->driver = driver->other; o_tty->index = idx; tty_line_name(driver->other, idx, o_tty->name); if (driver->flags & TTY_DRIVER_DEVPTS_MEM) { o_tp_loc = &o_tty->termios; o_ltp_loc = &o_tty->termios_locked; } else { o_tp_loc = &driver->other->termios[idx]; o_ltp_loc = &driver->other->termios_locked[idx]; } if (!*o_tp_loc) { o_tp = (struct termios *) kmalloc(sizeof(struct termios), GFP_KERNEL); if (!o_tp) goto free_mem_out; *o_tp = driver->other->init_termios; } if (!*o_ltp_loc) { o_ltp = (struct termios *) kmalloc(sizeof(struct termios), GFP_KERNEL); if (!o_ltp) goto free_mem_out; memset(o_ltp, 0, sizeof(struct termios)); } /* * Everything allocated ... set up the o_tty structure. */ if (!(driver->other->flags & TTY_DRIVER_DEVPTS_MEM)) { driver->other->ttys[idx] = o_tty; } if (!*o_tp_loc) *o_tp_loc = o_tp; if (!*o_ltp_loc) *o_ltp_loc = o_ltp; o_tty->termios = *o_tp_loc; o_tty->termios_locked = *o_ltp_loc; driver->other->refcount++; if (driver->subtype == PTY_TYPE_MASTER) o_tty->count++; /* Establish the links in both directions */ tty->link = o_tty; o_tty->link = tty; } /* * All structures have been allocated, so now we install them. * Failures after this point use release_mem to clean up, so * there's no need to null out the local pointers. */ if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM)) { driver->ttys[idx] = tty; } if (!*tp_loc) *tp_loc = tp; if (!*ltp_loc) *ltp_loc = ltp; tty->termios = *tp_loc; tty->termios_locked = *ltp_loc; driver->refcount++; tty->count++; /* * Structures all installed ... call the ldisc open routines. * If we fail here just call release_mem to clean up. No need * to decrement the use counts, as release_mem doesn't care. */ if (tty->ldisc.open) { retval = (tty->ldisc.open)(tty); if (retval) goto release_mem_out; } if (o_tty && o_tty->ldisc.open) { retval = (o_tty->ldisc.open)(o_tty); if (retval) { if (tty->ldisc.close) (tty->ldisc.close)(tty); goto release_mem_out; } tty_ldisc_enable(o_tty); } tty_ldisc_enable(tty); goto success; /* * This fast open can be used if the tty is already open. * No memory is allocated, and the only failures are from * attempting to open a closing tty or attempting multiple * opens on a pty master. */ fast_track: if (test_bit(TTY_CLOSING, &tty->flags)) { retval = -EIO; goto end_init; } if (driver->type == TTY_DRIVER_TYPE_PTY && driver->subtype == PTY_TYPE_MASTER) { /* * special case for PTY masters: only one open permitted, * and the slave side open count is incremented as well. */ if (tty->count) { retval = -EIO; goto end_init; } tty->link->count++; } tty->count++; tty->driver = driver; /* N.B. why do this every time?? */ /* FIXME */ if(!test_bit(TTY_LDISC, &tty->flags)) printk(KERN_ERR "init_dev but no ldisc\n"); success: *ret_tty = tty; /* All paths come through here to release the mutex */ end_init: return retval; /* Release locally allocated memory ... nothing placed in slots */ free_mem_out: kfree(o_tp); if (o_tty) free_tty_struct(o_tty); kfree(ltp); kfree(tp); free_tty_struct(tty); fail_no_mem: module_put(driver->owner); retval = -ENOMEM; goto end_init; /* call the tty release_mem routine to clean out this slot */ release_mem_out: printk(KERN_INFO "init_dev: ldisc open failed, " "clearing slot %d\n", idx); release_mem(tty, idx); goto end_init; } /** * release_mem - release tty structure memory * * Releases memory associated with a tty structure, and clears out the * driver table slots. This function is called when a device is no longer * in use. It also gets called when setup of a device fails. * * Locking: * tty_mutex - sometimes only * takes the file list lock internally when working on the list * of ttys that the driver keeps. * FIXME: should we require tty_mutex is held here ?? */ static void release_mem(struct tty_struct *tty, int idx) { struct tty_struct *o_tty; struct termios *tp; int devpts = tty->driver->flags & TTY_DRIVER_DEVPTS_MEM; if ((o_tty = tty->link) != NULL) { if (!devpts) o_tty->driver->ttys[idx] = NULL; if (o_tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) { tp = o_tty->termios; if (!devpts) o_tty->driver->termios[idx] = NULL; kfree(tp); tp = o_tty->termios_locked; if (!devpts) o_tty->driver->termios_locked[idx] = NULL; kfree(tp); } o_tty->magic = 0; o_tty->driver->refcount--; file_list_lock(); list_del_init(&o_tty->tty_files); file_list_unlock(); free_tty_struct(o_tty); } if (!devpts) tty->driver->ttys[idx] = NULL; if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) { tp = tty->termios; if (!devpts) tty->driver->termios[idx] = NULL; kfree(tp); tp = tty->termios_locked; if (!devpts) tty->driver->termios_locked[idx] = NULL; kfree(tp); } tty->magic = 0; tty->driver->refcount--; file_list_lock(); list_del_init(&tty->tty_files); file_list_unlock(); module_put(tty->driver->owner); free_tty_struct(tty); } /* * Even releasing the tty structures is a tricky business.. We have * to be very careful that the structures are all released at the * same time, as interrupts might otherwise get the wrong pointers. * * WSH 09/09/97: rewritten to avoid some nasty race conditions that could * lead to double frees or releasing memory still in use. */ static void release_dev(struct file * filp) { struct tty_struct *tty, *o_tty; int pty_master, tty_closing, o_tty_closing, do_sleep; int devpts; int idx; char buf[64]; unsigned long flags; tty = (struct tty_struct *)filp->private_data; if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "release_dev")) return; check_tty_count(tty, "release_dev"); tty_fasync(-1, filp, 0); idx = tty->index; pty_master = (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER); devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0; o_tty = tty->link; #ifdef TTY_PARANOIA_CHECK if (idx < 0 || idx >= tty->driver->num) { printk(KERN_DEBUG "release_dev: bad idx when trying to " "free (%s)\n", tty->name); return; } if (!(tty->driver->flags & TTY_DRIVER_DEVPTS_MEM)) { if (tty != tty->driver->ttys[idx]) { printk(KERN_DEBUG "release_dev: driver.table[%d] not tty " "for (%s)\n", idx, tty->name); return; } if (tty->termios != tty->driver->termios[idx]) { printk(KERN_DEBUG "release_dev: driver.termios[%d] not termios " "for (%s)\n", idx, tty->name); return; } if (tty->termios_locked != tty->driver->termios_locked[idx]) { printk(KERN_DEBUG "release_dev: driver.termios_locked[%d] not " "termios_locked for (%s)\n", idx, tty->name); return; } } #endif #ifdef TTY_DEBUG_HANGUP printk(KERN_DEBUG "release_dev of %s (tty count=%d)...", tty_name(tty, buf), tty->count); #endif #ifdef TTY_PARANOIA_CHECK if (tty->driver->other && !(tty->driver->flags & TTY_DRIVER_DEVPTS_MEM)) { if (o_tty != tty->driver->other->ttys[idx]) { printk(KERN_DEBUG "release_dev: other->table[%d] " "not o_tty for (%s)\n", idx, tty->name); return; } if (o_tty->termios != tty->driver->other->termios[idx]) { printk(KERN_DEBUG "release_dev: other->termios[%d] " "not o_termios for (%s)\n", idx, tty->name); return; } if (o_tty->termios_locked != tty->driver->other->termios_locked[idx]) { printk(KERN_DEBUG "release_dev: other->termios_locked[" "%d] not o_termios_locked for (%s)\n", idx, tty->name); return; } if (o_tty->link != tty) { printk(KERN_DEBUG "release_dev: bad pty pointers\n"); return; } } #endif if (tty->driver->close) tty->driver->close(tty, filp); /* * Sanity check: if tty->count is going to zero, there shouldn't be * any waiters on tty->read_wait or tty->write_wait. We test the * wait queues and kick everyone out _before_ actually starting to * close. This ensures that we won't block while releasing the tty * structure. * * The test for the o_tty closing is necessary, since the master and * slave sides may close in any order. If the slave side closes out * first, its count will be one, since the master side holds an open. * Thus this test wouldn't be triggered at the time the slave closes, * so we do it now. * * Note that it's possible for the tty to be opened again while we're * flushing out waiters. By recalculating the closing flags before * each iteration we avoid any problems. */ while (1) { /* Guard against races with tty->count changes elsewhere and opens on /dev/tty */ mutex_lock(&tty_mutex); tty_closing = tty->count <= 1; o_tty_closing = o_tty && (o_tty->count <= (pty_master ? 1 : 0)); do_sleep = 0; if (tty_closing) { if (waitqueue_active(&tty->read_wait)) { wake_up(&tty->read_wait); do_sleep++; } if (waitqueue_active(&tty->write_wait)) { wake_up(&tty->write_wait); do_sleep++; } } if (o_tty_closing) { if (waitqueue_active(&o_tty->read_wait)) { wake_up(&o_tty->read_wait); do_sleep++; } if (waitqueue_active(&o_tty->write_wait)) { wake_up(&o_tty->write_wait); do_sleep++; } } if (!do_sleep) break; printk(KERN_WARNING "release_dev: %s: read/write wait queue " "active!\n", tty_name(tty, buf)); mutex_unlock(&tty_mutex); schedule(); } /* * The closing flags are now consistent with the open counts on * both sides, and we've completed the last operation that could * block, so it's safe to proceed with closing. */ if (pty_master) { if (--o_tty->count < 0) { printk(KERN_WARNING "release_dev: bad pty slave count " "(%d) for %s\n", o_tty->count, tty_name(o_tty, buf)); o_tty->count = 0; } } if (--tty->count < 0) { printk(KERN_WARNING "release_dev: bad tty->count (%d) for %s\n", tty->count, tty_name(tty, buf)); tty->count = 0; } /* * We've decremented tty->count, so we need to remove this file * descriptor off the tty->tty_files list; this serves two * purposes: * - check_tty_count sees the correct number of file descriptors * associated with this tty. * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ file_kill(filp); filp->private_data = NULL; /* * Perform some housekeeping before deciding whether to return. * * Set the TTY_CLOSING flag if this was the last open. In the * case of a pty we may have to wait around for the other side * to close, and TTY_CLOSING makes sure we can't be reopened. */ if(tty_closing) set_bit(TTY_CLOSING, &tty->flags); if(o_tty_closing) set_bit(TTY_CLOSING, &o_tty->flags); /* * If _either_ side is closing, make sure there aren't any * processes that still think tty or o_tty is their controlling * tty. */ if (tty_closing || o_tty_closing) { struct task_struct *p; read_lock(&tasklist_lock); do_each_task_pid(tty->session, PIDTYPE_SID, p) { p->signal->tty = NULL; } while_each_task_pid(tty->session, PIDTYPE_SID, p); if (o_tty) do_each_task_pid(o_tty->session, PIDTYPE_SID, p) { p->signal->tty = NULL; } while_each_task_pid(o_tty->session, PIDTYPE_SID, p); read_unlock(&tasklist_lock); } mutex_unlock(&tty_mutex); /* check whether both sides are closing ... */ if (!tty_closing || (o_tty && !o_tty_closing)) return; #ifdef TTY_DEBUG_HANGUP printk(KERN_DEBUG "freeing tty structure..."); #endif /* * Prevent flush_to_ldisc() from rescheduling the work for later. Then * kill any delayed work. As this is the final close it does not * race with the set_ldisc code path. */ clear_bit(TTY_LDISC, &tty->flags); cancel_delayed_work(&tty->buf.work); /* * Wait for ->hangup_work and ->buf.work handlers to terminate */ flush_scheduled_work(); /* * Wait for any short term users (we know they are just driver * side waiters as the file is closing so user count on the file * side is zero. */ spin_lock_irqsave(&tty_ldisc_lock, flags); while(tty->ldisc.refcount) { spin_unlock_irqrestore(&tty_ldisc_lock, flags); wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0); spin_lock_irqsave(&tty_ldisc_lock, flags); } spin_unlock_irqrestore(&tty_ldisc_lock, flags); /* * Shutdown the current line discipline, and reset it to N_TTY. * N.B. why reset ldisc when we're releasing the memory?? * * FIXME: this MUST get fixed for the new reflocking */ if (tty->ldisc.close) (tty->ldisc.close)(tty); tty_ldisc_put(tty->ldisc.num); /* * Switch the line discipline back */ tty_ldisc_assign(tty, tty_ldisc_get(N_TTY)); tty_set_termios_ldisc(tty,N_TTY); if (o_tty) { /* FIXME: could o_tty be in setldisc here ? */ clear_bit(TTY_LDISC, &o_tty->flags); if (o_tty->ldisc.close) (o_tty->ldisc.close)(o_tty); tty_ldisc_put(o_tty->ldisc.num); tty_ldisc_assign(o_tty, tty_ldisc_get(N_TTY)); tty_set_termios_ldisc(o_tty,N_TTY); } /* * The release_mem function takes care of the details of clearing * the slots and preserving the termios structure. */ release_mem(tty, idx); #ifdef CONFIG_UNIX98_PTYS /* Make this pty number available for reallocation */ if (devpts) { down(&allocated_ptys_lock); idr_remove(&allocated_ptys, idx); up(&allocated_ptys_lock); } #endif } /** * tty_open - open a tty device * @inode: inode of device file * @filp: file pointer to tty * * tty_open and tty_release keep up the tty count that contains the * number of opens done on a tty. We cannot use the inode-count, as * different inodes might point to the same tty. * * Open-counting is needed for pty masters, as well as for keeping * track of serial lines: DTR is dropped when the last close happens. * (This is not done solely through tty->count, now. - Ted 1/27/92) * * The termios state of a pty is reset on first open so that * settings don't persist across reuse. * * Locking: tty_mutex protects current->signal->tty, get_tty_driver and * init_dev work. tty->count should protect the rest. * task_lock is held to update task details for sessions */ static int tty_open(struct inode * inode, struct file * filp) { struct tty_struct *tty; int noctty, retval; struct tty_driver *driver; int index; dev_t device = inode->i_rdev; unsigned short saved_flags = filp->f_flags; nonseekable_open(inode, filp); retry_open: noctty = filp->f_flags & O_NOCTTY; index = -1; retval = 0; mutex_lock(&tty_mutex); if (device == MKDEV(TTYAUX_MAJOR,0)) { if (!current->signal->tty) { mutex_unlock(&tty_mutex); return -ENXIO; } driver = current->signal->tty->driver; index = current->signal->tty->index; filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */ /* noctty = 1; */ goto got_driver; } #ifdef CONFIG_VT if (device == MKDEV(TTY_MAJOR,0)) { extern struct tty_driver *console_driver; driver = console_driver; index = fg_console; noctty = 1; goto got_driver; } #endif if (device == MKDEV(TTYAUX_MAJOR,1)) { driver = console_device(&index); if (driver) { /* Don't let /dev/console block */ filp->f_flags |= O_NONBLOCK; noctty = 1; goto got_driver; } mutex_unlock(&tty_mutex); return -ENODEV; } driver = get_tty_driver(device, &index); if (!driver) { mutex_unlock(&tty_mutex); return -ENODEV; } got_driver: retval = init_dev(driver, index, &tty); mutex_unlock(&tty_mutex); if (retval) return retval; filp->private_data = tty; file_move(filp, &tty->tty_files); check_tty_count(tty, "tty_open"); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) noctty = 1; #ifdef TTY_DEBUG_HANGUP printk(KERN_DEBUG "opening %s...", tty->name); #endif if (!retval) { if (tty->driver->open) retval = tty->driver->open(tty, filp); else retval = -ENODEV; } filp->f_flags = saved_flags; if (!retval && test_bit(TTY_EXCLUSIVE, &tty->flags) && !capable(CAP_SYS_ADMIN)) retval = -EBUSY; if (retval) { #ifdef TTY_DEBUG_HANGUP printk(KERN_DEBUG "error %d in opening %s...", retval, tty->name); #endif release_dev(filp); if (retval != -ERESTARTSYS) return retval; if (signal_pending(current)) return retval; schedule(); /* * Need to reset f_op in case a hangup happened. */ if (filp->f_op == &hung_up_tty_fops) filp->f_op = &tty_fops; goto retry_open; } if (!noctty && current->signal->leader && !current->signal->tty && tty->session == 0) { task_lock(current); current->signal->tty = tty; task_unlock(current); current->signal->tty_old_pgrp = 0; tty->session = current->signal->session; tty->pgrp = process_group(current); } return 0; } #ifdef CONFIG_UNIX98_PTYS /** * ptmx_open - open a unix 98 pty master * @inode: inode of device file * @filp: file pointer to tty * * Allocate a unix98 pty master device from the ptmx driver. * * Locking: tty_mutex protects theinit_dev work. tty->count should protect the rest. * allocated_ptys_lock handles the list of free pty numbers */ static int ptmx_open(struct inode * inode, struct file * filp) { struct tty_struct *tty; int retval; int index; int idr_ret; nonseekable_open(inode, filp); /* find a device that is not in use. */ down(&allocated_ptys_lock); if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) { up(&allocated_ptys_lock); return -ENOMEM; } idr_ret = idr_get_new(&allocated_ptys, NULL, &index); if (idr_ret < 0) { up(&allocated_ptys_lock); if (idr_ret == -EAGAIN) return -ENOMEM; return -EIO; } if (index >= pty_limit) { idr_remove(&allocated_ptys, index); up(&allocated_ptys_lock); return -EIO; } up(&allocated_ptys_lock); mutex_lock(&tty_mutex); retval = init_dev(ptm_driver, index, &tty); mutex_unlock(&tty_mutex); if (retval) goto out; set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ filp->private_data = tty; file_move(filp, &tty->tty_files); retval = -ENOMEM; if (devpts_pty_new(tty->link)) goto out1; check_tty_count(tty, "tty_open"); retval = ptm_driver->open(tty, filp); if (!retval) return 0; out1: release_dev(filp); return retval; out: down(&allocated_ptys_lock); idr_remove(&allocated_ptys, index); up(&allocated_ptys_lock); return retval; } #endif /** * tty_release - vfs callback for close * @inode: inode of tty * @filp: file pointer for handle to tty * * Called the last time each file handle is closed that references * this tty. There may however be several such references. * * Locking: * Takes bkl. See release_dev */ static int tty_release(struct inode * inode, struct file * filp) { lock_kernel(); release_dev(filp); unlock_kernel(); return 0; } /** * tty_poll - check tty status * @filp: file being polled * @wait: poll wait structures to update * * Call the line discipline polling method to obtain the poll * status of the device. * * Locking: locks called line discipline but ldisc poll method * may be re-entered freely by other callers. */ static unsigned int tty_poll(struct file * filp, poll_table * wait) { struct tty_struct * tty; struct tty_ldisc *ld; int ret = 0; tty = (struct tty_struct *)filp->private_data; if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "tty_poll")) return 0; ld = tty_ldisc_ref_wait(tty); if (ld->poll) ret = (ld->poll)(tty, filp, wait); tty_ldisc_deref(ld); return ret; } static int tty_fasync(int fd, struct file * filp, int on) { struct tty_struct * tty; int retval; tty = (struct tty_struct *)filp->private_data; if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "tty_fasync")) return 0; retval = fasync_helper(fd, filp, on, &tty->fasync); if (retval <= 0) return retval; if (on) { if (!waitqueue_active(&tty->read_wait)) tty->minimum_to_wake = 1; retval = f_setown(filp, (-tty->pgrp) ? : current->pid, 0); if (retval) return retval; } else { if (!tty->fasync && !waitqueue_active(&tty->read_wait)) tty->minimum_to_wake = N_TTY_BUF_SIZE; } return 0; } /** * tiocsti - fake input character * @tty: tty to fake input into * @p: pointer to character * * Fake input to a tty device. Does the neccessary locking and * input management. * * FIXME: does not honour flow control ?? * * Locking: * Called functions take tty_ldisc_lock * current->signal->tty check is safe without locks */ static int tiocsti(struct tty_struct *tty, char __user *p) { char ch, mbz = 0; struct tty_ldisc *ld; if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ch, p)) return -EFAULT; ld = tty_ldisc_ref_wait(tty); ld->receive_buf(tty, &ch, &mbz, 1); tty_ldisc_deref(ld); return 0; } /** * tiocgwinsz - implement window query ioctl * @tty; tty * @arg: user buffer for result * * Copies the kernel idea of the window size into the user buffer. No * locking is done. * * FIXME: Returning random values racing a window size set is wrong * should lock here against that */ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg) { if (copy_to_user(arg, &tty->winsize, sizeof(*arg))) return -EFAULT; return 0; } /** * tiocswinsz - implement window size set ioctl * @tty; tty * @arg: user buffer for result * * Copies the user idea of the window size to the kernel. Traditionally * this is just advisory information but for the Linux console it * actually has driver level meaning and triggers a VC resize. * * Locking: * The console_sem is used to ensure we do not try and resize * the console twice at once. * FIXME: Two racing size sets may leave the console and kernel * parameters disagreeing. Is this exploitable ? * FIXME: Random values racing a window size get is wrong * should lock here against that */ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, struct winsize __user * arg) { struct winsize tmp_ws; if (copy_from_user(&tmp_ws, arg, sizeof(*arg))) return -EFAULT; if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg))) return 0; #ifdef CONFIG_VT if (tty->driver->type == TTY_DRIVER_TYPE_CONSOLE) { int rc; acquire_console_sem(); rc = vc_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row); release_console_sem(); if (rc) return -ENXIO; } #endif if (tty->pgrp > 0) kill_pg(tty->pgrp, SIGWINCH, 1); if ((real_tty->pgrp != tty->pgrp) && (real_tty->pgrp > 0)) kill_pg(real_tty->pgrp, SIGWINCH, 1); tty->winsize = tmp_ws; real_tty->winsize = tmp_ws; return 0; } /** * tioccons - allow admin to move logical console * @file: the file to become console * * Allow the adminstrator to move the redirected console device * * Locking: uses redirect_lock to guard the redirect information */ static int tioccons(struct file *file) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (file->f_op->write == redirected_tty_write) { struct file *f; spin_lock(&redirect_lock); f = redirect; redirect = NULL; spin_unlock(&redirect_lock); if (f) fput(f); return 0; } spin_lock(&redirect_lock); if (redirect) { spin_unlock(&redirect_lock); return -EBUSY; } get_file(file); redirect = file; spin_unlock(&redirect_lock); return 0; } /** * fionbio - non blocking ioctl * @file: file to set blocking value * @p: user parameter * * Historical tty interfaces had a blocking control ioctl before * the generic functionality existed. This piece of history is preserved * in the expected tty API of posix OS's. * * Locking: none, the open fle handle ensures it won't go away. */ static int fionbio(struct file *file, int __user *p) { int nonblock; if (get_user(nonblock, p)) return -EFAULT; if (nonblock) file->f_flags |= O_NONBLOCK; else file->f_flags &= ~O_NONBLOCK; return 0; } /** * tiocsctty - set controlling tty * @tty: tty structure * @arg: user argument * * This ioctl is used to manage job control. It permits a session * leader to set this tty as the controlling tty for the session. * * Locking: * Takes tasklist lock internally to walk sessions * Takes task_lock() when updating signal->tty * * FIXME: tty_mutex is needed to protect signal->tty references. * FIXME: why task_lock on the signal->tty reference ?? * */ static int tiocsctty(struct tty_struct *tty, int arg) { struct task_struct *p; if (current->signal->leader && (current->signal->session == tty->session)) return 0; /* * The process must be a session leader and * not have a controlling tty already. */ if (!current->signal->leader || current->signal->tty) return -EPERM; if (tty->session > 0) { /* * This tty is already the controlling * tty for another session group! */ if ((arg == 1) && capable(CAP_SYS_ADMIN)) { /* * Steal it away */ read_lock(&tasklist_lock); do_each_task_pid(tty->session, PIDTYPE_SID, p) { p->signal->tty = NULL; } while_each_task_pid(tty->session, PIDTYPE_SID, p); read_unlock(&tasklist_lock); } else return -EPERM; } task_lock(current); current->signal->tty = tty; task_unlock(current); current->signal->tty_old_pgrp = 0; tty->session = current->signal->session; tty->pgrp = process_group(current); return 0; } /** * tiocgpgrp - get process group * @tty: tty passed by user * @real_tty: tty side of the tty pased by the user if a pty else the tty * @p: returned pid * * Obtain the process group of the tty. If there is no process group * return an error. * * Locking: none. Reference to ->signal->tty is safe. */ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; return put_user(real_tty->pgrp, p); } /** * tiocspgrp - attempt to set process group * @tty: tty passed by user * @real_tty: tty side device matching tty passed by user * @p: pid pointer * * Set the process group of the tty to the session passed. Only * permitted where the tty session is our session. * * Locking: None * * FIXME: current->signal->tty referencing is unsafe. */ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { pid_t pgrp; int retval = tty_check_change(real_tty); if (retval == -EIO) return -ENOTTY; if (retval) return retval; if (!current->signal->tty || (current->signal->tty != real_tty) || (real_tty->session != current->signal->session)) return -ENOTTY; if (get_user(pgrp, p)) return -EFAULT; if (pgrp < 0) return -EINVAL; if (session_of_pgrp(pgrp) != current->signal->session) return -EPERM; real_tty->pgrp = pgrp; return 0; } /** * tiocgsid - get session id * @tty: tty passed by user * @real_tty: tty side of the tty pased by the user if a pty else the tty * @p: pointer to returned session id * * Obtain the session id of the tty. If there is no session * return an error. * * Locking: none. Reference to ->signal->tty is safe. */ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; if (real_tty->session <= 0) return -ENOTTY; return put_user(real_tty->session, p); } /** * tiocsetd - set line discipline * @tty: tty device * @p: pointer to user data * * Set the line discipline according to user request. * * Locking: see tty_set_ldisc, this function is just a helper */ static int tiocsetd(struct tty_struct *tty, int __user *p) { int ldisc; if (get_user(ldisc, p)) return -EFAULT; return tty_set_ldisc(tty, ldisc); } /** * send_break - performed time break * @tty: device to break on * @duration: timeout in mS * * Perform a timed break on hardware that lacks its own driver level * timed break functionality. * * Locking: * None * * FIXME: * What if two overlap */ static int send_break(struct tty_struct *tty, unsigned int duration) { tty->driver->break_ctl(tty, -1); if (!signal_pending(current)) { msleep_interruptible(duration); } tty->driver->break_ctl(tty, 0); if (signal_pending(current)) return -EINTR; return 0; } /** * tiocmget - get modem status * @tty: tty device * @file: user file pointer * @p: pointer to result * * Obtain the modem status bits from the tty driver if the feature * is supported. Return -EINVAL if it is not available. * * Locking: none (up to the driver) */ static int tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p) { int retval = -EINVAL; if (tty->driver->tiocmget) { retval = tty->driver->tiocmget(tty, file); if (retval >= 0) retval = put_user(retval, p); } return retval; } /** * tiocmset - set modem status * @tty: tty device * @file: user file pointer * @cmd: command - clear bits, set bits or set all * @p: pointer to desired bits * * Set the modem status bits from the tty driver if the feature * is supported. Return -EINVAL if it is not available. * * Locking: none (up to the driver) */ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned __user *p) { int retval = -EINVAL; if (tty->driver->tiocmset) { unsigned int set, clear, val; retval = get_user(val, p); if (retval) return retval; set = clear = 0; switch (cmd) { case TIOCMBIS: set = val; break; case TIOCMBIC: clear = val; break; case TIOCMSET: set = val; clear = ~val; break; } set &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP; clear &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP; retval = tty->driver->tiocmset(tty, file, set, clear); } return retval; } /* * Split this up, as gcc can choke on it otherwise.. */ int tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { struct tty_struct *tty, *real_tty; void __user *p = (void __user *)arg; int retval; struct tty_ldisc *ld; tty = (struct tty_struct *)file->private_data; if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; real_tty = tty; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) real_tty = tty->link; /* * Break handling by driver */ if (!tty->driver->break_ctl) { switch(cmd) { case TIOCSBRK: case TIOCCBRK: if (tty->driver->ioctl) return tty->driver->ioctl(tty, file, cmd, arg); return -EINVAL; /* These two ioctl's always return success; even if */ /* the driver doesn't support them. */ case TCSBRK: case TCSBRKP: if (!tty->driver->ioctl) return 0; retval = tty->driver->ioctl(tty, file, cmd, arg); if (retval == -ENOIOCTLCMD) retval = 0; return retval; } } /* * Factor out some common prep work */ switch (cmd) { case TIOCSETD: case TIOCSBRK: case TIOCCBRK: case TCSBRK: case TCSBRKP: retval = tty_check_change(tty); if (retval) return retval; if (cmd != TIOCCBRK) { tty_wait_until_sent(tty, 0); if (signal_pending(current)) return -EINTR; } break; } switch (cmd) { case TIOCSTI: return tiocsti(tty, p); case TIOCGWINSZ: return tiocgwinsz(tty, p); case TIOCSWINSZ: return tiocswinsz(tty, real_tty, p); case TIOCCONS: return real_tty!=tty ? -EINVAL : tioccons(file); case FIONBIO: return fionbio(file, p); case TIOCEXCL: set_bit(TTY_EXCLUSIVE, &tty->flags); return 0; case TIOCNXCL: clear_bit(TTY_EXCLUSIVE, &tty->flags); return 0; case TIOCNOTTY: /* FIXME: taks lock or tty_mutex ? */ if (current->signal->tty != tty) return -ENOTTY; if (current->signal->leader) disassociate_ctty(0); task_lock(current); current->signal->tty = NULL; task_unlock(current); return 0; case TIOCSCTTY: return tiocsctty(tty, arg); case TIOCGPGRP: return tiocgpgrp(tty, real_tty, p); case TIOCSPGRP: return tiocspgrp(tty, real_tty, p); case TIOCGSID: return tiocgsid(tty, real_tty, p); case TIOCGETD: /* FIXME: check this is ok */ return put_user(tty->ldisc.num, (int __user *)p); case TIOCSETD: return tiocsetd(tty, p); #ifdef CONFIG_VT case TIOCLINUX: return tioclinux(tty, arg); #endif /* * Break handling */ case TIOCSBRK: /* Turn break on, unconditionally */ tty->driver->break_ctl(tty, -1); return 0; case TIOCCBRK: /* Turn break off, unconditionally */ tty->driver->break_ctl(tty, 0); return 0; case TCSBRK: /* SVID version: non-zero arg --> no break */ /* non-zero arg means wait for all output data * to be sent (performed above) but don't send break. * This is used by the tcdrain() termios function. */ if (!arg) return send_break(tty, 250); return 0; case TCSBRKP: /* support for POSIX tcsendbreak() */ return send_break(tty, arg ? arg*100 : 250); case TIOCMGET: return tty_tiocmget(tty, file, p); case TIOCMSET: case TIOCMBIC: case TIOCMBIS: return tty_tiocmset(tty, file, cmd, p); } if (tty->driver->ioctl) { retval = (tty->driver->ioctl)(tty, file, cmd, arg); if (retval != -ENOIOCTLCMD) return retval; } ld = tty_ldisc_ref_wait(tty); retval = -EINVAL; if (ld->ioctl) { retval = ld->ioctl(tty, file, cmd, arg); if (retval == -ENOIOCTLCMD) retval = -EINVAL; } tty_ldisc_deref(ld); return retval; } /* * This implements the "Secure Attention Key" --- the idea is to * prevent trojan horses by killing all processes associated with this * tty when the user hits the "Secure Attention Key". Required for * super-paranoid applications --- see the Orange Book for more details. * * This code could be nicer; ideally it should send a HUP, wait a few * seconds, then send a INT, and then a KILL signal. But you then * have to coordinate with the init process, since all processes associated * with the current tty must be dead before the new getty is allowed * to spawn. * * Now, if it would be correct ;-/ The current code has a nasty hole - * it doesn't catch files in flight. We may send the descriptor to ourselves * via AF_UNIX socket, close it and later fetch from socket. FIXME. * * Nasty bug: do_SAK is being called in interrupt context. This can * deadlock. We punt it up to process context. AKPM - 16Mar2001 */ static void __do_SAK(void *arg) { #ifdef TTY_SOFT_SAK tty_hangup(tty); #else struct tty_struct *tty = arg; struct task_struct *g, *p; int session; int i; struct file *filp; struct tty_ldisc *disc; struct fdtable *fdt; if (!tty) return; session = tty->session; /* We don't want an ldisc switch during this */ disc = tty_ldisc_ref(tty); if (disc && disc->flush_buffer) disc->flush_buffer(tty); tty_ldisc_deref(disc); if (tty->driver->flush_buffer) tty->driver->flush_buffer(tty); read_lock(&tasklist_lock); /* Kill the entire session */ do_each_task_pid(session, PIDTYPE_SID, p) { printk(KERN_NOTICE "SAK: killed process %d" " (%s): p->signal->session==tty->session\n", p->pid, p->comm); send_sig(SIGKILL, p, 1); } while_each_task_pid(session, PIDTYPE_SID, p); /* Now kill any processes that happen to have the * tty open. */ do_each_thread(g, p) { if (p->signal->tty == tty) { printk(KERN_NOTICE "SAK: killed process %d" " (%s): p->signal->session==tty->session\n", p->pid, p->comm); send_sig(SIGKILL, p, 1); continue; } task_lock(p); if (p->files) { /* * We don't take a ref to the file, so we must * hold ->file_lock instead. */ spin_lock(&p->files->file_lock); fdt = files_fdtable(p->files); for (i=0; i < fdt->max_fds; i++) { filp = fcheck_files(p->files, i); if (!filp) continue; if (filp->f_op->read == tty_read && filp->private_data == tty) { printk(KERN_NOTICE "SAK: killed process %d" " (%s): fd#%d opened to the tty\n", p->pid, p->comm, i); force_sig(SIGKILL, p); break; } } spin_unlock(&p->files->file_lock); } task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); #endif } /* * The tq handling here is a little racy - tty->SAK_work may already be queued. * Fortunately we don't need to worry, because if ->SAK_work is already queued, * the values which we write to it will be identical to the values which it * already has. --akpm */ void do_SAK(struct tty_struct *tty) { if (!tty) return; PREPARE_WORK(&tty->SAK_work, __do_SAK, tty); schedule_work(&tty->SAK_work); } EXPORT_SYMBOL(do_SAK); /** * flush_to_ldisc * @private_: tty structure passed from work queue. * * This routine is called out of the software interrupt to flush data * from the buffer chain to the line discipline. * * Locking: holds tty->buf.lock to guard buffer list. Drops the lock * while invoking the line discipline receive_buf method. The * receive_buf method is single threaded for each tty instance. */ static void flush_to_ldisc(void *private_) { struct tty_struct *tty = (struct tty_struct *) private_; unsigned long flags; struct tty_ldisc *disc; struct tty_buffer *tbuf, *head; char *char_buf; unsigned char *flag_buf; disc = tty_ldisc_ref(tty); if (disc == NULL) /* !TTY_LDISC */ return; spin_lock_irqsave(&tty->buf.lock, flags); head = tty->buf.head; if (head != NULL) { tty->buf.head = NULL; for (;;) { int count = head->commit - head->read; if (!count) { if (head->next == NULL) break; tbuf = head; head = head->next; tty_buffer_free(tty, tbuf); continue; } if (!tty->receive_room) { schedule_delayed_work(&tty->buf.work, 1); break; } if (count > tty->receive_room) count = tty->receive_room; char_buf = head->char_buf_ptr + head->read; flag_buf = head->flag_buf_ptr + head->read; head->read += count; spin_unlock_irqrestore(&tty->buf.lock, flags); disc->receive_buf(tty, char_buf, flag_buf, count); spin_lock_irqsave(&tty->buf.lock, flags); } tty->buf.head = head; } spin_unlock_irqrestore(&tty->buf.lock, flags); tty_ldisc_deref(disc); } /* * Routine which returns the baud rate of the tty * * Note that the baud_table needs to be kept in sync with the * include/asm/termbits.h file. */ static int baud_table[] = { 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800, 9600, 19200, 38400, 57600, 115200, 230400, 460800, #ifdef __sparc__ 76800, 153600, 307200, 614400, 921600 #else 500000, 576000, 921600, 1000000, 1152000, 1500000, 2000000, 2500000, 3000000, 3500000, 4000000 #endif }; static int n_baud_table = ARRAY_SIZE(baud_table); /** * tty_termios_baud_rate * @termios: termios structure * * Convert termios baud rate data into a speed. This should be called * with the termios lock held if this termios is a terminal termios * structure. May change the termios data. * * Locking: none */ int tty_termios_baud_rate(struct termios *termios) { unsigned int cbaud; cbaud = termios->c_cflag & CBAUD; if (cbaud & CBAUDEX) { cbaud &= ~CBAUDEX; if (cbaud < 1 || cbaud + 15 > n_baud_table) termios->c_cflag &= ~CBAUDEX; else cbaud += 15; } return baud_table[cbaud]; } EXPORT_SYMBOL(tty_termios_baud_rate); /** * tty_get_baud_rate - get tty bit rates * @tty: tty to query * * Returns the baud rate as an integer for this terminal. The * termios lock must be held by the caller and the terminal bit * flags may be updated. * * Locking: none */ int tty_get_baud_rate(struct tty_struct *tty) { int baud = tty_termios_baud_rate(tty->termios); if (baud == 38400 && tty->alt_speed) { if (!tty->warned) { printk(KERN_WARNING "Use of setserial/setrocket to " "set SPD_* flags is deprecated\n"); tty->warned = 1; } baud = tty->alt_speed; } return baud; } EXPORT_SYMBOL(tty_get_baud_rate); /** * tty_flip_buffer_push - terminal * @tty: tty to push * * Queue a push of the terminal flip buffers to the line discipline. This * function must not be called from IRQ context if tty->low_latency is set. * * In the event of the queue being busy for flipping the work will be * held off and retried later. * * Locking: tty buffer lock. Driver locks in low latency mode. */ void tty_flip_buffer_push(struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tty->buf.lock, flags); if (tty->buf.tail != NULL) tty->buf.tail->commit = tty->buf.tail->used; spin_unlock_irqrestore(&tty->buf.lock, flags); if (tty->low_latency) flush_to_ldisc((void *) tty); else schedule_delayed_work(&tty->buf.work, 1); } EXPORT_SYMBOL(tty_flip_buffer_push); /** * initialize_tty_struct * @tty: tty to initialize * * This subroutine initializes a tty structure that has been newly * allocated. * * Locking: none - tty in question must not be exposed at this point */ static void initialize_tty_struct(struct tty_struct *tty) { memset(tty, 0, sizeof(struct tty_struct)); tty->magic = TTY_MAGIC; tty_ldisc_assign(tty, tty_ldisc_get(N_TTY)); tty->pgrp = -1; tty->overrun_time = jiffies; tty->buf.head = tty->buf.tail = NULL; tty_buffer_init(tty); INIT_WORK(&tty->buf.work, flush_to_ldisc, tty); init_MUTEX(&tty->buf.pty_sem); init_MUTEX(&tty->termios_sem); init_waitqueue_head(&tty->write_wait); init_waitqueue_head(&tty->read_wait); INIT_WORK(&tty->hangup_work, do_tty_hangup, tty); mutex_init(&tty->atomic_read_lock); mutex_init(&tty->atomic_write_lock); spin_lock_init(&tty->read_lock); INIT_LIST_HEAD(&tty->tty_files); INIT_WORK(&tty->SAK_work, NULL, NULL); } /* * The default put_char routine if the driver did not define one. */ static void tty_default_put_char(struct tty_struct *tty, unsigned char ch) { tty->driver->write(tty, &ch, 1); } static struct class *tty_class; /** * tty_register_device - register a tty device * @driver: the tty driver that describes the tty device * @index: the index in the tty driver for this tty device * @device: a struct device that is associated with this tty device. * This field is optional, if there is no known struct device * for this tty device it can be set to NULL safely. * * Returns a pointer to the class device (or ERR_PTR(-EFOO) on error). * * This call is required to be made to register an individual tty device * if the tty driver's flags have the TTY_DRIVER_DYNAMIC_DEV bit set. If * that bit is not set, this function should not be called by a tty * driver. * * Locking: ?? */ struct class_device *tty_register_device(struct tty_driver *driver, unsigned index, struct device *device) { char name[64]; dev_t dev = MKDEV(driver->major, driver->minor_start) + index; if (index >= driver->num) { printk(KERN_ERR "Attempt to register invalid tty line number " " (%d).\n", index); return ERR_PTR(-EINVAL); } if (driver->type == TTY_DRIVER_TYPE_PTY) pty_line_name(driver, index, name); else tty_line_name(driver, index, name); return class_device_create(tty_class, NULL, dev, device, "%s", name); } /** * tty_unregister_device - unregister a tty device * @driver: the tty driver that describes the tty device * @index: the index in the tty driver for this tty device * * If a tty device is registered with a call to tty_register_device() then * this function must be called when the tty device is gone. * * Locking: ?? */ void tty_unregister_device(struct tty_driver *driver, unsigned index) { class_device_destroy(tty_class, MKDEV(driver->major, driver->minor_start) + index); } EXPORT_SYMBOL(tty_register_device); EXPORT_SYMBOL(tty_unregister_device); struct tty_driver *alloc_tty_driver(int lines) { struct tty_driver *driver; driver = kmalloc(sizeof(struct tty_driver), GFP_KERNEL); if (driver) { memset(driver, 0, sizeof(struct tty_driver)); driver->magic = TTY_DRIVER_MAGIC; driver->num = lines; /* later we'll move allocation of tables here */ } return driver; } void put_tty_driver(struct tty_driver *driver) { kfree(driver); } void tty_set_operations(struct tty_driver *driver, struct tty_operations *op) { driver->open = op->open; driver->close = op->close; driver->write = op->write; driver->put_char = op->put_char; driver->flush_chars = op->flush_chars; driver->write_room = op->write_room; driver->chars_in_buffer = op->chars_in_buffer; driver->ioctl = op->ioctl; driver->set_termios = op->set_termios; driver->throttle = op->throttle; driver->unthrottle = op->unthrottle; driver->stop = op->stop; driver->start = op->start; driver->hangup = op->hangup; driver->break_ctl = op->break_ctl; driver->flush_buffer = op->flush_buffer; driver->set_ldisc = op->set_ldisc; driver->wait_until_sent = op->wait_until_sent; driver->send_xchar = op->send_xchar; driver->read_proc = op->read_proc; driver->write_proc = op->write_proc; driver->tiocmget = op->tiocmget; driver->tiocmset = op->tiocmset; } EXPORT_SYMBOL(alloc_tty_driver); EXPORT_SYMBOL(put_tty_driver); EXPORT_SYMBOL(tty_set_operations); /* * Called by a tty driver to register itself. */ int tty_register_driver(struct tty_driver *driver) { int error; int i; dev_t dev; void **p = NULL; if (driver->flags & TTY_DRIVER_INSTALLED) return 0; if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM)) { p = kmalloc(driver->num * 3 * sizeof(void *), GFP_KERNEL); if (!p) return -ENOMEM; memset(p, 0, driver->num * 3 * sizeof(void *)); } if (!driver->major) { error = alloc_chrdev_region(&dev, driver->minor_start, driver->num, (char*)driver->name); if (!error) { driver->major = MAJOR(dev); driver->minor_start = MINOR(dev); } } else { dev = MKDEV(driver->major, driver->minor_start); error = register_chrdev_region(dev, driver->num, (char*)driver->name); } if (error < 0) { kfree(p); return error; } if (p) { driver->ttys = (struct tty_struct **)p; driver->termios = (struct termios **)(p + driver->num); driver->termios_locked = (struct termios **)(p + driver->num * 2); } else { driver->ttys = NULL; driver->termios = NULL; driver->termios_locked = NULL; } cdev_init(&driver->cdev, &tty_fops); driver->cdev.owner = driver->owner; error = cdev_add(&driver->cdev, dev, driver->num); if (error) { unregister_chrdev_region(dev, driver->num); driver->ttys = NULL; driver->termios = driver->termios_locked = NULL; kfree(p); return error; } if (!driver->put_char) driver->put_char = tty_default_put_char; list_add(&driver->tty_drivers, &tty_drivers); if ( !(driver->flags & TTY_DRIVER_DYNAMIC_DEV) ) { for(i = 0; i < driver->num; i++) tty_register_device(driver, i, NULL); } proc_tty_register_driver(driver); return 0; } EXPORT_SYMBOL(tty_register_driver); /* * Called by a tty driver to unregister itself. */ int tty_unregister_driver(struct tty_driver *driver) { int i; struct termios *tp; void *p; if (driver->refcount) return -EBUSY; unregister_chrdev_region(MKDEV(driver->major, driver->minor_start), driver->num); list_del(&driver->tty_drivers); /* * Free the termios and termios_locked structures because * we don't want to get memory leaks when modular tty * drivers are removed from the kernel. */ for (i = 0; i < driver->num; i++) { tp = driver->termios[i]; if (tp) { driver->termios[i] = NULL; kfree(tp); } tp = driver->termios_locked[i]; if (tp) { driver->termios_locked[i] = NULL; kfree(tp); } if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV)) tty_unregister_device(driver, i); } p = driver->ttys; proc_tty_unregister_driver(driver); driver->ttys = NULL; driver->termios = driver->termios_locked = NULL; kfree(p); cdev_del(&driver->cdev); return 0; } EXPORT_SYMBOL(tty_unregister_driver); /* * Initialize the console device. This is called *early*, so * we can't necessarily depend on lots of kernel help here. * Just do some early initializations, and do the complex setup * later. */ void __init console_init(void) { initcall_t *call; /* Setup the default TTY line discipline. */ (void) tty_register_ldisc(N_TTY, &tty_ldisc_N_TTY); /* * set up the console device so that later boot sequences can * inform about problems etc.. */ #ifdef CONFIG_EARLY_PRINTK disable_early_printk(); #endif call = __con_initcall_start; while (call < __con_initcall_end) { (*call)(); call++; } } #ifdef CONFIG_VT extern int vty_init(void); #endif static int __init tty_class_init(void) { tty_class = class_create(THIS_MODULE, "tty"); if (IS_ERR(tty_class)) return PTR_ERR(tty_class); return 0; } postcore_initcall(tty_class_init); /* 3/2004 jmc: why do these devices exist? */ static struct cdev tty_cdev, console_cdev; #ifdef CONFIG_UNIX98_PTYS static struct cdev ptmx_cdev; #endif #ifdef CONFIG_VT static struct cdev vc0_cdev; #endif /* * Ok, now we can initialize the rest of the tty devices and can count * on memory allocations, interrupts etc.. */ static int __init tty_init(void) { cdev_init(&tty_cdev, &tty_fops); if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) || register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0) panic("Couldn't register /dev/tty driver\n"); class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL, "tty"); cdev_init(&console_cdev, &console_fops); if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) || register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 0) panic("Couldn't register /dev/console driver\n"); class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 1), NULL, "console"); #ifdef CONFIG_UNIX98_PTYS cdev_init(&ptmx_cdev, &ptmx_fops); if (cdev_add(&ptmx_cdev, MKDEV(TTYAUX_MAJOR, 2), 1) || register_chrdev_region(MKDEV(TTYAUX_MAJOR, 2), 1, "/dev/ptmx") < 0) panic("Couldn't register /dev/ptmx driver\n"); class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx"); #endif #ifdef CONFIG_VT cdev_init(&vc0_cdev, &console_fops); if (cdev_add(&vc0_cdev, MKDEV(TTY_MAJOR, 0), 1) || register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0) panic("Couldn't register /dev/tty0 driver\n"); class_device_create(tty_class, NULL, MKDEV(TTY_MAJOR, 0), NULL, "tty0"); vty_init(); #endif return 0; } module_init(tty_init);