aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/kernel-parameters.txt3
-rw-r--r--Documentation/lguest/lguest.c178
-rw-r--r--Documentation/networking/00-INDEX2
-rw-r--r--Documentation/networking/net-modules.txt315
4 files changed, 108 insertions, 390 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a13d69b2217d..8ae5fac08dfa 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1444,7 +1444,8 @@ and is between 256 and 4096 characters. It is defined in the file
1444 Param: "schedule" - profile schedule points. 1444 Param: "schedule" - profile schedule points.
1445 Param: <number> - step/bucket size as a power of 2 for 1445 Param: <number> - step/bucket size as a power of 2 for
1446 statistical time based profiling. 1446 statistical time based profiling.
1447 Param: "sleep" - profile D-state sleeping (millisecs) 1447 Param: "sleep" - profile D-state sleeping (millisecs).
1448 Requires CONFIG_SCHEDSTATS
1448 Param: "kvm" - profile VM exits. 1449 Param: "kvm" - profile VM exits.
1449 1450
1450 processor.max_cstate= [HW,ACPI] 1451 processor.max_cstate= [HW,ACPI]
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 5bdc37f81842..f2668390e8f7 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -34,25 +34,24 @@
34#include <zlib.h> 34#include <zlib.h>
35#include <assert.h> 35#include <assert.h>
36#include <sched.h> 36#include <sched.h>
37/*L:110 We can ignore the 30 include files we need for this program, but I do
38 * want to draw attention to the use of kernel-style types.
39 *
40 * As Linus said, "C is a Spartan language, and so should your naming be." I
41 * like these abbreviations and the header we need uses them, so we define them
42 * here.
43 */
44typedef unsigned long long u64;
45typedef uint32_t u32;
46typedef uint16_t u16;
47typedef uint8_t u8;
48#include "linux/lguest_launcher.h" 37#include "linux/lguest_launcher.h"
49#include "linux/pci_ids.h"
50#include "linux/virtio_config.h" 38#include "linux/virtio_config.h"
51#include "linux/virtio_net.h" 39#include "linux/virtio_net.h"
52#include "linux/virtio_blk.h" 40#include "linux/virtio_blk.h"
53#include "linux/virtio_console.h" 41#include "linux/virtio_console.h"
54#include "linux/virtio_ring.h" 42#include "linux/virtio_ring.h"
55#include "asm-x86/bootparam.h" 43#include "asm-x86/bootparam.h"
44/*L:110 We can ignore the 38 include files we need for this program, but I do
45 * want to draw attention to the use of kernel-style types.
46 *
47 * As Linus said, "C is a Spartan language, and so should your naming be." I
48 * like these abbreviations, so we define them here. Note that u64 is always
49 * unsigned long long, which works on all Linux systems: this means that we can
50 * use %llu in printf for any u64. */
51typedef unsigned long long u64;
52typedef uint32_t u32;
53typedef uint16_t u16;
54typedef uint8_t u8;
56/*:*/ 55/*:*/
57 56
58#define PAGE_PRESENT 0x7 /* Present, RW, Execute */ 57#define PAGE_PRESENT 0x7 /* Present, RW, Execute */
@@ -361,8 +360,8 @@ static unsigned long load_bzimage(int fd)
361} 360}
362 361
363/*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels 362/*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels
364 * come wrapped up in the self-decompressing "bzImage" format. With some funky 363 * come wrapped up in the self-decompressing "bzImage" format. With a little
365 * coding, we can load those, too. */ 364 * work, we can load those, too. */
366static unsigned long load_kernel(int fd) 365static unsigned long load_kernel(int fd)
367{ 366{
368 Elf32_Ehdr hdr; 367 Elf32_Ehdr hdr;
@@ -465,6 +464,7 @@ static unsigned long setup_pagetables(unsigned long mem,
465 * to know where it is. */ 464 * to know where it is. */
466 return to_guest_phys(pgdir); 465 return to_guest_phys(pgdir);
467} 466}
467/*:*/
468 468
469/* Simple routine to roll all the commandline arguments together with spaces 469/* Simple routine to roll all the commandline arguments together with spaces
470 * between them. */ 470 * between them. */
@@ -481,9 +481,9 @@ static void concat(char *dst, char *args[])
481 dst[len] = '\0'; 481 dst[len] = '\0';
482} 482}
483 483
484/* This is where we actually tell the kernel to initialize the Guest. We saw 484/*L:185 This is where we actually tell the kernel to initialize the Guest. We
485 * the arguments it expects when we looked at initialize() in lguest_user.c: 485 * saw the arguments it expects when we looked at initialize() in lguest_user.c:
486 * the base of guest "physical" memory, the top physical page to allow, the 486 * the base of Guest "physical" memory, the top physical page to allow, the
487 * top level pagetable and the entry point for the Guest. */ 487 * top level pagetable and the entry point for the Guest. */
488static int tell_kernel(unsigned long pgdir, unsigned long start) 488static int tell_kernel(unsigned long pgdir, unsigned long start)
489{ 489{
@@ -513,13 +513,14 @@ static void add_device_fd(int fd)
513/*L:200 513/*L:200
514 * The Waker. 514 * The Waker.
515 * 515 *
516 * With a console and network devices, we can have lots of input which we need 516 * With console, block and network devices, we can have lots of input which we
517 * to process. We could try to tell the kernel what file descriptors to watch, 517 * need to process. We could try to tell the kernel what file descriptors to
518 * but handing a file descriptor mask through to the kernel is fairly icky. 518 * watch, but handing a file descriptor mask through to the kernel is fairly
519 * icky.
519 * 520 *
520 * Instead, we fork off a process which watches the file descriptors and writes 521 * Instead, we fork off a process which watches the file descriptors and writes
521 * the LHREQ_BREAK command to the /dev/lguest filedescriptor to tell the Host 522 * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host
522 * loop to stop running the Guest. This causes it to return from the 523 * stop running the Guest. This causes the Launcher to return from the
523 * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset 524 * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset
524 * the LHREQ_BREAK and wake us up again. 525 * the LHREQ_BREAK and wake us up again.
525 * 526 *
@@ -545,7 +546,9 @@ static void wake_parent(int pipefd, int lguest_fd)
545 if (read(pipefd, &fd, sizeof(fd)) == 0) 546 if (read(pipefd, &fd, sizeof(fd)) == 0)
546 exit(0); 547 exit(0);
547 /* Otherwise it's telling us to change what file 548 /* Otherwise it's telling us to change what file
548 * descriptors we're to listen to. */ 549 * descriptors we're to listen to. Positive means
550 * listen to a new one, negative means stop
551 * listening. */
549 if (fd >= 0) 552 if (fd >= 0)
550 FD_SET(fd, &devices.infds); 553 FD_SET(fd, &devices.infds);
551 else 554 else
@@ -560,7 +563,7 @@ static int setup_waker(int lguest_fd)
560{ 563{
561 int pipefd[2], child; 564 int pipefd[2], child;
562 565
563 /* We create a pipe to talk to the waker, and also so it knows when the 566 /* We create a pipe to talk to the Waker, and also so it knows when the
564 * Launcher dies (and closes pipe). */ 567 * Launcher dies (and closes pipe). */
565 pipe(pipefd); 568 pipe(pipefd);
566 child = fork(); 569 child = fork();
@@ -568,7 +571,8 @@ static int setup_waker(int lguest_fd)
568 err(1, "forking"); 571 err(1, "forking");
569 572
570 if (child == 0) { 573 if (child == 0) {
571 /* Close the "writing" end of our copy of the pipe */ 574 /* We are the Waker: close the "writing" end of our copy of the
575 * pipe and start waiting for input. */
572 close(pipefd[1]); 576 close(pipefd[1]);
573 wake_parent(pipefd[0], lguest_fd); 577 wake_parent(pipefd[0], lguest_fd);
574 } 578 }
@@ -579,12 +583,12 @@ static int setup_waker(int lguest_fd)
579 return pipefd[1]; 583 return pipefd[1];
580} 584}
581 585
582/*L:210 586/*
583 * Device Handling. 587 * Device Handling.
584 * 588 *
585 * When the Guest sends DMA to us, it sends us an array of addresses and sizes. 589 * When the Guest gives us a buffer, it sends an array of addresses and sizes.
586 * We need to make sure it's not trying to reach into the Launcher itself, so 590 * We need to make sure it's not trying to reach into the Launcher itself, so
587 * we have a convenient routine which check it and exits with an error message 591 * we have a convenient routine which checks it and exits with an error message
588 * if something funny is going on: 592 * if something funny is going on:
589 */ 593 */
590static void *_check_pointer(unsigned long addr, unsigned int size, 594static void *_check_pointer(unsigned long addr, unsigned int size,
@@ -601,7 +605,9 @@ static void *_check_pointer(unsigned long addr, unsigned int size,
601/* A macro which transparently hands the line number to the real function. */ 605/* A macro which transparently hands the line number to the real function. */
602#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) 606#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
603 607
604/* This function returns the next descriptor in the chain, or vq->vring.num. */ 608/* Each buffer in the virtqueues is actually a chain of descriptors. This
609 * function returns the next descriptor in the chain, or vq->vring.num if we're
610 * at the end. */
605static unsigned next_desc(struct virtqueue *vq, unsigned int i) 611static unsigned next_desc(struct virtqueue *vq, unsigned int i)
606{ 612{
607 unsigned int next; 613 unsigned int next;
@@ -680,13 +686,14 @@ static unsigned get_vq_desc(struct virtqueue *vq,
680 return head; 686 return head;
681} 687}
682 688
683/* Once we've used one of their buffers, we tell them about it. We'll then 689/* After we've used one of their buffers, we tell them about it. We'll then
684 * want to send them an interrupt, using trigger_irq(). */ 690 * want to send them an interrupt, using trigger_irq(). */
685static void add_used(struct virtqueue *vq, unsigned int head, int len) 691static void add_used(struct virtqueue *vq, unsigned int head, int len)
686{ 692{
687 struct vring_used_elem *used; 693 struct vring_used_elem *used;
688 694
689 /* Get a pointer to the next entry in the used ring. */ 695 /* The virtqueue contains a ring of used buffers. Get a pointer to the
696 * next entry in that used ring. */
690 used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; 697 used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
691 used->id = head; 698 used->id = head;
692 used->len = len; 699 used->len = len;
@@ -700,6 +707,7 @@ static void trigger_irq(int fd, struct virtqueue *vq)
700{ 707{
701 unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; 708 unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
702 709
710 /* If they don't want an interrupt, don't send one. */
703 if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) 711 if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
704 return; 712 return;
705 713
@@ -716,8 +724,11 @@ static void add_used_and_trigger(int fd, struct virtqueue *vq,
716 trigger_irq(fd, vq); 724 trigger_irq(fd, vq);
717} 725}
718 726
719/* Here is the input terminal setting we save, and the routine to restore them 727/*
720 * on exit so the user can see what they type next. */ 728 * The Console
729 *
730 * Here is the input terminal setting we save, and the routine to restore them
731 * on exit so the user gets their terminal back. */
721static struct termios orig_term; 732static struct termios orig_term;
722static void restore_term(void) 733static void restore_term(void)
723{ 734{
@@ -818,7 +829,10 @@ static void handle_console_output(int fd, struct virtqueue *vq)
818 } 829 }
819} 830}
820 831
821/* Handling output for network is also simple: we get all the output buffers 832/*
833 * The Network
834 *
835 * Handling output for network is also simple: we get all the output buffers
822 * and write them (ignoring the first element) to this device's file descriptor 836 * and write them (ignoring the first element) to this device's file descriptor
823 * (stdout). */ 837 * (stdout). */
824static void handle_net_output(int fd, struct virtqueue *vq) 838static void handle_net_output(int fd, struct virtqueue *vq)
@@ -831,8 +845,9 @@ static void handle_net_output(int fd, struct virtqueue *vq)
831 while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { 845 while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
832 if (in) 846 if (in)
833 errx(1, "Input buffers in output queue?"); 847 errx(1, "Input buffers in output queue?");
834 /* Check header, but otherwise ignore it (we said we supported 848 /* Check header, but otherwise ignore it (we told the Guest we
835 * no features). */ 849 * supported no features, so it shouldn't have anything
850 * interesting). */
836 (void)convert(&iov[0], struct virtio_net_hdr); 851 (void)convert(&iov[0], struct virtio_net_hdr);
837 len = writev(vq->dev->fd, iov+1, out-1); 852 len = writev(vq->dev->fd, iov+1, out-1);
838 add_used_and_trigger(fd, vq, head, len); 853 add_used_and_trigger(fd, vq, head, len);
@@ -883,7 +898,8 @@ static bool handle_tun_input(int fd, struct device *dev)
883 return true; 898 return true;
884} 899}
885 900
886/* This callback ensures we try again, in case we stopped console or net 901/*L:215 This is the callback attached to the network and console input
902 * virtqueues: it ensures we try again, in case we stopped console or net
887 * delivery because Guest didn't have any buffers. */ 903 * delivery because Guest didn't have any buffers. */
888static void enable_fd(int fd, struct virtqueue *vq) 904static void enable_fd(int fd, struct virtqueue *vq)
889{ 905{
@@ -919,7 +935,7 @@ static void handle_output(int fd, unsigned long addr)
919 strnlen(from_guest_phys(addr), guest_limit - addr)); 935 strnlen(from_guest_phys(addr), guest_limit - addr));
920} 936}
921 937
922/* This is called when the waker wakes us up: check for incoming file 938/* This is called when the Waker wakes us up: check for incoming file
923 * descriptors. */ 939 * descriptors. */
924static void handle_input(int fd) 940static void handle_input(int fd)
925{ 941{
@@ -986,8 +1002,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
986} 1002}
987 1003
988/* Each device descriptor is followed by some configuration information. 1004/* Each device descriptor is followed by some configuration information.
989 * The first byte is a "status" byte for the Guest to report what's happening. 1005 * Each configuration field looks like: u8 type, u8 len, [... len bytes...].
990 * After that are fields: u8 type, u8 len, [... len bytes...].
991 * 1006 *
992 * This routine adds a new field to an existing device's descriptor. It only 1007 * This routine adds a new field to an existing device's descriptor. It only
993 * works for the last device, but that's OK because that's how we use it. */ 1008 * works for the last device, but that's OK because that's how we use it. */
@@ -1044,14 +1059,17 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1044 /* Link virtqueue back to device. */ 1059 /* Link virtqueue back to device. */
1045 vq->dev = dev; 1060 vq->dev = dev;
1046 1061
1047 /* Set up handler. */ 1062 /* Set the routine to call when the Guest does something to this
1063 * virtqueue. */
1048 vq->handle_output = handle_output; 1064 vq->handle_output = handle_output;
1065
1066 /* Set the "Don't Notify Me" flag if we don't have a handler */
1049 if (!handle_output) 1067 if (!handle_output)
1050 vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; 1068 vq->vring.used->flags = VRING_USED_F_NO_NOTIFY;
1051} 1069}
1052 1070
1053/* This routine does all the creation and setup of a new device, including 1071/* This routine does all the creation and setup of a new device, including
1054 * caling new_dev_desc() to allocate the descriptor and device memory. */ 1072 * calling new_dev_desc() to allocate the descriptor and device memory. */
1055static struct device *new_device(const char *name, u16 type, int fd, 1073static struct device *new_device(const char *name, u16 type, int fd,
1056 bool (*handle_input)(int, struct device *)) 1074 bool (*handle_input)(int, struct device *))
1057{ 1075{
@@ -1060,7 +1078,7 @@ static struct device *new_device(const char *name, u16 type, int fd,
1060 /* Append to device list. Prepending to a single-linked list is 1078 /* Append to device list. Prepending to a single-linked list is
1061 * easier, but the user expects the devices to be arranged on the bus 1079 * easier, but the user expects the devices to be arranged on the bus
1062 * in command-line order. The first network device on the command line 1080 * in command-line order. The first network device on the command line
1063 * is eth0, the first block device /dev/lgba, etc. */ 1081 * is eth0, the first block device /dev/vda, etc. */
1064 *devices.lastdev = dev; 1082 *devices.lastdev = dev;
1065 dev->next = NULL; 1083 dev->next = NULL;
1066 devices.lastdev = &dev->next; 1084 devices.lastdev = &dev->next;
@@ -1104,7 +1122,7 @@ static void setup_console(void)
1104 /* The console needs two virtqueues: the input then the output. When 1122 /* The console needs two virtqueues: the input then the output. When
1105 * they put something the input queue, we make sure we're listening to 1123 * they put something the input queue, we make sure we're listening to
1106 * stdin. When they put something in the output queue, we write it to 1124 * stdin. When they put something in the output queue, we write it to
1107 * stdout. */ 1125 * stdout. */
1108 add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); 1126 add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd);
1109 add_virtqueue(dev, VIRTQUEUE_NUM, handle_console_output); 1127 add_virtqueue(dev, VIRTQUEUE_NUM, handle_console_output);
1110 1128
@@ -1252,21 +1270,17 @@ static void setup_tun_net(const char *arg)
1252 verbose("attached to bridge: %s\n", br_name); 1270 verbose("attached to bridge: %s\n", br_name);
1253} 1271}
1254 1272
1255 1273/* Our block (disk) device should be really simple: the Guest asks for a block
1256/* 1274 * number and we read or write that position in the file. Unfortunately, that
1257 * Block device. 1275 * was amazingly slow: the Guest waits until the read is finished before
1276 * running anything else, even if it could have been doing useful work.
1258 * 1277 *
1259 * Serving a block device is really easy: the Guest asks for a block number and 1278 * We could use async I/O, except it's reputed to suck so hard that characters
1260 * we read or write that position in the file. 1279 * actually go missing from your code when you try to use it.
1261 *
1262 * Unfortunately, this is amazingly slow: the Guest waits until the read is
1263 * finished before running anything else, even if it could be doing useful
1264 * work. We could use async I/O, except it's reputed to suck so hard that
1265 * characters actually go missing from your code when you try to use it.
1266 * 1280 *
1267 * So we farm the I/O out to thread, and communicate with it via a pipe. */ 1281 * So we farm the I/O out to thread, and communicate with it via a pipe. */
1268 1282
1269/* This hangs off device->priv, with the data. */ 1283/* This hangs off device->priv. */
1270struct vblk_info 1284struct vblk_info
1271{ 1285{
1272 /* The size of the file. */ 1286 /* The size of the file. */
@@ -1282,8 +1296,14 @@ struct vblk_info
1282 * Launcher triggers interrupt to Guest. */ 1296 * Launcher triggers interrupt to Guest. */
1283 int done_fd; 1297 int done_fd;
1284}; 1298};
1299/*:*/
1285 1300
1286/* This is the core of the I/O thread. It returns true if it did something. */ 1301/*L:210
1302 * The Disk
1303 *
1304 * Remember that the block device is handled by a separate I/O thread. We head
1305 * straight into the core of that thread here:
1306 */
1287static bool service_io(struct device *dev) 1307static bool service_io(struct device *dev)
1288{ 1308{
1289 struct vblk_info *vblk = dev->priv; 1309 struct vblk_info *vblk = dev->priv;
@@ -1294,10 +1314,14 @@ static bool service_io(struct device *dev)
1294 struct iovec iov[dev->vq->vring.num]; 1314 struct iovec iov[dev->vq->vring.num];
1295 off64_t off; 1315 off64_t off;
1296 1316
1317 /* See if there's a request waiting. If not, nothing to do. */
1297 head = get_vq_desc(dev->vq, iov, &out_num, &in_num); 1318 head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
1298 if (head == dev->vq->vring.num) 1319 if (head == dev->vq->vring.num)
1299 return false; 1320 return false;
1300 1321
1322 /* Every block request should contain at least one output buffer
1323 * (detailing the location on disk and the type of request) and one
1324 * input buffer (to hold the result). */
1301 if (out_num == 0 || in_num == 0) 1325 if (out_num == 0 || in_num == 0)
1302 errx(1, "Bad virtblk cmd %u out=%u in=%u", 1326 errx(1, "Bad virtblk cmd %u out=%u in=%u",
1303 head, out_num, in_num); 1327 head, out_num, in_num);
@@ -1306,10 +1330,15 @@ static bool service_io(struct device *dev)
1306 in = convert(&iov[out_num+in_num-1], struct virtio_blk_inhdr); 1330 in = convert(&iov[out_num+in_num-1], struct virtio_blk_inhdr);
1307 off = out->sector * 512; 1331 off = out->sector * 512;
1308 1332
1309 /* This is how we implement barriers. Pretty poor, no? */ 1333 /* The block device implements "barriers", where the Guest indicates
1334 * that it wants all previous writes to occur before this write. We
1335 * don't have a way of asking our kernel to do a barrier, so we just
1336 * synchronize all the data in the file. Pretty poor, no? */
1310 if (out->type & VIRTIO_BLK_T_BARRIER) 1337 if (out->type & VIRTIO_BLK_T_BARRIER)
1311 fdatasync(vblk->fd); 1338 fdatasync(vblk->fd);
1312 1339
1340 /* In general the virtio block driver is allowed to try SCSI commands.
1341 * It'd be nice if we supported eject, for example, but we don't. */
1313 if (out->type & VIRTIO_BLK_T_SCSI_CMD) { 1342 if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
1314 fprintf(stderr, "Scsi commands unsupported\n"); 1343 fprintf(stderr, "Scsi commands unsupported\n");
1315 in->status = VIRTIO_BLK_S_UNSUPP; 1344 in->status = VIRTIO_BLK_S_UNSUPP;
@@ -1375,7 +1404,7 @@ static int io_thread(void *_dev)
1375 1404
1376 /* When this read fails, it means Launcher died, so we follow. */ 1405 /* When this read fails, it means Launcher died, so we follow. */
1377 while (read(vblk->workpipe[0], &c, 1) == 1) { 1406 while (read(vblk->workpipe[0], &c, 1) == 1) {
1378 /* We acknowledge each request immediately, to reduce latency, 1407 /* We acknowledge each request immediately to reduce latency,
1379 * rather than waiting until we've done them all. I haven't 1408 * rather than waiting until we've done them all. I haven't
1380 * measured to see if it makes any difference. */ 1409 * measured to see if it makes any difference. */
1381 while (service_io(dev)) 1410 while (service_io(dev))
@@ -1384,12 +1413,14 @@ static int io_thread(void *_dev)
1384 return 0; 1413 return 0;
1385} 1414}
1386 1415
1387/* When the thread says some I/O is done, we interrupt the Guest. */ 1416/* Now we've seen the I/O thread, we return to the Launcher to see what happens
1417 * when the thread tells us it's completed some I/O. */
1388static bool handle_io_finish(int fd, struct device *dev) 1418static bool handle_io_finish(int fd, struct device *dev)
1389{ 1419{
1390 char c; 1420 char c;
1391 1421
1392 /* If child died, presumably it printed message. */ 1422 /* If the I/O thread died, presumably it printed the error, so we
1423 * simply exit. */
1393 if (read(dev->fd, &c, 1) != 1) 1424 if (read(dev->fd, &c, 1) != 1)
1394 exit(1); 1425 exit(1);
1395 1426
@@ -1398,7 +1429,7 @@ static bool handle_io_finish(int fd, struct device *dev)
1398 return true; 1429 return true;
1399} 1430}
1400 1431
1401/* When the Guest submits some I/O, we wake the I/O thread. */ 1432/* When the Guest submits some I/O, we just need to wake the I/O thread. */
1402static void handle_virtblk_output(int fd, struct virtqueue *vq) 1433static void handle_virtblk_output(int fd, struct virtqueue *vq)
1403{ 1434{
1404 struct vblk_info *vblk = vq->dev->priv; 1435 struct vblk_info *vblk = vq->dev->priv;
@@ -1410,7 +1441,7 @@ static void handle_virtblk_output(int fd, struct virtqueue *vq)
1410 exit(1); 1441 exit(1);
1411} 1442}
1412 1443
1413/* This creates a virtual block device. */ 1444/*L:198 This actually sets up a virtual block device. */
1414static void setup_block_file(const char *filename) 1445static void setup_block_file(const char *filename)
1415{ 1446{
1416 int p[2]; 1447 int p[2];
@@ -1426,7 +1457,7 @@ static void setup_block_file(const char *filename)
1426 /* The device responds to return from I/O thread. */ 1457 /* The device responds to return from I/O thread. */
1427 dev = new_device("block", VIRTIO_ID_BLOCK, p[0], handle_io_finish); 1458 dev = new_device("block", VIRTIO_ID_BLOCK, p[0], handle_io_finish);
1428 1459
1429 /* The device has a virtqueue. */ 1460 /* The device has one virtqueue, where the Guest places requests. */
1430 add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output); 1461 add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output);
1431 1462
1432 /* Allocate the room for our own bookkeeping */ 1463 /* Allocate the room for our own bookkeeping */
@@ -1448,7 +1479,8 @@ static void setup_block_file(const char *filename)
1448 /* The I/O thread writes to this end of the pipe when done. */ 1479 /* The I/O thread writes to this end of the pipe when done. */
1449 vblk->done_fd = p[1]; 1480 vblk->done_fd = p[1];
1450 1481
1451 /* This is how we tell the I/O thread about more work. */ 1482 /* This is the second pipe, which is how we tell the I/O thread about
1483 * more work. */
1452 pipe(vblk->workpipe); 1484 pipe(vblk->workpipe);
1453 1485
1454 /* Create stack for thread and run it */ 1486 /* Create stack for thread and run it */
@@ -1487,24 +1519,25 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
1487 char reason[1024] = { 0 }; 1519 char reason[1024] = { 0 };
1488 read(lguest_fd, reason, sizeof(reason)-1); 1520 read(lguest_fd, reason, sizeof(reason)-1);
1489 errx(1, "%s", reason); 1521 errx(1, "%s", reason);
1490 /* EAGAIN means the waker wanted us to look at some input. 1522 /* EAGAIN means the Waker wanted us to look at some input.
1491 * Anything else means a bug or incompatible change. */ 1523 * Anything else means a bug or incompatible change. */
1492 } else if (errno != EAGAIN) 1524 } else if (errno != EAGAIN)
1493 err(1, "Running guest failed"); 1525 err(1, "Running guest failed");
1494 1526
1495 /* Service input, then unset the BREAK which releases 1527 /* Service input, then unset the BREAK to release the Waker. */
1496 * the Waker. */
1497 handle_input(lguest_fd); 1528 handle_input(lguest_fd);
1498 if (write(lguest_fd, args, sizeof(args)) < 0) 1529 if (write(lguest_fd, args, sizeof(args)) < 0)
1499 err(1, "Resetting break"); 1530 err(1, "Resetting break");
1500 } 1531 }
1501} 1532}
1502/* 1533/*
1503 * This is the end of the Launcher. 1534 * This is the end of the Launcher. The good news: we are over halfway
1535 * through! The bad news: the most fiendish part of the code still lies ahead
1536 * of us.
1504 * 1537 *
1505 * But wait! We've seen I/O from the Launcher, and we've seen I/O from the 1538 * Are you ready? Take a deep breath and join me in the core of the Host, in
1506 * Drivers. If we were to see the Host kernel I/O code, our understanding 1539 * "make Host".
1507 * would be complete... :*/ 1540 :*/
1508 1541
1509static struct option opts[] = { 1542static struct option opts[] = {
1510 { "verbose", 0, NULL, 'v' }, 1543 { "verbose", 0, NULL, 'v' },
@@ -1527,7 +1560,7 @@ int main(int argc, char *argv[])
1527 /* Memory, top-level pagetable, code startpoint and size of the 1560 /* Memory, top-level pagetable, code startpoint and size of the
1528 * (optional) initrd. */ 1561 * (optional) initrd. */
1529 unsigned long mem = 0, pgdir, start, initrd_size = 0; 1562 unsigned long mem = 0, pgdir, start, initrd_size = 0;
1530 /* A temporary and the /dev/lguest file descriptor. */ 1563 /* Two temporaries and the /dev/lguest file descriptor. */
1531 int i, c, lguest_fd; 1564 int i, c, lguest_fd;
1532 /* The boot information for the Guest. */ 1565 /* The boot information for the Guest. */
1533 struct boot_params *boot; 1566 struct boot_params *boot;
@@ -1622,6 +1655,7 @@ int main(int argc, char *argv[])
1622 /* The boot header contains a command line pointer: we put the command 1655 /* The boot header contains a command line pointer: we put the command
1623 * line after the boot header. */ 1656 * line after the boot header. */
1624 boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1); 1657 boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1);
1658 /* We use a simple helper to copy the arguments separated by spaces. */
1625 concat((char *)(boot + 1), argv+optind+2); 1659 concat((char *)(boot + 1), argv+optind+2);
1626 1660
1627 /* Boot protocol version: 2.07 supports the fields for lguest. */ 1661 /* Boot protocol version: 2.07 supports the fields for lguest. */
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index 153d84d281e6..f5a5e6d3d541 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -80,8 +80,6 @@ multicast.txt
80 - Behaviour of cards under Multicast 80 - Behaviour of cards under Multicast
81ncsa-telnet 81ncsa-telnet
82 - notes on how NCSA telnet (DOS) breaks with MTU discovery enabled. 82 - notes on how NCSA telnet (DOS) breaks with MTU discovery enabled.
83net-modules.txt
84 - info and "insmod" parameters for all network driver modules.
85netdevices.txt 83netdevices.txt
86 - info on network device driver functions exported to the kernel. 84 - info on network device driver functions exported to the kernel.
87olympic.txt 85olympic.txt
diff --git a/Documentation/networking/net-modules.txt b/Documentation/networking/net-modules.txt
deleted file mode 100644
index 98c4392dd0fd..000000000000
--- a/Documentation/networking/net-modules.txt
+++ /dev/null
@@ -1,315 +0,0 @@
1Wed 2-Aug-95 <matti.aarnio@utu.fi>
2
3 Linux network driver modules
4
5 Do not mistake this for "README.modules" at the top-level
6 directory! That document tells about modules in general, while
7 this one tells only about network device driver modules.
8
9 This is a potpourri of INSMOD-time(*) configuration options
10 (if such exists) and their default values of various modules
11 in the Linux network drivers collection.
12
13 Some modules have also hidden (= non-documented) tunable values.
14 The choice of not documenting them is based on general belief, that
15 the less the user needs to know, the better. (There are things that
16 driver developers can use, others should not confuse themselves.)
17
18 In many cases it is highly preferred that insmod:ing is done
19 ONLY with defining an explicit address for the card, AND BY
20 NOT USING AUTO-PROBING!
21
22 Now most cards have some explicitly defined base address that they
23 are compiled with (to avoid auto-probing, among other things).
24 If that compiled value does not match your actual configuration,
25 do use the "io=0xXXX" -parameter for the insmod, and give there
26 a value matching your environment.
27
28 If you are adventurous, you can ask the driver to autoprobe
29 by using the "io=0" parameter, however it is a potentially dangerous
30 thing to do in a live system. (If you don't know where the
31 card is located, you can try autoprobing, and after possible
32 crash recovery, insmod with proper IO-address..)
33
34 --------------------------
35 (*) "INSMOD-time" means when you load module with
36 /sbin/insmod you can feed it optional parameters.
37 See "man insmod".
38 --------------------------
39
40
41 8390 based Network Modules (Paul Gortmaker, Nov 12, 1995)
42 --------------------------
43
44(Includes: smc-ultra, ne, wd, 3c503, hp, hp-plus, e2100 and ac3200)
45
46The 8390 series of network drivers now support multiple card systems without
47reloading the same module multiple times (memory efficient!) This is done by
48specifying multiple comma separated values, such as:
49
50 insmod 3c503.o io=0x280,0x300,0x330,0x350 xcvr=0,1,0,1
51
52The above would have the one module controlling four 3c503 cards, with card 2
53and 4 using external transceivers. The "insmod" manual describes the usage
54of comma separated value lists.
55
56It is *STRONGLY RECOMMENDED* that you supply "io=" instead of autoprobing.
57If an "io=" argument is not supplied, then the ISA drivers will complain
58about autoprobing being not recommended, and begrudgingly autoprobe for
59a *SINGLE CARD ONLY* -- if you want to use multiple cards you *have* to
60supply an "io=0xNNN,0xQQQ,..." argument.
61
62The ne module is an exception to the above. A NE2000 is essentially an
638390 chip, some bus glue and some RAM. Because of this, the ne probe is
64more invasive than the rest, and so at boot we make sure the ne probe is
65done last of all the 8390 cards (so that it won't trip over other 8390 based
66cards) With modules we can't ensure that all other non-ne 8390 cards have
67already been found. Because of this, the ne module REQUIRES an "io=0xNNN"
68argument passed in via insmod. It will refuse to autoprobe.
69
70It is also worth noting that auto-IRQ probably isn't as reliable during
71the flurry of interrupt activity on a running machine. Cards such as the
72ne2000 that can't get the IRQ setting from an EEPROM or configuration
73register are probably best supplied with an "irq=M" argument as well.
74
75
76----------------------------------------------------------------------
77Card/Module List - Configurable Parameters and Default Values
78----------------------------------------------------------------------
79
803c501.c:
81 io = 0x280 IO base address
82 irq = 5 IRQ
83 (Probes ports: 0x280, 0x300)
84
853c503.c:
86 io = 0 (It will complain if you don't supply an "io=0xNNN")
87 irq = 0 (IRQ software selected by driver using autoIRQ)
88 xcvr = 0 (Use xcvr=1 to select external transceiver.)
89 (Probes ports: 0x300, 0x310, 0x330, 0x350, 0x250, 0x280, 0x2A0, 0x2E0)
90
913c505.c:
92 io = 0
93 irq = 0
94 dma = 6 (not autoprobed)
95 (Probes ports: 0x300, 0x280, 0x310)
96
973c507.c:
98 io = 0x300
99 irq = 0
100 (Probes ports: 0x300, 0x320, 0x340, 0x280)
101
1023c509.c:
103 io = 0
104 irq = 0
105 ( Module load-time probing Works reliably only on EISA, ISA ID-PROBE
106 IS NOT RELIABLE! Compile this driver statically into kernel for
107 now, if you need it auto-probing on an ISA-bus machine. )
108
1098390.c:
110 (No public options, several other modules need this one)
111
112a2065.c:
113 Since this is a Zorro board, it supports full autoprobing, even for
114 multiple boards. (m68k/Amiga)
115
116ac3200.c:
117 io = 0 (Checks 0x1000 to 0x8fff in 0x1000 intervals)
118 irq = 0 (Read from config register)
119 (EISA probing..)
120
121apricot.c:
122 io = 0x300 (Can't be altered!)
123 irq = 10
124
125arcnet.c:
126 io = 0
127 irqnum = 0
128 shmem = 0
129 num = 0
130 DO SET THESE MANUALLY AT INSMOD!
131 (When probing, looks at the following possible addresses:
132 Suggested ones:
133 0x300, 0x2E0, 0x2F0, 0x2D0
134 Other ones:
135 0x200, 0x210, 0x220, 0x230, 0x240, 0x250, 0x260, 0x270,
136 0x280, 0x290, 0x2A0, 0x2B0, 0x2C0,
137 0x310, 0x320, 0x330, 0x340, 0x350, 0x360, 0x370,
138 0x380, 0x390, 0x3A0, 0x3E0, 0x3F0 )
139
140ariadne.c:
141 Since this is a Zorro board, it supports full autoprobing, even for
142 multiple boards. (m68k/Amiga)
143
144at1700.c:
145 io = 0x260
146 irq = 0
147 (Probes ports: 0x260, 0x280, 0x2A0, 0x240, 0x340, 0x320, 0x380, 0x300)
148
149atarilance.c:
150 Supports full autoprobing. (m68k/Atari)
151
152atp.c: *Not modularized*
153 (Probes ports: 0x378, 0x278, 0x3BC;
154 fixed IRQs: 5 and 7 )
155
156cops.c:
157 io = 0x240
158 irq = 5
159 nodeid = 0 (AutoSelect = 0, NodeID 1-254 is hand selected.)
160 (Probes ports: 0x240, 0x340, 0x200, 0x210, 0x220, 0x230, 0x260,
161 0x2A0, 0x300, 0x310, 0x320, 0x330, 0x350, 0x360)
162
163de4x5.c:
164 io = 0x000b
165 irq = 10
166 is_not_dec = 0 -- For non-DEC card using DEC 21040/21041/21140 chip, set this to 1
167 (EISA, and PCI probing)
168
169de600.c:
170 de600_debug = 0
171 (On port 0x378, irq 7 -- lpt1; compile time configurable)
172
173de620.c:
174 bnc = 0, utp = 0 <-- Force media by setting either.
175 io = 0x378 (also compile-time configurable)
176 irq = 7
177
178depca.c:
179 io = 0x200
180 irq = 7
181 (Probes ports: ISA: 0x300, 0x200;
182 EISA: 0x0c00 )
183
184dummy.c:
185 No options
186
187e2100.c:
188 io = 0 (It will complain if you don't supply an "io=0xNNN")
189 irq = 0 (IRQ software selected by driver)
190 mem = 0 (Override default shared memory start of 0xd0000)
191 xcvr = 0 (Use xcvr=1 to select external transceiver.)
192 (Probes ports: 0x300, 0x280, 0x380, 0x220)
193
194eepro.c:
195 io = 0x200
196 irq = 0
197 (Probes ports: 0x200, 0x240, 0x280, 0x2C0, 0x300, 0x320, 0x340, 0x360)
198
199eexpress.c:
200 io = 0x300
201 irq = 0 (IRQ value read from EEPROM)
202 (Probes ports: 0x300, 0x270, 0x320, 0x340)
203
204eql.c:
205 (No parameters)
206
207ewrk3.c:
208 io = 0x300
209 irq = 5
210 (With module no autoprobing!
211 On EISA-bus does EISA probing.
212 Static linkage probes ports on ISA bus:
213 0x100, 0x120, 0x140, 0x160, 0x180, 0x1A0, 0x1C0,
214 0x200, 0x220, 0x240, 0x260, 0x280, 0x2A0, 0x2C0, 0x2E0,
215 0x300, 0x340, 0x360, 0x380, 0x3A0, 0x3C0)
216
217hp-plus.c:
218 io = 0 (It will complain if you don't supply an "io=0xNNN")
219 irq = 0 (IRQ read from configuration register)
220 (Probes ports: 0x200, 0x240, 0x280, 0x2C0, 0x300, 0x320, 0x340)
221
222hp.c:
223 io = 0 (It will complain if you don't supply an "io=0xNNN")
224 irq = 0 (IRQ software selected by driver using autoIRQ)
225 (Probes ports: 0x300, 0x320, 0x340, 0x280, 0x2C0, 0x200, 0x240)
226
227hp100.c:
228 hp100_port = 0 (IO-base address)
229 (Does EISA-probing, if on EISA-slot;
230 On ISA-bus probes all ports from 0x100 thru to 0x3E0
231 in increments of 0x020)
232
233hydra.c:
234 Since this is a Zorro board, it supports full autoprobing, even for
235 multiple boards. (m68k/Amiga)
236
237ibmtr.c:
238 io = 0xa20, 0xa24 (autoprobed by default)
239 irq = 0 (driver cannot select irq - read from hardware)
240 mem = 0 (shared memory base set at 0xd0000 and not yet
241 able to override thru mem= parameter.)
242
243lance.c: *Not modularized*
244 (PCI, and ISA probing; "CONFIG_PCI" needed for PCI support)
245 (Probes ISA ports: 0x300, 0x320, 0x340, 0x360)
246
247loopback.c: *Static kernel component*
248
249ne.c:
250 io = 0 (Explicitly *requires* an "io=0xNNN" value)
251 irq = 0 (Tries to determine configured IRQ via autoIRQ)
252 (Probes ports: 0x300, 0x280, 0x320, 0x340, 0x360)
253
254net_init.c: *Static kernel component*
255
256ni52.c: *Not modularized*
257 (Probes ports: 0x300, 0x280, 0x360, 0x320, 0x340
258 mems: 0xD0000, 0xD2000, 0xC8000, 0xCA000,
259 0xD4000, 0xD6000, 0xD8000 )
260
261ni65.c: *Not modularized* **16MB MEMORY BARRIER BUG**
262 (Probes ports: 0x300, 0x320, 0x340, 0x360)
263
264pi2.c: *Not modularized* (well, NON-STANDARD modularization!)
265 Only one card supported at this time.
266 (Probes ports: 0x380, 0x300, 0x320, 0x340, 0x360, 0x3A0)
267
268plip.c:
269 io = 0
270 irq = 0 (by default, uses IRQ 5 for port at 0x3bc, IRQ 7
271 for port at 0x378, and IRQ 2 for port at 0x278)
272 (Probes ports: 0x278, 0x378, 0x3bc)
273
274ppp.c:
275 No options (ppp-2.2+ has some, this is based on non-dynamic
276 version from ppp-2.1.2d)
277
278seeq8005.c: *Not modularized*
279 (Probes ports: 0x300, 0x320, 0x340, 0x360)
280
281skeleton.c: *Skeleton*
282
283slhc.c:
284 No configuration parameters
285
286slip.c:
287 slip_maxdev = 256 (default value from SL_NRUNIT on slip.h)
288
289
290smc-ultra.c:
291 io = 0 (It will complain if you don't supply an "io=0xNNN")
292 irq = 0 (IRQ val. read from EEPROM)
293 (Probes ports: 0x200, 0x220, 0x240, 0x280, 0x300, 0x340, 0x380)
294
295tulip.c: *Partial modularization*
296 (init-time memory allocation makes problems..)
297
298tunnel.c:
299 No insmod parameters
300
301wavelan.c:
302 io = 0x390 (Settable, but change not recommended)
303 irq = 0 (Not honoured, if changed..)
304
305wd.c:
306 io = 0 (It will complain if you don't supply an "io=0xNNN")
307 irq = 0 (IRQ val. read from EEPROM, ancient cards use autoIRQ)
308 mem = 0 (Force shared-memory on address 0xC8000, or whatever..)
309 mem_end = 0 (Force non-std. mem. size via supplying mem_end val.)
310 (eg. for 32k WD8003EBT, use mem=0xd0000 mem_end=0xd8000)
311 (Probes ports: 0x300, 0x280, 0x380, 0x240)
312
313znet.c: *Not modularized*
314 (Only one device on Zenith Z-Note (notebook?) systems,
315 configuration information from (EE)PROM)