aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorVille Tervo <ville.tervo@nokia.com>2011-02-10 20:38:47 -0500
committerGustavo F. Padovan <padovan@profusion.mobi>2011-02-16 14:32:45 -0500
commitfcd89c09a59a054fb986861e0862aa2fff7d7c40 (patch)
tree115d525a9789e974b0a118d9cc22b792370f40f7 /net
parent63185f64ef06464706b32c9a301f71f68cd93e52 (diff)
Bluetooth: Add LE connect support
Bluetooth V4.0 adds support for Low Energy (LE) connections. Specification introduces new set of hci commands to control LE connection. This patch adds logic to create, cancel and disconnect LE connections. Signed-off-by: Ville Tervo <ville.tervo@nokia.com> Acked-by: Marcel Holtmann <marcel@holtmann.org> Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/hci_conn.c51
-rw-r--r--net/bluetooth/hci_event.c93
2 files changed, 141 insertions, 3 deletions
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 42dc39f25b72..d0c470c18f9d 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -45,6 +45,32 @@
45#include <net/bluetooth/bluetooth.h> 45#include <net/bluetooth/bluetooth.h>
46#include <net/bluetooth/hci_core.h> 46#include <net/bluetooth/hci_core.h>
47 47
48static void hci_le_connect(struct hci_conn *conn)
49{
50 struct hci_dev *hdev = conn->hdev;
51 struct hci_cp_le_create_conn cp;
52
53 conn->state = BT_CONNECT;
54 conn->out = 1;
55
56 memset(&cp, 0, sizeof(cp));
57 cp.scan_interval = cpu_to_le16(0x0004);
58 cp.scan_window = cpu_to_le16(0x0004);
59 bacpy(&cp.peer_addr, &conn->dst);
60 cp.conn_interval_min = cpu_to_le16(0x0008);
61 cp.conn_interval_max = cpu_to_le16(0x0100);
62 cp.supervision_timeout = cpu_to_le16(0x0064);
63 cp.min_ce_len = cpu_to_le16(0x0001);
64 cp.max_ce_len = cpu_to_le16(0x0001);
65
66 hci_send_cmd(hdev, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp);
67}
68
69static void hci_le_connect_cancel(struct hci_conn *conn)
70{
71 hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
72}
73
48void hci_acl_connect(struct hci_conn *conn) 74void hci_acl_connect(struct hci_conn *conn)
49{ 75{
50 struct hci_dev *hdev = conn->hdev; 76 struct hci_dev *hdev = conn->hdev;
@@ -193,8 +219,12 @@ static void hci_conn_timeout(unsigned long arg)
193 switch (conn->state) { 219 switch (conn->state) {
194 case BT_CONNECT: 220 case BT_CONNECT:
195 case BT_CONNECT2: 221 case BT_CONNECT2:
196 if (conn->type == ACL_LINK && conn->out) 222 if (conn->out) {
197 hci_acl_connect_cancel(conn); 223 if (conn->type == ACL_LINK)
224 hci_acl_connect_cancel(conn);
225 else if (conn->type == LE_LINK)
226 hci_le_connect_cancel(conn);
227 }
198 break; 228 break;
199 case BT_CONFIG: 229 case BT_CONFIG:
200 case BT_CONNECTED: 230 case BT_CONNECTED:
@@ -361,15 +391,30 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
361} 391}
362EXPORT_SYMBOL(hci_get_route); 392EXPORT_SYMBOL(hci_get_route);
363 393
364/* Create SCO or ACL connection. 394/* Create SCO, ACL or LE connection.
365 * Device _must_ be locked */ 395 * Device _must_ be locked */
366struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type) 396struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type)
367{ 397{
368 struct hci_conn *acl; 398 struct hci_conn *acl;
369 struct hci_conn *sco; 399 struct hci_conn *sco;
400 struct hci_conn *le;
370 401
371 BT_DBG("%s dst %s", hdev->name, batostr(dst)); 402 BT_DBG("%s dst %s", hdev->name, batostr(dst));
372 403
404 if (type == LE_LINK) {
405 le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
406 if (!le)
407 le = hci_conn_add(hdev, LE_LINK, dst);
408 if (!le)
409 return NULL;
410 if (le->state == BT_OPEN)
411 hci_le_connect(le);
412
413 hci_conn_hold(le);
414
415 return le;
416 }
417
373 acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); 418 acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
374 if (!acl) { 419 if (!acl) {
375 acl = hci_conn_add(hdev, ACL_LINK, dst); 420 acl = hci_conn_add(hdev, ACL_LINK, dst);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index cee46cbe7aeb..47c6e9316ce8 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1107,6 +1107,43 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status)
1107 hci_dev_unlock(hdev); 1107 hci_dev_unlock(hdev);
1108} 1108}
1109 1109
1110static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)
1111{
1112 struct hci_cp_le_create_conn *cp;
1113 struct hci_conn *conn;
1114
1115 BT_DBG("%s status 0x%x", hdev->name, status);
1116
1117 cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN);
1118 if (!cp)
1119 return;
1120
1121 hci_dev_lock(hdev);
1122
1123 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr);
1124
1125 BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr),
1126 conn);
1127
1128 if (status) {
1129 if (conn && conn->state == BT_CONNECT) {
1130 conn->state = BT_CLOSED;
1131 hci_proto_connect_cfm(conn, status);
1132 hci_conn_del(conn);
1133 }
1134 } else {
1135 if (!conn) {
1136 conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr);
1137 if (conn)
1138 conn->out = 1;
1139 else
1140 BT_ERR("No memory for new connection");
1141 }
1142 }
1143
1144 hci_dev_unlock(hdev);
1145}
1146
1110static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) 1147static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
1111{ 1148{
1112 __u8 status = *((__u8 *) skb->data); 1149 __u8 status = *((__u8 *) skb->data);
@@ -1738,6 +1775,10 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
1738 mgmt_disconnect_failed(hdev->id); 1775 mgmt_disconnect_failed(hdev->id);
1739 break; 1776 break;
1740 1777
1778 case HCI_OP_LE_CREATE_CONN:
1779 hci_cs_le_create_conn(hdev, ev->status);
1780 break;
1781
1741 default: 1782 default:
1742 BT_DBG("%s opcode 0x%x", hdev->name, opcode); 1783 BT_DBG("%s opcode 0x%x", hdev->name, opcode);
1743 break; 1784 break;
@@ -2321,6 +2362,54 @@ static inline void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_
2321 hci_dev_unlock(hdev); 2362 hci_dev_unlock(hdev);
2322} 2363}
2323 2364
2365static inline void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2366{
2367 struct hci_ev_le_conn_complete *ev = (void *) skb->data;
2368 struct hci_conn *conn;
2369
2370 BT_DBG("%s status %d", hdev->name, ev->status);
2371
2372 hci_dev_lock(hdev);
2373
2374 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr);
2375 if (!conn)
2376 goto unlock;
2377
2378 if (ev->status) {
2379 hci_proto_connect_cfm(conn, ev->status);
2380 conn->state = BT_CLOSED;
2381 hci_conn_del(conn);
2382 goto unlock;
2383 }
2384
2385 conn->handle = __le16_to_cpu(ev->handle);
2386 conn->state = BT_CONNECTED;
2387
2388 hci_conn_hold_device(conn);
2389 hci_conn_add_sysfs(conn);
2390
2391 hci_proto_connect_cfm(conn, ev->status);
2392
2393unlock:
2394 hci_dev_unlock(hdev);
2395}
2396
2397static inline void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
2398{
2399 struct hci_ev_le_meta *le_ev = (void *) skb->data;
2400
2401 skb_pull(skb, sizeof(*le_ev));
2402
2403 switch (le_ev->subevent) {
2404 case HCI_EV_LE_CONN_COMPLETE:
2405 hci_le_conn_complete_evt(hdev, skb);
2406 break;
2407
2408 default:
2409 break;
2410 }
2411}
2412
2324void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) 2413void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
2325{ 2414{
2326 struct hci_event_hdr *hdr = (void *) skb->data; 2415 struct hci_event_hdr *hdr = (void *) skb->data;
@@ -2461,6 +2550,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
2461 hci_remote_host_features_evt(hdev, skb); 2550 hci_remote_host_features_evt(hdev, skb);
2462 break; 2551 break;
2463 2552
2553 case HCI_EV_LE_META:
2554 hci_le_meta_evt(hdev, skb);
2555 break;
2556
2464 default: 2557 default:
2465 BT_DBG("%s event 0x%x", hdev->name, event); 2558 BT_DBG("%s event 0x%x", hdev->name, event);
2466 break; 2559 break;
> * * This is a fairly ugly trick: we need to know the size of the type and * alignment requirement to check the pointer is kosher. It's also nice to * have the name of the type in case we report failure. * * Typing those three things all the time is cumbersome and error prone, so we * have a macro which sets them all up and passes to the real function. */ #define convert(iov, type) \ ((type *)_convert((iov), sizeof(type), __alignof__(type), #type)) static void *_convert(struct iovec *iov, size_t size, size_t align, const char *name) { if (iov->iov_len != size) errx(1, "Bad iovec size %zu for %s", iov->iov_len, name); if ((unsigned long)iov->iov_base % align != 0) errx(1, "Bad alignment %p for %s", iov->iov_base, name); return iov->iov_base; } /* The virtio configuration space is defined to be little-endian. x86 is * little-endian too, but it's nice to be explicit so we have these helpers. */ #define cpu_to_le16(v16) (v16) #define cpu_to_le32(v32) (v32) #define cpu_to_le64(v64) (v64) #define le16_to_cpu(v16) (v16) #define le32_to_cpu(v32) (v32) #define le64_to_cpu(v32) (v64) /*L:100 The Launcher code itself takes us out into userspace, that scary place * where pointers run wild and free! Unfortunately, like most userspace * programs, it's quite boring (which is why everyone likes to hack on the * kernel!). Perhaps if you make up an Lguest Drinking Game at this point, it * will get you through this section. Or, maybe not. * * The Launcher sets up a big chunk of memory to be the Guest's "physical" * memory and stores it in "guest_base". In other words, Guest physical == * Launcher virtual with an offset. * * This can be tough to get your head around, but usually it just means that we * use these trivial conversion functions when the Guest gives us it's * "physical" addresses: */ static void *from_guest_phys(unsigned long addr) { return guest_base + addr; } static unsigned long to_guest_phys(const void *addr) { return (addr - guest_base); } /*L:130 * Loading the Kernel. * * We start with couple of simple helper routines. open_or_die() avoids * error-checking code cluttering the callers: */ static int open_or_die(const char *name, int flags) { int fd = open(name, flags); if (fd < 0) err(1, "Failed to open %s", name); return fd; } /* map_zeroed_pages() takes a number of pages. */ static void *map_zeroed_pages(unsigned int num) { int fd = open_or_die("/dev/zero", O_RDONLY); void *addr; /* We use a private mapping (ie. if we write to the page, it will be * copied). */ addr = mmap(NULL, getpagesize() * num, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) err(1, "Mmaping %u pages of /dev/zero", num); return addr; } /* Get some more pages for a device. */ static void *get_pages(unsigned int num) { void *addr = from_guest_phys(guest_limit); guest_limit += num * getpagesize(); if (guest_limit > guest_max) errx(1, "Not enough memory for devices"); return addr; } /* This routine is used to load the kernel or initrd. It tries mmap, but if * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries), * it falls back to reading the memory in. */ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) { ssize_t r; /* We map writable even though for some segments are marked read-only. * The kernel really wants to be writable: it patches its own * instructions. * * MAP_PRIVATE means that the page won't be copied until a write is * done to it. This allows us to share untouched memory between * Guests. */ if (mmap(addr, len, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED) return; /* pread does a seek and a read in one shot: saves a few lines. */ r = pread(fd, addr, len, offset); if (r != len) err(1, "Reading offset %lu len %lu gave %zi", offset, len, r); } /* This routine takes an open vmlinux image, which is in ELF, and maps it into * the Guest memory. ELF = Embedded Linking Format, which is the format used * by all modern binaries on Linux including the kernel. * * The ELF headers give *two* addresses: a physical address, and a virtual * address. We use the physical address; the Guest will map itself to the * virtual address. * * We return the starting address. */ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) { Elf32_Phdr phdr[ehdr->e_phnum]; unsigned int i; /* Sanity checks on the main ELF header: an x86 executable with a * reasonable number of correctly-sized program headers. */ if (ehdr->e_type != ET_EXEC || ehdr->e_machine != EM_386 || ehdr->e_phentsize != sizeof(Elf32_Phdr) || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) errx(1, "Malformed elf header"); /* An ELF executable contains an ELF header and a number of "program" * headers which indicate which parts ("segments") of the program to * load where. */ /* We read in all the program headers at once: */ if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) err(1, "Seeking to program headers"); if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) err(1, "Reading program headers"); /* Try all the headers: there are usually only three. A read-only one, * a read-write one, and a "note" section which isn't loadable. */ for (i = 0; i < ehdr->e_phnum; i++) { /* If this isn't a loadable segment, we ignore it */ if (phdr[i].p_type != PT_LOAD) continue; verbose("Section %i: size %i addr %p\n", i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); /* We map this section of the file at its physical address. */ map_at(elf_fd, from_guest_phys(phdr[i].p_paddr), phdr[i].p_offset, phdr[i].p_filesz); } /* The entry point is given in the ELF header. */ return ehdr->e_entry; } /*L:160 Unfortunately the entire ELF image isn't compressed: the segments * which need loading are extracted and compressed raw. This denies us the * information we need to make a fully-general loader. */ static unsigned long unpack_bzimage(int fd) { gzFile f; int ret, len = 0; /* A bzImage always gets loaded at physical address 1M. This is * actually configurable as CONFIG_PHYSICAL_START, but as the comment * there says, "Don't change this unless you know what you are doing". * Indeed. */ void *img = from_guest_phys(0x100000); /* gzdopen takes our file descriptor (carefully placed at the start of * the GZIP header we found) and returns a gzFile. */ f = gzdopen(fd, "rb"); /* We read it into memory in 64k chunks until we hit the end. */ while ((ret = gzread(f, img + len, 65536)) > 0) len += ret; if (ret < 0) err(1, "reading image from bzImage"); verbose("Unpacked size %i addr %p\n", len, img); /* The entry point for a bzImage is always the first byte */ return (unsigned long)img; } /*L:150 A bzImage, unlike an ELF file, is not meant to be loaded. You're * supposed to jump into it and it will unpack itself. We can't do that * because the Guest can't run the unpacking code, and adding features to * lguest kills puppies, so we don't want to. * * The bzImage is formed by putting the decompressing code in front of the * compressed kernel code. So we can simple scan through it looking for the * first "gzip" header, and start decompressing from there. */ static unsigned long load_bzimage(int fd) { unsigned char c; int state = 0; /* GZIP header is 0x1F 0x8B <method> <flags>... <compressed-by>. */ while (read(fd, &c, 1) == 1) { switch (state) { case 0: if (c == 0x1F) state++; break; case 1: if (c == 0x8B) state++; else state = 0; break; case 2 ... 8: state++; break; case 9: /* Seek back to the start of the gzip header. */ lseek(fd, -10, SEEK_CUR); /* One final check: "compressed under UNIX". */ if (c != 0x03) state = -1; else return unpack_bzimage(fd); } } errx(1, "Could not find kernel in bzImage"); } /*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels * come wrapped up in the self-decompressing "bzImage" format. With some funky * coding, we can load those, too. */ static unsigned long load_kernel(int fd) { Elf32_Ehdr hdr; /* Read in the first few bytes. */ if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) err(1, "Reading kernel"); /* If it's an ELF file, it starts with "\177ELF" */ if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) return map_elf(fd, &hdr); /* Otherwise we assume it's a bzImage, and try to unpack it */ return load_bzimage(fd); } /* This is a trivial little helper to align pages. Andi Kleen hated it because * it calls getpagesize() twice: "it's dumb code." * * Kernel guys get really het up about optimization, even when it's not * necessary. I leave this code as a reaction against that. */ static inline unsigned long page_align(unsigned long addr) { /* Add upwards and truncate downwards. */ return ((addr + getpagesize()-1) & ~(getpagesize()-1)); } /*L:180 An "initial ram disk" is a disk image loaded into memory along with * the kernel which the kernel can use to boot from without needing any * drivers. Most distributions now use this as standard: the initrd contains * the code to load the appropriate driver modules for the current machine. * * Importantly, James Morris works for RedHat, and Fedora uses initrds for its * kernels. He sent me this (and tells me when I break it). */ static unsigned long load_initrd(const char *name, unsigned long mem) { int ifd; struct stat st; unsigned long len; ifd = open_or_die(name, O_RDONLY); /* fstat() is needed to get the file size. */ if (fstat(ifd, &st) < 0) err(1, "fstat() on initrd '%s'", name); /* We map the initrd at the top of memory, but mmap wants it to be * page-aligned, so we round the size up for that. */ len = page_align(st.st_size); map_at(ifd, from_guest_phys(mem - len), 0, st.st_size); /* Once a file is mapped, you can close the file descriptor. It's a * little odd, but quite useful. */ close(ifd); verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len); /* We return the initrd size. */ return len; } /* Once we know how much memory we have, we can construct simple linear page * tables which set virtual == physical which will get the Guest far enough * into the boot to create its own. * * We lay them out of the way, just below the initrd (which is why we need to * know its size). */ static unsigned long setup_pagetables(unsigned long mem, unsigned long initrd_size) { unsigned long *pgdir, *linear; unsigned int mapped_pages, i, linear_pages; unsigned int ptes_per_page = getpagesize()/sizeof(void *); mapped_pages = mem/getpagesize(); /* Each PTE page can map ptes_per_page pages: how many do we need? */ linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; /* We put the toplevel page directory page at the top of memory. */ pgdir = from_guest_phys(mem) - initrd_size - getpagesize(); /* Now we use the next linear_pages pages as pte pages */ linear = (void *)pgdir - linear_pages*getpagesize(); /* Linear mapping is easy: put every page's address into the mapping in * order. PAGE_PRESENT contains the flags Present, Writable and * Executable. */ for (i = 0; i < mapped_pages; i++) linear[i] = ((i * getpagesize()) | PAGE_PRESENT); /* The top level points to the linear page table pages above. */ for (i = 0; i < mapped_pages; i += ptes_per_page) { pgdir[i/ptes_per_page] = ((to_guest_phys(linear) + i*sizeof(void *)) | PAGE_PRESENT); } verbose("Linear mapping of %u pages in %u pte pages at %#lx\n", mapped_pages, linear_pages, to_guest_phys(linear)); /* We return the top level (guest-physical) address: the kernel needs * to know where it is. */ return to_guest_phys(pgdir); } /* Simple routine to roll all the commandline arguments together with spaces * between them. */ static void concat(char *dst, char *args[]) { unsigned int i, len = 0; for (i = 0; args[i]; i++) { strcpy(dst+len, args[i]); strcat(dst+len, " "); len += strlen(args[i]) + 1; } /* In case it's empty. */ dst[len] = '\0'; } /* This is where we actually tell the kernel to initialize the Guest. We saw * the arguments it expects when we looked at initialize() in lguest_user.c: * the base of guest "physical" memory, the top physical page to allow, the * top level pagetable and the entry point for the Guest. */ static int tell_kernel(unsigned long pgdir, unsigned long start) { unsigned long args[] = { LHREQ_INITIALIZE, (unsigned long)guest_base, guest_limit / getpagesize(), pgdir, start }; int fd; verbose("Guest: %p - %p (%#lx)\n", guest_base, guest_base + guest_limit, guest_limit); fd = open_or_die("/dev/lguest", O_RDWR); if (write(fd, args, sizeof(args)) < 0) err(1, "Writing to /dev/lguest"); /* We return the /dev/lguest file descriptor to control this Guest */ return fd; } /*:*/ static void add_device_fd(int fd) { FD_SET(fd, &devices.infds); if (fd > devices.max_infd) devices.max_infd = fd; } /*L:200 * The Waker. * * With a console and network devices, we can have lots of input which we need * to process. We could try to tell the kernel what file descriptors to watch, * but handing a file descriptor mask through to the kernel is fairly icky. * * Instead, we fork off a process which watches the file descriptors and writes * the LHREQ_BREAK command to the /dev/lguest filedescriptor to tell the Host * loop to stop running the Guest. This causes it to return from the * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset * the LHREQ_BREAK and wake us up again. * * This, of course, is merely a different *kind* of icky. */ static void wake_parent(int pipefd, int lguest_fd) { /* Add the pipe from the Launcher to the fdset in the device_list, so * we watch it, too. */ add_device_fd(pipefd); for (;;) { fd_set rfds = devices.infds; unsigned long args[] = { LHREQ_BREAK, 1 }; /* Wait until input is ready from one of the devices. */ select(devices.max_infd+1, &rfds, NULL, NULL, NULL); /* Is it a message from the Launcher? */ if (FD_ISSET(pipefd, &rfds)) { int fd; /* If read() returns 0, it means the Launcher has * exited. We silently follow. */ if (read(pipefd, &fd, sizeof(fd)) == 0) exit(0); /* Otherwise it's telling us to change what file * descriptors we're to listen to. */ if (fd >= 0) FD_SET(fd, &devices.infds); else FD_CLR(-fd - 1, &devices.infds); } else /* Send LHREQ_BREAK command. */ write(lguest_fd, args, sizeof(args)); } } /* This routine just sets up a pipe to the Waker process. */ static int setup_waker(int lguest_fd) { int pipefd[2], child; /* We create a pipe to talk to the waker, and also so it knows when the * Launcher dies (and closes pipe). */ pipe(pipefd); child = fork(); if (child == -1) err(1, "forking"); if (child == 0) { /* Close the "writing" end of our copy of the pipe */ close(pipefd[1]); wake_parent(pipefd[0], lguest_fd); } /* Close the reading end of our copy of the pipe. */ close(pipefd[0]); /* Here is the fd used to talk to the waker. */ return pipefd[1]; } /*L:210 * Device Handling. * * When the Guest sends DMA to us, it sends us an array of addresses and sizes. * We need to make sure it's not trying to reach into the Launcher itself, so * we have a convenient routine which check it and exits with an error message * if something funny is going on: */ static void *_check_pointer(unsigned long addr, unsigned int size, unsigned int line) { /* We have to separately check addr and addr+size, because size could * be huge and addr + size might wrap around. */ if (addr >= guest_limit || addr + size >= guest_limit) errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr); /* We return a pointer for the caller's convenience, now we know it's * safe to use. */ return from_guest_phys(addr); } /* A macro which transparently hands the line number to the real function. */ #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) /* This function returns the next descriptor in the chain, or vq->vring.num. */ static unsigned next_desc(struct virtqueue *vq, unsigned int i) { unsigned int next; /* If this descriptor says it doesn't chain, we're done. */ if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT)) return vq->vring.num; /* Check they're not leading us off end of descriptors. */ next = vq->vring.desc[i].next; /* Make sure compiler knows to grab that: we don't want it changing! */ wmb(); if (next >= vq->vring.num) errx(1, "Desc next is %u", next); return next; } /* This looks in the virtqueue and for the first available buffer, and converts * it to an iovec for convenient access. Since descriptors consist of some * number of output then some number of input descriptors, it's actually two * iovecs, but we pack them into one and note how many of each there were. * * This function returns the descriptor number found, or vq->vring.num (which * is never a valid descriptor number) if none was found. */ static unsigned get_vq_desc(struct virtqueue *vq, struct iovec iov[], unsigned int *out_num, unsigned int *in_num) { unsigned int i, head; /* Check it isn't doing very strange things with descriptor numbers. */ if ((u16)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num) errx(1, "Guest moved used index from %u to %u", vq->last_avail_idx, vq->vring.avail->idx); /* If there's nothing new since last we looked, return invalid. */ if (vq->vring.avail->idx == vq->last_avail_idx) return vq->vring.num; /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num]; /* If their number is silly, that's a fatal mistake. */ if (head >= vq->vring.num) errx(1, "Guest says index %u is available", head); /* When we start there are none of either input nor output. */ *out_num = *in_num = 0; i = head; do { /* Grab the first descriptor, and check it's OK. */ iov[*out_num + *in_num].iov_len = vq->vring.desc[i].len; iov[*out_num + *in_num].iov_base = check_pointer(vq->vring.desc[i].addr, vq->vring.desc[i].len); /* If this is an input descriptor, increment that count. */ if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE) (*in_num)++; else { /* If it's an output descriptor, they're all supposed * to come before any input descriptors. */ if (*in_num) errx(1, "Descriptor has out after in"); (*out_num)++; } /* If we've got too many, that implies a descriptor loop. */ if (*out_num + *in_num > vq->vring.num) errx(1, "Looped descriptor"); } while ((i = next_desc(vq, i)) != vq->vring.num); return head; } /* Once we've used one of their buffers, we tell them about it. We'll then * want to send them an interrupt, using trigger_irq(). */ static void add_used(struct virtqueue *vq, unsigned int head, int len) { struct vring_used_elem *used; /* Get a pointer to the next entry in the used ring. */ used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; used->id = head; used->len = len; /* Make sure buffer is written before we update index. */ wmb(); vq->vring.used->idx++; } /* This actually sends the interrupt for this virtqueue */ static void trigger_irq(int fd, struct virtqueue *vq) { unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) return; /* Send the Guest an interrupt tell them we used something up. */ if (write(fd, buf, sizeof(buf)) != 0) err(1, "Triggering irq %i", vq->config.irq); } /* And here's the combo meal deal. Supersize me! */ static void add_used_and_trigger(int fd, struct virtqueue *vq, unsigned int head, int len) { add_used(vq, head, len); trigger_irq(fd, vq); } /* Here is the input terminal setting we save, and the routine to restore them * on exit so the user can see what they type next. */ static struct termios orig_term; static void restore_term(void) { tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); } /* We associate some data with the console for our exit hack. */ struct console_abort { /* How many times have they hit ^C? */ int count; /* When did they start? */ struct timeval start; }; /* This is the routine which handles console input (ie. stdin). */ static bool handle_console_input(int fd, struct device *dev) { int len; unsigned int head, in_num, out_num; struct iovec iov[dev->vq->vring.num]; struct console_abort *abort = dev->priv; /* First we need a console buffer from the Guests's input virtqueue. */ head = get_vq_desc(dev->vq, iov, &out_num, &in_num); /* If they're not ready for input, stop listening to this file * descriptor. We'll start again once they add an input buffer. */ if (head == dev->vq->vring.num) return false; if (out_num) errx(1, "Output buffers in console in queue?"); /* This is why we convert to iovecs: the readv() call uses them, and so * it reads straight into the Guest's buffer. */ len = readv(dev->fd, iov, in_num); if (len <= 0) { /* This implies that the console is closed, is /dev/null, or * something went terribly wrong. */ warnx("Failed to get console input, ignoring console."); /* Put the input terminal back. */ restore_term(); /* Remove callback from input vq, so it doesn't restart us. */ dev->vq->handle_output = NULL; /* Stop listening to this fd: don't call us again. */ return false; } /* Tell the Guest about the new input. */ add_used_and_trigger(fd, dev->vq, head, len); /* Three ^C within one second? Exit. * * This is such a hack, but works surprisingly well. Each ^C has to be * in a buffer by itself, so they can't be too fast. But we check that * we get three within about a second, so they can't be too slow. */ if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) { if (!abort->count++) gettimeofday(&abort->start, NULL); else if (abort->count == 3) { struct timeval now; gettimeofday(&now, NULL); if (now.tv_sec <= abort->start.tv_sec+1) { unsigned long args[] = { LHREQ_BREAK, 0 }; /* Close the fd so Waker will know it has to * exit. */ close(waker_fd); /* Just in case waker is blocked in BREAK, send * unbreak now. */ write(fd, args, sizeof(args)); exit(2); } abort->count = 0; } } else /* Any other key resets the abort counter. */ abort->count = 0; /* Everything went OK! */ return true; } /* Handling output for console is simple: we just get all the output buffers * and write them to stdout. */ static void handle_console_output(int fd, struct virtqueue *vq) { unsigned int head, out, in; int len; struct iovec iov[vq->vring.num]; /* Keep getting output buffers from the Guest until we run out. */ while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { if (in) errx(1, "Input buffers in output queue?"); len = writev(STDOUT_FILENO, iov, out); add_used_and_trigger(fd, vq, head, len); } } /* Handling output for network is also simple: we get all the output buffers * and write them (ignoring the first element) to this device's file descriptor * (stdout). */ static void handle_net_output(int fd, struct virtqueue *vq) { unsigned int head, out, in; int len; struct iovec iov[vq->vring.num]; /* Keep getting output buffers from the Guest until we run out. */ while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { if (in) errx(1, "Input buffers in output queue?"); /* Check header, but otherwise ignore it (we said we supported * no features). */ (void)convert(&iov[0], struct virtio_net_hdr); len = writev(vq->dev->fd, iov+1, out-1); add_used_and_trigger(fd, vq, head, len); } } /* This is where we handle a packet coming in from the tun device to our * Guest. */ static bool handle_tun_input(int fd, struct device *dev) { unsigned int head, in_num, out_num; int len; struct iovec iov[dev->vq->vring.num]; struct virtio_net_hdr *hdr; /* First we need a network buffer from the Guests's recv virtqueue. */ head = get_vq_desc(dev->vq, iov, &out_num, &in_num); if (head == dev->vq->vring.num) { /* Now, it's expected that if we try to send a packet too * early, the Guest won't be ready yet. Wait until the device * status says it's ready. */ /* FIXME: Actually want DRIVER_ACTIVE here. */ if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) warn("network: no dma buffer!"); /* We'll turn this back on if input buffers are registered. */ return false; } else if (out_num) errx(1, "Output buffers in network recv queue?"); /* First element is the header: we set it to 0 (no features). */ hdr = convert(&iov[0], struct virtio_net_hdr); hdr->flags = 0; hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; /* Read the packet from the device directly into the Guest's buffer. */ len = readv(dev->fd, iov+1, in_num-1); if (len <= 0) err(1, "reading network"); /* Tell the Guest about the new packet. */ add_used_and_trigger(fd, dev->vq, head, sizeof(*hdr) + len); verbose("tun input packet len %i [%02x %02x] (%s)\n", len, ((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1], head != dev->vq->vring.num ? "sent" : "discarded"); /* All good. */ return true; } /* This callback ensures we try again, in case we stopped console or net * delivery because Guest didn't have any buffers. */ static void enable_fd(int fd, struct virtqueue *vq) { add_device_fd(vq->dev->fd); /* Tell waker to listen to it again */ write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); } /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ static void handle_output(int fd, unsigned long addr) { struct device *i; struct virtqueue *vq; /* Check each virtqueue. */ for (i = devices.dev; i; i = i->next) { for (vq = i->vq; vq; vq = vq->next) { if (vq->config.pfn == addr/getpagesize() && vq->handle_output) { verbose("Output to %s\n", vq->dev->name); vq->handle_output(fd, vq); return; } } } /* Early console write is done using notify on a nul-terminated string * in Guest memory. */ if (addr >= guest_limit) errx(1, "Bad NOTIFY %#lx", addr); write(STDOUT_FILENO, from_guest_phys(addr), strnlen(from_guest_phys(addr), guest_limit - addr)); } /* This is called when the waker wakes us up: check for incoming file * descriptors. */ static void handle_input(int fd) { /* select() wants a zeroed timeval to mean "don't wait". */ struct timeval poll = { .tv_sec = 0, .tv_usec = 0 }; for (;;) { struct device *i; fd_set fds = devices.infds; /* If nothing is ready, we're done. */ if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) break; /* Otherwise, call the device(s) which have readable * file descriptors and a method of handling them. */ for (i = devices.dev; i; i = i->next) { if (i->handle_input && FD_ISSET(i->fd, &fds)) { int dev_fd; if (i->handle_input(fd, i)) continue; /* If handle_input() returns false, it means we * should no longer service it. Networking and * console do this when there's no input * buffers to deliver into. Console also uses * it when it discovers that stdin is * closed. */ FD_CLR(i->fd, &devices.infds); /* Tell waker to ignore it too, by sending a * negative fd number (-1, since 0 is a valid * FD number). */ dev_fd = -i->fd - 1; write(waker_fd, &dev_fd, sizeof(dev_fd)); } } } } /*L:190 * Device Setup * * All devices need a descriptor so the Guest knows it exists, and a "struct * device" so the Launcher can keep track of it. We have common helper * routines to allocate them. * * This routine allocates a new "struct lguest_device_desc" from descriptor * table just above the Guest's normal memory. It returns a pointer to that * descriptor. */ static struct lguest_device_desc *new_dev_desc(u16 type) { struct lguest_device_desc *d; /* We only have one page for all the descriptors. */ if (devices.desc_used + sizeof(*d) > getpagesize()) errx(1, "Too many devices"); /* We don't need to set config_len or status: page is 0 already. */ d = (void *)devices.descpage + devices.desc_used; d->type = type; devices.desc_used += sizeof(*d); return d; } /* Each device descriptor is followed by some configuration information. * The first byte is a "status" byte for the Guest to report what's happening. * After that are fields: u8 type, u8 len, [... len bytes...]. * * This routine adds a new field to an existing device's descriptor. It only * works for the last device, but that's OK because that's how we use it. */ static void add_desc_field(struct device *dev, u8 type, u8 len, const void *c) { /* This is the last descriptor, right? */ assert(devices.descpage + devices.desc_used == (u8 *)(dev->desc + 1) + dev->desc->config_len); /* We only have one page of device descriptions. */ if (devices.desc_used + 2 + len > getpagesize()) errx(1, "Too many devices"); /* Copy in the new config header: type then length. */ devices.descpage[devices.desc_used++] = type; devices.descpage[devices.desc_used++] = len; memcpy(devices.descpage + devices.desc_used, c, len); devices.desc_used += len; /* Update the device descriptor length: two byte head then data. */ dev->desc->config_len += 2 + len; } /* This routine adds a virtqueue to a device. We specify how many descriptors * the virtqueue is to have. */ static void add_virtqueue(struct device *dev, unsigned int num_descs, void (*handle_output)(int fd, struct virtqueue *me)) { unsigned int pages; struct virtqueue **i, *vq = malloc(sizeof(*vq)); void *p; /* First we need some pages for this virtqueue. */ pages = (vring_size(num_descs) + getpagesize() - 1) / getpagesize(); p = get_pages(pages); /* Initialize the configuration. */ vq->config.num = num_descs; vq->config.irq = devices.next_irq++; vq->config.pfn = to_guest_phys(p) / getpagesize(); /* Initialize the vring. */ vring_init(&vq->vring, num_descs, p); /* Add the configuration information to this device's descriptor. */ add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE, sizeof(vq->config), &vq->config); /* Add to tail of list, so dev->vq is first vq, dev->vq->next is * second. */ for (i = &dev->vq; *i; i = &(*i)->next); *i = vq; /* Link virtqueue back to device. */ vq->dev = dev; /* Set up handler. */ vq->handle_output = handle_output; if (!handle_output) vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; } /* This routine does all the creation and setup of a new device, including * caling new_dev_desc() to allocate the descriptor and device memory. */ static struct device *new_device(const char *name, u16 type, int fd, bool (*handle_input)(int, struct device *)) { struct device *dev = malloc(sizeof(*dev)); /* Append to device list. Prepending to a single-linked list is * easier, but the user expects the devices to be arranged on the bus * in command-line order. The first network device on the command line * is eth0, the first block device /dev/lgba, etc. */ *devices.lastdev = dev; dev->next = NULL; devices.lastdev = &dev->next; /* Now we populate the fields one at a time. */ dev->fd = fd; /* If we have an input handler for this file descriptor, then we add it