aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2012-01-19 15:56:50 -0500
committerH. Peter Anvin <hpa@linux.intel.com>2012-01-19 15:56:50 -0500
commit282f445a779ed76fca9884fe377bf56a3088b208 (patch)
treed9abcf526baee0100672851e0a8894c19e762a39 /tools
parent68f30fbee19cc67849b9fa8e153ede70758afe81 (diff)
parent90a4c0f51e8e44111a926be6f4c87af3938a79c3 (diff)
Merge remote-tracking branch 'linus/master' into x86/urgent
Diffstat (limited to 'tools')
-rw-r--r--tools/lguest/.gitignore1
-rw-r--r--tools/lguest/Makefile8
-rw-r--r--tools/lguest/extract58
-rw-r--r--tools/lguest/lguest.c2065
-rw-r--r--tools/lguest/lguest.txt129
-rwxr-xr-xtools/nfsd/inject_fault.sh49
-rw-r--r--tools/perf/Documentation/examples.txt34
-rw-r--r--tools/perf/Documentation/perf-list.txt2
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/arch/powerpc/util/dwarf-regs.c3
-rw-r--r--tools/perf/builtin-annotate.c7
-rw-r--r--tools/perf/builtin-kmem.c3
-rw-r--r--tools/perf/builtin-kvm.c6
-rw-r--r--tools/perf/builtin-script.c4
-rw-r--r--tools/perf/builtin-test.c2
-rw-r--r--tools/perf/builtin-top.c5
-rw-r--r--tools/perf/util/evlist.c5
-rw-r--r--tools/perf/util/hist.c131
-rw-r--r--tools/perf/util/hist.h7
-rw-r--r--tools/perf/util/parse-events.c15
-rw-r--r--tools/perf/util/trace-event-info.c1
-rw-r--r--tools/perf/util/util.c15
-rw-r--r--tools/perf/util/util.h4
-rw-r--r--tools/power/x86/turbostat/turbostat.88
-rw-r--r--tools/power/x86/turbostat/turbostat.c2
-rwxr-xr-xtools/testing/ktest/compare-ktest-sample.pl4
-rwxr-xr-xtools/testing/ktest/ktest.pl682
-rw-r--r--tools/testing/ktest/sample.conf89
-rw-r--r--tools/testing/selftests/Makefile11
-rw-r--r--tools/testing/selftests/breakpoints/Makefile20
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test.c394
-rw-r--r--tools/testing/selftests/run_tests8
-rw-r--r--tools/virtio/linux/virtio.h22
-rw-r--r--tools/virtio/virtio_test.c6
34 files changed, 3486 insertions, 315 deletions
diff --git a/tools/lguest/.gitignore b/tools/lguest/.gitignore
new file mode 100644
index 000000000000..115587fd5f65
--- /dev/null
+++ b/tools/lguest/.gitignore
@@ -0,0 +1 @@
lguest
diff --git a/tools/lguest/Makefile b/tools/lguest/Makefile
new file mode 100644
index 000000000000..0ac34206f7a7
--- /dev/null
+++ b/tools/lguest/Makefile
@@ -0,0 +1,8 @@
1# This creates the demonstration utility "lguest" which runs a Linux guest.
2# Missing headers? Add "-I../../../include -I../../../arch/x86/include"
3CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE
4
5all: lguest
6
7clean:
8 rm -f lguest
diff --git a/tools/lguest/extract b/tools/lguest/extract
new file mode 100644
index 000000000000..7730bb6e4b94
--- /dev/null
+++ b/tools/lguest/extract
@@ -0,0 +1,58 @@
1#! /bin/sh
2
3set -e
4
5PREFIX=$1
6shift
7
8trap 'rm -r $TMPDIR' 0
9TMPDIR=`mktemp -d`
10
11exec 3>/dev/null
12for f; do
13 while IFS="
14" read -r LINE; do
15 case "$LINE" in
16 *$PREFIX:[0-9]*:\**)
17 NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
18 if [ -f $TMPDIR/$NUM ]; then
19 echo "$TMPDIR/$NUM already exits prior to $f"
20 exit 1
21 fi
22 exec 3>>$TMPDIR/$NUM
23 echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
24 /bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3
25 ;;
26 *$PREFIX:[0-9]*)
27 NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
28 if [ -f $TMPDIR/$NUM ]; then
29 echo "$TMPDIR/$NUM already exits prior to $f"
30 exit 1
31 fi
32 exec 3>>$TMPDIR/$NUM
33 echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
34 /bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3
35 ;;
36 *:\**)
37 /bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3
38 echo >&3
39 exec 3>/dev/null
40 ;;
41 *)
42 /bin/echo "$LINE" >&3
43 ;;
44 esac
45 done < $f
46 echo >&3
47 exec 3>/dev/null
48done
49
50LASTFILE=""
51for f in $TMPDIR/*; do
52 if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then
53 LASTFILE=$(cat $TMPDIR/.$(basename $f) )
54 echo "[ $LASTFILE ]"
55 fi
56 cat $f
57done
58
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
new file mode 100644
index 000000000000..f759f4f097c7
--- /dev/null
+++ b/tools/lguest/lguest.c
@@ -0,0 +1,2065 @@
1/*P:100
2 * This is the Launcher code, a simple program which lays out the "physical"
3 * memory for the new Guest by mapping the kernel image and the virtual
4 * devices, then opens /dev/lguest to tell the kernel about the Guest and
5 * control it.
6:*/
7#define _LARGEFILE64_SOURCE
8#define _GNU_SOURCE
9#include <stdio.h>
10#include <string.h>
11#include <unistd.h>
12#include <err.h>
13#include <stdint.h>
14#include <stdlib.h>
15#include <elf.h>
16#include <sys/mman.h>
17#include <sys/param.h>
18#include <sys/types.h>
19#include <sys/stat.h>
20#include <sys/wait.h>
21#include <sys/eventfd.h>
22#include <fcntl.h>
23#include <stdbool.h>
24#include <errno.h>
25#include <ctype.h>
26#include <sys/socket.h>
27#include <sys/ioctl.h>
28#include <sys/time.h>
29#include <time.h>
30#include <netinet/in.h>
31#include <net/if.h>
32#include <linux/sockios.h>
33#include <linux/if_tun.h>
34#include <sys/uio.h>
35#include <termios.h>
36#include <getopt.h>
37#include <assert.h>
38#include <sched.h>
39#include <limits.h>
40#include <stddef.h>
41#include <signal.h>
42#include <pwd.h>
43#include <grp.h>
44
45#include <linux/virtio_config.h>
46#include <linux/virtio_net.h>
47#include <linux/virtio_blk.h>
48#include <linux/virtio_console.h>
49#include <linux/virtio_rng.h>
50#include <linux/virtio_ring.h>
51#include <asm/bootparam.h>
52#include "../../include/linux/lguest_launcher.h"
53/*L:110
54 * We can ignore the 43 include files we need for this program, but I do want
55 * to draw attention to the use of kernel-style types.
56 *
57 * As Linus said, "C is a Spartan language, and so should your naming be." I
58 * like these abbreviations, so we define them here. Note that u64 is always
59 * unsigned long long, which works on all Linux systems: this means that we can
60 * use %llu in printf for any u64.
61 */
62typedef unsigned long long u64;
63typedef uint32_t u32;
64typedef uint16_t u16;
65typedef uint8_t u8;
66/*:*/
67
68#define BRIDGE_PFX "bridge:"
69#ifndef SIOCBRADDIF
70#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
71#endif
72/* We can have up to 256 pages for devices. */
73#define DEVICE_PAGES 256
74/* This will occupy 3 pages: it must be a power of 2. */
75#define VIRTQUEUE_NUM 256
76
77/*L:120
78 * verbose is both a global flag and a macro. The C preprocessor allows
79 * this, and although I wouldn't recommend it, it works quite nicely here.
80 */
81static bool verbose;
82#define verbose(args...) \
83 do { if (verbose) printf(args); } while(0)
84/*:*/
85
86/* The pointer to the start of guest memory. */
87static void *guest_base;
88/* The maximum guest physical address allowed, and maximum possible. */
89static unsigned long guest_limit, guest_max;
90/* The /dev/lguest file descriptor. */
91static int lguest_fd;
92
93/* a per-cpu variable indicating whose vcpu is currently running */
94static unsigned int __thread cpu_id;
95
96/* This is our list of devices. */
97struct device_list {
98 /* Counter to assign interrupt numbers. */
99 unsigned int next_irq;
100
101 /* Counter to print out convenient device numbers. */
102 unsigned int device_num;
103
104 /* The descriptor page for the devices. */
105 u8 *descpage;
106
107 /* A single linked list of devices. */
108 struct device *dev;
109 /* And a pointer to the last device for easy append. */
110 struct device *lastdev;
111};
112
113/* The list of Guest devices, based on command line arguments. */
114static struct device_list devices;
115
116/* The device structure describes a single device. */
117struct device {
118 /* The linked-list pointer. */
119 struct device *next;
120
121 /* The device's descriptor, as mapped into the Guest. */
122 struct lguest_device_desc *desc;
123
124 /* We can't trust desc values once Guest has booted: we use these. */
125 unsigned int feature_len;
126 unsigned int num_vq;
127
128 /* The name of this device, for --verbose. */
129 const char *name;
130
131 /* Any queues attached to this device */
132 struct virtqueue *vq;
133
134 /* Is it operational */
135 bool running;
136
137 /* Device-specific data. */
138 void *priv;
139};
140
141/* The virtqueue structure describes a queue attached to a device. */
142struct virtqueue {
143 struct virtqueue *next;
144
145 /* Which device owns me. */
146 struct device *dev;
147
148 /* The configuration for this queue. */
149 struct lguest_vqconfig config;
150
151 /* The actual ring of buffers. */
152 struct vring vring;
153
154 /* Last available index we saw. */
155 u16 last_avail_idx;
156
157 /* How many are used since we sent last irq? */
158 unsigned int pending_used;
159
160 /* Eventfd where Guest notifications arrive. */
161 int eventfd;
162
163 /* Function for the thread which is servicing this virtqueue. */
164 void (*service)(struct virtqueue *vq);
165 pid_t thread;
166};
167
168/* Remember the arguments to the program so we can "reboot" */
169static char **main_args;
170
171/* The original tty settings to restore on exit. */
172static struct termios orig_term;
173
174/*
175 * We have to be careful with barriers: our devices are all run in separate
176 * threads and so we need to make sure that changes visible to the Guest happen
177 * in precise order.
178 */
179#define wmb() __asm__ __volatile__("" : : : "memory")
180#define mb() __asm__ __volatile__("" : : : "memory")
181
182/*
183 * Convert an iovec element to the given type.
184 *
185 * This is a fairly ugly trick: we need to know the size of the type and
186 * alignment requirement to check the pointer is kosher. It's also nice to
187 * have the name of the type in case we report failure.
188 *
189 * Typing those three things all the time is cumbersome and error prone, so we
190 * have a macro which sets them all up and passes to the real function.
191 */
192#define convert(iov, type) \
193 ((type *)_convert((iov), sizeof(type), __alignof__(type), #type))
194
195static void *_convert(struct iovec *iov, size_t size, size_t align,
196 const char *name)
197{
198 if (iov->iov_len != size)
199 errx(1, "Bad iovec size %zu for %s", iov->iov_len, name);
200 if ((unsigned long)iov->iov_base % align != 0)
201 errx(1, "Bad alignment %p for %s", iov->iov_base, name);
202 return iov->iov_base;
203}
204
205/* Wrapper for the last available index. Makes it easier to change. */
206#define lg_last_avail(vq) ((vq)->last_avail_idx)
207
208/*
209 * The virtio configuration space is defined to be little-endian. x86 is
210 * little-endian too, but it's nice to be explicit so we have these helpers.
211 */
212#define cpu_to_le16(v16) (v16)
213#define cpu_to_le32(v32) (v32)
214#define cpu_to_le64(v64) (v64)
215#define le16_to_cpu(v16) (v16)
216#define le32_to_cpu(v32) (v32)
217#define le64_to_cpu(v64) (v64)
218
219/* Is this iovec empty? */
220static bool iov_empty(const struct iovec iov[], unsigned int num_iov)
221{
222 unsigned int i;
223
224 for (i = 0; i < num_iov; i++)
225 if (iov[i].iov_len)
226 return false;
227 return true;
228}
229
230/* Take len bytes from the front of this iovec. */
231static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len)
232{
233 unsigned int i;
234
235 for (i = 0; i < num_iov; i++) {
236 unsigned int used;
237
238 used = iov[i].iov_len < len ? iov[i].iov_len : len;
239 iov[i].iov_base += used;
240 iov[i].iov_len -= used;
241 len -= used;
242 }
243 assert(len == 0);
244}
245
246/* The device virtqueue descriptors are followed by feature bitmasks. */
247static u8 *get_feature_bits(struct device *dev)
248{
249 return (u8 *)(dev->desc + 1)
250 + dev->num_vq * sizeof(struct lguest_vqconfig);
251}
252
253/*L:100
254 * The Launcher code itself takes us out into userspace, that scary place where
255 * pointers run wild and free! Unfortunately, like most userspace programs,
256 * it's quite boring (which is why everyone likes to hack on the kernel!).
257 * Perhaps if you make up an Lguest Drinking Game at this point, it will get
258 * you through this section. Or, maybe not.
259 *
260 * The Launcher sets up a big chunk of memory to be the Guest's "physical"
261 * memory and stores it in "guest_base". In other words, Guest physical ==
262 * Launcher virtual with an offset.
263 *
264 * This can be tough to get your head around, but usually it just means that we
265 * use these trivial conversion functions when the Guest gives us its
266 * "physical" addresses:
267 */
268static void *from_guest_phys(unsigned long addr)
269{
270 return guest_base + addr;
271}
272
273static unsigned long to_guest_phys(const void *addr)
274{
275 return (addr - guest_base);
276}
277
278/*L:130
279 * Loading the Kernel.
280 *
281 * We start with couple of simple helper routines. open_or_die() avoids
282 * error-checking code cluttering the callers:
283 */
284static int open_or_die(const char *name, int flags)
285{
286 int fd = open(name, flags);
287 if (fd < 0)
288 err(1, "Failed to open %s", name);
289 return fd;
290}
291
292/* map_zeroed_pages() takes a number of pages. */
293static void *map_zeroed_pages(unsigned int num)
294{
295 int fd = open_or_die("/dev/zero", O_RDONLY);
296 void *addr;
297
298 /*
299 * We use a private mapping (ie. if we write to the page, it will be
300 * copied). We allocate an extra two pages PROT_NONE to act as guard
301 * pages against read/write attempts that exceed allocated space.
302 */
303 addr = mmap(NULL, getpagesize() * (num+2),
304 PROT_NONE, MAP_PRIVATE, fd, 0);
305
306 if (addr == MAP_FAILED)
307 err(1, "Mmapping %u pages of /dev/zero", num);
308
309 if (mprotect(addr + getpagesize(), getpagesize() * num,
310 PROT_READ|PROT_WRITE) == -1)
311 err(1, "mprotect rw %u pages failed", num);
312
313 /*
314 * One neat mmap feature is that you can close the fd, and it
315 * stays mapped.
316 */
317 close(fd);
318
319 /* Return address after PROT_NONE page */
320 return addr + getpagesize();
321}
322
323/* Get some more pages for a device. */
324static void *get_pages(unsigned int num)
325{
326 void *addr = from_guest_phys(guest_limit);
327
328 guest_limit += num * getpagesize();
329 if (guest_limit > guest_max)
330 errx(1, "Not enough memory for devices");
331 return addr;
332}
333
334/*
335 * This routine is used to load the kernel or initrd. It tries mmap, but if
336 * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries),
337 * it falls back to reading the memory in.
338 */
339static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
340{
341 ssize_t r;
342
343 /*
344 * We map writable even though for some segments are marked read-only.
345 * The kernel really wants to be writable: it patches its own
346 * instructions.
347 *
348 * MAP_PRIVATE means that the page won't be copied until a write is
349 * done to it. This allows us to share untouched memory between
350 * Guests.
351 */
352 if (mmap(addr, len, PROT_READ|PROT_WRITE,
353 MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED)
354 return;
355
356 /* pread does a seek and a read in one shot: saves a few lines. */
357 r = pread(fd, addr, len, offset);
358 if (r != len)
359 err(1, "Reading offset %lu len %lu gave %zi", offset, len, r);
360}
361
362/*
363 * This routine takes an open vmlinux image, which is in ELF, and maps it into
364 * the Guest memory. ELF = Embedded Linking Format, which is the format used
365 * by all modern binaries on Linux including the kernel.
366 *
367 * The ELF headers give *two* addresses: a physical address, and a virtual
368 * address. We use the physical address; the Guest will map itself to the
369 * virtual address.
370 *
371 * We return the starting address.
372 */
373static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
374{
375 Elf32_Phdr phdr[ehdr->e_phnum];
376 unsigned int i;
377
378 /*
379 * Sanity checks on the main ELF header: an x86 executable with a
380 * reasonable number of correctly-sized program headers.
381 */
382 if (ehdr->e_type != ET_EXEC
383 || ehdr->e_machine != EM_386
384 || ehdr->e_phentsize != sizeof(Elf32_Phdr)
385 || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr))
386 errx(1, "Malformed elf header");
387
388 /*
389 * An ELF executable contains an ELF header and a number of "program"
390 * headers which indicate which parts ("segments") of the program to
391 * load where.
392 */
393
394 /* We read in all the program headers at once: */
395 if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0)
396 err(1, "Seeking to program headers");
397 if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr))
398 err(1, "Reading program headers");
399
400 /*
401 * Try all the headers: there are usually only three. A read-only one,
402 * a read-write one, and a "note" section which we don't load.
403 */
404 for (i = 0; i < ehdr->e_phnum; i++) {
405 /* If this isn't a loadable segment, we ignore it */
406 if (phdr[i].p_type != PT_LOAD)
407 continue;
408
409 verbose("Section %i: size %i addr %p\n",
410 i, phdr[i].p_memsz, (void *)phdr[i].p_paddr);
411
412 /* We map this section of the file at its physical address. */
413 map_at(elf_fd, from_guest_phys(phdr[i].p_paddr),
414 phdr[i].p_offset, phdr[i].p_filesz);
415 }
416
417 /* The entry point is given in the ELF header. */
418 return ehdr->e_entry;
419}
420
421/*L:150
422 * A bzImage, unlike an ELF file, is not meant to be loaded. You're supposed
423 * to jump into it and it will unpack itself. We used to have to perform some
424 * hairy magic because the unpacking code scared me.
425 *
426 * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote
427 * a small patch to jump over the tricky bits in the Guest, so now we just read
428 * the funky header so we know where in the file to load, and away we go!
429 */
430static unsigned long load_bzimage(int fd)
431{
432 struct boot_params boot;
433 int r;
434 /* Modern bzImages get loaded at 1M. */
435 void *p = from_guest_phys(0x100000);
436
437 /*
438 * Go back to the start of the file and read the header. It should be
439 * a Linux boot header (see Documentation/x86/boot.txt)
440 */
441 lseek(fd, 0, SEEK_SET);
442 read(fd, &boot, sizeof(boot));
443
444 /* Inside the setup_hdr, we expect the magic "HdrS" */
445 if (memcmp(&boot.hdr.header, "HdrS", 4) != 0)
446 errx(1, "This doesn't look like a bzImage to me");
447
448 /* Skip over the extra sectors of the header. */
449 lseek(fd, (boot.hdr.setup_sects+1) * 512, SEEK_SET);
450
451 /* Now read everything into memory. in nice big chunks. */
452 while ((r = read(fd, p, 65536)) > 0)
453 p += r;
454
455 /* Finally, code32_start tells us where to enter the kernel. */
456 return boot.hdr.code32_start;
457}
458
459/*L:140
460 * Loading the kernel is easy when it's a "vmlinux", but most kernels
461 * come wrapped up in the self-decompressing "bzImage" format. With a little
462 * work, we can load those, too.
463 */
464static unsigned long load_kernel(int fd)
465{
466 Elf32_Ehdr hdr;
467
468 /* Read in the first few bytes. */
469 if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr))
470 err(1, "Reading kernel");
471
472 /* If it's an ELF file, it starts with "\177ELF" */
473 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
474 return map_elf(fd, &hdr);
475
476 /* Otherwise we assume it's a bzImage, and try to load it. */
477 return load_bzimage(fd);
478}
479
480/*
481 * This is a trivial little helper to align pages. Andi Kleen hated it because
482 * it calls getpagesize() twice: "it's dumb code."
483 *
484 * Kernel guys get really het up about optimization, even when it's not
485 * necessary. I leave this code as a reaction against that.
486 */
487static inline unsigned long page_align(unsigned long addr)
488{
489 /* Add upwards and truncate downwards. */
490 return ((addr + getpagesize()-1) & ~(getpagesize()-1));
491}
492
493/*L:180
494 * An "initial ram disk" is a disk image loaded into memory along with the
495 * kernel which the kernel can use to boot from without needing any drivers.
496 * Most distributions now use this as standard: the initrd contains the code to
497 * load the appropriate driver modules for the current machine.
498 *
499 * Importantly, James Morris works for RedHat, and Fedora uses initrds for its
500 * kernels. He sent me this (and tells me when I break it).
501 */
502static unsigned long load_initrd(const char *name, unsigned long mem)
503{
504 int ifd;
505 struct stat st;
506 unsigned long len;
507
508 ifd = open_or_die(name, O_RDONLY);
509 /* fstat() is needed to get the file size. */
510 if (fstat(ifd, &st) < 0)
511 err(1, "fstat() on initrd '%s'", name);
512
513 /*
514 * We map the initrd at the top of memory, but mmap wants it to be
515 * page-aligned, so we round the size up for that.
516 */
517 len = page_align(st.st_size);
518 map_at(ifd, from_guest_phys(mem - len), 0, st.st_size);
519 /*
520 * Once a file is mapped, you can close the file descriptor. It's a
521 * little odd, but quite useful.
522 */
523 close(ifd);
524 verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len);
525
526 /* We return the initrd size. */
527 return len;
528}
529/*:*/
530
531/*
532 * Simple routine to roll all the commandline arguments together with spaces
533 * between them.
534 */
535static void concat(char *dst, char *args[])
536{
537 unsigned int i, len = 0;
538
539 for (i = 0; args[i]; i++) {
540 if (i) {
541 strcat(dst+len, " ");
542 len++;
543 }
544 strcpy(dst+len, args[i]);
545 len += strlen(args[i]);
546 }
547 /* In case it's empty. */
548 dst[len] = '\0';
549}
550
551/*L:185
552 * This is where we actually tell the kernel to initialize the Guest. We
553 * saw the arguments it expects when we looked at initialize() in lguest_user.c:
554 * the base of Guest "physical" memory, the top physical page to allow and the
555 * entry point for the Guest.
556 */
557static void tell_kernel(unsigned long start)
558{
559 unsigned long args[] = { LHREQ_INITIALIZE,
560 (unsigned long)guest_base,
561 guest_limit / getpagesize(), start };
562 verbose("Guest: %p - %p (%#lx)\n",
563 guest_base, guest_base + guest_limit, guest_limit);
564 lguest_fd = open_or_die("/dev/lguest", O_RDWR);
565 if (write(lguest_fd, args, sizeof(args)) < 0)
566 err(1, "Writing to /dev/lguest");
567}
568/*:*/
569
570/*L:200
571 * Device Handling.
572 *
573 * When the Guest gives us a buffer, it sends an array of addresses and sizes.
574 * We need to make sure it's not trying to reach into the Launcher itself, so
575 * we have a convenient routine which checks it and exits with an error message
576 * if something funny is going on:
577 */
578static void *_check_pointer(unsigned long addr, unsigned int size,
579 unsigned int line)
580{
581 /*
582 * Check if the requested address and size exceeds the allocated memory,
583 * or addr + size wraps around.
584 */
585 if ((addr + size) > guest_limit || (addr + size) < addr)
586 errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr);
587 /*
588 * We return a pointer for the caller's convenience, now we know it's
589 * safe to use.
590 */
591 return from_guest_phys(addr);
592}
593/* A macro which transparently hands the line number to the real function. */
594#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
595
596/*
597 * Each buffer in the virtqueues is actually a chain of descriptors. This
598 * function returns the next descriptor in the chain, or vq->vring.num if we're
599 * at the end.
600 */
601static unsigned next_desc(struct vring_desc *desc,
602 unsigned int i, unsigned int max)
603{
604 unsigned int next;
605
606 /* If this descriptor says it doesn't chain, we're done. */
607 if (!(desc[i].flags & VRING_DESC_F_NEXT))
608 return max;
609
610 /* Check they're not leading us off end of descriptors. */
611 next = desc[i].next;
612 /* Make sure compiler knows to grab that: we don't want it changing! */
613 wmb();
614
615 if (next >= max)
616 errx(1, "Desc next is %u", next);
617
618 return next;
619}
620
621/*
622 * This actually sends the interrupt for this virtqueue, if we've used a
623 * buffer.
624 */
625static void trigger_irq(struct virtqueue *vq)
626{
627 unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
628
629 /* Don't inform them if nothing used. */
630 if (!vq->pending_used)
631 return;
632 vq->pending_used = 0;
633
634 /* If they don't want an interrupt, don't send one... */
635 if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
636 return;
637 }
638
639 /* Send the Guest an interrupt tell them we used something up. */
640 if (write(lguest_fd, buf, sizeof(buf)) != 0)
641 err(1, "Triggering irq %i", vq->config.irq);
642}
643
644/*
645 * This looks in the virtqueue for the first available buffer, and converts
646 * it to an iovec for convenient access. Since descriptors consist of some
647 * number of output then some number of input descriptors, it's actually two
648 * iovecs, but we pack them into one and note how many of each there were.
649 *
650 * This function waits if necessary, and returns the descriptor number found.
651 */
652static unsigned wait_for_vq_desc(struct virtqueue *vq,
653 struct iovec iov[],
654 unsigned int *out_num, unsigned int *in_num)
655{
656 unsigned int i, head, max;
657 struct vring_desc *desc;
658 u16 last_avail = lg_last_avail(vq);
659
660 /* There's nothing available? */
661 while (last_avail == vq->vring.avail->idx) {
662 u64 event;
663
664 /*
665 * Since we're about to sleep, now is a good time to tell the
666 * Guest about what we've used up to now.
667 */
668 trigger_irq(vq);
669
670 /* OK, now we need to know about added descriptors. */
671 vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
672
673 /*
674 * They could have slipped one in as we were doing that: make
675 * sure it's written, then check again.
676 */
677 mb();
678 if (last_avail != vq->vring.avail->idx) {
679 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
680 break;
681 }
682
683 /* Nothing new? Wait for eventfd to tell us they refilled. */
684 if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
685 errx(1, "Event read failed?");
686
687 /* We don't need to be notified again. */
688 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
689 }
690
691 /* Check it isn't doing very strange things with descriptor numbers. */
692 if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num)
693 errx(1, "Guest moved used index from %u to %u",
694 last_avail, vq->vring.avail->idx);
695
696 /*
697 * Grab the next descriptor number they're advertising, and increment
698 * the index we've seen.
699 */
700 head = vq->vring.avail->ring[last_avail % vq->vring.num];
701 lg_last_avail(vq)++;
702
703 /* If their number is silly, that's a fatal mistake. */
704 if (head >= vq->vring.num)
705 errx(1, "Guest says index %u is available", head);
706
707 /* When we start there are none of either input nor output. */
708 *out_num = *in_num = 0;
709
710 max = vq->vring.num;
711 desc = vq->vring.desc;
712 i = head;
713
714 /*
715 * If this is an indirect entry, then this buffer contains a descriptor
716 * table which we handle as if it's any normal descriptor chain.
717 */
718 if (desc[i].flags & VRING_DESC_F_INDIRECT) {
719 if (desc[i].len % sizeof(struct vring_desc))
720 errx(1, "Invalid size for indirect buffer table");
721
722 max = desc[i].len / sizeof(struct vring_desc);
723 desc = check_pointer(desc[i].addr, desc[i].len);
724 i = 0;
725 }
726
727 do {
728 /* Grab the first descriptor, and check it's OK. */
729 iov[*out_num + *in_num].iov_len = desc[i].len;
730 iov[*out_num + *in_num].iov_base
731 = check_pointer(desc[i].addr, desc[i].len);
732 /* If this is an input descriptor, increment that count. */
733 if (desc[i].flags & VRING_DESC_F_WRITE)
734 (*in_num)++;
735 else {
736 /*
737 * If it's an output descriptor, they're all supposed
738 * to come before any input descriptors.
739 */
740 if (*in_num)
741 errx(1, "Descriptor has out after in");
742 (*out_num)++;
743 }
744
745 /* If we've got too many, that implies a descriptor loop. */
746 if (*out_num + *in_num > max)
747 errx(1, "Looped descriptor");
748 } while ((i = next_desc(desc, i, max)) != max);
749
750 return head;
751}
752
753/*
754 * After we've used one of their buffers, we tell the Guest about it. Sometime
755 * later we'll want to send them an interrupt using trigger_irq(); note that
756 * wait_for_vq_desc() does that for us if it has to wait.
757 */
758static void add_used(struct virtqueue *vq, unsigned int head, int len)
759{
760 struct vring_used_elem *used;
761
762 /*
763 * The virtqueue contains a ring of used buffers. Get a pointer to the
764 * next entry in that used ring.
765 */
766 used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
767 used->id = head;
768 used->len = len;
769 /* Make sure buffer is written before we update index. */
770 wmb();
771 vq->vring.used->idx++;
772 vq->pending_used++;
773}
774
775/* And here's the combo meal deal. Supersize me! */
776static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
777{
778 add_used(vq, head, len);
779 trigger_irq(vq);
780}
781
782/*
783 * The Console
784 *
785 * We associate some data with the console for our exit hack.
786 */
787struct console_abort {
788 /* How many times have they hit ^C? */
789 int count;
790 /* When did they start? */
791 struct timeval start;
792};
793
794/* This is the routine which handles console input (ie. stdin). */
795static void console_input(struct virtqueue *vq)
796{
797 int len;
798 unsigned int head, in_num, out_num;
799 struct console_abort *abort = vq->dev->priv;
800 struct iovec iov[vq->vring.num];
801
802 /* Make sure there's a descriptor available. */
803 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
804 if (out_num)
805 errx(1, "Output buffers in console in queue?");
806
807 /* Read into it. This is where we usually wait. */
808 len = readv(STDIN_FILENO, iov, in_num);
809 if (len <= 0) {
810 /* Ran out of input? */
811 warnx("Failed to get console input, ignoring console.");
812 /*
813 * For simplicity, dying threads kill the whole Launcher. So
814 * just nap here.
815 */
816 for (;;)
817 pause();
818 }
819
820 /* Tell the Guest we used a buffer. */
821 add_used_and_trigger(vq, head, len);
822
823 /*
824 * Three ^C within one second? Exit.
825 *
826 * This is such a hack, but works surprisingly well. Each ^C has to
827 * be in a buffer by itself, so they can't be too fast. But we check
828 * that we get three within about a second, so they can't be too
829 * slow.
830 */
831 if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) {
832 abort->count = 0;
833 return;
834 }
835
836 abort->count++;
837 if (abort->count == 1)
838 gettimeofday(&abort->start, NULL);
839 else if (abort->count == 3) {
840 struct timeval now;
841 gettimeofday(&now, NULL);
842 /* Kill all Launcher processes with SIGINT, like normal ^C */
843 if (now.tv_sec <= abort->start.tv_sec+1)
844 kill(0, SIGINT);
845 abort->count = 0;
846 }
847}
848
849/* This is the routine which handles console output (ie. stdout). */
850static void console_output(struct virtqueue *vq)
851{
852 unsigned int head, out, in;
853 struct iovec iov[vq->vring.num];
854
855 /* We usually wait in here, for the Guest to give us something. */
856 head = wait_for_vq_desc(vq, iov, &out, &in);
857 if (in)
858 errx(1, "Input buffers in console output queue?");
859
860 /* writev can return a partial write, so we loop here. */
861 while (!iov_empty(iov, out)) {
862 int len = writev(STDOUT_FILENO, iov, out);
863 if (len <= 0) {
864 warn("Write to stdout gave %i (%d)", len, errno);
865 break;
866 }
867 iov_consume(iov, out, len);
868 }
869
870 /*
871 * We're finished with that buffer: if we're going to sleep,
872 * wait_for_vq_desc() will prod the Guest with an interrupt.
873 */
874 add_used(vq, head, 0);
875}
876
877/*
878 * The Network
879 *
880 * Handling output for network is also simple: we get all the output buffers
881 * and write them to /dev/net/tun.
882 */
883struct net_info {
884 int tunfd;
885};
886
887static void net_output(struct virtqueue *vq)
888{
889 struct net_info *net_info = vq->dev->priv;
890 unsigned int head, out, in;
891 struct iovec iov[vq->vring.num];
892
893 /* We usually wait in here for the Guest to give us a packet. */
894 head = wait_for_vq_desc(vq, iov, &out, &in);
895 if (in)
896 errx(1, "Input buffers in net output queue?");
897 /*
898 * Send the whole thing through to /dev/net/tun. It expects the exact
899 * same format: what a coincidence!
900 */
901 if (writev(net_info->tunfd, iov, out) < 0)
902 warnx("Write to tun failed (%d)?", errno);
903
904 /*
905 * Done with that one; wait_for_vq_desc() will send the interrupt if
906 * all packets are processed.
907 */
908 add_used(vq, head, 0);
909}
910
911/*
912 * Handling network input is a bit trickier, because I've tried to optimize it.
913 *
914 * First we have a helper routine which tells is if from this file descriptor
915 * (ie. the /dev/net/tun device) will block:
916 */
917static bool will_block(int fd)
918{
919 fd_set fdset;
920 struct timeval zero = { 0, 0 };
921 FD_ZERO(&fdset);
922 FD_SET(fd, &fdset);
923 return select(fd+1, &fdset, NULL, NULL, &zero) != 1;
924}
925
926/*
927 * This handles packets coming in from the tun device to our Guest. Like all
928 * service routines, it gets called again as soon as it returns, so you don't
929 * see a while(1) loop here.
930 */
931static void net_input(struct virtqueue *vq)
932{
933 int len;
934 unsigned int head, out, in;
935 struct iovec iov[vq->vring.num];
936 struct net_info *net_info = vq->dev->priv;
937
938 /*
939 * Get a descriptor to write an incoming packet into. This will also
940 * send an interrupt if they're out of descriptors.
941 */
942 head = wait_for_vq_desc(vq, iov, &out, &in);
943 if (out)
944 errx(1, "Output buffers in net input queue?");
945
946 /*
947 * If it looks like we'll block reading from the tun device, send them
948 * an interrupt.
949 */
950 if (vq->pending_used && will_block(net_info->tunfd))
951 trigger_irq(vq);
952
953 /*
954 * Read in the packet. This is where we normally wait (when there's no
955 * incoming network traffic).
956 */
957 len = readv(net_info->tunfd, iov, in);
958 if (len <= 0)
959 warn("Failed to read from tun (%d).", errno);
960
961 /*
962 * Mark that packet buffer as used, but don't interrupt here. We want
963 * to wait until we've done as much work as we can.
964 */
965 add_used(vq, head, len);
966}
967/*:*/
968
969/* This is the helper to create threads: run the service routine in a loop. */
970static int do_thread(void *_vq)
971{
972 struct virtqueue *vq = _vq;
973
974 for (;;)
975 vq->service(vq);
976 return 0;
977}
978
979/*
980 * When a child dies, we kill our entire process group with SIGTERM. This
981 * also has the side effect that the shell restores the console for us!
982 */
983static void kill_launcher(int signal)
984{
985 kill(0, SIGTERM);
986}
987
988static void reset_device(struct device *dev)
989{
990 struct virtqueue *vq;
991
992 verbose("Resetting device %s\n", dev->name);
993
994 /* Clear any features they've acked. */
995 memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len);
996
997 /* We're going to be explicitly killing threads, so ignore them. */
998 signal(SIGCHLD, SIG_IGN);
999
1000 /* Zero out the virtqueues, get rid of their threads */
1001 for (vq = dev->vq; vq; vq = vq->next) {
1002 if (vq->thread != (pid_t)-1) {
1003 kill(vq->thread, SIGTERM);
1004 waitpid(vq->thread, NULL, 0);
1005 vq->thread = (pid_t)-1;
1006 }
1007 memset(vq->vring.desc, 0,
1008 vring_size(vq->config.num, LGUEST_VRING_ALIGN));
1009 lg_last_avail(vq) = 0;
1010 }
1011 dev->running = false;
1012
1013 /* Now we care if threads die. */
1014 signal(SIGCHLD, (void *)kill_launcher);
1015}
1016
1017/*L:216
1018 * This actually creates the thread which services the virtqueue for a device.
1019 */
1020static void create_thread(struct virtqueue *vq)
1021{
1022 /*
1023 * Create stack for thread. Since the stack grows upwards, we point
1024 * the stack pointer to the end of this region.
1025 */
1026 char *stack = malloc(32768);
1027 unsigned long args[] = { LHREQ_EVENTFD,
1028 vq->config.pfn*getpagesize(), 0 };
1029
1030 /* Create a zero-initialized eventfd. */
1031 vq->eventfd = eventfd(0, 0);
1032 if (vq->eventfd < 0)
1033 err(1, "Creating eventfd");
1034 args[2] = vq->eventfd;
1035
1036 /*
1037 * Attach an eventfd to this virtqueue: it will go off when the Guest
1038 * does an LHCALL_NOTIFY for this vq.
1039 */
1040 if (write(lguest_fd, &args, sizeof(args)) != 0)
1041 err(1, "Attaching eventfd");
1042
1043 /*
1044 * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so
1045 * we get a signal if it dies.
1046 */
1047 vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq);
1048 if (vq->thread == (pid_t)-1)
1049 err(1, "Creating clone");
1050
1051 /* We close our local copy now the child has it. */
1052 close(vq->eventfd);
1053}
1054
1055static void start_device(struct device *dev)
1056{
1057 unsigned int i;
1058 struct virtqueue *vq;
1059
1060 verbose("Device %s OK: offered", dev->name);
1061 for (i = 0; i < dev->feature_len; i++)
1062 verbose(" %02x", get_feature_bits(dev)[i]);
1063 verbose(", accepted");
1064 for (i = 0; i < dev->feature_len; i++)
1065 verbose(" %02x", get_feature_bits(dev)
1066 [dev->feature_len+i]);
1067
1068 for (vq = dev->vq; vq; vq = vq->next) {
1069 if (vq->service)
1070 create_thread(vq);
1071 }
1072 dev->running = true;
1073}
1074
1075static void cleanup_devices(void)
1076{
1077 struct device *dev;
1078
1079 for (dev = devices.dev; dev; dev = dev->next)
1080 reset_device(dev);
1081
1082 /* If we saved off the original terminal settings, restore them now. */
1083 if (orig_term.c_lflag & (ISIG|ICANON|ECHO))
1084 tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
1085}
1086
1087/* When the Guest tells us they updated the status field, we handle it. */
1088static void update_device_status(struct device *dev)
1089{
1090 /* A zero status is a reset, otherwise it's a set of flags. */
1091 if (dev->desc->status == 0)
1092 reset_device(dev);
1093 else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
1094 warnx("Device %s configuration FAILED", dev->name);
1095 if (dev->running)
1096 reset_device(dev);
1097 } else {
1098 if (dev->running)
1099 err(1, "Device %s features finalized twice", dev->name);
1100 start_device(dev);
1101 }
1102}
1103
1104/*L:215
1105 * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In
1106 * particular, it's used to notify us of device status changes during boot.
1107 */
1108static void handle_output(unsigned long addr)
1109{
1110 struct device *i;
1111
1112 /* Check each device. */
1113 for (i = devices.dev; i; i = i->next) {
1114 struct virtqueue *vq;
1115
1116 /*
1117 * Notifications to device descriptors mean they updated the
1118 * device status.
1119 */
1120 if (from_guest_phys(addr) == i->desc) {
1121 update_device_status(i);
1122 return;
1123 }
1124
1125 /* Devices should not be used before features are finalized. */
1126 for (vq = i->vq; vq; vq = vq->next) {
1127 if (addr != vq->config.pfn*getpagesize())
1128 continue;
1129 errx(1, "Notification on %s before setup!", i->name);
1130 }
1131 }
1132
1133 /*
1134 * Early console write is done using notify on a nul-terminated string
1135 * in Guest memory. It's also great for hacking debugging messages
1136 * into a Guest.
1137 */
1138 if (addr >= guest_limit)
1139 errx(1, "Bad NOTIFY %#lx", addr);
1140
1141 write(STDOUT_FILENO, from_guest_phys(addr),
1142 strnlen(from_guest_phys(addr), guest_limit - addr));
1143}
1144
1145/*L:190
1146 * Device Setup
1147 *
1148 * All devices need a descriptor so the Guest knows it exists, and a "struct
1149 * device" so the Launcher can keep track of it. We have common helper
1150 * routines to allocate and manage them.
1151 */
1152
1153/*
1154 * The layout of the device page is a "struct lguest_device_desc" followed by a
1155 * number of virtqueue descriptors, then two sets of feature bits, then an
1156 * array of configuration bytes. This routine returns the configuration
1157 * pointer.
1158 */
1159static u8 *device_config(const struct device *dev)
1160{
1161 return (void *)(dev->desc + 1)
1162 + dev->num_vq * sizeof(struct lguest_vqconfig)
1163 + dev->feature_len * 2;
1164}
1165
1166/*
1167 * This routine allocates a new "struct lguest_device_desc" from descriptor
1168 * table page just above the Guest's normal memory. It returns a pointer to
1169 * that descriptor.
1170 */
1171static struct lguest_device_desc *new_dev_desc(u16 type)
1172{
1173 struct lguest_device_desc d = { .type = type };
1174 void *p;
1175
1176 /* Figure out where the next device config is, based on the last one. */
1177 if (devices.lastdev)
1178 p = device_config(devices.lastdev)
1179 + devices.lastdev->desc->config_len;
1180 else
1181 p = devices.descpage;
1182
1183 /* We only have one page for all the descriptors. */
1184 if (p + sizeof(d) > (void *)devices.descpage + getpagesize())
1185 errx(1, "Too many devices");
1186
1187 /* p might not be aligned, so we memcpy in. */
1188 return memcpy(p, &d, sizeof(d));
1189}
1190
1191/*
1192 * Each device descriptor is followed by the description of its virtqueues. We
1193 * specify how many descriptors the virtqueue is to have.
1194 */
1195static void add_virtqueue(struct device *dev, unsigned int num_descs,
1196 void (*service)(struct virtqueue *))
1197{
1198 unsigned int pages;
1199 struct virtqueue **i, *vq = malloc(sizeof(*vq));
1200 void *p;
1201
1202 /* First we need some memory for this virtqueue. */
1203 pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1)
1204 / getpagesize();
1205 p = get_pages(pages);
1206
1207 /* Initialize the virtqueue */
1208 vq->next = NULL;
1209 vq->last_avail_idx = 0;
1210 vq->dev = dev;
1211
1212 /*
1213 * This is the routine the service thread will run, and its Process ID
1214 * once it's running.
1215 */
1216 vq->service = service;
1217 vq->thread = (pid_t)-1;
1218
1219 /* Initialize the configuration. */
1220 vq->config.num = num_descs;
1221 vq->config.irq = devices.next_irq++;
1222 vq->config.pfn = to_guest_phys(p) / getpagesize();
1223
1224 /* Initialize the vring. */
1225 vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN);
1226
1227 /*
1228 * Append virtqueue to this device's descriptor. We use
1229 * device_config() to get the end of the device's current virtqueues;
1230 * we check that we haven't added any config or feature information
1231 * yet, otherwise we'd be overwriting them.
1232 */
1233 assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
1234 memcpy(device_config(dev), &vq->config, sizeof(vq->config));
1235 dev->num_vq++;
1236 dev->desc->num_vq++;
1237
1238 verbose("Virtqueue page %#lx\n", to_guest_phys(p));
1239
1240 /*
1241 * Add to tail of list, so dev->vq is first vq, dev->vq->next is
1242 * second.
1243 */
1244 for (i = &dev->vq; *i; i = &(*i)->next);
1245 *i = vq;
1246}
1247
1248/*
1249 * The first half of the feature bitmask is for us to advertise features. The
1250 * second half is for the Guest to accept features.
1251 */
1252static void add_feature(struct device *dev, unsigned bit)
1253{
1254 u8 *features = get_feature_bits(dev);
1255
1256 /* We can't extend the feature bits once we've added config bytes */
1257 if (dev->desc->feature_len <= bit / CHAR_BIT) {
1258 assert(dev->desc->config_len == 0);
1259 dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1;
1260 }
1261
1262 features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
1263}
1264
1265/*
1266 * This routine sets the configuration fields for an existing device's
1267 * descriptor. It only works for the last device, but that's OK because that's
1268 * how we use it.
1269 */
1270static void set_config(struct device *dev, unsigned len, const void *conf)
1271{
1272 /* Check we haven't overflowed our single page. */
1273 if (device_config(dev) + len > devices.descpage + getpagesize())
1274 errx(1, "Too many devices");
1275
1276 /* Copy in the config information, and store the length. */
1277 memcpy(device_config(dev), conf, len);
1278 dev->desc->config_len = len;
1279
1280 /* Size must fit in config_len field (8 bits)! */
1281 assert(dev->desc->config_len == len);
1282}
1283
1284/*
1285 * This routine does all the creation and setup of a new device, including
1286 * calling new_dev_desc() to allocate the descriptor and device memory. We
1287 * don't actually start the service threads until later.
1288 *
1289 * See what I mean about userspace being boring?
1290 */
1291static struct device *new_device(const char *name, u16 type)
1292{
1293 struct device *dev = malloc(sizeof(*dev));
1294
1295 /* Now we populate the fields one at a time. */
1296 dev->desc = new_dev_desc(type);
1297 dev->name = name;
1298 dev->vq = NULL;
1299 dev->feature_len = 0;
1300 dev->num_vq = 0;
1301 dev->running = false;
1302
1303 /*
1304 * Append to device list. Prepending to a single-linked list is
1305 * easier, but the user expects the devices to be arranged on the bus
1306 * in command-line order. The first network device on the command line
1307 * is eth0, the first block device /dev/vda, etc.
1308 */
1309 if (devices.lastdev)
1310 devices.lastdev->next = dev;
1311 else
1312 devices.dev = dev;
1313 devices.lastdev = dev;
1314
1315 return dev;
1316}
1317
1318/*
1319 * Our first setup routine is the console. It's a fairly simple device, but
1320 * UNIX tty handling makes it uglier than it could be.
1321 */
1322static void setup_console(void)
1323{
1324 struct device *dev;
1325
1326 /* If we can save the initial standard input settings... */
1327 if (tcgetattr(STDIN_FILENO, &orig_term) == 0) {
1328 struct termios term = orig_term;
1329 /*
1330 * Then we turn off echo, line buffering and ^C etc: We want a
1331 * raw input stream to the Guest.
1332 */
1333 term.c_lflag &= ~(ISIG|ICANON|ECHO);
1334 tcsetattr(STDIN_FILENO, TCSANOW, &term);
1335 }
1336
1337 dev = new_device("console", VIRTIO_ID_CONSOLE);
1338
1339 /* We store the console state in dev->priv, and initialize it. */
1340 dev->priv = malloc(sizeof(struct console_abort));
1341 ((struct console_abort *)dev->priv)->count = 0;
1342
1343 /*
1344 * The console needs two virtqueues: the input then the output. When
1345 * they put something the input queue, we make sure we're listening to
1346 * stdin. When they put something in the output queue, we write it to
1347 * stdout.
1348 */
1349 add_virtqueue(dev, VIRTQUEUE_NUM, console_input);
1350 add_virtqueue(dev, VIRTQUEUE_NUM, console_output);
1351
1352 verbose("device %u: console\n", ++devices.device_num);
1353}
1354/*:*/
1355
1356/*M:010
1357 * Inter-guest networking is an interesting area. Simplest is to have a
1358 * --sharenet=<name> option which opens or creates a named pipe. This can be
1359 * used to send packets to another guest in a 1:1 manner.
1360 *
1361 * More sophisticated is to use one of the tools developed for project like UML
1362 * to do networking.
1363 *
1364 * Faster is to do virtio bonding in kernel. Doing this 1:1 would be
1365 * completely generic ("here's my vring, attach to your vring") and would work
1366 * for any traffic. Of course, namespace and permissions issues need to be
1367 * dealt with. A more sophisticated "multi-channel" virtio_net.c could hide
1368 * multiple inter-guest channels behind one interface, although it would
1369 * require some manner of hotplugging new virtio channels.
1370 *
1371 * Finally, we could use a virtio network switch in the kernel, ie. vhost.
1372:*/
1373
1374static u32 str2ip(const char *ipaddr)
1375{
1376 unsigned int b[4];
1377
1378 if (sscanf(ipaddr, "%u.%u.%u.%u", &b[0], &b[1], &b[2], &b[3]) != 4)
1379 errx(1, "Failed to parse IP address '%s'", ipaddr);
1380 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
1381}
1382
1383static void str2mac(const char *macaddr, unsigned char mac[6])
1384{
1385 unsigned int m[6];
1386 if (sscanf(macaddr, "%02x:%02x:%02x:%02x:%02x:%02x",
1387 &m[0], &m[1], &m[2], &m[3], &m[4], &m[5]) != 6)
1388 errx(1, "Failed to parse mac address '%s'", macaddr);
1389 mac[0] = m[0];
1390 mac[1] = m[1];
1391 mac[2] = m[2];
1392 mac[3] = m[3];
1393 mac[4] = m[4];
1394 mac[5] = m[5];
1395}
1396
1397/*
1398 * This code is "adapted" from libbridge: it attaches the Host end of the
1399 * network device to the bridge device specified by the command line.
1400 *
1401 * This is yet another James Morris contribution (I'm an IP-level guy, so I
1402 * dislike bridging), and I just try not to break it.
1403 */
1404static void add_to_bridge(int fd, const char *if_name, const char *br_name)
1405{
1406 int ifidx;
1407 struct ifreq ifr;
1408
1409 if (!*br_name)
1410 errx(1, "must specify bridge name");
1411
1412 ifidx = if_nametoindex(if_name);
1413 if (!ifidx)
1414 errx(1, "interface %s does not exist!", if_name);
1415
1416 strncpy(ifr.ifr_name, br_name, IFNAMSIZ);
1417 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1418 ifr.ifr_ifindex = ifidx;
1419 if (ioctl(fd, SIOCBRADDIF, &ifr) < 0)
1420 err(1, "can't add %s to bridge %s", if_name, br_name);
1421}
1422
1423/*
1424 * This sets up the Host end of the network device with an IP address, brings
1425 * it up so packets will flow, the copies the MAC address into the hwaddr
1426 * pointer.
1427 */
1428static void configure_device(int fd, const char *tapif, u32 ipaddr)
1429{
1430 struct ifreq ifr;
1431 struct sockaddr_in sin;
1432
1433 memset(&ifr, 0, sizeof(ifr));
1434 strcpy(ifr.ifr_name, tapif);
1435
1436 /* Don't read these incantations. Just cut & paste them like I did! */
1437 sin.sin_family = AF_INET;
1438 sin.sin_addr.s_addr = htonl(ipaddr);
1439 memcpy(&ifr.ifr_addr, &sin, sizeof(sin));
1440 if (ioctl(fd, SIOCSIFADDR, &ifr) != 0)
1441 err(1, "Setting %s interface address", tapif);
1442 ifr.ifr_flags = IFF_UP;
1443 if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0)
1444 err(1, "Bringing interface %s up", tapif);
1445}
1446
1447static int get_tun_device(char tapif[IFNAMSIZ])
1448{
1449 struct ifreq ifr;
1450 int netfd;
1451
1452 /* Start with this zeroed. Messy but sure. */
1453 memset(&ifr, 0, sizeof(ifr));
1454
1455 /*
1456 * We open the /dev/net/tun device and tell it we want a tap device. A
1457 * tap device is like a tun device, only somehow different. To tell
1458 * the truth, I completely blundered my way through this code, but it
1459 * works now!
1460 */
1461 netfd = open_or_die("/dev/net/tun", O_RDWR);
1462 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
1463 strcpy(ifr.ifr_name, "tap%d");
1464 if (ioctl(netfd, TUNSETIFF, &ifr) != 0)
1465 err(1, "configuring /dev/net/tun");
1466
1467 if (ioctl(netfd, TUNSETOFFLOAD,
1468 TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0)
1469 err(1, "Could not set features for tun device");
1470
1471 /*
1472 * We don't need checksums calculated for packets coming in this
1473 * device: trust us!
1474 */
1475 ioctl(netfd, TUNSETNOCSUM, 1);
1476
1477 memcpy(tapif, ifr.ifr_name, IFNAMSIZ);
1478 return netfd;
1479}
1480
1481/*L:195
1482 * Our network is a Host<->Guest network. This can either use bridging or
1483 * routing, but the principle is the same: it uses the "tun" device to inject
1484 * packets into the Host as if they came in from a normal network card. We
1485 * just shunt packets between the Guest and the tun device.
1486 */
1487static void setup_tun_net(char *arg)
1488{
1489 struct device *dev;
1490 struct net_info *net_info = malloc(sizeof(*net_info));
1491 int ipfd;
1492 u32 ip = INADDR_ANY;
1493 bool bridging = false;
1494 char tapif[IFNAMSIZ], *p;
1495 struct virtio_net_config conf;
1496
1497 net_info->tunfd = get_tun_device(tapif);
1498
1499 /* First we create a new network device. */
1500 dev = new_device("net", VIRTIO_ID_NET);
1501 dev->priv = net_info;
1502
1503 /* Network devices need a recv and a send queue, just like console. */
1504 add_virtqueue(dev, VIRTQUEUE_NUM, net_input);
1505 add_virtqueue(dev, VIRTQUEUE_NUM, net_output);
1506
1507 /*
1508 * We need a socket to perform the magic network ioctls to bring up the
1509 * tap interface, connect to the bridge etc. Any socket will do!
1510 */
1511 ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
1512 if (ipfd < 0)
1513 err(1, "opening IP socket");
1514
1515 /* If the command line was --tunnet=bridge:<name> do bridging. */
1516 if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) {
1517 arg += strlen(BRIDGE_PFX);
1518 bridging = true;
1519 }
1520
1521 /* A mac address may follow the bridge name or IP address */
1522 p = strchr(arg, ':');
1523 if (p) {
1524 str2mac(p+1, conf.mac);
1525 add_feature(dev, VIRTIO_NET_F_MAC);
1526 *p = '\0';
1527 }
1528
1529 /* arg is now either an IP address or a bridge name */
1530 if (bridging)
1531 add_to_bridge(ipfd, tapif, arg);
1532 else
1533 ip = str2ip(arg);
1534
1535 /* Set up the tun device. */
1536 configure_device(ipfd, tapif, ip);
1537
1538 /* Expect Guest to handle everything except UFO */
1539 add_feature(dev, VIRTIO_NET_F_CSUM);
1540 add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
1541 add_feature(dev, VIRTIO_NET_F_GUEST_TSO4);
1542 add_feature(dev, VIRTIO_NET_F_GUEST_TSO6);
1543 add_feature(dev, VIRTIO_NET_F_GUEST_ECN);
1544 add_feature(dev, VIRTIO_NET_F_HOST_TSO4);
1545 add_feature(dev, VIRTIO_NET_F_HOST_TSO6);
1546 add_feature(dev, VIRTIO_NET_F_HOST_ECN);
1547 /* We handle indirect ring entries */
1548 add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC);
1549 set_config(dev, sizeof(conf), &conf);
1550
1551 /* We don't need the socket any more; setup is done. */
1552 close(ipfd);
1553
1554 devices.device_num++;
1555
1556 if (bridging)
1557 verbose("device %u: tun %s attached to bridge: %s\n",
1558 devices.device_num, tapif, arg);
1559 else
1560 verbose("device %u: tun %s: %s\n",
1561 devices.device_num, tapif, arg);
1562}
1563/*:*/
1564
1565/* This hangs off device->priv. */
1566struct vblk_info {
1567 /* The size of the file. */
1568 off64_t len;
1569
1570 /* The file descriptor for the file. */
1571 int fd;
1572
1573};
1574
1575/*L:210
1576 * The Disk
1577 *
1578 * The disk only has one virtqueue, so it only has one thread. It is really
1579 * simple: the Guest asks for a block number and we read or write that position
1580 * in the file.
1581 *
1582 * Before we serviced each virtqueue in a separate thread, that was unacceptably
1583 * slow: the Guest waits until the read is finished before running anything
1584 * else, even if it could have been doing useful work.
1585 *
1586 * We could have used async I/O, except it's reputed to suck so hard that
1587 * characters actually go missing from your code when you try to use it.
1588 */
1589static void blk_request(struct virtqueue *vq)
1590{
1591 struct vblk_info *vblk = vq->dev->priv;
1592 unsigned int head, out_num, in_num, wlen;
1593 int ret;
1594 u8 *in;
1595 struct virtio_blk_outhdr *out;
1596 struct iovec iov[vq->vring.num];
1597 off64_t off;
1598
1599 /*
1600 * Get the next request, where we normally wait. It triggers the
1601 * interrupt to acknowledge previously serviced requests (if any).
1602 */
1603 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
1604
1605 /*
1606 * Every block request should contain at least one output buffer
1607 * (detailing the location on disk and the type of request) and one
1608 * input buffer (to hold the result).
1609 */
1610 if (out_num == 0 || in_num == 0)
1611 errx(1, "Bad virtblk cmd %u out=%u in=%u",
1612 head, out_num, in_num);
1613
1614 out = convert(&iov[0], struct virtio_blk_outhdr);
1615 in = convert(&iov[out_num+in_num-1], u8);
1616 /*
1617 * For historical reasons, block operations are expressed in 512 byte
1618 * "sectors".
1619 */
1620 off = out->sector * 512;
1621
1622 /*
1623 * In general the virtio block driver is allowed to try SCSI commands.
1624 * It'd be nice if we supported eject, for example, but we don't.
1625 */
1626 if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
1627 fprintf(stderr, "Scsi commands unsupported\n");
1628 *in = VIRTIO_BLK_S_UNSUPP;
1629 wlen = sizeof(*in);
1630 } else if (out->type & VIRTIO_BLK_T_OUT) {
1631 /*
1632 * Write
1633 *
1634 * Move to the right location in the block file. This can fail
1635 * if they try to write past end.
1636 */
1637 if (lseek64(vblk->fd, off, SEEK_SET) != off)
1638 err(1, "Bad seek to sector %llu", out->sector);
1639
1640 ret = writev(vblk->fd, iov+1, out_num-1);
1641 verbose("WRITE to sector %llu: %i\n", out->sector, ret);
1642
1643 /*
1644 * Grr... Now we know how long the descriptor they sent was, we
1645 * make sure they didn't try to write over the end of the block
1646 * file (possibly extending it).
1647 */
1648 if (ret > 0 && off + ret > vblk->len) {
1649 /* Trim it back to the correct length */
1650 ftruncate64(vblk->fd, vblk->len);
1651 /* Die, bad Guest, die. */
1652 errx(1, "Write past end %llu+%u", off, ret);
1653 }
1654
1655 wlen = sizeof(*in);
1656 *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
1657 } else if (out->type & VIRTIO_BLK_T_FLUSH) {
1658 /* Flush */
1659 ret = fdatasync(vblk->fd);
1660 verbose("FLUSH fdatasync: %i\n", ret);
1661 wlen = sizeof(*in);
1662 *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
1663 } else {
1664 /*
1665 * Read
1666 *
1667 * Move to the right location in the block file. This can fail
1668 * if they try to read past end.
1669 */
1670 if (lseek64(vblk->fd, off, SEEK_SET) != off)
1671 err(1, "Bad seek to sector %llu", out->sector);
1672
1673 ret = readv(vblk->fd, iov+1, in_num-1);
1674 verbose("READ from sector %llu: %i\n", out->sector, ret);
1675 if (ret >= 0) {
1676 wlen = sizeof(*in) + ret;
1677 *in = VIRTIO_BLK_S_OK;
1678 } else {
1679 wlen = sizeof(*in);
1680 *in = VIRTIO_BLK_S_IOERR;
1681 }
1682 }
1683
1684 /* Finished that request. */
1685 add_used(vq, head, wlen);
1686}
1687
1688/*L:198 This actually sets up a virtual block device. */
1689static void setup_block_file(const char *filename)
1690{
1691 struct device *dev;
1692 struct vblk_info *vblk;
1693 struct virtio_blk_config conf;
1694
1695 /* Creat the device. */
1696 dev = new_device("block", VIRTIO_ID_BLOCK);
1697
1698 /* The device has one virtqueue, where the Guest places requests. */
1699 add_virtqueue(dev, VIRTQUEUE_NUM, blk_request);
1700
1701 /* Allocate the room for our own bookkeeping */
1702 vblk = dev->priv = malloc(sizeof(*vblk));
1703
1704 /* First we open the file and store the length. */
1705 vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE);
1706 vblk->len = lseek64(vblk->fd, 0, SEEK_END);
1707
1708 /* We support FLUSH. */
1709 add_feature(dev, VIRTIO_BLK_F_FLUSH);
1710
1711 /* Tell Guest how many sectors this device has. */
1712 conf.capacity = cpu_to_le64(vblk->len / 512);
1713
1714 /*
1715 * Tell Guest not to put in too many descriptors at once: two are used
1716 * for the in and out elements.
1717 */
1718 add_feature(dev, VIRTIO_BLK_F_SEG_MAX);
1719 conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2);
1720
1721 /* Don't try to put whole struct: we have 8 bit limit. */
1722 set_config(dev, offsetof(struct virtio_blk_config, geometry), &conf);
1723
1724 verbose("device %u: virtblock %llu sectors\n",
1725 ++devices.device_num, le64_to_cpu(conf.capacity));
1726}
1727
1728/*L:211
1729 * Our random number generator device reads from /dev/random into the Guest's
1730 * input buffers. The usual case is that the Guest doesn't want random numbers
1731 * and so has no buffers although /dev/random is still readable, whereas
1732 * console is the reverse.
1733 *
1734 * The same logic applies, however.
1735 */
1736struct rng_info {
1737 int rfd;
1738};
1739
1740static void rng_input(struct virtqueue *vq)
1741{
1742 int len;
1743 unsigned int head, in_num, out_num, totlen = 0;
1744 struct rng_info *rng_info = vq->dev->priv;
1745 struct iovec iov[vq->vring.num];
1746
1747 /* First we need a buffer from the Guests's virtqueue. */
1748 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
1749 if (out_num)
1750 errx(1, "Output buffers in rng?");
1751
1752 /*
1753 * Just like the console write, we loop to cover the whole iovec.
1754 * In this case, short reads actually happen quite a bit.
1755 */
1756 while (!iov_empty(iov, in_num)) {
1757 len = readv(rng_info->rfd, iov, in_num);
1758 if (len <= 0)
1759 err(1, "Read from /dev/random gave %i", len);
1760 iov_consume(iov, in_num, len);
1761 totlen += len;
1762 }
1763
1764 /* Tell the Guest about the new input. */
1765 add_used(vq, head, totlen);
1766}
1767
1768/*L:199
1769 * This creates a "hardware" random number device for the Guest.
1770 */
1771static void setup_rng(void)
1772{
1773 struct device *dev;
1774 struct rng_info *rng_info = malloc(sizeof(*rng_info));
1775
1776 /* Our device's privat info simply contains the /dev/random fd. */
1777 rng_info->rfd = open_or_die("/dev/random", O_RDONLY);
1778
1779 /* Create the new device. */
1780 dev = new_device("rng", VIRTIO_ID_RNG);
1781 dev->priv = rng_info;
1782
1783 /* The device has one virtqueue, where the Guest places inbufs. */
1784 add_virtqueue(dev, VIRTQUEUE_NUM, rng_input);
1785
1786 verbose("device %u: rng\n", devices.device_num++);
1787}
1788/* That's the end of device setup. */
1789
1790/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */
1791static void __attribute__((noreturn)) restart_guest(void)
1792{
1793 unsigned int i;
1794
1795 /*
1796 * Since we don't track all open fds, we simply close everything beyond
1797 * stderr.
1798 */
1799 for (i = 3; i < FD_SETSIZE; i++)
1800 close(i);
1801
1802 /* Reset all the devices (kills all threads). */
1803 cleanup_devices();
1804
1805 execv(main_args[0], main_args);
1806 err(1, "Could not exec %s", main_args[0]);
1807}
1808
1809/*L:220
1810 * Finally we reach the core of the Launcher which runs the Guest, serves
1811 * its input and output, and finally, lays it to rest.
1812 */
1813static void __attribute__((noreturn)) run_guest(void)
1814{
1815 for (;;) {
1816 unsigned long notify_addr;
1817 int readval;
1818
1819 /* We read from the /dev/lguest device to run the Guest. */
1820 readval = pread(lguest_fd, &notify_addr,
1821 sizeof(notify_addr), cpu_id);
1822
1823 /* One unsigned long means the Guest did HCALL_NOTIFY */
1824 if (readval == sizeof(notify_addr)) {
1825 verbose("Notify on address %#lx\n", notify_addr);
1826 handle_output(notify_addr);
1827 /* ENOENT means the Guest died. Reading tells us why. */
1828 } else if (errno == ENOENT) {
1829 char reason[1024] = { 0 };
1830 pread(lguest_fd, reason, sizeof(reason)-1, cpu_id);
1831 errx(1, "%s", reason);
1832 /* ERESTART means that we need to reboot the guest */
1833 } else if (errno == ERESTART) {
1834 restart_guest();
1835 /* Anything else means a bug or incompatible change. */
1836 } else
1837 err(1, "Running guest failed");
1838 }
1839}
1840/*L:240
1841 * This is the end of the Launcher. The good news: we are over halfway
1842 * through! The bad news: the most fiendish part of the code still lies ahead
1843 * of us.
1844 *
1845 * Are you ready? Take a deep breath and join me in the core of the Host, in
1846 * "make Host".
1847:*/
1848
1849static struct option opts[] = {
1850 { "verbose", 0, NULL, 'v' },
1851 { "tunnet", 1, NULL, 't' },
1852 { "block", 1, NULL, 'b' },
1853 { "rng", 0, NULL, 'r' },
1854 { "initrd", 1, NULL, 'i' },
1855 { "username", 1, NULL, 'u' },
1856 { "chroot", 1, NULL, 'c' },
1857 { NULL },
1858};
1859static void usage(void)
1860{
1861 errx(1, "Usage: lguest [--verbose] "
1862 "[--tunnet=(<ipaddr>:<macaddr>|bridge:<bridgename>:<macaddr>)\n"
1863 "|--block=<filename>|--initrd=<filename>]...\n"
1864 "<mem-in-mb> vmlinux [args...]");
1865}
1866
1867/*L:105 The main routine is where the real work begins: */
1868int main(int argc, char *argv[])
1869{
1870 /* Memory, code startpoint and size of the (optional) initrd. */
1871 unsigned long mem = 0, start, initrd_size = 0;
1872 /* Two temporaries. */
1873 int i, c;
1874 /* The boot information for the Guest. */
1875 struct boot_params *boot;
1876 /* If they specify an initrd file to load. */
1877 const char *initrd_name = NULL;
1878
1879 /* Password structure for initgroups/setres[gu]id */
1880 struct passwd *user_details = NULL;
1881
1882 /* Directory to chroot to */
1883 char *chroot_path = NULL;
1884
1885 /* Save the args: we "reboot" by execing ourselves again. */
1886 main_args = argv;
1887
1888 /*
1889 * First we initialize the device list. We keep a pointer to the last
1890 * device, and the next interrupt number to use for devices (1:
1891 * remember that 0 is used by the timer).
1892 */
1893 devices.lastdev = NULL;
1894 devices.next_irq = 1;
1895
1896 /* We're CPU 0. In fact, that's the only CPU possible right now. */
1897 cpu_id = 0;
1898
1899 /*
1900 * We need to know how much memory so we can set up the device
1901 * descriptor and memory pages for the devices as we parse the command
1902 * line. So we quickly look through the arguments to find the amount
1903 * of memory now.
1904 */
1905 for (i = 1; i < argc; i++) {
1906 if (argv[i][0] != '-') {
1907 mem = atoi(argv[i]) * 1024 * 1024;
1908 /*
1909 * We start by mapping anonymous pages over all of
1910 * guest-physical memory range. This fills it with 0,
1911 * and ensures that the Guest won't be killed when it
1912 * tries to access it.
1913 */
1914 guest_base = map_zeroed_pages(mem / getpagesize()
1915 + DEVICE_PAGES);
1916 guest_limit = mem;
1917 guest_max = mem + DEVICE_PAGES*getpagesize();
1918 devices.descpage = get_pages(1);
1919 break;
1920 }
1921 }
1922
1923 /* The options are fairly straight-forward */
1924 while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) {
1925 switch (c) {
1926 case 'v':
1927 verbose = true;
1928 break;
1929 case 't':
1930 setup_tun_net(optarg);
1931 break;
1932 case 'b':
1933 setup_block_file(optarg);
1934 break;
1935 case 'r':
1936 setup_rng();
1937 break;
1938 case 'i':
1939 initrd_name = optarg;
1940 break;
1941 case 'u':
1942 user_details = getpwnam(optarg);
1943 if (!user_details)
1944 err(1, "getpwnam failed, incorrect username?");
1945 break;
1946 case 'c':
1947 chroot_path = optarg;
1948 break;
1949 default:
1950 warnx("Unknown argument %s", argv[optind]);
1951 usage();
1952 }
1953 }
1954 /*
1955 * After the other arguments we expect memory and kernel image name,
1956 * followed by command line arguments for the kernel.
1957 */
1958 if (optind + 2 > argc)
1959 usage();
1960
1961 verbose("Guest base is at %p\n", guest_base);
1962
1963 /* We always have a console device */
1964 setup_console();
1965
1966 /* Now we load the kernel */
1967 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
1968
1969 /* Boot information is stashed at physical address 0 */
1970 boot = from_guest_phys(0);
1971
1972 /* Map the initrd image if requested (at top of physical memory) */
1973 if (initrd_name) {
1974 initrd_size = load_initrd(initrd_name, mem);
1975 /*
1976 * These are the location in the Linux boot header where the
1977 * start and size of the initrd are expected to be found.
1978 */
1979 boot->hdr.ramdisk_image = mem - initrd_size;
1980 boot->hdr.ramdisk_size = initrd_size;
1981 /* The bootloader type 0xFF means "unknown"; that's OK. */
1982 boot->hdr.type_of_loader = 0xFF;
1983 }
1984
1985 /*
1986 * The Linux boot header contains an "E820" memory map: ours is a
1987 * simple, single region.
1988 */
1989 boot->e820_entries = 1;
1990 boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM });
1991 /*
1992 * The boot header contains a command line pointer: we put the command
1993 * line after the boot header.
1994 */
1995 boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1);
1996 /* We use a simple helper to copy the arguments separated by spaces. */
1997 concat((char *)(boot + 1), argv+optind+2);
1998
1999 /* Set kernel alignment to 16M (CONFIG_PHYSICAL_ALIGN) */
2000 boot->hdr.kernel_alignment = 0x1000000;
2001
2002 /* Boot protocol version: 2.07 supports the fields for lguest. */
2003 boot->hdr.version = 0x207;
2004
2005 /* The hardware_subarch value of "1" tells the Guest it's an lguest. */
2006 boot->hdr.hardware_subarch = 1;
2007
2008 /* Tell the entry path not to try to reload segment registers. */
2009 boot->hdr.loadflags |= KEEP_SEGMENTS;
2010
2011 /* We tell the kernel to initialize the Guest. */
2012 tell_kernel(start);
2013
2014 /* Ensure that we terminate if a device-servicing child dies. */
2015 signal(SIGCHLD, kill_launcher);
2016
2017 /* If we exit via err(), this kills all the threads, restores tty. */
2018 atexit(cleanup_devices);
2019
2020 /* If requested, chroot to a directory */
2021 if (chroot_path) {
2022 if (chroot(chroot_path) != 0)
2023 err(1, "chroot(\"%s\") failed", chroot_path);
2024
2025 if (chdir("/") != 0)
2026 err(1, "chdir(\"/\") failed");
2027
2028 verbose("chroot done\n");
2029 }
2030
2031 /* If requested, drop privileges */
2032 if (user_details) {
2033 uid_t u;
2034 gid_t g;
2035
2036 u = user_details->pw_uid;
2037 g = user_details->pw_gid;
2038
2039 if (initgroups(user_details->pw_name, g) != 0)
2040 err(1, "initgroups failed");
2041
2042 if (setresgid(g, g, g) != 0)
2043 err(1, "setresgid failed");
2044
2045 if (setresuid(u, u, u) != 0)
2046 err(1, "setresuid failed");
2047
2048 verbose("Dropping privileges completed\n");
2049 }
2050
2051 /* Finally, run the Guest. This doesn't return. */
2052 run_guest();
2053}
2054/*:*/
2055
2056/*M:999
2057 * Mastery is done: you now know everything I do.
2058 *
2059 * But surely you have seen code, features and bugs in your wanderings which
2060 * you now yearn to attack? That is the real game, and I look forward to you
2061 * patching and forking lguest into the Your-Name-Here-visor.
2062 *
2063 * Farewell, and good coding!
2064 * Rusty Russell.
2065 */
diff --git a/tools/lguest/lguest.txt b/tools/lguest/lguest.txt
new file mode 100644
index 000000000000..bff0c554485d
--- /dev/null
+++ b/tools/lguest/lguest.txt
@@ -0,0 +1,129 @@
1 __
2 (___()'`; Rusty's Remarkably Unreliable Guide to Lguest
3 /, /` - or, A Young Coder's Illustrated Hypervisor
4 \\"--\\ http://lguest.ozlabs.org
5
6Lguest is designed to be a minimal 32-bit x86 hypervisor for the Linux kernel,
7for Linux developers and users to experiment with virtualization with the
8minimum of complexity. Nonetheless, it should have sufficient features to
9make it useful for specific tasks, and, of course, you are encouraged to fork
10and enhance it (see drivers/lguest/README).
11
12Features:
13
14- Kernel module which runs in a normal kernel.
15- Simple I/O model for communication.
16- Simple program to create new guests.
17- Logo contains cute puppies: http://lguest.ozlabs.org
18
19Developer features:
20
21- Fun to hack on.
22- No ABI: being tied to a specific kernel anyway, you can change anything.
23- Many opportunities for improvement or feature implementation.
24
25Running Lguest:
26
27- The easiest way to run lguest is to use same kernel as guest and host.
28 You can configure them differently, but usually it's easiest not to.
29
30 You will need to configure your kernel with the following options:
31
32 "General setup":
33 "Prompt for development and/or incomplete code/drivers" = Y
34 (CONFIG_EXPERIMENTAL=y)
35
36 "Processor type and features":
37 "Paravirtualized guest support" = Y
38 "Lguest guest support" = Y
39 "High Memory Support" = off/4GB
40 "Alignment value to which kernel should be aligned" = 0x100000
41 (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and
42 CONFIG_PHYSICAL_ALIGN=0x100000)
43
44 "Device Drivers":
45 "Block devices"
46 "Virtio block driver (EXPERIMENTAL)" = M/Y
47 "Network device support"
48 "Universal TUN/TAP device driver support" = M/Y
49 "Virtio network driver (EXPERIMENTAL)" = M/Y
50 (CONFIG_VIRTIO_BLK=m, CONFIG_VIRTIO_NET=m and CONFIG_TUN=m)
51
52 "Virtualization"
53 "Linux hypervisor example code" = M/Y
54 (CONFIG_LGUEST=m)
55
56- A tool called "lguest" is available in this directory: type "make"
57 to build it. If you didn't build your kernel in-tree, use "make
58 O=<builddir>".
59
60- Create or find a root disk image. There are several useful ones
61 around, such as the xm-test tiny root image at
62 http://xm-test.xensource.com/ramdisks/initrd-1.1-i386.img
63
64 For more serious work, I usually use a distribution ISO image and
65 install it under qemu, then make multiple copies:
66
67 dd if=/dev/zero of=rootfile bs=1M count=2048
68 qemu -cdrom image.iso -hda rootfile -net user -net nic -boot d
69
70 Make sure that you install a getty on /dev/hvc0 if you want to log in on the
71 console!
72
73- "modprobe lg" if you built it as a module.
74
75- Run an lguest as root:
76
77 Documentation/virtual/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \
78 --block=rootfile root=/dev/vda
79
80 Explanation:
81 64: the amount of memory to use, in MB.
82
83 vmlinux: the kernel image found in the top of your build directory. You
84 can also use a standard bzImage.
85
86 --tunnet=192.168.19.1: configures a "tap" device for networking with this
87 IP address.
88
89 --block=rootfile: a file or block device which becomes /dev/vda
90 inside the guest.
91
92 root=/dev/vda: this (and anything else on the command line) are
93 kernel boot parameters.
94
95- Configuring networking. I usually have the host masquerade, using
96 "iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE" and "echo 1 >
97 /proc/sys/net/ipv4/ip_forward". In this example, I would configure
98 eth0 inside the guest at 192.168.19.2.
99
100 Another method is to bridge the tap device to an external interface
101 using --tunnet=bridge:<bridgename>, and perhaps run dhcp on the guest
102 to obtain an IP address. The bridge needs to be configured first:
103 this option simply adds the tap interface to it.
104
105 A simple example on my system:
106
107 ifconfig eth0 0.0.0.0
108 brctl addbr lg0
109 ifconfig lg0 up
110 brctl addif lg0 eth0
111 dhclient lg0
112
113 Then use --tunnet=bridge:lg0 when launching the guest.
114
115 See:
116
117 http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge
118
119 for general information on how to get bridging to work.
120
121- Random number generation. Using the --rng option will provide a
122 /dev/hwrng in the guest that will read from the host's /dev/random.
123 Use this option in conjunction with rng-tools (see ../hw_random.txt)
124 to provide entropy to the guest kernel's /dev/random.
125
126There is a helpful mailing list at http://ozlabs.org/mailman/listinfo/lguest
127
128Good luck!
129Rusty Russell rusty@rustcorp.com.au.
diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh
new file mode 100755
index 000000000000..06a399ac8b2f
--- /dev/null
+++ b/tools/nfsd/inject_fault.sh
@@ -0,0 +1,49 @@
1#!/bin/bash
2#
3# Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
4#
5# Script for easier NFSD fault injection
6
7# Check that debugfs has been mounted
8DEBUGFS=`cat /proc/mounts | grep debugfs`
9if [ "$DEBUGFS" == "" ]; then
10 echo "debugfs does not appear to be mounted!"
11 echo "Please mount debugfs and try again"
12 exit 1
13fi
14
15# Check that the fault injection directory exists
16DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd
17if [ ! -d "$DEBUGDIR" ]; then
18 echo "$DEBUGDIR does not exist"
19 echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION"
20 exit 1
21fi
22
23function help()
24{
25 echo "Usage $0 injection_type [count]"
26 echo ""
27 echo "Injection types are:"
28 ls $DEBUGDIR
29 exit 1
30}
31
32if [ $# == 0 ]; then
33 help
34elif [ ! -f $DEBUGDIR/$1 ]; then
35 help
36elif [ $# != 2 ]; then
37 COUNT=0
38else
39 COUNT=$2
40fi
41
42BEFORE=`mktemp`
43AFTER=`mktemp`
44dmesg > $BEFORE
45echo $COUNT > $DEBUGDIR/$1
46dmesg > $AFTER
47# Capture lines that only exist in the $AFTER file
48diff $BEFORE $AFTER | grep ">"
49rm -f $BEFORE $AFTER
diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt
index 8eb6c489fb15..77f952762426 100644
--- a/tools/perf/Documentation/examples.txt
+++ b/tools/perf/Documentation/examples.txt
@@ -17,8 +17,8 @@ titan:~> perf list
17 kmem:kmem_cache_alloc_node [Tracepoint event] 17 kmem:kmem_cache_alloc_node [Tracepoint event]
18 kmem:kfree [Tracepoint event] 18 kmem:kfree [Tracepoint event]
19 kmem:kmem_cache_free [Tracepoint event] 19 kmem:kmem_cache_free [Tracepoint event]
20 kmem:mm_page_free_direct [Tracepoint event] 20 kmem:mm_page_free [Tracepoint event]
21 kmem:mm_pagevec_free [Tracepoint event] 21 kmem:mm_page_free_batched [Tracepoint event]
22 kmem:mm_page_alloc [Tracepoint event] 22 kmem:mm_page_alloc [Tracepoint event]
23 kmem:mm_page_alloc_zone_locked [Tracepoint event] 23 kmem:mm_page_alloc_zone_locked [Tracepoint event]
24 kmem:mm_page_pcpu_drain [Tracepoint event] 24 kmem:mm_page_pcpu_drain [Tracepoint event]
@@ -29,15 +29,15 @@ measured. For example the page alloc/free properties of a 'hackbench
29run' are: 29run' are:
30 30
31 titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc 31 titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
32 -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 32 -e kmem:mm_page_free_batched -e kmem:mm_page_free ./hackbench 10
33 Time: 0.575 33 Time: 0.575
34 34
35 Performance counter stats for './hackbench 10': 35 Performance counter stats for './hackbench 10':
36 36
37 13857 kmem:mm_page_pcpu_drain 37 13857 kmem:mm_page_pcpu_drain
38 27576 kmem:mm_page_alloc 38 27576 kmem:mm_page_alloc
39 6025 kmem:mm_pagevec_free 39 6025 kmem:mm_page_free_batched
40 20934 kmem:mm_page_free_direct 40 20934 kmem:mm_page_free
41 41
42 0.613972165 seconds time elapsed 42 0.613972165 seconds time elapsed
43 43
@@ -45,8 +45,8 @@ You can observe the statistical properties as well, by using the
45'repeat the workload N times' feature of perf stat: 45'repeat the workload N times' feature of perf stat:
46 46
47 titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e 47 titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
48 kmem:mm_page_alloc -e kmem:mm_pagevec_free -e 48 kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
49 kmem:mm_page_free_direct ./hackbench 10 49 kmem:mm_page_free ./hackbench 10
50 Time: 0.627 50 Time: 0.627
51 Time: 0.644 51 Time: 0.644
52 Time: 0.564 52 Time: 0.564
@@ -57,8 +57,8 @@ You can observe the statistical properties as well, by using the
57 57
58 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) 58 12920 kmem:mm_page_pcpu_drain ( +- 3.359% )
59 25035 kmem:mm_page_alloc ( +- 3.783% ) 59 25035 kmem:mm_page_alloc ( +- 3.783% )
60 6104 kmem:mm_pagevec_free ( +- 0.934% ) 60 6104 kmem:mm_page_free_batched ( +- 0.934% )
61 18376 kmem:mm_page_free_direct ( +- 4.941% ) 61 18376 kmem:mm_page_free ( +- 4.941% )
62 62
63 0.643954516 seconds time elapsed ( +- 2.363% ) 63 0.643954516 seconds time elapsed ( +- 2.363% )
64 64
@@ -158,15 +158,15 @@ Or you can observe the whole system's page allocations for 10
158seconds: 158seconds:
159 159
160titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e 160titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
161kmem:mm_page_alloc -e kmem:mm_pagevec_free -e 161kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
162kmem:mm_page_free_direct sleep 10 162kmem:mm_page_free sleep 10
163 163
164 Performance counter stats for 'sleep 10': 164 Performance counter stats for 'sleep 10':
165 165
166 171585 kmem:mm_page_pcpu_drain 166 171585 kmem:mm_page_pcpu_drain
167 322114 kmem:mm_page_alloc 167 322114 kmem:mm_page_alloc
168 73623 kmem:mm_pagevec_free 168 73623 kmem:mm_page_free_batched
169 254115 kmem:mm_page_free_direct 169 254115 kmem:mm_page_free
170 170
171 10.000591410 seconds time elapsed 171 10.000591410 seconds time elapsed
172 172
@@ -174,15 +174,15 @@ Or observe how fluctuating the page allocations are, via statistical
174analysis done over ten 1-second intervals: 174analysis done over ten 1-second intervals:
175 175
176 titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e 176 titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
177 kmem:mm_page_alloc -e kmem:mm_pagevec_free -e 177 kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
178 kmem:mm_page_free_direct sleep 1 178 kmem:mm_page_free sleep 1
179 179
180 Performance counter stats for 'sleep 1' (10 runs): 180 Performance counter stats for 'sleep 1' (10 runs):
181 181
182 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) 182 17254 kmem:mm_page_pcpu_drain ( +- 3.709% )
183 34394 kmem:mm_page_alloc ( +- 4.617% ) 183 34394 kmem:mm_page_alloc ( +- 4.617% )
184 7509 kmem:mm_pagevec_free ( +- 4.820% ) 184 7509 kmem:mm_page_free_batched ( +- 4.820% )
185 25653 kmem:mm_page_free_direct ( +- 3.672% ) 185 25653 kmem:mm_page_free ( +- 3.672% )
186 186
187 1.058135029 seconds time elapsed ( +- 3.089% ) 187 1.058135029 seconds time elapsed ( +- 3.089% )
188 188
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 7a527f7e9da9..ddc22525228d 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -21,6 +21,8 @@ EVENT MODIFIERS
21Events can optionally have a modifer by appending a colon and one or 21Events can optionally have a modifer by appending a colon and one or
22more modifiers. Modifiers allow the user to restrict when events are 22more modifiers. Modifiers allow the user to restrict when events are
23counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor. 23counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
24Additional modifiers are 'G' for guest counting (in KVM guests) and 'H'
25for host counting (not in KVM guests).
24 26
25The 'p' modifier can be used for specifying how precise the instruction 27The 'p' modifier can be used for specifying how precise the instruction
26address should be. The 'p' modifier is currently only implemented for 28address should be. The 'p' modifier is currently only implemented for
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index c12659d8cb26..1078c5fadd5b 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,4 +1,5 @@
1tools/perf 1tools/perf
2include/linux/const.h
2include/linux/perf_event.h 3include/linux/perf_event.h
3include/linux/rbtree.h 4include/linux/rbtree.h
4include/linux/list.h 5include/linux/list.h
diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c
index 48ae0c5e3f73..7cdd61d0e27c 100644
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -9,7 +9,10 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <stdlib.h>
13#ifndef __UCLIBC__
12#include <libio.h> 14#include <libio.h>
15#endif
13#include <dwarf-regs.h> 16#include <dwarf-regs.h>
14 17
15 18
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 214ba7f9f577..806e0a286634 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -235,7 +235,7 @@ out_delete:
235} 235}
236 236
237static const char * const annotate_usage[] = { 237static const char * const annotate_usage[] = {
238 "perf annotate [<options>] <command>", 238 "perf annotate [<options>]",
239 NULL 239 NULL
240}; 240};
241 241
@@ -313,10 +313,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
313 annotate.sym_hist_filter = argv[0]; 313 annotate.sym_hist_filter = argv[0];
314 } 314 }
315 315
316 if (field_sep && *field_sep == '.') {
317 pr_err("'.' is the only non valid --field-separator argument\n");
318 return -1;
319 }
320
321 return __cmd_annotate(&annotate); 316 return __cmd_annotate(&annotate);
322} 317}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index fe1ad8f21961..39104c0beea3 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -108,7 +108,9 @@ static void setup_cpunode_map(void)
108 continue; 108 continue;
109 cpunode_map[cpu] = mem; 109 cpunode_map[cpu] = mem;
110 } 110 }
111 closedir(dir2);
111 } 112 }
113 closedir(dir1);
112} 114}
113 115
114static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, 116static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
@@ -645,6 +647,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
645 break; 647 break;
646 if (sort_dimension__add(tok, sort_list) < 0) { 648 if (sort_dimension__add(tok, sort_list) < 0) {
647 error("Unknown --sort key: '%s'", tok); 649 error("Unknown --sort key: '%s'", tok);
650 free(str);
648 return -1; 651 return -1;
649 } 652 }
650 } 653 }
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 032324a76b87..9fc6e0fa3dce 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -22,9 +22,6 @@
22static const char *file_name; 22static const char *file_name;
23static char name_buffer[256]; 23static char name_buffer[256];
24 24
25bool perf_host = 1;
26bool perf_guest;
27
28static const char * const kvm_usage[] = { 25static const char * const kvm_usage[] = {
29 "perf kvm [<options>] {top|record|report|diff|buildid-list}", 26 "perf kvm [<options>] {top|record|report|diff|buildid-list}",
30 NULL 27 NULL
@@ -107,7 +104,8 @@ static int __cmd_buildid_list(int argc, const char **argv)
107 104
108int cmd_kvm(int argc, const char **argv, const char *prefix __used) 105int cmd_kvm(int argc, const char **argv, const char *prefix __used)
109{ 106{
110 perf_host = perf_guest = 0; 107 perf_host = 0;
108 perf_guest = 1;
111 109
112 argc = parse_options(argc, argv, kvm_options, kvm_usage, 110 argc = parse_options(argc, argv, kvm_options, kvm_usage,
113 PARSE_OPT_STOP_AT_NON_OPTION); 111 PARSE_OPT_STOP_AT_NON_OPTION);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index fd1909afcfd6..bb68ddf257b7 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1018,13 +1018,17 @@ static char *get_script_path(const char *script_root, const char *suffix)
1018 __script_root = get_script_root(&script_dirent, suffix); 1018 __script_root = get_script_root(&script_dirent, suffix);
1019 if (__script_root && !strcmp(script_root, __script_root)) { 1019 if (__script_root && !strcmp(script_root, __script_root)) {
1020 free(__script_root); 1020 free(__script_root);
1021 closedir(lang_dir);
1022 closedir(scripts_dir);
1021 snprintf(script_path, MAXPATHLEN, "%s/%s", 1023 snprintf(script_path, MAXPATHLEN, "%s/%s",
1022 lang_path, script_dirent.d_name); 1024 lang_path, script_dirent.d_name);
1023 return strdup(script_path); 1025 return strdup(script_path);
1024 } 1026 }
1025 free(__script_root); 1027 free(__script_root);
1026 } 1028 }
1029 closedir(lang_dir);
1027 } 1030 }
1031 closedir(scripts_dir);
1028 1032
1029 return NULL; 1033 return NULL;
1030} 1034}
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 2b9a7f497a20..3854e869dce1 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -1396,7 +1396,7 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
1396 NULL, 1396 NULL,
1397 }; 1397 };
1398 const struct option test_options[] = { 1398 const struct option test_options[] = {
1399 OPT_INTEGER('v', "verbose", &verbose, 1399 OPT_INCR('v', "verbose", &verbose,
1400 "be more verbose (show symbol address, etc)"), 1400 "be more verbose (show symbol address, etc)"),
1401 OPT_END() 1401 OPT_END()
1402 }; 1402 };
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 4f81eeb99875..8f80df896038 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -235,7 +235,6 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
235 if (he == NULL) 235 if (he == NULL)
236 return NULL; 236 return NULL;
237 237
238 evsel->hists.stats.total_period += sample->period;
239 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 238 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
240 return he; 239 return he;
241} 240}
@@ -889,6 +888,10 @@ try_again:
889 ui__warning("The %s event is not supported.\n", 888 ui__warning("The %s event is not supported.\n",
890 event_name(counter)); 889 event_name(counter));
891 goto out_err; 890 goto out_err;
891 } else if (err == EMFILE) {
892 ui__warning("Too many events are opened.\n"
893 "Try again after reducing the number of events\n");
894 goto out_err;
892 } 895 }
893 896
894 ui__warning("The sys_perf_event_open() syscall " 897 ui__warning("The sys_perf_event_open() syscall "
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index fa1837088ca8..3f16e08a5c8d 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -111,8 +111,11 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
111 .type = PERF_TYPE_HARDWARE, 111 .type = PERF_TYPE_HARDWARE,
112 .config = PERF_COUNT_HW_CPU_CYCLES, 112 .config = PERF_COUNT_HW_CPU_CYCLES,
113 }; 113 };
114 struct perf_evsel *evsel = perf_evsel__new(&attr, 0); 114 struct perf_evsel *evsel;
115
116 event_attr_init(&attr);
115 117
118 evsel = perf_evsel__new(&attr, 0);
116 if (evsel == NULL) 119 if (evsel == NULL)
117 goto error; 120 goto error;
118 121
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index abef2703cd24..6f505d1abac7 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -76,21 +76,21 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
76 } 76 }
77} 77}
78 78
79static void hist_entry__add_cpumode_period(struct hist_entry *self, 79static void hist_entry__add_cpumode_period(struct hist_entry *he,
80 unsigned int cpumode, u64 period) 80 unsigned int cpumode, u64 period)
81{ 81{
82 switch (cpumode) { 82 switch (cpumode) {
83 case PERF_RECORD_MISC_KERNEL: 83 case PERF_RECORD_MISC_KERNEL:
84 self->period_sys += period; 84 he->period_sys += period;
85 break; 85 break;
86 case PERF_RECORD_MISC_USER: 86 case PERF_RECORD_MISC_USER:
87 self->period_us += period; 87 he->period_us += period;
88 break; 88 break;
89 case PERF_RECORD_MISC_GUEST_KERNEL: 89 case PERF_RECORD_MISC_GUEST_KERNEL:
90 self->period_guest_sys += period; 90 he->period_guest_sys += period;
91 break; 91 break;
92 case PERF_RECORD_MISC_GUEST_USER: 92 case PERF_RECORD_MISC_GUEST_USER:
93 self->period_guest_us += period; 93 he->period_guest_us += period;
94 break; 94 break;
95 default: 95 default:
96 break; 96 break;
@@ -165,18 +165,18 @@ void hists__decay_entries_threaded(struct hists *hists,
165static struct hist_entry *hist_entry__new(struct hist_entry *template) 165static struct hist_entry *hist_entry__new(struct hist_entry *template)
166{ 166{
167 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; 167 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
168 struct hist_entry *self = malloc(sizeof(*self) + callchain_size); 168 struct hist_entry *he = malloc(sizeof(*he) + callchain_size);
169 169
170 if (self != NULL) { 170 if (he != NULL) {
171 *self = *template; 171 *he = *template;
172 self->nr_events = 1; 172 he->nr_events = 1;
173 if (self->ms.map) 173 if (he->ms.map)
174 self->ms.map->referenced = true; 174 he->ms.map->referenced = true;
175 if (symbol_conf.use_callchain) 175 if (symbol_conf.use_callchain)
176 callchain_init(self->callchain); 176 callchain_init(he->callchain);
177 } 177 }
178 178
179 return self; 179 return he;
180} 180}
181 181
182static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) 182static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
@@ -677,15 +677,16 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
677 return ret; 677 return ret;
678} 678}
679 679
680static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, 680static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
681 u64 total_samples, int left_margin) 681 u64 total_samples, int left_margin,
682 FILE *fp)
682{ 683{
683 struct rb_node *rb_node; 684 struct rb_node *rb_node;
684 struct callchain_node *chain; 685 struct callchain_node *chain;
685 size_t ret = 0; 686 size_t ret = 0;
686 u32 entries_printed = 0; 687 u32 entries_printed = 0;
687 688
688 rb_node = rb_first(&self->sorted_chain); 689 rb_node = rb_first(&he->sorted_chain);
689 while (rb_node) { 690 while (rb_node) {
690 double percent; 691 double percent;
691 692
@@ -730,35 +731,35 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows)
730 } 731 }
731} 732}
732 733
733static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s, 734static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s,
734 size_t size, struct hists *pair_hists, 735 size_t size, struct hists *pair_hists,
735 bool show_displacement, long displacement, 736 bool show_displacement, long displacement,
736 bool color, u64 session_total) 737 bool color, u64 total_period)
737{ 738{
738 u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; 739 u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
739 u64 nr_events; 740 u64 nr_events;
740 const char *sep = symbol_conf.field_sep; 741 const char *sep = symbol_conf.field_sep;
741 int ret; 742 int ret;
742 743
743 if (symbol_conf.exclude_other && !self->parent) 744 if (symbol_conf.exclude_other && !he->parent)
744 return 0; 745 return 0;
745 746
746 if (pair_hists) { 747 if (pair_hists) {
747 period = self->pair ? self->pair->period : 0; 748 period = he->pair ? he->pair->period : 0;
748 nr_events = self->pair ? self->pair->nr_events : 0; 749 nr_events = he->pair ? he->pair->nr_events : 0;
749 total = pair_hists->stats.total_period; 750 total = pair_hists->stats.total_period;
750 period_sys = self->pair ? self->pair->period_sys : 0; 751 period_sys = he->pair ? he->pair->period_sys : 0;
751 period_us = self->pair ? self->pair->period_us : 0; 752 period_us = he->pair ? he->pair->period_us : 0;
752 period_guest_sys = self->pair ? self->pair->period_guest_sys : 0; 753 period_guest_sys = he->pair ? he->pair->period_guest_sys : 0;
753 period_guest_us = self->pair ? self->pair->period_guest_us : 0; 754 period_guest_us = he->pair ? he->pair->period_guest_us : 0;
754 } else { 755 } else {
755 period = self->period; 756 period = he->period;
756 nr_events = self->nr_events; 757 nr_events = he->nr_events;
757 total = session_total; 758 total = total_period;
758 period_sys = self->period_sys; 759 period_sys = he->period_sys;
759 period_us = self->period_us; 760 period_us = he->period_us;
760 period_guest_sys = self->period_guest_sys; 761 period_guest_sys = he->period_guest_sys;
761 period_guest_us = self->period_guest_us; 762 period_guest_us = he->period_guest_us;
762 } 763 }
763 764
764 if (total) { 765 if (total) {
@@ -812,8 +813,8 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s,
812 813
813 if (total > 0) 814 if (total > 0)
814 old_percent = (period * 100.0) / total; 815 old_percent = (period * 100.0) / total;
815 if (session_total > 0) 816 if (total_period > 0)
816 new_percent = (self->period * 100.0) / session_total; 817 new_percent = (he->period * 100.0) / total_period;
817 818
818 diff = new_percent - old_percent; 819 diff = new_percent - old_percent;
819 820
@@ -862,9 +863,10 @@ int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size,
862 return ret; 863 return ret;
863} 864}
864 865
865int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists, 866static int hist_entry__fprintf(struct hist_entry *he, size_t size,
866 struct hists *pair_hists, bool show_displacement, 867 struct hists *hists, struct hists *pair_hists,
867 long displacement, FILE *fp, u64 session_total) 868 bool show_displacement, long displacement,
869 u64 total_period, FILE *fp)
868{ 870{
869 char bf[512]; 871 char bf[512];
870 int ret; 872 int ret;
@@ -874,14 +876,14 @@ int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
874 876
875 ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists, 877 ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists,
876 show_displacement, displacement, 878 show_displacement, displacement,
877 true, session_total); 879 true, total_period);
878 hist_entry__snprintf(he, bf + ret, size - ret, hists); 880 hist_entry__snprintf(he, bf + ret, size - ret, hists);
879 return fprintf(fp, "%s\n", bf); 881 return fprintf(fp, "%s\n", bf);
880} 882}
881 883
882static size_t hist_entry__fprintf_callchain(struct hist_entry *self, 884static size_t hist_entry__fprintf_callchain(struct hist_entry *he,
883 struct hists *hists, FILE *fp, 885 struct hists *hists,
884 u64 session_total) 886 u64 total_period, FILE *fp)
885{ 887{
886 int left_margin = 0; 888 int left_margin = 0;
887 889
@@ -889,11 +891,10 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
889 struct sort_entry *se = list_first_entry(&hist_entry__sort_list, 891 struct sort_entry *se = list_first_entry(&hist_entry__sort_list,
890 typeof(*se), list); 892 typeof(*se), list);
891 left_margin = hists__col_len(hists, se->se_width_idx); 893 left_margin = hists__col_len(hists, se->se_width_idx);
892 left_margin -= thread__comm_len(self->thread); 894 left_margin -= thread__comm_len(he->thread);
893 } 895 }
894 896
895 return hist_entry_callchain__fprintf(fp, self, session_total, 897 return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
896 left_margin);
897} 898}
898 899
899size_t hists__fprintf(struct hists *hists, struct hists *pair, 900size_t hists__fprintf(struct hists *hists, struct hists *pair,
@@ -903,6 +904,7 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
903 struct sort_entry *se; 904 struct sort_entry *se;
904 struct rb_node *nd; 905 struct rb_node *nd;
905 size_t ret = 0; 906 size_t ret = 0;
907 u64 total_period;
906 unsigned long position = 1; 908 unsigned long position = 1;
907 long displacement = 0; 909 long displacement = 0;
908 unsigned int width; 910 unsigned int width;
@@ -917,20 +919,6 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
917 919
918 fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); 920 fprintf(fp, "# %s", pair ? "Baseline" : "Overhead");
919 921
920 if (symbol_conf.show_nr_samples) {
921 if (sep)
922 fprintf(fp, "%cSamples", *sep);
923 else
924 fputs(" Samples ", fp);
925 }
926
927 if (symbol_conf.show_total_period) {
928 if (sep)
929 ret += fprintf(fp, "%cPeriod", *sep);
930 else
931 ret += fprintf(fp, " Period ");
932 }
933
934 if (symbol_conf.show_cpu_utilization) { 922 if (symbol_conf.show_cpu_utilization) {
935 if (sep) { 923 if (sep) {
936 ret += fprintf(fp, "%csys", *sep); 924 ret += fprintf(fp, "%csys", *sep);
@@ -940,8 +928,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
940 ret += fprintf(fp, "%cguest us", *sep); 928 ret += fprintf(fp, "%cguest us", *sep);
941 } 929 }
942 } else { 930 } else {
943 ret += fprintf(fp, " sys "); 931 ret += fprintf(fp, " sys ");
944 ret += fprintf(fp, " us "); 932 ret += fprintf(fp, " us ");
945 if (perf_guest) { 933 if (perf_guest) {
946 ret += fprintf(fp, " guest sys "); 934 ret += fprintf(fp, " guest sys ");
947 ret += fprintf(fp, " guest us "); 935 ret += fprintf(fp, " guest us ");
@@ -949,6 +937,20 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
949 } 937 }
950 } 938 }
951 939
940 if (symbol_conf.show_nr_samples) {
941 if (sep)
942 fprintf(fp, "%cSamples", *sep);
943 else
944 fputs(" Samples ", fp);
945 }
946
947 if (symbol_conf.show_total_period) {
948 if (sep)
949 ret += fprintf(fp, "%cPeriod", *sep);
950 else
951 ret += fprintf(fp, " Period ");
952 }
953
952 if (pair) { 954 if (pair) {
953 if (sep) 955 if (sep)
954 ret += fprintf(fp, "%cDelta", *sep); 956 ret += fprintf(fp, "%cDelta", *sep);
@@ -993,6 +995,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
993 goto print_entries; 995 goto print_entries;
994 996
995 fprintf(fp, "# ........"); 997 fprintf(fp, "# ........");
998 if (symbol_conf.show_cpu_utilization)
999 fprintf(fp, " ....... .......");
996 if (symbol_conf.show_nr_samples) 1000 if (symbol_conf.show_nr_samples)
997 fprintf(fp, " .........."); 1001 fprintf(fp, " ..........");
998 if (symbol_conf.show_total_period) 1002 if (symbol_conf.show_total_period)
@@ -1025,6 +1029,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
1025 goto out; 1029 goto out;
1026 1030
1027print_entries: 1031print_entries:
1032 total_period = hists->stats.total_period;
1033
1028 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { 1034 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
1029 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 1035 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
1030 1036
@@ -1040,11 +1046,10 @@ print_entries:
1040 ++position; 1046 ++position;
1041 } 1047 }
1042 ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, 1048 ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement,
1043 displacement, fp, hists->stats.total_period); 1049 displacement, total_period, fp);
1044 1050
1045 if (symbol_conf.use_callchain) 1051 if (symbol_conf.use_callchain)
1046 ret += hist_entry__fprintf_callchain(h, hists, fp, 1052 ret += hist_entry__fprintf_callchain(h, hists, total_period, fp);
1047 hists->stats.total_period);
1048 if (max_rows && ++nr_rows >= max_rows) 1053 if (max_rows && ++nr_rows >= max_rows)
1049 goto out; 1054 goto out;
1050 1055
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ff6f9d56ea41..f55f0a8d1f81 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -66,11 +66,8 @@ struct hists {
66struct hist_entry *__hists__add_entry(struct hists *self, 66struct hist_entry *__hists__add_entry(struct hists *self,
67 struct addr_location *al, 67 struct addr_location *al,
68 struct symbol *parent, u64 period); 68 struct symbol *parent, u64 period);
69extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); 69int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
70extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); 70int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
71int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
72 struct hists *pair_hists, bool show_displacement,
73 long displacement, FILE *fp, u64 session_total);
74int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, 71int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
75 struct hists *hists); 72 struct hists *hists);
76void hist_entry__free(struct hist_entry *); 73void hist_entry__free(struct hist_entry *);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 531c283fc0c5..b029296d20d9 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -735,8 +735,8 @@ static int
735parse_event_modifier(const char **strp, struct perf_event_attr *attr) 735parse_event_modifier(const char **strp, struct perf_event_attr *attr)
736{ 736{
737 const char *str = *strp; 737 const char *str = *strp;
738 int exclude = 0; 738 int exclude = 0, exclude_GH = 0;
739 int eu = 0, ek = 0, eh = 0, precise = 0; 739 int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0;
740 740
741 if (!*str) 741 if (!*str)
742 return 0; 742 return 0;
@@ -760,6 +760,14 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
760 if (!exclude) 760 if (!exclude)
761 exclude = eu = ek = eh = 1; 761 exclude = eu = ek = eh = 1;
762 eh = 0; 762 eh = 0;
763 } else if (*str == 'G') {
764 if (!exclude_GH)
765 exclude_GH = eG = eH = 1;
766 eG = 0;
767 } else if (*str == 'H') {
768 if (!exclude_GH)
769 exclude_GH = eG = eH = 1;
770 eH = 0;
763 } else if (*str == 'p') { 771 } else if (*str == 'p') {
764 precise++; 772 precise++;
765 } else 773 } else
@@ -776,6 +784,8 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
776 attr->exclude_kernel = ek; 784 attr->exclude_kernel = ek;
777 attr->exclude_hv = eh; 785 attr->exclude_hv = eh;
778 attr->precise_ip = precise; 786 attr->precise_ip = precise;
787 attr->exclude_host = eH;
788 attr->exclude_guest = eG;
779 789
780 return 0; 790 return 0;
781} 791}
@@ -838,6 +848,7 @@ int parse_events(struct perf_evlist *evlist , const char *str, int unset __used)
838 for (;;) { 848 for (;;) {
839 ostr = str; 849 ostr = str;
840 memset(&attr, 0, sizeof(attr)); 850 memset(&attr, 0, sizeof(attr));
851 event_attr_init(&attr);
841 ret = parse_event_symbols(evlist, &str, &attr); 852 ret = parse_event_symbols(evlist, &str, &attr);
842 if (ret == EVT_FAILED) 853 if (ret == EVT_FAILED)
843 return -1; 854 return -1;
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index ac6830d8292b..fc22cf5c605f 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -18,7 +18,6 @@
18 * 18 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */ 20 */
21#include <ctype.h>
22#include "util.h" 21#include "util.h"
23#include <dirent.h> 22#include <dirent.h>
24#include <mntent.h> 23#include <mntent.h>
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 5b3ea49aa63e..813141047fc2 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -1,6 +1,21 @@
1#include "../perf.h"
1#include "util.h" 2#include "util.h"
2#include <sys/mman.h> 3#include <sys/mman.h>
3 4
5/*
6 * XXX We need to find a better place for these things...
7 */
8bool perf_host = true;
9bool perf_guest = true;
10
11void event_attr_init(struct perf_event_attr *attr)
12{
13 if (!perf_host)
14 attr->exclude_host = 1;
15 if (!perf_guest)
16 attr->exclude_guest = 1;
17}
18
4int mkdir_p(char *path, mode_t mode) 19int mkdir_p(char *path, mode_t mode)
5{ 20{
6 struct stat st; 21 struct stat st;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 37be34dff798..b9c530cce79a 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -242,6 +242,10 @@ int strtailcmp(const char *s1, const char *s2);
242unsigned long convert_unit(unsigned long value, char *unit); 242unsigned long convert_unit(unsigned long value, char *unit);
243int readn(int fd, void *buf, size_t size); 243int readn(int fd, void *buf, size_t size);
244 244
245struct perf_event_attr;
246
247void event_attr_init(struct perf_event_attr *attr);
248
245#define _STR(x) #x 249#define _STR(x) #x
246#define STR(x) _STR(x) 250#define STR(x) _STR(x)
247 251
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index ff75125deed0..555c69a5592a 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -38,8 +38,8 @@ displays the statistics gathered since it was forked.
38.PP 38.PP
39.SH FIELD DESCRIPTIONS 39.SH FIELD DESCRIPTIONS
40.nf 40.nf
41\fBpkg\fP processor package number. 41\fBpk\fP processor package number.
42\fBcore\fP processor core number. 42\fBcr\fP processor core number.
43\fBCPU\fP Linux CPU (logical processor) number. 43\fBCPU\fP Linux CPU (logical processor) number.
44\fB%c0\fP percent of the interval that the CPU retired instructions. 44\fB%c0\fP percent of the interval that the CPU retired instructions.
45\fBGHz\fP average clock rate while the CPU was in c0 state. 45\fBGHz\fP average clock rate while the CPU was in c0 state.
@@ -58,7 +58,7 @@ Subsequent rows show per-CPU statistics.
58 58
59.nf 59.nf
60[root@x980]# ./turbostat 60[root@x980]# ./turbostat
61core CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 61cr CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
62 0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07 62 0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07
63 0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07 63 0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07
64 0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07 64 0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07
@@ -102,7 +102,7 @@ until ^C while the other CPUs are mostly idle:
102.nf 102.nf
103[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null 103[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null
104 104
105^Ccore CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 105^Ccr CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
106 8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00 106 8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00
107 0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00 107 0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00
108 0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00 108 0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 3c6f7808efae..310d3dd5e547 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -811,6 +811,8 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
811 case 0x2C: /* Westmere EP - Gulftown */ 811 case 0x2C: /* Westmere EP - Gulftown */
812 case 0x2A: /* SNB */ 812 case 0x2A: /* SNB */
813 case 0x2D: /* SNB Xeon */ 813 case 0x2D: /* SNB Xeon */
814 case 0x3A: /* IVB */
815 case 0x3D: /* IVB Xeon */
814 return 1; 816 return 1;
815 case 0x2E: /* Nehalem-EX Xeon - Beckton */ 817 case 0x2E: /* Nehalem-EX Xeon - Beckton */
816 case 0x2F: /* Westmere-EX Xeon - Eagleton */ 818 case 0x2F: /* Westmere-EX Xeon - Eagleton */
diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl
index 9a571e71683c..a373a5bfff68 100755
--- a/tools/testing/ktest/compare-ktest-sample.pl
+++ b/tools/testing/ktest/compare-ktest-sample.pl
@@ -2,7 +2,9 @@
2 2
3open (IN,"ktest.pl"); 3open (IN,"ktest.pl");
4while (<IN>) { 4while (<IN>) {
5 # hashes are now used
5 if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ || 6 if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ ||
7 /^\s*"?([A-Z].*?)"?\s*=>\s*/ ||
6 /set_test_option\("(.*?)"/) { 8 /set_test_option\("(.*?)"/) {
7 $opt{$1} = 1; 9 $opt{$1} = 1;
8 } 10 }
@@ -11,7 +13,7 @@ close IN;
11 13
12open (IN, "sample.conf"); 14open (IN, "sample.conf");
13while (<IN>) { 15while (<IN>) {
14 if (/^\s*#?\s*(\S+)\s*=/) { 16 if (/^\s*#?\s*([A-Z]\S*)\s*=/) {
15 $samp{$1} = 1; 17 $samp{$1} = 1;
16 } 18 }
17} 19}
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 8b4c2535b266..62a134dc421a 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -18,40 +18,50 @@ $| = 1;
18my %opt; 18my %opt;
19my %repeat_tests; 19my %repeat_tests;
20my %repeats; 20my %repeats;
21my %default;
22 21
23#default opts 22#default opts
24$default{"NUM_TESTS"} = 1; 23my %default = (
25$default{"REBOOT_TYPE"} = "grub"; 24 "NUM_TESTS" => 1,
26$default{"TEST_TYPE"} = "test"; 25 "TEST_TYPE" => "build",
27$default{"BUILD_TYPE"} = "randconfig"; 26 "BUILD_TYPE" => "randconfig",
28$default{"MAKE_CMD"} = "make"; 27 "MAKE_CMD" => "make",
29$default{"TIMEOUT"} = 120; 28 "TIMEOUT" => 120,
30$default{"TMP_DIR"} = "/tmp/ktest/\${MACHINE}"; 29 "TMP_DIR" => "/tmp/ktest/\${MACHINE}",
31$default{"SLEEP_TIME"} = 60; # sleep time between tests 30 "SLEEP_TIME" => 60, # sleep time between tests
32$default{"BUILD_NOCLEAN"} = 0; 31 "BUILD_NOCLEAN" => 0,
33$default{"REBOOT_ON_ERROR"} = 0; 32 "REBOOT_ON_ERROR" => 0,
34$default{"POWEROFF_ON_ERROR"} = 0; 33 "POWEROFF_ON_ERROR" => 0,
35$default{"REBOOT_ON_SUCCESS"} = 1; 34 "REBOOT_ON_SUCCESS" => 1,
36$default{"POWEROFF_ON_SUCCESS"} = 0; 35 "POWEROFF_ON_SUCCESS" => 0,
37$default{"BUILD_OPTIONS"} = ""; 36 "BUILD_OPTIONS" => "",
38$default{"BISECT_SLEEP_TIME"} = 60; # sleep time between bisects 37 "BISECT_SLEEP_TIME" => 60, # sleep time between bisects
39$default{"PATCHCHECK_SLEEP_TIME"} = 60; # sleep time between patch checks 38 "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks
40$default{"CLEAR_LOG"} = 0; 39 "CLEAR_LOG" => 0,
41$default{"BISECT_MANUAL"} = 0; 40 "BISECT_MANUAL" => 0,
42$default{"BISECT_SKIP"} = 1; 41 "BISECT_SKIP" => 1,
43$default{"SUCCESS_LINE"} = "login:"; 42 "SUCCESS_LINE" => "login:",
44$default{"DETECT_TRIPLE_FAULT"} = 1; 43 "DETECT_TRIPLE_FAULT" => 1,
45$default{"NO_INSTALL"} = 0; 44 "NO_INSTALL" => 0,
46$default{"BOOTED_TIMEOUT"} = 1; 45 "BOOTED_TIMEOUT" => 1,
47$default{"DIE_ON_FAILURE"} = 1; 46 "DIE_ON_FAILURE" => 1,
48$default{"SSH_EXEC"} = "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND"; 47 "SSH_EXEC" => "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND",
49$default{"SCP_TO_TARGET"} = "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE"; 48 "SCP_TO_TARGET" => "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE",
50$default{"REBOOT"} = "ssh \$SSH_USER\@\$MACHINE reboot"; 49 "REBOOT" => "ssh \$SSH_USER\@\$MACHINE reboot",
51$default{"STOP_AFTER_SUCCESS"} = 10; 50 "STOP_AFTER_SUCCESS" => 10,
52$default{"STOP_AFTER_FAILURE"} = 60; 51 "STOP_AFTER_FAILURE" => 60,
53$default{"STOP_TEST_AFTER"} = 600; 52 "STOP_TEST_AFTER" => 600,
54$default{"LOCALVERSION"} = "-test"; 53
54# required, and we will ask users if they don't have them but we keep the default
55# value something that is common.
56 "REBOOT_TYPE" => "grub",
57 "LOCALVERSION" => "-test",
58 "SSH_USER" => "root",
59 "BUILD_TARGET" => "arch/x86/boot/bzImage",
60 "TARGET_IMAGE" => "/boot/vmlinuz-test",
61
62 "LOG_FILE" => undef,
63 "IGNORE_UNUSED" => 0,
64);
55 65
56my $ktest_config; 66my $ktest_config;
57my $version; 67my $version;
@@ -73,6 +83,8 @@ my $reboot_script;
73my $power_cycle; 83my $power_cycle;
74my $reboot; 84my $reboot;
75my $reboot_on_error; 85my $reboot_on_error;
86my $switch_to_good;
87my $switch_to_test;
76my $poweroff_on_error; 88my $poweroff_on_error;
77my $die_on_failure; 89my $die_on_failure;
78my $powercycle_after_reboot; 90my $powercycle_after_reboot;
@@ -92,17 +104,24 @@ my $start_minconfig;
92my $start_minconfig_defined; 104my $start_minconfig_defined;
93my $output_minconfig; 105my $output_minconfig;
94my $ignore_config; 106my $ignore_config;
107my $ignore_errors;
95my $addconfig; 108my $addconfig;
96my $in_bisect = 0; 109my $in_bisect = 0;
97my $bisect_bad = ""; 110my $bisect_bad_commit = "";
98my $reverse_bisect; 111my $reverse_bisect;
99my $bisect_manual; 112my $bisect_manual;
100my $bisect_skip; 113my $bisect_skip;
101my $config_bisect_good; 114my $config_bisect_good;
115my $bisect_ret_good;
116my $bisect_ret_bad;
117my $bisect_ret_skip;
118my $bisect_ret_abort;
119my $bisect_ret_default;
102my $in_patchcheck = 0; 120my $in_patchcheck = 0;
103my $run_test; 121my $run_test;
104my $redirect; 122my $redirect;
105my $buildlog; 123my $buildlog;
124my $testlog;
106my $dmesg; 125my $dmesg;
107my $monitor_fp; 126my $monitor_fp;
108my $monitor_pid; 127my $monitor_pid;
@@ -112,6 +131,7 @@ my $bisect_sleep_time;
112my $patchcheck_sleep_time; 131my $patchcheck_sleep_time;
113my $ignore_warnings; 132my $ignore_warnings;
114my $store_failures; 133my $store_failures;
134my $store_successes;
115my $test_name; 135my $test_name;
116my $timeout; 136my $timeout;
117my $booted_timeout; 137my $booted_timeout;
@@ -124,10 +144,34 @@ my $stop_after_failure;
124my $stop_test_after; 144my $stop_test_after;
125my $build_target; 145my $build_target;
126my $target_image; 146my $target_image;
147my $checkout;
127my $localversion; 148my $localversion;
128my $iteration = 0; 149my $iteration = 0;
129my $successes = 0; 150my $successes = 0;
130 151
152my $bisect_good;
153my $bisect_bad;
154my $bisect_type;
155my $bisect_start;
156my $bisect_replay;
157my $bisect_files;
158my $bisect_reverse;
159my $bisect_check;
160
161my $config_bisect;
162my $config_bisect_type;
163
164my $patchcheck_type;
165my $patchcheck_start;
166my $patchcheck_end;
167
168# set when a test is something other that just building or install
169# which would require more options.
170my $buildonly = 1;
171
172# set when creating a new config
173my $newconfig = 0;
174
131my %entered_configs; 175my %entered_configs;
132my %config_help; 176my %config_help;
133my %variable; 177my %variable;
@@ -136,11 +180,99 @@ my %force_config;
136# do not force reboots on config problems 180# do not force reboots on config problems
137my $no_reboot = 1; 181my $no_reboot = 1;
138 182
183my %option_map = (
184 "MACHINE" => \$machine,
185 "SSH_USER" => \$ssh_user,
186 "TMP_DIR" => \$tmpdir,
187 "OUTPUT_DIR" => \$outputdir,
188 "BUILD_DIR" => \$builddir,
189 "TEST_TYPE" => \$test_type,
190 "BUILD_TYPE" => \$build_type,
191 "BUILD_OPTIONS" => \$build_options,
192 "PRE_BUILD" => \$pre_build,
193 "POST_BUILD" => \$post_build,
194 "PRE_BUILD_DIE" => \$pre_build_die,
195 "POST_BUILD_DIE" => \$post_build_die,
196 "POWER_CYCLE" => \$power_cycle,
197 "REBOOT" => \$reboot,
198 "BUILD_NOCLEAN" => \$noclean,
199 "MIN_CONFIG" => \$minconfig,
200 "OUTPUT_MIN_CONFIG" => \$output_minconfig,
201 "START_MIN_CONFIG" => \$start_minconfig,
202 "IGNORE_CONFIG" => \$ignore_config,
203 "TEST" => \$run_test,
204 "ADD_CONFIG" => \$addconfig,
205 "REBOOT_TYPE" => \$reboot_type,
206 "GRUB_MENU" => \$grub_menu,
207 "POST_INSTALL" => \$post_install,
208 "NO_INSTALL" => \$no_install,
209 "REBOOT_SCRIPT" => \$reboot_script,
210 "REBOOT_ON_ERROR" => \$reboot_on_error,
211 "SWITCH_TO_GOOD" => \$switch_to_good,
212 "SWITCH_TO_TEST" => \$switch_to_test,
213 "POWEROFF_ON_ERROR" => \$poweroff_on_error,
214 "DIE_ON_FAILURE" => \$die_on_failure,
215 "POWER_OFF" => \$power_off,
216 "POWERCYCLE_AFTER_REBOOT" => \$powercycle_after_reboot,
217 "POWEROFF_AFTER_HALT" => \$poweroff_after_halt,
218 "SLEEP_TIME" => \$sleep_time,
219 "BISECT_SLEEP_TIME" => \$bisect_sleep_time,
220 "PATCHCHECK_SLEEP_TIME" => \$patchcheck_sleep_time,
221 "IGNORE_WARNINGS" => \$ignore_warnings,
222 "IGNORE_ERRORS" => \$ignore_errors,
223 "BISECT_MANUAL" => \$bisect_manual,
224 "BISECT_SKIP" => \$bisect_skip,
225 "CONFIG_BISECT_GOOD" => \$config_bisect_good,
226 "BISECT_RET_GOOD" => \$bisect_ret_good,
227 "BISECT_RET_BAD" => \$bisect_ret_bad,
228 "BISECT_RET_SKIP" => \$bisect_ret_skip,
229 "BISECT_RET_ABORT" => \$bisect_ret_abort,
230 "BISECT_RET_DEFAULT" => \$bisect_ret_default,
231 "STORE_FAILURES" => \$store_failures,
232 "STORE_SUCCESSES" => \$store_successes,
233 "TEST_NAME" => \$test_name,
234 "TIMEOUT" => \$timeout,
235 "BOOTED_TIMEOUT" => \$booted_timeout,
236 "CONSOLE" => \$console,
237 "DETECT_TRIPLE_FAULT" => \$detect_triplefault,
238 "SUCCESS_LINE" => \$success_line,
239 "REBOOT_SUCCESS_LINE" => \$reboot_success_line,
240 "STOP_AFTER_SUCCESS" => \$stop_after_success,
241 "STOP_AFTER_FAILURE" => \$stop_after_failure,
242 "STOP_TEST_AFTER" => \$stop_test_after,
243 "BUILD_TARGET" => \$build_target,
244 "SSH_EXEC" => \$ssh_exec,
245 "SCP_TO_TARGET" => \$scp_to_target,
246 "CHECKOUT" => \$checkout,
247 "TARGET_IMAGE" => \$target_image,
248 "LOCALVERSION" => \$localversion,
249
250 "BISECT_GOOD" => \$bisect_good,
251 "BISECT_BAD" => \$bisect_bad,
252 "BISECT_TYPE" => \$bisect_type,
253 "BISECT_START" => \$bisect_start,
254 "BISECT_REPLAY" => \$bisect_replay,
255 "BISECT_FILES" => \$bisect_files,
256 "BISECT_REVERSE" => \$bisect_reverse,
257 "BISECT_CHECK" => \$bisect_check,
258
259 "CONFIG_BISECT" => \$config_bisect,
260 "CONFIG_BISECT_TYPE" => \$config_bisect_type,
261
262 "PATCHCHECK_TYPE" => \$patchcheck_type,
263 "PATCHCHECK_START" => \$patchcheck_start,
264 "PATCHCHECK_END" => \$patchcheck_end,
265);
266
267# Options may be used by other options, record them.
268my %used_options;
269
139# default variables that can be used 270# default variables that can be used
140chomp ($variable{"PWD"} = `pwd`); 271chomp ($variable{"PWD"} = `pwd`);
141 272
142$config_help{"MACHINE"} = << "EOF" 273$config_help{"MACHINE"} = << "EOF"
143 The machine hostname that you will test. 274 The machine hostname that you will test.
275 For build only tests, it is still needed to differentiate log files.
144EOF 276EOF
145 ; 277 ;
146$config_help{"SSH_USER"} = << "EOF" 278$config_help{"SSH_USER"} = << "EOF"
@@ -150,11 +282,15 @@ EOF
150 ; 282 ;
151$config_help{"BUILD_DIR"} = << "EOF" 283$config_help{"BUILD_DIR"} = << "EOF"
152 The directory that contains the Linux source code (full path). 284 The directory that contains the Linux source code (full path).
285 You can use \${PWD} that will be the path where ktest.pl is run, or use
286 \${THIS_DIR} which is assigned \${PWD} but may be changed later.
153EOF 287EOF
154 ; 288 ;
155$config_help{"OUTPUT_DIR"} = << "EOF" 289$config_help{"OUTPUT_DIR"} = << "EOF"
156 The directory that the objects will be built (full path). 290 The directory that the objects will be built (full path).
157 (can not be same as BUILD_DIR) 291 (can not be same as BUILD_DIR)
292 You can use \${PWD} that will be the path where ktest.pl is run, or use
293 \${THIS_DIR} which is assigned \${PWD} but may be changed later.
158EOF 294EOF
159 ; 295 ;
160$config_help{"BUILD_TARGET"} = << "EOF" 296$config_help{"BUILD_TARGET"} = << "EOF"
@@ -162,6 +298,11 @@ $config_help{"BUILD_TARGET"} = << "EOF"
162 (relative to OUTPUT_DIR) 298 (relative to OUTPUT_DIR)
163EOF 299EOF
164 ; 300 ;
301$config_help{"BUILD_OPTIONS"} = << "EOF"
302 Options to add to \"make\" when building.
303 i.e. -j20
304EOF
305 ;
165$config_help{"TARGET_IMAGE"} = << "EOF" 306$config_help{"TARGET_IMAGE"} = << "EOF"
166 The place to put your image on the test machine. 307 The place to put your image on the test machine.
167EOF 308EOF
@@ -227,20 +368,36 @@ $config_help{"REBOOT_SCRIPT"} = << "EOF"
227EOF 368EOF
228 ; 369 ;
229 370
230sub read_yn { 371sub read_prompt {
231 my ($prompt) = @_; 372 my ($cancel, $prompt) = @_;
232 373
233 my $ans; 374 my $ans;
234 375
235 for (;;) { 376 for (;;) {
236 print "$prompt [Y/n] "; 377 if ($cancel) {
378 print "$prompt [y/n/C] ";
379 } else {
380 print "$prompt [Y/n] ";
381 }
237 $ans = <STDIN>; 382 $ans = <STDIN>;
238 chomp $ans; 383 chomp $ans;
239 if ($ans =~ /^\s*$/) { 384 if ($ans =~ /^\s*$/) {
240 $ans = "y"; 385 if ($cancel) {
386 $ans = "c";
387 } else {
388 $ans = "y";
389 }
241 } 390 }
242 last if ($ans =~ /^y$/i || $ans =~ /^n$/i); 391 last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
243 print "Please answer either 'y' or 'n'.\n"; 392 if ($cancel) {
393 last if ($ans =~ /^c$/i);
394 print "Please answer either 'y', 'n' or 'c'.\n";
395 } else {
396 print "Please answer either 'y' or 'n'.\n";
397 }
398 }
399 if ($ans =~ /^c/i) {
400 exit;
244 } 401 }
245 if ($ans !~ /^y$/i) { 402 if ($ans !~ /^y$/i) {
246 return 0; 403 return 0;
@@ -248,6 +405,18 @@ sub read_yn {
248 return 1; 405 return 1;
249} 406}
250 407
408sub read_yn {
409 my ($prompt) = @_;
410
411 return read_prompt 0, $prompt;
412}
413
414sub read_ync {
415 my ($prompt) = @_;
416
417 return read_prompt 1, $prompt;
418}
419
251sub get_ktest_config { 420sub get_ktest_config {
252 my ($config) = @_; 421 my ($config) = @_;
253 my $ans; 422 my $ans;
@@ -261,7 +430,7 @@ sub get_ktest_config {
261 430
262 for (;;) { 431 for (;;) {
263 print "$config = "; 432 print "$config = ";
264 if (defined($default{$config})) { 433 if (defined($default{$config}) && length($default{$config})) {
265 print "\[$default{$config}\] "; 434 print "\[$default{$config}\] ";
266 } 435 }
267 $ans = <STDIN>; 436 $ans = <STDIN>;
@@ -274,22 +443,37 @@ sub get_ktest_config {
274 next; 443 next;
275 } 444 }
276 } 445 }
277 $entered_configs{$config} = process_variables($ans); 446 $entered_configs{$config} = ${ans};
278 last; 447 last;
279 } 448 }
280} 449}
281 450
282sub get_ktest_configs { 451sub get_ktest_configs {
283 get_ktest_config("MACHINE"); 452 get_ktest_config("MACHINE");
284 get_ktest_config("SSH_USER");
285 get_ktest_config("BUILD_DIR"); 453 get_ktest_config("BUILD_DIR");
286 get_ktest_config("OUTPUT_DIR"); 454 get_ktest_config("OUTPUT_DIR");
287 get_ktest_config("BUILD_TARGET"); 455
288 get_ktest_config("TARGET_IMAGE"); 456 if ($newconfig) {
289 get_ktest_config("POWER_CYCLE"); 457 get_ktest_config("BUILD_OPTIONS");
290 get_ktest_config("CONSOLE"); 458 }
459
460 # options required for other than just building a kernel
461 if (!$buildonly) {
462 get_ktest_config("POWER_CYCLE");
463 get_ktest_config("CONSOLE");
464 }
465
466 # options required for install and more
467 if ($buildonly != 1) {
468 get_ktest_config("SSH_USER");
469 get_ktest_config("BUILD_TARGET");
470 get_ktest_config("TARGET_IMAGE");
471 }
472
291 get_ktest_config("LOCALVERSION"); 473 get_ktest_config("LOCALVERSION");
292 474
475 return if ($buildonly);
476
293 my $rtype = $opt{"REBOOT_TYPE"}; 477 my $rtype = $opt{"REBOOT_TYPE"};
294 478
295 if (!defined($rtype)) { 479 if (!defined($rtype)) {
@@ -303,8 +487,6 @@ sub get_ktest_configs {
303 487
304 if ($rtype eq "grub") { 488 if ($rtype eq "grub") {
305 get_ktest_config("GRUB_MENU"); 489 get_ktest_config("GRUB_MENU");
306 } else {
307 get_ktest_config("REBOOT_SCRIPT");
308 } 490 }
309} 491}
310 492
@@ -334,6 +516,10 @@ sub process_variables {
334 } else { 516 } else {
335 # put back the origin piece. 517 # put back the origin piece.
336 $retval = "$retval\$\{$var\}"; 518 $retval = "$retval\$\{$var\}";
519 # This could be an option that is used later, save
520 # it so we don't warn if this option is not one of
521 # ktests options.
522 $used_options{$var} = 1;
337 } 523 }
338 $value = $end; 524 $value = $end;
339 } 525 }
@@ -348,6 +534,19 @@ sub process_variables {
348sub set_value { 534sub set_value {
349 my ($lvalue, $rvalue, $override, $overrides, $name) = @_; 535 my ($lvalue, $rvalue, $override, $overrides, $name) = @_;
350 536
537 my $prvalue = process_variables($rvalue);
538
539 if ($buildonly && $lvalue =~ /^TEST_TYPE(\[.*\])?$/ && $prvalue ne "build") {
540 # Note if a test is something other than build, then we
541 # will need other manditory options.
542 if ($prvalue ne "install") {
543 $buildonly = 0;
544 } else {
545 # install still limits some manditory options.
546 $buildonly = 2;
547 }
548 }
549
351 if (defined($opt{$lvalue})) { 550 if (defined($opt{$lvalue})) {
352 if (!$override || defined(${$overrides}{$lvalue})) { 551 if (!$override || defined(${$overrides}{$lvalue})) {
353 my $extra = ""; 552 my $extra = "";
@@ -356,13 +555,12 @@ sub set_value {
356 } 555 }
357 die "$name: $.: Option $lvalue defined more than once!\n$extra"; 556 die "$name: $.: Option $lvalue defined more than once!\n$extra";
358 } 557 }
359 ${$overrides}{$lvalue} = $rvalue; 558 ${$overrides}{$lvalue} = $prvalue;
360 } 559 }
361 if ($rvalue =~ /^\s*$/) { 560 if ($rvalue =~ /^\s*$/) {
362 delete $opt{$lvalue}; 561 delete $opt{$lvalue};
363 } else { 562 } else {
364 $rvalue = process_variables($rvalue); 563 $opt{$lvalue} = $prvalue;
365 $opt{$lvalue} = $rvalue;
366 } 564 }
367} 565}
368 566
@@ -712,6 +910,15 @@ sub __read_config {
712 return $test_case; 910 return $test_case;
713} 911}
714 912
913sub get_test_case {
914 print "What test case would you like to run?\n";
915 print " (build, install or boot)\n";
916 print " Other tests are available but require editing the config file\n";
917 my $ans = <STDIN>;
918 chomp $ans;
919 $default{"TEST_TYPE"} = $ans;
920}
921
715sub read_config { 922sub read_config {
716 my ($config) = @_; 923 my ($config) = @_;
717 924
@@ -726,10 +933,7 @@ sub read_config {
726 # was a test specified? 933 # was a test specified?
727 if (!$test_case) { 934 if (!$test_case) {
728 print "No test case specified.\n"; 935 print "No test case specified.\n";
729 print "What test case would you like to run?\n"; 936 get_test_case;
730 my $ans = <STDIN>;
731 chomp $ans;
732 $default{"TEST_TYPE"} = $ans;
733 } 937 }
734 938
735 # set any defaults 939 # set any defaults
@@ -739,6 +943,37 @@ sub read_config {
739 $opt{$default} = $default{$default}; 943 $opt{$default} = $default{$default};
740 } 944 }
741 } 945 }
946
947 if ($opt{"IGNORE_UNUSED"} == 1) {
948 return;
949 }
950
951 my %not_used;
952
953 # check if there are any stragglers (typos?)
954 foreach my $option (keys %opt) {
955 my $op = $option;
956 # remove per test labels.
957 $op =~ s/\[.*\]//;
958 if (!exists($option_map{$op}) &&
959 !exists($default{$op}) &&
960 !exists($used_options{$op})) {
961 $not_used{$op} = 1;
962 }
963 }
964
965 if (%not_used) {
966 my $s = "s are";
967 $s = " is" if (keys %not_used == 1);
968 print "The following option$s not used; could be a typo:\n";
969 foreach my $option (keys %not_used) {
970 print "$option\n";
971 }
972 print "Set IGRNORE_UNUSED = 1 to have ktest ignore unused variables\n";
973 if (!read_yn "Do you want to continue?") {
974 exit -1;
975 }
976 }
742} 977}
743 978
744sub __eval_option { 979sub __eval_option {
@@ -873,6 +1108,17 @@ sub reboot {
873 } 1108 }
874} 1109}
875 1110
1111sub reboot_to_good {
1112 my ($time) = @_;
1113
1114 if (defined($switch_to_good)) {
1115 run_command $switch_to_good;
1116 return;
1117 }
1118
1119 reboot $time;
1120}
1121
876sub do_not_reboot { 1122sub do_not_reboot {
877 my $i = $iteration; 1123 my $i = $iteration;
878 1124
@@ -889,7 +1135,7 @@ sub dodie {
889 if ($reboot_on_error && !do_not_reboot) { 1135 if ($reboot_on_error && !do_not_reboot) {
890 1136
891 doprint "REBOOTING\n"; 1137 doprint "REBOOTING\n";
892 reboot; 1138 reboot_to_good;
893 1139
894 } elsif ($poweroff_on_error && defined($power_off)) { 1140 } elsif ($poweroff_on_error && defined($power_off)) {
895 doprint "POWERING OFF\n"; 1141 doprint "POWERING OFF\n";
@@ -975,6 +1221,43 @@ sub wait_for_monitor {
975 print "** Monitor flushed **\n"; 1221 print "** Monitor flushed **\n";
976} 1222}
977 1223
1224sub save_logs {
1225 my ($result, $basedir) = @_;
1226 my @t = localtime;
1227 my $date = sprintf "%04d%02d%02d%02d%02d%02d",
1228 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
1229
1230 my $type = $build_type;
1231 if ($type =~ /useconfig/) {
1232 $type = "useconfig";
1233 }
1234
1235 my $dir = "$machine-$test_type-$type-$result-$date";
1236
1237 $dir = "$basedir/$dir";
1238
1239 if (!-d $dir) {
1240 mkpath($dir) or
1241 die "can't create $dir";
1242 }
1243
1244 my %files = (
1245 "config" => $output_config,
1246 "buildlog" => $buildlog,
1247 "dmesg" => $dmesg,
1248 "testlog" => $testlog,
1249 );
1250
1251 while (my ($name, $source) = each(%files)) {
1252 if (-f "$source") {
1253 cp "$source", "$dir/$name" or
1254 die "failed to copy $source";
1255 }
1256 }
1257
1258 doprint "*** Saved info to $dir ***\n";
1259}
1260
978sub fail { 1261sub fail {
979 1262
980 if ($die_on_failure) { 1263 if ($die_on_failure) {
@@ -988,7 +1271,7 @@ sub fail {
988 # no need to reboot for just building. 1271 # no need to reboot for just building.
989 if (!do_not_reboot) { 1272 if (!do_not_reboot) {
990 doprint "REBOOTING\n"; 1273 doprint "REBOOTING\n";
991 reboot $sleep_time; 1274 reboot_to_good $sleep_time;
992 } 1275 }
993 1276
994 my $name = ""; 1277 my $name = "";
@@ -1003,38 +1286,9 @@ sub fail {
1003 doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; 1286 doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
1004 doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; 1287 doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
1005 1288
1006 return 1 if (!defined($store_failures)); 1289 if (defined($store_failures)) {
1007 1290 save_logs "fail", $store_failures;
1008 my @t = localtime; 1291 }
1009 my $date = sprintf "%04d%02d%02d%02d%02d%02d",
1010 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
1011
1012 my $type = $build_type;
1013 if ($type =~ /useconfig/) {
1014 $type = "useconfig";
1015 }
1016
1017 my $dir = "$machine-$test_type-$type-fail-$date";
1018 my $faildir = "$store_failures/$dir";
1019
1020 if (!-d $faildir) {
1021 mkpath($faildir) or
1022 die "can't create $faildir";
1023 }
1024 if (-f "$output_config") {
1025 cp "$output_config", "$faildir/config" or
1026 die "failed to copy .config";
1027 }
1028 if (-f $buildlog) {
1029 cp $buildlog, "$faildir/buildlog" or
1030 die "failed to move $buildlog";
1031 }
1032 if (-f $dmesg) {
1033 cp $dmesg, "$faildir/dmesg" or
1034 die "failed to move $dmesg";
1035 }
1036
1037 doprint "*** Saved info to $faildir ***\n";
1038 1292
1039 return 1; 1293 return 1;
1040} 1294}
@@ -1170,13 +1424,16 @@ sub wait_for_input
1170} 1424}
1171 1425
1172sub reboot_to { 1426sub reboot_to {
1427 if (defined($switch_to_test)) {
1428 run_command $switch_to_test;
1429 }
1430
1173 if ($reboot_type eq "grub") { 1431 if ($reboot_type eq "grub") {
1174 run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'"; 1432 run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'";
1175 reboot; 1433 } elsif (defined $reboot_script) {
1176 return; 1434 run_command "$reboot_script";
1177 } 1435 }
1178 1436 reboot;
1179 run_command "$reboot_script";
1180} 1437}
1181 1438
1182sub get_sha1 { 1439sub get_sha1 {
@@ -1274,7 +1531,7 @@ sub monitor {
1274 } 1531 }
1275 1532
1276 if ($full_line =~ /call trace:/i) { 1533 if ($full_line =~ /call trace:/i) {
1277 if (!$bug && !$skip_call_trace) { 1534 if (!$ignore_errors && !$bug && !$skip_call_trace) {
1278 $bug = 1; 1535 $bug = 1;
1279 $failure_start = time; 1536 $failure_start = time;
1280 } 1537 }
@@ -1341,12 +1598,19 @@ sub monitor {
1341 return 1; 1598 return 1;
1342} 1599}
1343 1600
1601sub eval_kernel_version {
1602 my ($option) = @_;
1603
1604 $option =~ s/\$KERNEL_VERSION/$version/g;
1605
1606 return $option;
1607}
1608
1344sub do_post_install { 1609sub do_post_install {
1345 1610
1346 return if (!defined($post_install)); 1611 return if (!defined($post_install));
1347 1612
1348 my $cp_post_install = $post_install; 1613 my $cp_post_install = eval_kernel_version $post_install;
1349 $cp_post_install =~ s/\$KERNEL_VERSION/$version/g;
1350 run_command "$cp_post_install" or 1614 run_command "$cp_post_install" or
1351 dodie "Failed to run post install"; 1615 dodie "Failed to run post install";
1352} 1616}
@@ -1355,7 +1619,9 @@ sub install {
1355 1619
1356 return if ($no_install); 1620 return if ($no_install);
1357 1621
1358 run_scp "$outputdir/$build_target", "$target_image" or 1622 my $cp_target = eval_kernel_version $target_image;
1623
1624 run_scp "$outputdir/$build_target", "$cp_target" or
1359 dodie "failed to copy image"; 1625 dodie "failed to copy image";
1360 1626
1361 my $install_mods = 0; 1627 my $install_mods = 0;
@@ -1640,9 +1906,13 @@ sub success {
1640 doprint "*******************************************\n"; 1906 doprint "*******************************************\n";
1641 doprint "*******************************************\n"; 1907 doprint "*******************************************\n";
1642 1908
1909 if (defined($store_successes)) {
1910 save_logs "success", $store_successes;
1911 }
1912
1643 if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) { 1913 if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) {
1644 doprint "Reboot and wait $sleep_time seconds\n"; 1914 doprint "Reboot and wait $sleep_time seconds\n";
1645 reboot $sleep_time; 1915 reboot_to_good $sleep_time;
1646 } 1916 }
1647} 1917}
1648 1918
@@ -1669,7 +1939,10 @@ sub child_run_test {
1669 $poweroff_on_error = 0; 1939 $poweroff_on_error = 0;
1670 $die_on_failure = 1; 1940 $die_on_failure = 1;
1671 1941
1942 $redirect = "$testlog";
1672 run_command $run_test or $failed = 1; 1943 run_command $run_test or $failed = 1;
1944 undef $redirect;
1945
1673 exit $failed; 1946 exit $failed;
1674} 1947}
1675 1948
@@ -1744,6 +2017,43 @@ sub do_run_test {
1744 waitpid $child_pid, 0; 2017 waitpid $child_pid, 0;
1745 $child_exit = $?; 2018 $child_exit = $?;
1746 2019
2020 if (!$bug && $in_bisect) {
2021 if (defined($bisect_ret_good)) {
2022 if ($child_exit == $bisect_ret_good) {
2023 return 1;
2024 }
2025 }
2026 if (defined($bisect_ret_skip)) {
2027 if ($child_exit == $bisect_ret_skip) {
2028 return -1;
2029 }
2030 }
2031 if (defined($bisect_ret_abort)) {
2032 if ($child_exit == $bisect_ret_abort) {
2033 fail "test abort" and return -2;
2034 }
2035 }
2036 if (defined($bisect_ret_bad)) {
2037 if ($child_exit == $bisect_ret_skip) {
2038 return 0;
2039 }
2040 }
2041 if (defined($bisect_ret_default)) {
2042 if ($bisect_ret_default eq "good") {
2043 return 1;
2044 } elsif ($bisect_ret_default eq "bad") {
2045 return 0;
2046 } elsif ($bisect_ret_default eq "skip") {
2047 return -1;
2048 } elsif ($bisect_ret_default eq "abort") {
2049 return -2;
2050 } else {
2051 fail "unknown default action: $bisect_ret_default"
2052 and return -2;
2053 }
2054 }
2055 }
2056
1747 if ($bug || $child_exit) { 2057 if ($bug || $child_exit) {
1748 return 0 if $in_bisect; 2058 return 0 if $in_bisect;
1749 fail "test failed" and return 0; 2059 fail "test failed" and return 0;
@@ -1770,7 +2080,7 @@ sub run_git_bisect {
1770 if ($output =~ m/^(Bisecting: .*\(roughly \d+ steps?\))\s+\[([[:xdigit:]]+)\]/) { 2080 if ($output =~ m/^(Bisecting: .*\(roughly \d+ steps?\))\s+\[([[:xdigit:]]+)\]/) {
1771 doprint "$1 [$2]\n"; 2081 doprint "$1 [$2]\n";
1772 } elsif ($output =~ m/^([[:xdigit:]]+) is the first bad commit/) { 2082 } elsif ($output =~ m/^([[:xdigit:]]+) is the first bad commit/) {
1773 $bisect_bad = $1; 2083 $bisect_bad_commit = $1;
1774 doprint "Found bad commit... $1\n"; 2084 doprint "Found bad commit... $1\n";
1775 return 0; 2085 return 0;
1776 } else { 2086 } else {
@@ -1783,7 +2093,7 @@ sub run_git_bisect {
1783 2093
1784sub bisect_reboot { 2094sub bisect_reboot {
1785 doprint "Reboot and sleep $bisect_sleep_time seconds\n"; 2095 doprint "Reboot and sleep $bisect_sleep_time seconds\n";
1786 reboot $bisect_sleep_time; 2096 reboot_to_good $bisect_sleep_time;
1787} 2097}
1788 2098
1789# returns 1 on success, 0 on failure, -1 on skip 2099# returns 1 on success, 0 on failure, -1 on skip
@@ -1868,21 +2178,28 @@ sub run_bisect {
1868 } 2178 }
1869} 2179}
1870 2180
2181sub update_bisect_replay {
2182 my $tmp_log = "$tmpdir/ktest_bisect_log";
2183 run_command "git bisect log > $tmp_log" or
2184 die "can't create bisect log";
2185 return $tmp_log;
2186}
2187
1871sub bisect { 2188sub bisect {
1872 my ($i) = @_; 2189 my ($i) = @_;
1873 2190
1874 my $result; 2191 my $result;
1875 2192
1876 die "BISECT_GOOD[$i] not defined\n" if (!defined($opt{"BISECT_GOOD[$i]"})); 2193 die "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good));
1877 die "BISECT_BAD[$i] not defined\n" if (!defined($opt{"BISECT_BAD[$i]"})); 2194 die "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad));
1878 die "BISECT_TYPE[$i] not defined\n" if (!defined($opt{"BISECT_TYPE[$i]"})); 2195 die "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type));
1879 2196
1880 my $good = $opt{"BISECT_GOOD[$i]"}; 2197 my $good = $bisect_good;
1881 my $bad = $opt{"BISECT_BAD[$i]"}; 2198 my $bad = $bisect_bad;
1882 my $type = $opt{"BISECT_TYPE[$i]"}; 2199 my $type = $bisect_type;
1883 my $start = $opt{"BISECT_START[$i]"}; 2200 my $start = $bisect_start;
1884 my $replay = $opt{"BISECT_REPLAY[$i]"}; 2201 my $replay = $bisect_replay;
1885 my $start_files = $opt{"BISECT_FILES[$i]"}; 2202 my $start_files = $bisect_files;
1886 2203
1887 if (defined($start_files)) { 2204 if (defined($start_files)) {
1888 $start_files = " -- " . $start_files; 2205 $start_files = " -- " . $start_files;
@@ -1894,8 +2211,7 @@ sub bisect {
1894 $good = get_sha1($good); 2211 $good = get_sha1($good);
1895 $bad = get_sha1($bad); 2212 $bad = get_sha1($bad);
1896 2213
1897 if (defined($opt{"BISECT_REVERSE[$i]"}) && 2214 if (defined($bisect_reverse) && $bisect_reverse == 1) {
1898 $opt{"BISECT_REVERSE[$i]"} == 1) {
1899 doprint "Performing a reverse bisect (bad is good, good is bad!)\n"; 2215 doprint "Performing a reverse bisect (bad is good, good is bad!)\n";
1900 $reverse_bisect = 1; 2216 $reverse_bisect = 1;
1901 } else { 2217 } else {
@@ -1907,8 +2223,31 @@ sub bisect {
1907 $type = "boot"; 2223 $type = "boot";
1908 } 2224 }
1909 2225
1910 my $check = $opt{"BISECT_CHECK[$i]"}; 2226 # Check if a bisect was running
1911 if (defined($check) && $check ne "0") { 2227 my $bisect_start_file = "$builddir/.git/BISECT_START";
2228
2229 my $check = $bisect_check;
2230 my $do_check = defined($check) && $check ne "0";
2231
2232 if ( -f $bisect_start_file ) {
2233 print "Bisect in progress found\n";
2234 if ($do_check) {
2235 print " If you say yes, then no checks of good or bad will be done\n";
2236 }
2237 if (defined($replay)) {
2238 print "** BISECT_REPLAY is defined in config file **";
2239 print " Ignore config option and perform new git bisect log?\n";
2240 if (read_ync " (yes, no, or cancel) ") {
2241 $replay = update_bisect_replay;
2242 $do_check = 0;
2243 }
2244 } elsif (read_yn "read git log and continue?") {
2245 $replay = update_bisect_replay;
2246 $do_check = 0;
2247 }
2248 }
2249
2250 if ($do_check) {
1912 2251
1913 # get current HEAD 2252 # get current HEAD
1914 my $head = get_sha1("HEAD"); 2253 my $head = get_sha1("HEAD");
@@ -1973,7 +2312,7 @@ sub bisect {
1973 run_command "git bisect reset" or 2312 run_command "git bisect reset" or
1974 dodie "could not reset git bisect"; 2313 dodie "could not reset git bisect";
1975 2314
1976 doprint "Bad commit was [$bisect_bad]\n"; 2315 doprint "Bad commit was [$bisect_bad_commit]\n";
1977 2316
1978 success $i; 2317 success $i;
1979} 2318}
@@ -2129,7 +2468,7 @@ sub run_config_bisect {
2129 } 2468 }
2130 2469
2131 doprint "***** RUN TEST ***\n"; 2470 doprint "***** RUN TEST ***\n";
2132 my $type = $opt{"CONFIG_BISECT_TYPE[$iteration]"}; 2471 my $type = $config_bisect_type;
2133 my $ret; 2472 my $ret;
2134 my %current_config; 2473 my %current_config;
2135 2474
@@ -2233,7 +2572,7 @@ sub run_config_bisect {
2233sub config_bisect { 2572sub config_bisect {
2234 my ($i) = @_; 2573 my ($i) = @_;
2235 2574
2236 my $start_config = $opt{"CONFIG_BISECT[$i]"}; 2575 my $start_config = $config_bisect;
2237 2576
2238 my $tmpconfig = "$tmpdir/use_config"; 2577 my $tmpconfig = "$tmpdir/use_config";
2239 2578
@@ -2346,29 +2685,29 @@ sub config_bisect {
2346 2685
2347sub patchcheck_reboot { 2686sub patchcheck_reboot {
2348 doprint "Reboot and sleep $patchcheck_sleep_time seconds\n"; 2687 doprint "Reboot and sleep $patchcheck_sleep_time seconds\n";
2349 reboot $patchcheck_sleep_time; 2688 reboot_to_good $patchcheck_sleep_time;
2350} 2689}
2351 2690
2352sub patchcheck { 2691sub patchcheck {
2353 my ($i) = @_; 2692 my ($i) = @_;
2354 2693
2355 die "PATCHCHECK_START[$i] not defined\n" 2694 die "PATCHCHECK_START[$i] not defined\n"
2356 if (!defined($opt{"PATCHCHECK_START[$i]"})); 2695 if (!defined($patchcheck_start));
2357 die "PATCHCHECK_TYPE[$i] not defined\n" 2696 die "PATCHCHECK_TYPE[$i] not defined\n"
2358 if (!defined($opt{"PATCHCHECK_TYPE[$i]"})); 2697 if (!defined($patchcheck_type));
2359 2698
2360 my $start = $opt{"PATCHCHECK_START[$i]"}; 2699 my $start = $patchcheck_start;
2361 2700
2362 my $end = "HEAD"; 2701 my $end = "HEAD";
2363 if (defined($opt{"PATCHCHECK_END[$i]"})) { 2702 if (defined($patchcheck_end)) {
2364 $end = $opt{"PATCHCHECK_END[$i]"}; 2703 $end = $patchcheck_end;
2365 } 2704 }
2366 2705
2367 # Get the true sha1's since we can use things like HEAD~3 2706 # Get the true sha1's since we can use things like HEAD~3
2368 $start = get_sha1($start); 2707 $start = get_sha1($start);
2369 $end = get_sha1($end); 2708 $end = get_sha1($end);
2370 2709
2371 my $type = $opt{"PATCHCHECK_TYPE[$i]"}; 2710 my $type = $patchcheck_type;
2372 2711
2373 # Can't have a test without having a test to run 2712 # Can't have a test without having a test to run
2374 if ($type eq "test" && !defined($run_test)) { 2713 if ($type eq "test" && !defined($run_test)) {
@@ -2963,7 +3302,7 @@ sub make_min_config {
2963 } 3302 }
2964 3303
2965 doprint "Reboot and wait $sleep_time seconds\n"; 3304 doprint "Reboot and wait $sleep_time seconds\n";
2966 reboot $sleep_time; 3305 reboot_to_good $sleep_time;
2967 } 3306 }
2968 3307
2969 success $i; 3308 success $i;
@@ -2985,13 +3324,27 @@ if ($#ARGV == 0) {
2985} 3324}
2986 3325
2987if (! -f $ktest_config) { 3326if (! -f $ktest_config) {
3327 $newconfig = 1;
3328 get_test_case;
2988 open(OUT, ">$ktest_config") or die "Can not create $ktest_config"; 3329 open(OUT, ">$ktest_config") or die "Can not create $ktest_config";
2989 print OUT << "EOF" 3330 print OUT << "EOF"
2990# Generated by ktest.pl 3331# Generated by ktest.pl
2991# 3332#
3333
3334# PWD is a ktest.pl variable that will result in the process working
3335# directory that ktest.pl is executed in.
3336
3337# THIS_DIR is automatically assigned the PWD of the path that generated
3338# the config file. It is best to use this variable when assigning other
3339# directory paths within this directory. This allows you to easily
3340# move the test cases to other locations or to other machines.
3341#
3342THIS_DIR := $variable{"PWD"}
3343
2992# Define each test with TEST_START 3344# Define each test with TEST_START
2993# The config options below it will override the defaults 3345# The config options below it will override the defaults
2994TEST_START 3346TEST_START
3347TEST_TYPE = $default{"TEST_TYPE"}
2995 3348
2996DEFAULTS 3349DEFAULTS
2997EOF 3350EOF
@@ -3011,7 +3364,7 @@ if ($#new_configs >= 0) {
3011 open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config"; 3364 open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config";
3012 foreach my $config (@new_configs) { 3365 foreach my $config (@new_configs) {
3013 print OUT "$config = $entered_configs{$config}\n"; 3366 print OUT "$config = $entered_configs{$config}\n";
3014 $opt{$config} = $entered_configs{$config}; 3367 $opt{$config} = process_variables($entered_configs{$config});
3015 } 3368 }
3016} 3369}
3017 3370
@@ -3091,61 +3444,10 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
3091 3444
3092 my $makecmd = set_test_option("MAKE_CMD", $i); 3445 my $makecmd = set_test_option("MAKE_CMD", $i);
3093 3446
3094 $machine = set_test_option("MACHINE", $i); 3447 # Load all the options into their mapped variable names
3095 $ssh_user = set_test_option("SSH_USER", $i); 3448 foreach my $opt (keys %option_map) {
3096 $tmpdir = set_test_option("TMP_DIR", $i); 3449 ${$option_map{$opt}} = set_test_option($opt, $i);
3097 $outputdir = set_test_option("OUTPUT_DIR", $i); 3450 }
3098 $builddir = set_test_option("BUILD_DIR", $i);
3099 $test_type = set_test_option("TEST_TYPE", $i);
3100 $build_type = set_test_option("BUILD_TYPE", $i);
3101 $build_options = set_test_option("BUILD_OPTIONS", $i);
3102 $pre_build = set_test_option("PRE_BUILD", $i);
3103 $post_build = set_test_option("POST_BUILD", $i);
3104 $pre_build_die = set_test_option("PRE_BUILD_DIE", $i);
3105 $post_build_die = set_test_option("POST_BUILD_DIE", $i);
3106 $power_cycle = set_test_option("POWER_CYCLE", $i);
3107 $reboot = set_test_option("REBOOT", $i);
3108 $noclean = set_test_option("BUILD_NOCLEAN", $i);
3109 $minconfig = set_test_option("MIN_CONFIG", $i);
3110 $output_minconfig = set_test_option("OUTPUT_MIN_CONFIG", $i);
3111 $start_minconfig = set_test_option("START_MIN_CONFIG", $i);
3112 $ignore_config = set_test_option("IGNORE_CONFIG", $i);
3113 $run_test = set_test_option("TEST", $i);
3114 $addconfig = set_test_option("ADD_CONFIG", $i);
3115 $reboot_type = set_test_option("REBOOT_TYPE", $i);
3116 $grub_menu = set_test_option("GRUB_MENU", $i);
3117 $post_install = set_test_option("POST_INSTALL", $i);
3118 $no_install = set_test_option("NO_INSTALL", $i);
3119 $reboot_script = set_test_option("REBOOT_SCRIPT", $i);
3120 $reboot_on_error = set_test_option("REBOOT_ON_ERROR", $i);
3121 $poweroff_on_error = set_test_option("POWEROFF_ON_ERROR", $i);
3122 $die_on_failure = set_test_option("DIE_ON_FAILURE", $i);
3123 $power_off = set_test_option("POWER_OFF", $i);
3124 $powercycle_after_reboot = set_test_option("POWERCYCLE_AFTER_REBOOT", $i);
3125 $poweroff_after_halt = set_test_option("POWEROFF_AFTER_HALT", $i);
3126 $sleep_time = set_test_option("SLEEP_TIME", $i);
3127 $bisect_sleep_time = set_test_option("BISECT_SLEEP_TIME", $i);
3128 $patchcheck_sleep_time = set_test_option("PATCHCHECK_SLEEP_TIME", $i);
3129 $ignore_warnings = set_test_option("IGNORE_WARNINGS", $i);
3130 $bisect_manual = set_test_option("BISECT_MANUAL", $i);
3131 $bisect_skip = set_test_option("BISECT_SKIP", $i);
3132 $config_bisect_good = set_test_option("CONFIG_BISECT_GOOD", $i);
3133 $store_failures = set_test_option("STORE_FAILURES", $i);
3134 $test_name = set_test_option("TEST_NAME", $i);
3135 $timeout = set_test_option("TIMEOUT", $i);
3136 $booted_timeout = set_test_option("BOOTED_TIMEOUT", $i);
3137 $console = set_test_option("CONSOLE", $i);
3138 $detect_triplefault = set_test_option("DETECT_TRIPLE_FAULT", $i);
3139 $success_line = set_test_option("SUCCESS_LINE", $i);
3140 $reboot_success_line = set_test_option("REBOOT_SUCCESS_LINE", $i);
3141 $stop_after_success = set_test_option("STOP_AFTER_SUCCESS", $i);
3142 $stop_after_failure = set_test_option("STOP_AFTER_FAILURE", $i);
3143 $stop_test_after = set_test_option("STOP_TEST_AFTER", $i);
3144 $build_target = set_test_option("BUILD_TARGET", $i);
3145 $ssh_exec = set_test_option("SSH_EXEC", $i);
3146 $scp_to_target = set_test_option("SCP_TO_TARGET", $i);
3147 $target_image = set_test_option("TARGET_IMAGE", $i);
3148 $localversion = set_test_option("LOCALVERSION", $i);
3149 3451
3150 $start_minconfig_defined = 1; 3452 $start_minconfig_defined = 1;
3151 3453
@@ -3166,26 +3468,26 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
3166 $ENV{"SSH_USER"} = $ssh_user; 3468 $ENV{"SSH_USER"} = $ssh_user;
3167 $ENV{"MACHINE"} = $machine; 3469 $ENV{"MACHINE"} = $machine;
3168 3470
3169 $target = "$ssh_user\@$machine";
3170
3171 $buildlog = "$tmpdir/buildlog-$machine"; 3471 $buildlog = "$tmpdir/buildlog-$machine";
3472 $testlog = "$tmpdir/testlog-$machine";
3172 $dmesg = "$tmpdir/dmesg-$machine"; 3473 $dmesg = "$tmpdir/dmesg-$machine";
3173 $make = "$makecmd O=$outputdir"; 3474 $make = "$makecmd O=$outputdir";
3174 $output_config = "$outputdir/.config"; 3475 $output_config = "$outputdir/.config";
3175 3476
3176 if ($reboot_type eq "grub") { 3477 if (!$buildonly) {
3177 dodie "GRUB_MENU not defined" if (!defined($grub_menu)); 3478 $target = "$ssh_user\@$machine";
3178 } elsif (!defined($reboot_script)) { 3479 if ($reboot_type eq "grub") {
3179 dodie "REBOOT_SCRIPT not defined" 3480 dodie "GRUB_MENU not defined" if (!defined($grub_menu));
3481 }
3180 } 3482 }
3181 3483
3182 my $run_type = $build_type; 3484 my $run_type = $build_type;
3183 if ($test_type eq "patchcheck") { 3485 if ($test_type eq "patchcheck") {
3184 $run_type = $opt{"PATCHCHECK_TYPE[$i]"}; 3486 $run_type = $patchcheck_type;
3185 } elsif ($test_type eq "bisect") { 3487 } elsif ($test_type eq "bisect") {
3186 $run_type = $opt{"BISECT_TYPE[$i]"}; 3488 $run_type = $bisect_type;
3187 } elsif ($test_type eq "config_bisect") { 3489 } elsif ($test_type eq "config_bisect") {
3188 $run_type = $opt{"CONFIG_BISECT_TYPE[$i]"}; 3490 $run_type = $config_bisect_type;
3189 } 3491 }
3190 3492
3191 if ($test_type eq "make_min_config") { 3493 if ($test_type eq "make_min_config") {
@@ -3205,6 +3507,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
3205 3507
3206 unlink $dmesg; 3508 unlink $dmesg;
3207 unlink $buildlog; 3509 unlink $buildlog;
3510 unlink $testlog;
3208 3511
3209 if (defined($addconfig)) { 3512 if (defined($addconfig)) {
3210 my $min = $minconfig; 3513 my $min = $minconfig;
@@ -3216,7 +3519,6 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
3216 $minconfig = "$tmpdir/add_config"; 3519 $minconfig = "$tmpdir/add_config";
3217 } 3520 }
3218 3521
3219 my $checkout = $opt{"CHECKOUT[$i]"};
3220 if (defined($checkout)) { 3522 if (defined($checkout)) {
3221 run_command "git checkout $checkout" or 3523 run_command "git checkout $checkout" or
3222 die "failed to checkout $checkout"; 3524 die "failed to checkout $checkout";
@@ -3267,7 +3569,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
3267if ($opt{"POWEROFF_ON_SUCCESS"}) { 3569if ($opt{"POWEROFF_ON_SUCCESS"}) {
3268 halt; 3570 halt;
3269} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot) { 3571} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot) {
3270 reboot; 3572 reboot_to_good;
3271} 3573}
3272 3574
3273doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; 3575doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n";
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index dbedfa196727..5ea04c6a71bf 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -346,7 +346,10 @@
346#GRUB_MENU = Test Kernel 346#GRUB_MENU = Test Kernel
347 347
348# A script to reboot the target into the test kernel 348# A script to reboot the target into the test kernel
349# (Only mandatory if REBOOT_TYPE = script) 349# This and SWITCH_TO_TEST are about the same, except
350# SWITCH_TO_TEST is run even for REBOOT_TYPE = grub.
351# This may be left undefined.
352# (default undefined)
350#REBOOT_SCRIPT = 353#REBOOT_SCRIPT =
351 354
352#### Optional Config Options (all have defaults) #### 355#### Optional Config Options (all have defaults) ####
@@ -468,6 +471,27 @@
468# The test will not modify that file. 471# The test will not modify that file.
469#REBOOT_TYPE = grub 472#REBOOT_TYPE = grub
470 473
474# If you are using a machine that doesn't boot with grub, and
475# perhaps gets its kernel from a remote server (tftp), then
476# you can use this option to update the target image with the
477# test image.
478#
479# You could also do the same with POST_INSTALL, but the difference
480# between that option and this option is that POST_INSTALL runs
481# after the install, where this one runs just before a reboot.
482# (default undefined)
483#SWITCH_TO_TEST = cp ${OUTPUT_DIR}/${BUILD_TARGET} ${TARGET_IMAGE}
484
485# If you are using a machine that doesn't boot with grub, and
486# perhaps gets its kernel from a remote server (tftp), then
487# you can use this option to update the target image with the
488# the known good image to reboot safely back into.
489#
490# This option holds a command that will execute before needing
491# to reboot to a good known image.
492# (default undefined)
493#SWITCH_TO_GOOD = ssh ${SSH_USER}/${MACHINE} cp good_image ${TARGET_IMAGE}
494
471# The min config that is needed to build for the machine 495# The min config that is needed to build for the machine
472# A nice way to create this is with the following: 496# A nice way to create this is with the following:
473# 497#
@@ -589,6 +613,12 @@
589# (default undefined) 613# (default undefined)
590#STORE_FAILURES = /home/test/failures 614#STORE_FAILURES = /home/test/failures
591 615
616# Directory to store success directories on success. If this is not
617# set, the .config, dmesg and bootlog will not be saved if a
618# test succeeds.
619# (default undefined)
620#STORE_SUCCESSES = /home/test/successes
621
592# Build without doing a make mrproper, or removing .config 622# Build without doing a make mrproper, or removing .config
593# (default 0) 623# (default 0)
594#BUILD_NOCLEAN = 0 624#BUILD_NOCLEAN = 0
@@ -700,6 +730,25 @@
700# (default 1) 730# (default 1)
701#DETECT_TRIPLE_FAULT = 0 731#DETECT_TRIPLE_FAULT = 0
702 732
733# All options in the config file should be either used by ktest
734# or could be used within a value of another option. If an option
735# in the config file is not used, ktest will warn about it and ask
736# if you want to continue.
737#
738# If you don't care if there are non-used options, enable this
739# option. Be careful though, a non-used option is usually a sign
740# of an option name being typed incorrectly.
741# (default 0)
742#IGNORE_UNUSED = 1
743
744# When testing a kernel that happens to have WARNINGs, and call
745# traces, ktest.pl will detect these and fail a boot or test run
746# due to warnings. By setting this option, ktest will ignore
747# call traces, and will not fail a test if the kernel produces
748# an oops. Use this option with care.
749# (default 0)
750#IGNORE_ERRORS = 1
751
703#### Per test run options #### 752#### Per test run options ####
704# The following options are only allowed in TEST_START sections. 753# The following options are only allowed in TEST_START sections.
705# They are ignored in the DEFAULTS sections. 754# They are ignored in the DEFAULTS sections.
@@ -862,6 +911,42 @@
862# BISECT_BAD with BISECT_CHECK = good or 911# BISECT_BAD with BISECT_CHECK = good or
863# BISECT_CHECK = bad, respectively. 912# BISECT_CHECK = bad, respectively.
864# 913#
914# BISECT_RET_GOOD = 0 (optional, default undefined)
915#
916# In case the specificed test returns something other than just
917# 0 for good, and non-zero for bad, you can override 0 being
918# good by defining BISECT_RET_GOOD.
919#
920# BISECT_RET_BAD = 1 (optional, default undefined)
921#
922# In case the specificed test returns something other than just
923# 0 for good, and non-zero for bad, you can override non-zero being
924# bad by defining BISECT_RET_BAD.
925#
926# BISECT_RET_ABORT = 255 (optional, default undefined)
927#
928# If you need to abort the bisect if the test discovers something
929# that was wrong, you can define BISECT_RET_ABORT to be the error
930# code returned by the test in order to abort the bisect.
931#
932# BISECT_RET_SKIP = 2 (optional, default undefined)
933#
934# If the test detects that the current commit is neither good
935# nor bad, but something else happened (another bug detected)
936# you can specify BISECT_RET_SKIP to an error code that the
937# test returns when it should skip the current commit.
938#
939# BISECT_RET_DEFAULT = good (optional, default undefined)
940#
941# You can override the default of what to do when the above
942# options are not hit. This may be one of, "good", "bad",
943# "abort" or "skip" (without the quotes).
944#
945# Note, if you do not define any of the previous BISECT_RET_*
946# and define BISECT_RET_DEFAULT, all bisects results will do
947# what the BISECT_RET_DEFAULT has.
948#
949#
865# Example: 950# Example:
866# TEST_START 951# TEST_START
867# TEST_TYPE = bisect 952# TEST_TYPE = bisect
@@ -950,7 +1035,7 @@
950# TEST_START 1035# TEST_START
951# TEST_TYPE = config_bisect 1036# TEST_TYPE = config_bisect
952# CONFIG_BISECT_TYPE = build 1037# CONFIG_BISECT_TYPE = build
953# CONFIG_BISECT = /home/test/¢onfig-bad 1038# CONFIG_BISECT = /home/test/config-bad
954# MIN_CONFIG = /home/test/config-min 1039# MIN_CONFIG = /home/test/config-min
955# BISECT_MANUAL = 1 1040# BISECT_MANUAL = 1
956# 1041#
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
new file mode 100644
index 000000000000..4ec84018cc13
--- /dev/null
+++ b/tools/testing/selftests/Makefile
@@ -0,0 +1,11 @@
1TARGETS = breakpoints
2
3all:
4 for TARGET in $(TARGETS); do \
5 make -C $$TARGET; \
6 done;
7
8clean:
9 for TARGET in $(TARGETS); do \
10 make -C $$TARGET clean; \
11 done;
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
new file mode 100644
index 000000000000..f362722cdce7
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -0,0 +1,20 @@
1# Taken from perf makefile
2uname_M := $(shell uname -m 2>/dev/null || echo not)
3ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
4ifeq ($(ARCH),i386)
5 ARCH := x86
6endif
7ifeq ($(ARCH),x86_64)
8 ARCH := x86
9endif
10
11
12all:
13ifeq ($(ARCH),x86)
14 gcc breakpoint_test.c -o run_test
15else
16 echo "Not an x86 target, can't build breakpoints selftests"
17endif
18
19clean:
20 rm -fr run_test
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
new file mode 100644
index 000000000000..a0743f3b2b57
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@@ -0,0 +1,394 @@
1/*
2 * Copyright (C) 2011 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
3 *
4 * Licensed under the terms of the GNU GPL License version 2
5 *
6 * Selftests for breakpoints (and more generally the do_debug() path) in x86.
7 */
8
9
10#include <sys/ptrace.h>
11#include <unistd.h>
12#include <stddef.h>
13#include <sys/user.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <signal.h>
17#include <sys/types.h>
18#include <sys/wait.h>
19
20
21/* Breakpoint access modes */
22enum {
23 BP_X = 1,
24 BP_RW = 2,
25 BP_W = 4,
26};
27
28static pid_t child_pid;
29
30/*
31 * Ensures the child and parent are always "talking" about
32 * the same test sequence. (ie: that we haven't forgotten
33 * to call check_trapped() somewhere).
34 */
35static int nr_tests;
36
37static void set_breakpoint_addr(void *addr, int n)
38{
39 int ret;
40
41 ret = ptrace(PTRACE_POKEUSER, child_pid,
42 offsetof(struct user, u_debugreg[n]), addr);
43 if (ret) {
44 perror("Can't set breakpoint addr\n");
45 exit(-1);
46 }
47}
48
49static void toggle_breakpoint(int n, int type, int len,
50 int local, int global, int set)
51{
52 int ret;
53
54 int xtype, xlen;
55 unsigned long vdr7, dr7;
56
57 switch (type) {
58 case BP_X:
59 xtype = 0;
60 break;
61 case BP_W:
62 xtype = 1;
63 break;
64 case BP_RW:
65 xtype = 3;
66 break;
67 }
68
69 switch (len) {
70 case 1:
71 xlen = 0;
72 break;
73 case 2:
74 xlen = 4;
75 break;
76 case 4:
77 xlen = 0xc;
78 break;
79 case 8:
80 xlen = 8;
81 break;
82 }
83
84 dr7 = ptrace(PTRACE_PEEKUSER, child_pid,
85 offsetof(struct user, u_debugreg[7]), 0);
86
87 vdr7 = (xlen | xtype) << 16;
88 vdr7 <<= 4 * n;
89
90 if (local) {
91 vdr7 |= 1 << (2 * n);
92 vdr7 |= 1 << 8;
93 }
94 if (global) {
95 vdr7 |= 2 << (2 * n);
96 vdr7 |= 1 << 9;
97 }
98
99 if (set)
100 dr7 |= vdr7;
101 else
102 dr7 &= ~vdr7;
103
104 ret = ptrace(PTRACE_POKEUSER, child_pid,
105 offsetof(struct user, u_debugreg[7]), dr7);
106 if (ret) {
107 perror("Can't set dr7");
108 exit(-1);
109 }
110}
111
112/* Dummy variables to test read/write accesses */
113static unsigned long long dummy_var[4];
114
115/* Dummy functions to test execution accesses */
116static void dummy_func(void) { }
117static void dummy_func1(void) { }
118static void dummy_func2(void) { }
119static void dummy_func3(void) { }
120
121static void (*dummy_funcs[])(void) = {
122 dummy_func,
123 dummy_func1,
124 dummy_func2,
125 dummy_func3,
126};
127
128static int trapped;
129
130static void check_trapped(void)
131{
132 /*
133 * If we haven't trapped, wake up the parent
134 * so that it notices the failure.
135 */
136 if (!trapped)
137 kill(getpid(), SIGUSR1);
138 trapped = 0;
139
140 nr_tests++;
141}
142
143static void write_var(int len)
144{
145 char *pcval; short *psval; int *pival; long long *plval;
146 int i;
147
148 for (i = 0; i < 4; i++) {
149 switch (len) {
150 case 1:
151 pcval = (char *)&dummy_var[i];
152 *pcval = 0xff;
153 break;
154 case 2:
155 psval = (short *)&dummy_var[i];
156 *psval = 0xffff;
157 break;
158 case 4:
159 pival = (int *)&dummy_var[i];
160 *pival = 0xffffffff;
161 break;
162 case 8:
163 plval = (long long *)&dummy_var[i];
164 *plval = 0xffffffffffffffffLL;
165 break;
166 }
167 check_trapped();
168 }
169}
170
171static void read_var(int len)
172{
173 char cval; short sval; int ival; long long lval;
174 int i;
175
176 for (i = 0; i < 4; i++) {
177 switch (len) {
178 case 1:
179 cval = *(char *)&dummy_var[i];
180 break;
181 case 2:
182 sval = *(short *)&dummy_var[i];
183 break;
184 case 4:
185 ival = *(int *)&dummy_var[i];
186 break;
187 case 8:
188 lval = *(long long *)&dummy_var[i];
189 break;
190 }
191 check_trapped();
192 }
193}
194
195/*
196 * Do the r/w/x accesses to trigger the breakpoints. And run
197 * the usual traps.
198 */
199static void trigger_tests(void)
200{
201 int len, local, global, i;
202 char val;
203 int ret;
204
205 ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
206 if (ret) {
207 perror("Can't be traced?\n");
208 return;
209 }
210
211 /* Wake up father so that it sets up the first test */
212 kill(getpid(), SIGUSR1);
213
214 /* Test instruction breakpoints */
215 for (local = 0; local < 2; local++) {
216 for (global = 0; global < 2; global++) {
217 if (!local && !global)
218 continue;
219
220 for (i = 0; i < 4; i++) {
221 dummy_funcs[i]();
222 check_trapped();
223 }
224 }
225 }
226
227 /* Test write watchpoints */
228 for (len = 1; len <= sizeof(long); len <<= 1) {
229 for (local = 0; local < 2; local++) {
230 for (global = 0; global < 2; global++) {
231 if (!local && !global)
232 continue;
233 write_var(len);
234 }
235 }
236 }
237
238 /* Test read/write watchpoints (on read accesses) */
239 for (len = 1; len <= sizeof(long); len <<= 1) {
240 for (local = 0; local < 2; local++) {
241 for (global = 0; global < 2; global++) {
242 if (!local && !global)
243 continue;
244 read_var(len);
245 }
246 }
247 }
248
249 /* Icebp trap */
250 asm(".byte 0xf1\n");
251 check_trapped();
252
253 /* Int 3 trap */
254 asm("int $3\n");
255 check_trapped();
256
257 kill(getpid(), SIGUSR1);
258}
259
260static void check_success(const char *msg)
261{
262 const char *msg2;
263 int child_nr_tests;
264 int status;
265
266 /* Wait for the child to SIGTRAP */
267 wait(&status);
268
269 msg2 = "Failed";
270
271 if (WSTOPSIG(status) == SIGTRAP) {
272 child_nr_tests = ptrace(PTRACE_PEEKDATA, child_pid,
273 &nr_tests, 0);
274 if (child_nr_tests == nr_tests)
275 msg2 = "Ok";
276 if (ptrace(PTRACE_POKEDATA, child_pid, &trapped, 1)) {
277 perror("Can't poke\n");
278 exit(-1);
279 }
280 }
281
282 nr_tests++;
283
284 printf("%s [%s]\n", msg, msg2);
285}
286
287static void launch_instruction_breakpoints(char *buf, int local, int global)
288{
289 int i;
290
291 for (i = 0; i < 4; i++) {
292 set_breakpoint_addr(dummy_funcs[i], i);
293 toggle_breakpoint(i, BP_X, 1, local, global, 1);
294 ptrace(PTRACE_CONT, child_pid, NULL, 0);
295 sprintf(buf, "Test breakpoint %d with local: %d global: %d",
296 i, local, global);
297 check_success(buf);
298 toggle_breakpoint(i, BP_X, 1, local, global, 0);
299 }
300}
301
302static void launch_watchpoints(char *buf, int mode, int len,
303 int local, int global)
304{
305 const char *mode_str;
306 int i;
307
308 if (mode == BP_W)
309 mode_str = "write";
310 else
311 mode_str = "read";
312
313 for (i = 0; i < 4; i++) {
314 set_breakpoint_addr(&dummy_var[i], i);
315 toggle_breakpoint(i, mode, len, local, global, 1);
316 ptrace(PTRACE_CONT, child_pid, NULL, 0);
317 sprintf(buf, "Test %s watchpoint %d with len: %d local: "
318 "%d global: %d", mode_str, i, len, local, global);
319 check_success(buf);
320 toggle_breakpoint(i, mode, len, local, global, 0);
321 }
322}
323
324/* Set the breakpoints and check the child successfully trigger them */
325static void launch_tests(void)
326{
327 char buf[1024];
328 int len, local, global, i;
329
330 /* Instruction breakpoints */
331 for (local = 0; local < 2; local++) {
332 for (global = 0; global < 2; global++) {
333 if (!local && !global)
334 continue;
335 launch_instruction_breakpoints(buf, local, global);
336 }
337 }
338
339 /* Write watchpoint */
340 for (len = 1; len <= sizeof(long); len <<= 1) {
341 for (local = 0; local < 2; local++) {
342 for (global = 0; global < 2; global++) {
343 if (!local && !global)
344 continue;
345 launch_watchpoints(buf, BP_W, len,
346 local, global);
347 }
348 }
349 }
350
351 /* Read-Write watchpoint */
352 for (len = 1; len <= sizeof(long); len <<= 1) {
353 for (local = 0; local < 2; local++) {
354 for (global = 0; global < 2; global++) {
355 if (!local && !global)
356 continue;
357 launch_watchpoints(buf, BP_RW, len,
358 local, global);
359 }
360 }
361 }
362
363 /* Icebp traps */
364 ptrace(PTRACE_CONT, child_pid, NULL, 0);
365 check_success("Test icebp");
366
367 /* Int 3 traps */
368 ptrace(PTRACE_CONT, child_pid, NULL, 0);
369 check_success("Test int 3 trap");
370
371 ptrace(PTRACE_CONT, child_pid, NULL, 0);
372}
373
374int main(int argc, char **argv)
375{
376 pid_t pid;
377 int ret;
378
379 pid = fork();
380 if (!pid) {
381 trigger_tests();
382 return 0;
383 }
384
385 child_pid = pid;
386
387 wait(NULL);
388
389 launch_tests();
390
391 wait(NULL);
392
393 return 0;
394}
diff --git a/tools/testing/selftests/run_tests b/tools/testing/selftests/run_tests
new file mode 100644
index 000000000000..320718a4e6bf
--- /dev/null
+++ b/tools/testing/selftests/run_tests
@@ -0,0 +1,8 @@
1#!/bin/bash
2
3TARGETS=breakpoints
4
5for TARGET in $TARGETS
6do
7 $TARGET/run_test
8done
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 669bcdd45805..b4fbc91c41b4 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -186,21 +186,12 @@ struct virtqueue {
186#endif 186#endif
187 187
188/* Interfaces exported by virtio_ring. */ 188/* Interfaces exported by virtio_ring. */
189int virtqueue_add_buf_gfp(struct virtqueue *vq, 189int virtqueue_add_buf(struct virtqueue *vq,
190 struct scatterlist sg[], 190 struct scatterlist sg[],
191 unsigned int out_num, 191 unsigned int out_num,
192 unsigned int in_num, 192 unsigned int in_num,
193 void *data, 193 void *data,
194 gfp_t gfp); 194 gfp_t gfp);
195
196static inline int virtqueue_add_buf(struct virtqueue *vq,
197 struct scatterlist sg[],
198 unsigned int out_num,
199 unsigned int in_num,
200 void *data)
201{
202 return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
203}
204 195
205void virtqueue_kick(struct virtqueue *vq); 196void virtqueue_kick(struct virtqueue *vq);
206 197
@@ -214,6 +205,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq);
214struct virtqueue *vring_new_virtqueue(unsigned int num, 205struct virtqueue *vring_new_virtqueue(unsigned int num,
215 unsigned int vring_align, 206 unsigned int vring_align,
216 struct virtio_device *vdev, 207 struct virtio_device *vdev,
208 bool weak_barriers,
217 void *pages, 209 void *pages,
218 void (*notify)(struct virtqueue *vq), 210 void (*notify)(struct virtqueue *vq),
219 void (*callback)(struct virtqueue *vq), 211 void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 74d3331bdaf9..6bf95f995364 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -92,7 +92,8 @@ static void vq_info_add(struct vdev_info *dev, int num)
92 assert(r >= 0); 92 assert(r >= 0);
93 memset(info->ring, 0, vring_size(num, 4096)); 93 memset(info->ring, 0, vring_size(num, 4096));
94 vring_init(&info->vring, num, info->ring, 4096); 94 vring_init(&info->vring, num, info->ring, 4096);
95 info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring, 95 info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev,
96 true, info->ring,
96 vq_notify, vq_callback, "test"); 97 vq_notify, vq_callback, "test");
97 assert(info->vq); 98 assert(info->vq);
98 info->vq->priv = info; 99 info->vq->priv = info;
@@ -160,7 +161,8 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs)
160 if (started < bufs) { 161 if (started < bufs) {
161 sg_init_one(&sl, dev->buf, dev->buf_size); 162 sg_init_one(&sl, dev->buf, dev->buf_size);
162 r = virtqueue_add_buf(vq->vq, &sl, 1, 0, 163 r = virtqueue_add_buf(vq->vq, &sl, 1, 0,
163 dev->buf + started); 164 dev->buf + started,
165 GFP_ATOMIC);
164 if (likely(r >= 0)) { 166 if (likely(r >= 0)) {
165 ++started; 167 ++started;
166 virtqueue_kick(vq->vq); 168 virtqueue_kick(vq->vq);